scikit-learn svm初探 分类:机器学习 Sklearn
来源:互联网 发布:极小化极大算法 编辑:程序博客网 时间:2024/05/17 03:33
下面的代码仅仅作为伪代码来看,因为拟合效果不好,思考是优化求解的问题,
仅有w_bar_2_1的解是收敛的,但是拟合效果很差。
svm实现要进一步思考。
- import numpy as np
- from sklearn import svm
- from scipy.optimize import minimize
- from numpy.linalg import norm
- from sklearn import datasets
- from functools import partial
- import copy
- iris = datasets.load_iris()
- print iris.data.shape, iris.target.shape
- #w_bar is [b ,w]
- def func(w_bar):
- return norm(w_bar[1:]) ** 2 / 2
- def strain(data, target, w_bar):
- return target * np.dot(data, w_bar) - 1
- #print strain(np.array([[0, 1], [2, 3]]), np.array([0, 1]), np.array([0, 1]))
- #print np.unique(iris.target, return_counts = True)
- data = np.append(iris.data, iris.target.reshape(iris.target.shape[0], 1), axis = 1)
- data_0 = np.ndarray(shape = (0, 5))
- data_1 = np.ndarray(shape = (0, 5))
- data_2 = np.ndarray(shape = (0, 5))
- for i in range(data.shape[0]):
- data_for_append = data[i,:].reshape(1, data[i,:].shape[0])
- if data[i,-1] == 0:
- data_0 = np.append(data_0, data_for_append, axis = 0)
- elif data[i,-1] == 1:
- data_1 = np.append(data_1, data_for_append, axis = 0)
- else:
- data_2 = np.append(data_2, data_for_append, axis = 0)
- def generate_one_svm(X_y):
- #print X_y
- X = np.ones([X_y.shape[0], X_y.shape[1]])
- X[:,1:] = X_y[:,:-1]
- y = X_y[:,-1].reshape(X_y.shape[0])
- strain_0 = partial(strain, X, y)
- cons = ({
- ’type’: ‘ineq’,
- ’fun’: strain_0
- },)
- res = minimize(func, np.ones([X.shape[1]]), constraints = cons ,\
- method = ’SLSQP’, options = {‘disp’: True})
- print “w_bar :”
- print res.x
- return res.x
- data_True = copy.deepcopy(data_1)
- data_False = copy.deepcopy(data_0)
- data_mix = np.append(data_True, data_False, axis = 0)
- w_bar_1_0 = generate_one_svm(data_mix)
- data_True = copy.deepcopy(data_2)
- data_False = copy.deepcopy(data_1)
- data_mix = np.append(data_True, data_False, axis = 0)
- w_bar_2_1 = generate_one_svm(data_mix)
- data_True = copy.deepcopy(data_2)
- data_False = copy.deepcopy(data_0)
- data_mix = np.append(data_True, data_False, axis = 0)
- w_bar_2_0 = generate_one_svm(data_mix)
- data_predict = np.ndarray(shape = data.shape)
- data_predict[:,:-1] = data[:,:-1]
- for i in range(data_predict.shape[0]):
- data_row = np.ones([data_predict.shape[1]])
- data_row[1:] = data_predict[i,:-1]
- if np.dot(data_row, w_bar_1_0) > 0:
- print “np.dot(data_row, w_bar_1_0) > 0”
- if np.dot(data_row, w_bar_2_1)> 0:
- print “np.dot(data_row, w_bar_2_1) > 0”
- data_predict[i,-1] = 2
- else:
- print “else”
- data_predict[i,-1] = 1
- else:
- print “else”
- if np.dot(data_row, w_bar_2_0)> 0:
- print “np.dot(data_row, w_bar_2_0) > 0”
- data_predict[i,-1] = 2
- else:
- print “else”
- data_predict[i,-1] = 0
- print data_predict
- print np.unique(data_pr
import numpy as npfrom sklearn import svmfrom scipy.optimize import minimizefrom numpy.linalg import normfrom sklearn import datasetsfrom functools import partialimport copyiris = datasets.load_iris()print iris.data.shape, iris.target.shape
下面对对偶问题求解,可是算法不收敛,而且由于维数过大,导致求解时间很长:
- from scipy.optimize import minimize
- import numpy as np
- from sklearn import datasets
- from functools import partial
- from numpy.linalg import norm
- from math import exp
- iris = datasets.load_iris()
- data = np.append(iris.data, iris.target.reshape(iris.target.shape[0], 1),axis = 1)
- data_True = np.ndarray(shape = (0, data.shape[1]))
- data_False = np.ndarray(shape = (0, data.shape[1]))
- for i in range(data.shape[0]):
- data_for_append = data[i,:].reshape(1, data.shape[1])
- if data[i][-1] == 1:
- data_for_append[0, -1] = 1
- data_True = np.append(data_True, data_for_append, axis = 0)
- elif data[i][-1] == 0:
- data_for_append[0, -1] = -1
- data_False = np.append(data_False, data_for_append, axis = 0)
- data = np.append(data_True, data_False, axis = 0)
- def kernel_matrix(X):
- require = np.ndarray(shape = (X.shape[0], X.shape[0]))
- for i in range(require.shape[0]):
- for j in range(require.shape[1]):
- #require[i][j] = np.dot(X[i,:], X[j,:])
- require[i][j] = exp(-1 * norm(X[i,:] - X[j,:]) ** 2 / 2)
- return require
- def func(X, y, alpha):
- return np.dot(np.dot(alpha.T, kernel_matrix(X)), alpha) - np.dot(np.ones([X.shape[0]]), alpha)
- def eq_strain(y, alpha):
- return np.dot(y, alpha)
- y = data[:,-1]
- X = data[:,1:]
- eq_strain = partial(eq_strain, y)
- #set C val
- C = 10
- cons_list = [{
- ’type’: ‘eq’,
- ’fun’: eq_strain
- },]
- for i in range(y.shape[0]):
- cons_list.append({
- ’type’: ‘ineq’,
- ’fun’: lambda alpha: alpha[i]
- })
- cons_list.append({
- ’type’: ‘ineq’,
- ’fun’: lambda alpha: C - alpha[i]
- })
- cons = tuple(cons_list)
- func = partial(func, X, y)
- init_alpha = np.ones([y.shape[0]])
- res = minimize(func, init_alpha, constraints = cons,\
- method = ’SLSQP’, options = {‘disp’: True})
- print res.x
from scipy.optimize import minimizeimport numpy as npfrom sklearn import datasetsfrom functools import partialfrom numpy.linalg import normfrom math import expiris = datasets.load_iris()data = np.append(iris.data, iris.target.reshape(iris.target.shape[0], 1),axis = 1)data_True = np.ndarray(shape = (0, data.shape[1]))data_False = np.ndarray(shape = (0, data.shape[1]))for i in range(data.shape[0]): data_for_append = data[i,:].reshape(1, data.shape[1]) if data[i][-1] == 1: data_for_append[0, -1] = 1 data_True = np.append(data_True, data_for_append, axis = 0) elif data[i][-1] == 0: data_for_append[0, -1] = -1 data_False = np.append(data_False, data_for_append, axis = 0)data = np.append(data_True, data_False, axis = 0)def kernel_matrix(X): require = np.ndarray(shape = (X.shape[0], X.shape[0])) for i in range(require.shape[0]): for j in range(require.shape[1]): #require[i][j] = np.dot(X[i,:], X[j,:]) require[i][j] = exp(-1 * norm(X[i,:] - X[j,:]) ** 2 / 2) return requiredef func(X, y, alpha): return np.dot(np.dot(alpha.T, kernel_matrix(X)), alpha) - np.dot(np.ones([X.shape[0]]), alpha)def eq_strain(y, alpha): return np.dot(y, alpha)y = data[:,-1]X = data[:,1:]eq_strain = partial(eq_strain, y)
这里对参数的约束写法有问题,可能导致迭代速度慢,kernel Matrix写的也不对(漏掉了符号),可能导致得不到收敛值。后面对其进行修改。
下面对二分类问题的对偶形式实现了SVC算法,并进行了检验,得到了正确的结果。
- from scipy.optimize import minimize
- import numpy as np
- from sklearn import datasets
- from functools import partial
- from numpy.linalg import norm
- from math import exp
- iris = datasets.load_iris()
- data = np.append(iris.data, iris.target.reshape(iris.target.shape[0], 1),axis = 1)
- data_True = np.ndarray(shape = (0, data.shape[1]))
- data_False = np.ndarray(shape = (0, data.shape[1]))
- for i in range(data.shape[0]):
- data_for_append = data[i,:].reshape(1, data.shape[1])
- if data[i][-1] == 1:
- data_for_append[0, -1] = 1
- data_True = np.append(data_True, data_for_append, axis = 0)
- elif data[i][-1] == 0:
- data_for_append[0, -1] = -1
- data_False = np.append(data_False, data_for_append, axis = 0)
- data = np.append(data_True, data_False, axis = 0)
- def kernel_matrix(X, y):
- require = np.ndarray(shape = (X.shape[0], X.shape[0]))
- for i in range(require.shape[0]):
- for j in range(require.shape[1]):
- require[i][j] = np.dot(X[i,:], X[j,:]) * y[i] * y[j]
- return require
- def func(X, y, alpha):
- return np.dot(np.dot(alpha.T, kernel_matrix(X, y)), alpha) - np.dot(np.ones([X.shape[0]]), alpha)
- def eq_strain(y, alpha):
- return np.dot(y, alpha)
- y = data[:,-1]
- X = data[:,1:]
- eq_strain = partial(eq_strain, y)
- #set C val
- C = 10
- cons = ({
- ’type’: ‘eq’,
- ’fun’: eq_strain
- },)
- bnds_list = []
- for i in range(y.shape[0]):
- bnds_list.append((0, C))
- bnds = tuple(bnds_list)
- func = partial(func, X, y)
- init_alpha = np.ones([y.shape[0]])
- res = minimize(func, init_alpha, constraints = cons, bounds = bnds, \
- method = ’SLSQP’, options = {‘disp’: True})
- alpha = res.x
- #we have a success converge
- #return vector y_hat
- def decision_func(X, y, alpha):
- y_hat_list = []
- j = 0
- for i in range(y.shape[0]):
- if alpha[j] > 0 and alpha[j]< C:
- j = i
- break
- role = 0
- for i in range(y.shape[0]):
- role += y[i] * alpha[i] * np.dot(X[i,:], X[j,:])
- role = y[j] - role
- for i in range(y.shape[0]):
- SUM = 0
- for j in range(y.shape[0]):
- SUM += y[j] * alpha[j] * np.dot(X[j,:],X[i,:])
- SUM += role
- y_hat_list.append((SUM + role > 0) and 1 or -1)
- return np.array(y_hat_list)
- y_hat = decision_func(X, y, alpha)
- #Test Func
- print np.unique(y * y_hat, return_counts = True)
from scipy.optimize import minimizeimport numpy as npfrom sklearn import datasetsfrom functools import partialfrom numpy.linalg import normfrom math import expiris = datasets.load_iris()data = np.append(iris.data, iris.target.reshape(iris.target.shape[0], 1),axis = 1)data_True = np.ndarray(shape = (0, data.shape[1]))data_False = np.ndarray(shape = (0, data.shape[1]))for i in range(data.shape[0]): data_for_append = data[i,:].reshape(1, data.shape[1]) if data[i][-1] == 1: data_for_append[0, -1] = 1 data_True = np.append(data_True, data_for_append, axis = 0) elif data[i][-1] == 0: data_for_append[0, -1] = -1 data_False = np.append(data_False, data_for_append, axis = 0)data = np.append(data_True, data_False, axis = 0)def kernel_matrix(X, y): require = np.ndarray(shape = (X.shape[0], X.shape[0])) for i in range(require.shape[0]): for j in range(require.shape[1]): require[i][j] = np.dot(X[i,:], X[j,:]) * y[i] * y[j] return requiredef func(X, y, alpha): return np.dot(np.dot(alpha.T, kernel_matrix(X, y)), alpha) - np.dot(np.ones([X.shape[0]]), alpha)def eq_strain(y, alpha): return np.dot(y, alpha)y = data[:,-1]X = data[:,1:]eq_strain = partial(eq_strain, y)
更多了解请浏览:http://blog.csdn.net/sinat_30665603
阅读全文
0 0
- scikit-learn svm初探 分类:机器学习 Sklearn
- scikit-learn 回归基础 分类:机器学习Sklearn
- scikit learn(sklearn)机器学习算法选择
- scikit-learn svm初探
- scikit-learn 交叉验证绘图及原理实践 分类:机器学习Sklearn
- Python/scikit-learn机器学习库(SVM支持向量机)
- 机器学习 SVM sklearn
- scikit-learn 机器学习
- Scikit-learn实战之SVM分类
- scikit-learn 即sklearn的基础学习
- 基于scikit-learn(sklearn)做分类--2.分类
- Windows下安装Python机器学习相关的sklearn(scikit-learn)模块
- 机器学习scikit-learn入门教程
- 机器学习 scikit-learn 图谱
- 机器学习 scikit-learn安装
- scikit-learn 机器学习介绍
- 机器学习scikit-learn入门教程
- ]scikit-learn进行机器学习
- 2017驾校 科二考试流程
- hdoj1013(简单题)
- 神经网络算法
- 集合框架
- 《OpenGL超级宝典(第五版)》 在 VS2010 / VS2015中 的配置方法 +
- scikit-learn svm初探 分类:机器学习 Sklearn
- 【编程题】字符串编码
- JAVA数据类型
- java的类型
- 兔子跳跃之谜下(BZOJ2454 RabbitPuzzle(BZOJ中不是多组数据))
- 斯坦福:机器学习CS229:Exercise 1: Linear Regression线性回归(答案1)
- C++后台编程一、socket通信
- 单例模式
- BootStrap学习手册