机器学习基石 作业4 带Regularizer和Cross Validation的线性回归分类器
来源:互联网 发布:《java并发编程实战》 编辑:程序博客网 时间:2024/04/28 14:48
#!/usr/bin/env python# -*- coding: utf-8 -*-"""__title__ = 'main.py'__author__ = 'w1d2s'__mtime__ = '2015/10/30'"""from numpy import *from RidgeReg import *from Validation import *import sysimport stringdef Data_Pretreatment(path): rawData = open(path).readlines() #print rawData dataNum = len(rawData) dataDim = len(rawData[0].strip().split(' ')) - 1 dataIdx = 0 X = zeros([dataNum, dataDim]) Y = zeros(dataNum) print(dataNum, dataDim) for line in rawData: tempList = line.strip().split(' ') Y[dataIdx] = string.atoi(tempList[dataDim]) X[dataIdx, :] = tempList[0: dataDim] dataIdx += 1 return (X, Y)if __name__ == '__main__': Xtrain, Ytrain = Data_Pretreatment('train.dat') Xtest, Ytest = Data_Pretreatment('test.dat') #(Wt, p) = Validate(Xtrain, Ytrain, 120, False) (Wt, p) = Cross_Validate(Xtrain, Ytrain, 5) rate = 10 ** p W = Ridge_Regression(Xtrain, Ytrain, rate) Ein = Err_Counter(Xtrain, Ytrain, W) Eout = Err_Counter(Xtest, Ytest, W) print '** Ein : ' + str(float(Ein)/200) print '** Eout : ' + str(float(Eout)/1000)
#!/usr/bin/env python# -*- coding: utf-8 -*-"""__title__ = 'RidgeReg.py'__author__ = 'w1d2s'__mtime__ = '2015/10/30'"""from numpy import *from scipy import linalgimport randomdef Err_Counter(X, Y, W): (dataSize, dataDim) = X.shape Z = ones([dataSize, dataDim + 1]) Z[:, 1: dataDim + 1] = X ErrCnt = 0 for i in range(0, dataSize): if Y[i] * dot(Z[i, :], W) <= 0: ErrCnt = ErrCnt + 1 return ErrCntdef Ridge_Regression(X, Y, rate): (dataSize, dataDim) = X.shape Z = ones([dataSize, dataDim + 1]) Z[:, 1: dataDim + 1] = X Zt = transpose(Z) ZtZ = dot(Zt, Z) I = identity(len(ZtZ)) P = linalg.inv(ZtZ + rate * I) W = dot(dot(P, Zt), Y) return W
#!/usr/bin/env python# -*- coding: utf-8 -*-"""__title__ = 'Validation.py'__author__ = 'w1d2s'__mtime__ = '2015/10/30'"""from numpy import *from RidgeReg import *def Data_Spliter(X, Y, Num4Train): Xtrain = X[0: Num4Train, :] Ytrain = Y[0: Num4Train] Xval = X[Num4Train: , :] Yval = Y[Num4Train: ] return [Xtrain, Ytrain, Xval, Yval]def Validate(X, Y, Num4Train, IsEt): [Xt, Yt, Xv, Yv] = Data_Spliter(X, Y, Num4Train) minEt = 120 minEv = 80 Wt = zeros([1, Xt.ndim + 1]) p = 0 for pow in range(-10, 3): rate = 10 ** pow W = Ridge_Regression(Xt, Yt, rate) Et = Err_Counter(Xt, Yt, W) Ev = Err_Counter(Xv, Yv, W) if IsEt == True: if Et <= minEt: [Wt, minEt, p] = [W, Et, pow] print '== Et : ' + str(float(Et)/120) print '== log lambda : ' + str(pow) else: if Ev <= minEv: [Wt, minEv, p] = [W, Ev, pow] print '== Ev : ' + str(float(Ev)/80) print '== log lambda : ' + str(pow) Et = Err_Counter(Xt, Yt, Wt) Ev = Err_Counter(Xv, Yv, Wt) print 'log lambda : ' + str(p) print 'Et : ' + str(float(Et)/120) print 'Ev: ' + str(float(Ev)/80) return (Wt, p)def Data_Spliter2(X, Y, folds): dataSize = len(Y) inc = dataSize / folds Xlist = [] Ylist = [] for idx in range(0, dataSize, inc): Xtemp = X[idx: idx + inc, :] Ytemp = Y[idx: idx + inc] Xlist.append(Xtemp) Ylist.append(Ytemp) return (Xlist, Ylist)def Cross_Validate(X, Y, folds): (Xlist, Ylist) = Data_Spliter2(X, Y, folds) (foldSize, foldDim) = Xlist[0].shape Xt = zeros([foldSize * 4, foldDim]) Yt = zeros([foldSize * 4, 1]) Wt = zeros([1, foldDim + 1]) p = 0 minEcv = 10000 for pow in range(-10, 3): rate = 10 ** pow EcvSum = 0 for V in range(0, folds): beg = 0 for idx in range(0, folds): if idx == V: Xv = Xlist[idx] Yv = Ylist[idx] else: Xt[beg: beg + foldSize, :] = Xlist[idx] Ylist[idx].shape = (Ylist[idx].shape[0], 1) Yt[beg: beg + foldSize] = Ylist[idx] beg = beg + foldSize W = Ridge_Regression(Xt, Yt, rate) Ecv = Err_Counter(Xv, Yv, W) EcvSum = EcvSum + Ecv if float(EcvSum)/folds <= minEcv: minEcv = float(EcvSum)/folds (Wt, p) = (W, pow) print 'log lambda: ' + str(p) print 'Ecv : ' + str(minEcv) return (Wt, p)
0 0
- 机器学习基石 作业4 带Regularizer和Cross Validation的线性回归分类器
- 机器学习--线性回归和分类
- 机器学习基石 作业3 Logistic回归/线性回归/特征转换
- 机器学习基石-Validation
- 林轩田机器学习基石及技法课程中线性分类器的总结
- 机器学习-sklearn库的Cross Validation
- 【机器学习】线性回归和逻辑回归的理解
- 机器学习:线性回归和逻辑回归的区别
- 机器学习-->sklearn.Cross-validation
- 【机器学习】交叉验证和K-折交叉验证cross-validation and k-fold cross-validation
- 初识机器学习和线性分类器
- 机器学习(五)使用sklearn库的cross validation
- [机器学习]线性回归和局部加权线性回归
- 机器学习基石第二次作业
- 《机器学习基石》作业一
- 机器学习笔记:对线性回归,logistic回归和一般回归的认识
- 斯坦福机器学习Coursera课程:第二周作业--一元和多元线性回归
- 机器学习--最小二乘法和加权线性回归
- PAT 1017. Queueing at Bank (25)
- JAVA反射机制作用是什么
- 插件管理Alcatraz和使用
- 专访张俊林:十年程序员的感悟与算法之路
- XYLX 10.21最长距离
- 机器学习基石 作业4 带Regularizer和Cross Validation的线性回归分类器
- 从 Windows 移植到 UNIX,第 1 部分: 移植 C/C++ 源代码
- ubuntu14.04利用devstack单点安装openstack(最简)
- 【Android问题及其解决】又见图片导致的OOM
- 分享一个python库的源
- 乳草的入侵
- 消息总线重构之简化客户端
- HR犀利的面试问题
- 常量指针 和 指针常量