线性回归和批量梯度下降法python

来源：互联网发布：快乐宝拉知乎编辑：程序博客网时间：2024/06/16 20:11

通过学习斯坦福公开课的线性规划和梯度下降，参考他人代码自己做了测试，写了个类以后有时间再去扩展，代码注释以后再加，作业好多：

import numpy as npimport matplotlib.pyplot as pltimport randomclass dataMinning:    datasets = []    labelsets = []        addressD = ''  #Data folder    addressL = ''  #Label folder        npDatasets = np.zeros(1)    npLabelsets = np.zeros(1)        cost = []    numIterations = 0    alpha = 0    theta = np.ones(2)    #pCols = 0    #dRows = 0    def __init__(self,addressD,addressL,theta,numIterations,alpha,datasets=None):        if datasets is None:            self.datasets = []        else:            self.datasets = datasets        self.addressD = addressD        self.addressL = addressL        self.theta = theta        self.numIterations = numIterations        self.alpha = alpha            def readFrom(self):        fd = open(self.addressD,'r')        for line in fd:            tmp = line[:-1].split()            self.datasets.append([int(i) for i in tmp])        fd.close()        self.npDatasets = np.array(self.datasets)        fl = open(self.addressL,'r')        for line in fl:            tmp = line[:-1].split()            self.labelsets.append([int(i) for i in tmp])        fl.close()                tm = []        for item in self.labelsets:            tm = tm + item        self.npLabelsets = np.array(tm)    def genData(self,numPoints,bias,variance):        self.genx = np.zeros(shape = (numPoints,2))        self.geny = np.zeros(shape = numPoints)        for i in range(0,numPoints):            self.genx[i][0] = 1            self.genx[i][1] = i            self.geny[i] = (i + bias) + random.uniform(0,1) * variance    def gradientDescent(self):        xTrans = self.genx.transpose() #        i = 0        while i < self.numIterations:            hypothesis = np.dot(self.genx,self.theta)            loss = hypothesis - self.geny            #record the cost            self.cost.append(np.sum(loss ** 2))            #calculate the gradient            gradient = np.dot(xTrans,loss)            #updata, gradientDescent            self.theta = self.theta - self.alpha * gradient            i = i + 1                    def show(self):        print 'yes'        if __name__ == "__main__":    c = dataMinning('c:\\city.txt','c:\\st.txt',np.ones(2),100000,0.000005)    c.genData(100,25,10)    c.gradientDescent()    cx = range(len(c.cost))    plt.figure(1)    plt.plot(cx,c.cost)    plt.ylim(0,25000)    plt.figure(2)    plt.plot(c.genx[:,1],c.geny,'b.')    x = np.arange(0,100,0.1)    y = x * c.theta[1] + c.theta[0]    plt.plot(x,y)    plt.margins(0.2)    plt.show()

　　　　　　　　　　图1. 迭代过程中的误差cost

　　　　　　　　　　图2. 数据散点图和解直线

参考资料：

1.python编写类：http://blog.csdn.net/wklken/article/details/6313265

2.python中if __name__ == __main__的用法：http://www.cnblogs.com/herbert/archive/2011/09/27/2193482.html

3.matplotlab gallery:http://matplotlib.org/gallery.html

4.python批量梯度下降参考代码：http://www.91r.net/ask/17784587.html

0 0