LR 逻辑回归代码 (梯度下降)

来源:互联网 发布:数据分析图表 编辑:程序博客网 时间:2024/04/30 09:38

这几天一直在推导逻辑回归的公式,就自己写了个,发现动手写和公式还是有点区别的。

公式推导就不赘述了,反正没有解析解,就只能梯度下降呢,不过后面会优化成随机梯度和牛顿法来处理,毕竟批梯度下降对于大数据那就是不行的呢。

代码如下,这里稍微参考了下别人的代码,主要是关于权重更新,矩阵的运算,开始参考公式,老是有问题。

#coding=utf-8#!/usr/bin/pythonimport pprintfrom numpy import *import matplotlib.pyplot as pltdef load_data(path):    '''    :param path:传递路径,返回样例的数据和标签,格式采用矩阵,便于进行矩阵运算    :return:    '''    data_set=[]    label_set=[]    file_object=open(path)    for line in file_object.readlines():        lineArr = line.strip().split()        lineArr = [float(x) for x in lineArr ]        label_set.append(int(lineArr[-1])) #最后一列默认为标记        lineArr[-1]=1#这里添加1,是因为又一个常数变量w和b合并在一起.        data_set.append(lineArr)    #归一化操作,暂时不动,后续添加    #data_set[0]=(data_set[0]-min(data_set[0]))/(max(data_set[0])-min(data_set[0]))    return data_set,label_setclass myLRregression(object):    def __init__(self):        '''        :这里先初始化数据和权重        '''        self.data_set=[]        self.label_set=[]        self.weight=[]    def sigmoid(self, inX):        '''        :param inX:sigmod 函数        :return:        '''        return 1.0/(1+exp(-inX))    def load_data(self,path):        '''        :param path:传递路径,返回样例的数据和标签,格式采用矩阵,便于进行矩阵运算        :return:        '''        file_object=open(path)        for line in file_object.readlines():            lineArr = line.strip().split()            lineArr = [float(x) for x in lineArr ]            self.label_set.append(int(lineArr[-1])) #最后一列默认为标记            lineArr[-1]=1#这里添加1,是因为又一个常数变量w和b合并在一起.            self.data_set.append(lineArr)    def get_data_set(self):        pprint.pprint(self.data_set)    def train(self,train_fun):        '''        :return:返回训练好的权重        '''        max_iter=20000 #最大迭代次数        alpha=0.01   #设置变量为0.01        data_set=mat(self.data_set)   #转换为矩阵,进行计算        lable_set=mat(self.label_set).transpose()        weights = mat(ones((3,1))) #权重初始化为1        print weights        if(train_fun=="gradDescent"):            for i in range(max_iter):                loss=lable_set-self.sigmoid(data_set*weights) #这的矩阵运算稍微注意下                weights=weights+alpha*data_set.transpose()*loss        print weights        return weights,data_set,lable_set        #self.weight=list(weights)if __name__ == '__main__':    print("-------start load data-----")    path="./LR/testSet.txt"    LR=myLRregression()    LR.load_data(path)    weights,data_set,lable_set=LR.train(train_fun='gradDescent')    pprint.pprint( LR.weight)    pprint.pprint( LR.data_set)    pprint.pprint(LR.label_set)




测试数据如下:


-0.017612 14.0530640
-1.395634 4.6625411
-0.752157 6.5386200
-1.322371 7.1528530
0.423363 11.0546770
0.406704 7.0673351
0.667394 12.7414520
-2.460150 6.8668051
0.569411 9.5487550
-0.026632 10.4277430
0.850433 6.9203341
1.347183 13.1755000
1.176813 3.1670201
-1.781871 9.0979530
-0.566606 5.7490031
0.931635 1.5895051
-0.024205 6.1518231
-0.036453 2.6909881
-0.196949 0.4441651
1.014459 5.7543991
1.985298 3.2306191
-1.693453 -0.5575401
-0.576525 11.7789220
-0.346811 -1.6787301
-2.124484 2.6724711
1.217916 9.5970150
-0.733928 9.0986870
-3.642001 -1.6180871
0.315985 3.5239531
1.416614 9.6192320
-0.386323 3.9892861
0.556921 8.2949841
1.224863 11.5873600
-1.347803 -2.4060511
1.196604 4.9518511
0.275221 9.5436470
0.470575 9.3324880
-1.889567 9.5426620
-1.527893 12.1505790
-1.185247 11.3093180
-0.445678 3.2973031
1.042222 6.1051551
-0.618787 10.3209860
1.152083 0.5484671
0.828534 2.6760451
-1.237728 10.5490330
-0.683565 -2.1661251
0.229456 5.9219381
-0.959885 11.5553360
0.492911 10.9933240
0.184992 8.7214880
-0.355715 10.3259760
-0.397822 8.0583970
0.824839 13.7303430
1.507278 5.0278661
0.099671 6.8358391
-0.344008 10.7174850
1.785928 7.7186451
-0.918801 11.5602170
-0.364009 4.7473001
-0.841722 4.1190831
0.490426 1.9605391
-0.007194 9.0757920
0.356107 12.4478630
0.342578 12.2811620
-0.810823 -1.4660181
2.530777 6.4768011
1.296683 11.6075590
0.475487 12.0400350
-0.783277 11.0097250
0.074798 11.0236500
-1.337472 0.4683391
-0.102781 13.7636510
-0.147324 2.8748461
0.518389 9.8870350
1.015399 7.5718820
-1.658086 -0.0272551
1.319944 2.1712281
2.056216 5.0199811
-0.851633 4.3756911
-1.510047 6.0619920
-1.076637 -3.1818881
1.821096 10.2839900
3.010150 8.4017661
-1.099458 1.6882741
-0.834872 -1.7338691
-0.846637 3.8490751
1.400102 12.6287810
1.752842 5.4681661
0.078557 0.0597361
0.089392 -0.7153001
1.825662 12.6938080
0.197445 9.7446380
0.126117 0.9223111
-0.679797 1.2205301
0.677983 2.5566661
0.761349 10.6938620
-2.168791 0.1436321
1.388610 9.3419970
0.317029 14.7390250


1 0
原创粉丝点击