Logistic回归

来源：互联网发布：一厢情愿的感情知乎编辑：程序博客网时间：2024/05/20 17:39

本文实现了Logistic回归算法。数据集为机器学习实战中的该章节的数据。Logistic回归应用概率的方法来实现对参数的求解，利用最大似然及梯度上升的方法来实现参数的迭代。
下面为具体代码，用到了批量梯度上升，每次更新权值都需要加载所有的数据集，因此适用于较小的数据集。

import numpy as npimport matplotlibimport matplotlib.pyplot as pltfrom matplotlib import animation as ani#matplotlib.verbose.set_level("helpful")#plt.rcParams['animation.convert_path'] = 'C:\Program Files\ImageMagick-7.0.5-Q16\magick.exe'plt.rcParams['animation.ffmpeg_path'] = r'F:\Program Files\ffmpeg-20170418-6108805-win64-static\bin\ffmpeg.exe'history=[]#读取数据到数组def loadDataSet():    #存储特征的dataMat,和标签label的列表    dataMat=[];label=[]    with open('testSet.txt') as f:        for line in f.readlines():            row=line.strip().split()            dataMat.append([1.0,float(row[0]),float(row[1])])            label.append((int)(row[2]))        return dataMat,labeldef sigmoid(x):    return 1.0/(1+np.exp(-x))def batchGrid(dataMat,label):    global history    dataMtrix=np.array(dataMat)#m*3    labelmtrix=np.array(label).astype('float')#1*100    m,n=np.shape(dataMtrix)#训练样本数m，特征数n=3    weight=np.ones((n,1))#3*1 初始化    ala=0.001    #开始训练，批量梯度上升，每次都需要用到所有的样本数据    for k in range(500):        real_value=sigmoid(np.dot(dataMtrix,weight))#m*3,3*1        labelmtrix=labelmtrix.reshape((m,1))#100*1        err=labelmtrix-real_value#100*1        weight+=ala*np.dot(dataMtrix.transpose(),err)#        history.append([float(weight[0]),float(weight[1]),float(weight[2])])    return weightfig=plt.figure()plt.xlim(-5,5)plt.ylim(-5,20)ax=fig.add_subplot(111)line,=ax.plot([],[],'g',lw=2)txt=ax.text([],[],'')#画图def plotbasefit(datarr,label):#data m*3    x1=[];y1=[]    x2=[];y2=[]    global fig,ax    data=np.array(datarr)    n=np.shape(data)[0]    print("label:")    #print(label)    print("n= %d"%n)    for i in range(n):        #print("type:")        #print(type(label[i]))        if int(label[i])==1:            #print("----")            x1.append(data[i,1])            y1.append(data[i,2])        else:            x2.append(data[i,1])            y2.append(data[i,2])    ax.scatter(x1,y1,s=30,c='red',marker='s')    ax.scatter(x2,y2,s=30,c='green')    plt.xlabel('x')    plt.ylabel('y')    plt.title('Logitic Regression')    #plt.show()    #return x1,x2,linedef animate(i):    global history,line    w1=history[i][1]    w2=history[i][2]    b=history[i][0]    x1=-4    y1=-(b+w1*x1)/w2    x2=4    y2=-(b+w1*x2)/w2    line.set_data([x1,x2],[y1,y2])    #txt.set_text(history[i])    #txt.set_position([x1,y1])#设置坐标和在该坐标上显示的点    return line,def show_gif(data,label):    animat=ani.FuncAnimation(fig,animate,init_func=plotbasefit(data,label),frames=len(history),interval=500,repeat=True,blit=True)                               #blit=True)        #writer = ani.ImageMagickFileWriter()    FFwriter = ani.FFMpegWriter(fps=30, codec="h264")    # codec="libx264"    animat.save('basic_animation1.mp4', writer = FFwriter )    #animat.save('basic_animation.mp4', fps=30, extra_args=['-vcodec', 'libx264'])    plt.show()if __name__ == '__main__':    matplotlib.matplotlib_fname()    data,label=loadDataSet()    weight=batchGrid(data,label)    #print(history)    #plotbasefit(data,label)    show_gif(data,label)    print(weight)

最后将参数迭代的动态过程以MP4文件展示出来。在保存mp4文件时有个小问题，不能保存。

如果要将动画保存为mp4格式的视频文件，则需要先安装FFmpeg。FFmpeg是一套可以用来记录、转换数字音频、视频，并能将其转化为流的开源计算机程序。采用LGPL或GPL许可证。它提供了录制、转换以及流化音视频的完整解决方案。
　　在这里下载windows的版本：Download FFmpeg for Windows，解压，然后将bin目录加入系统环境变量的路径中。如：C:\Program Files\ffmpeg-3.2.2-win64-static\bin。然后测试是否配置OK: 输入ffmpeg -version。
　　在代码开头添加ffmpeg的路径：plt.rcParams[‘animation.ffmpeg_path’] = r’F:\Program Files\ffmpeg-20170418-6108805-win64-static\bin\ffmpeg.exe’，设置moviewriter：
　　FFwriter = ani.FFMpegWriter(fps=30, codec=”h264”) # codec=”libx264”
animat.save(‘basic_animation1.mp4’, writer = FFwriter ) 生成的分类图片为
　　

0 0