multi-class logestic regression的python实现

来源:互联网 发布:网络链接不上 编辑:程序博客网 时间:2024/06/05 00:14

logestic 多分类问题原理:


每一维上都进行回归,每一维上进行gradient descent

不多说了贴代码:

# -*- coding: utf-8 -*-"""Created on Sun Sep 10 20:47:58 2017@author: wjwlogestic regression 来分类就是通过已有data来估计数据的分布模型,然后将test数据带入概率分布模型,得到对应的p,判断p与0.5的关系,来完成分类。这里要进行的是logestic的多维分类,以4维为例。"""def readText(filePath):    lines = open(filePath,'r').readlines()    data = []    dataClass = []    for line in lines:        dataList = line.split(',')        data.append([float(dataList[0]),float(dataList[1]),float(dataList[2]),float(dataList[3])])        dataClass.append(dataList[4].split("\n")[0])    return np.array(data),np.array(dataClass)def sigmod(x):    return 1/(1+np.exp(-x))def softmax(y):    sum = 0    Y = []    for node in y:        sum += node#    print("sum",sum)    for node in y:#        print(np.exp(node)/sum)        Y.append(node/sum)    return np.array(Y)def myexp(w,b,data):    return np.exp(np.mat(w).T*np.mat(data).tolist()[0][0]+b)    def normalized(data):    (length,width) = data.shape        for j in range(width):        min_value = min(data[:,j])        max_value = max(data[:,j])        for i in range(length):            data[i][j] = (data[i][j]-min_value)/(max_value-min_value)#    print(np.array(data))    return np.array(data)def logClassification(data,dataCLass,w,b):    max_itor = 100000    alpha = 0.001    epslion = 0.001        data = normalized(data)        n = 0    while True:        n += 1        if n >= max_itor:            break          wsum_1 = wsum_2 = wsum_3 = 0        bsum_1 = bsum_2 = bsum_3 = 0        for i in range(data.shape[0]):            temp1 = sigmod((np.mat(data[i]).T*np.mat(w[0])).tolist()[0][0]+b[0])            temp2 = sigmod((np.mat(data[i]).T*np.mat(w[1])).tolist()[0][0]+b[1])            temp3 = sigmod((np.mat(data[i]).T*np.mat(w[2])).tolist()[0][0]+b[2])            t = [temp1,temp2,temp3]#            print(t)            t = softmax(t)#            print(t)            if dataClass[i]=="Iris-setosa":#                print(1)                wsum_1 += (1-t[0])*data[i]                wsum_2 += (0-t[1])*data[i]                wsum_3 += (0-t[2])*data[i]                bsum_1 += (1-t[0])                bsum_2 += (0-t[1])                bsum_3 += (0-t[2])            elif dataClass[i]=="Iris-versicolor":#                print(2)                wsum_1 += (0-t[0])*data[i]                wsum_2 += (1-t[1])*data[i]                wsum_3 += (0-t[2])*data[i]                bsum_1 += (0-t[0])                bsum_2 += (1-t[1])                bsum_3 += (0-t[2])            elif dataClass[i]=="Iris-virginica":#                print(3)                wsum_1 += (0-t[0])*data[i]                wsum_2 += (0-t[1])*data[i]                wsum_3 += (1-t[2])*data[i]                bsum_1 += (0-t[0])                bsum_2 += (0-t[1])                bsum_3 += (1-t[2])        w[0] += alpha*wsum_1         w[1] += alpha*wsum_2         w[2] += alpha*wsum_3         b[0] += alpha*bsum_1         b[1] += alpha*bsum_2         b[2] += alpha*bsum_3         if n%2000==0:            print (np.array(w),'\n',np.array(b))    return w,bdef testmodel(test,w,b):    data = normalized(test)    resultSet = []    for d in test:        result=[]        for i in range(3):            r = sigmod((np.mat(d).T*np.mat(w[i])+b[i]).tolist()[0][0])            result.append(r)                resultSet.append(softmax(result))    return np.array(resultSet)if __name__ == '__main__':    import numpy as np#    import matplotlib.plot as plt    filePath = r"E:\data\iris.txt"    data, dataClass = readText(filePath)     w1 =  np.random.random((1,4))    w2 = np.random.random((1,4))    w3 = np.random.random((1,4))    b1 = np.random.random(1)    b2 = np.random.random(1)    b3 = np.random.random(1)    w = [w1,w2,w3]    b = [b1,b2,b3]    w,b = logClassification(data,dataClass,w,b)    testpath=r"E:\data\iris.txt"    test,testClass = readText(testpath)    resultSet = testmodel(test,w,b)    print(resultSet)#    print(w)



原创粉丝点击