Softmax算法：逻辑回归的扩展

来源：互联网发布：python源代码 pass 编辑：程序博客网时间：2024/04/30 11:20

终于实现了逻辑回归的扩展版本，训练方法采用梯度下降法，这种方法对学习率的要求比较高，不同的学习率可能导致结果大相径庭。见相关图

参考资料：http://deeplearning.stanford.edu/wiki/index.php/Softmax%E5%9B%9E%E5%BD%92

Python代码如下：

import numpy as npimport matplotlib.pylab as pltimport copyfrom scipy.linalg import normfrom math import powfrom scipy.optimize import fminbound,minimizeimport randomdef _dot(a, b):    mat_dot = np.dot(a, b)    return np.exp(mat_dot)def condProb(theta, thetai, xi):    numerator = _dot(thetai, xi.transpose())            denominator = _dot(theta, xi.transpose())    denominator = np.sum(denominator, axis=0)    p = numerator / denominator    return pdef costFunc(alfa, *args):    i = args[2]    original_thetai = args[0]    delta_thetai = args[1]    x = args[3]    y = args[4]    lamta = args[5]        labels = set(y)    thetai = original_thetai    thetai[i, :] = thetai[i, :] - alfa * delta_thetai    k = 0    sum_log_p = 0.0    for label in labels:        index = y == label        xi = x[index]        p = condProb(original_thetai,thetai[k, :], xi)        log_p = np.log10(p)        sum_log_p = sum_log_p + log_p.sum()        k = k + 1    r = -sum_log_p / x.shape[0]+ (lamta / 2.0) * pow(norm(thetai),2)    #print r ,alfa    return rclass Softmax:    def __init__(self, alfa, lamda, feature_num, label_mum, run_times = 500, col = 1e-6):        self.alfa = alfa        self.lamda = lamda        self.feature_num = feature_num        self.label_num = label_mum        self.run_times = run_times        self.col = col        self.theta = np.random.random((label_mum, feature_num + 1))+1.0    def oneDimSearch(self, original_thetai,delta_thetai,i,x,y ,lamta):        res = minimize(costFunc, 0.0, method = 'Powell', args =(original_thetai,delta_thetai,i,x,y ,lamta))        return res.x    def train(self, x, y):        tmp = np.ones((x.shape[0], x.shape[1] + 1))        tmp[:,1:tmp.shape[1]] = x        x = tmp        del tmp        labels = set(y)        self.errors = []        old_alfa = self.alfa        for kk in range(0, self.run_times):            i = 0                        for label in labels:                tmp_theta = copy.deepcopy(self.theta)                one = np.zeros(x.shape[0])                index = y == label                one[index] = 1.0                thetai = np.array([self.theta[i, :]])                prob = self.condProb(thetai, x)                prob = np.array([one - prob])                prob = prob.transpose()                delta_thetai = - np.sum(x * prob, axis = 0)/ x.shape[0] + self.lamda * self.theta[i, :]                #alfa = self.oneDimSearch(self.theta,delta_thetai,i,x,y ,self.lamda)#一维搜索法寻找最优的学习率，没有实现                self.theta[i,:] = self.theta[i,:] - self.alfa * np.array([delta_thetai])                i = i + 1            self.errors.append(self.performance(tmp_theta))            def performance(self, tmp_theta):        return norm(self.theta - tmp_theta)     def dot(self, a, b):        mat_dot = np.dot(a, b)        return np.exp(mat_dot)    def condProb(self, thetai, xi):        numerator = self.dot(thetai, xi.transpose())                denominator = self.dot(self.theta, xi.transpose())        denominator = np.sum(denominator, axis=0)        p = numerator[0] / denominator        return p    def predict(self, x):        tmp = np.ones((x.shape[0], x.shape[1] + 1))        tmp[:,1:tmp.shape[1]] = x        x = tmp        row = x.shape[0]        col = self.theta.shape[0]        pre_res = np.zeros((row, col))        for i in range(0, row):            xi = x[i, :]            for j in range(0, col):                thetai = self.theta[j, :]                p = self.condProb(np.array([thetai]), np.array([xi]))                pre_res[i, j] = p        r = []        for i in range(0, row):            tmp = []            line = pre_res[i, :]            ind = line.argmax()            tmp.append(ind)            tmp.append(line[ind])            r.append(tmp)        return np.array(r)    def evaluate(self):        passdef samples(sample_num, feature_num, label_num):    n = int(sample_num / label_num)    x = np.zeros((n*label_num, feature_num))    y = np.zeros(n*label_num, dtype=np.int)    for i in range(0, label_num):        x[i*n : i*n + n, :] = np.random.random((n, feature_num)) + i        y[i*n : i*n + n] = i    return [x, y]def save(name, x, y):    writer = open(name, 'w')    for i in range(0, x.shape[0]):        for j in range(0, x.shape[1]):            writer.write(str(x[i,j]) + ' ')        writer.write(str(y[i])+ '\n')    writer.close()def load(name):    x = []    y = []    for line in open(name, 'r'):        ele = line.split(' ')        tmp = []        for i in range(0, len(ele) - 1):            tmp.append(float(ele[i]))        x.append(tmp)        y.append(int(ele[len(ele) - 1]))    return [x, y]def plotRes(pre, real, test_x,l):    s = set(pre)    col = ['r','b','g','y','m']    fig = plt.figure()        ax = fig.add_subplot(111)    for i in range(0, len(s)):        index1 = pre == i        index2 = real == i        x1 = test_x[index1, :]        x2 = test_x[index2, :]        ax.scatter(x1[:,0],x1[:,1],color=col[i],marker='v',linewidths=0.5)        ax.scatter(x2[:,0],x2[:,1],color=col[i],marker='.',linewidths=12)    plt.title('learning rating='+str(l))    plt.legend(('c1:predict','c1:true',\                'c2:predict','c2:true',                'c3:predict','c3:true',                'c4:predict','c4:true',                'c5:predict','c5:true'), shadow = True, loc = (0.01, 0.4))    plt.show()  if __name__ == '__main__':    #[x, y] = samples(1000, 2, 5)    #save('data.txt', x, y)    [x, y] = load('data.txt')    index= range(0, len(x))    random.shuffle(index)    x = np.array(x)    y = np.array(y)    x_train = x[index[0:700],:]    y_train = y[index[0:700]]    softmax = Softmax(0.4, 0.0, 2, 5)#这里讲第二个参数设置为0.0，即不用正则化，因为模型中没有高次项，用正则化反而使效果变差    softmax.train(x_train, y_train)    x_test = x[index[700:1000],:]    y_test = y[index[700:1000]]    r= softmax.predict(x_test)    plotRes(r[:,0],y_test,x_test,softmax.alfa)    t = r[:,0] != y_test    o = np.zeros(len(t))    o[t] = 1    err = sum(o)

计算结果如下，如下面几幅图中看到，随着学习率的变大，分类效果越来越好，当大到一定程度，如为1时效果又变差，所以如何学则学习率是关键，该代码中用到的数据可以自动生成。