LogisticRegression Python实现

来源：互联网发布：php 电商系统编辑：程序博客网时间：2024/06/14 10:18
from numpy import *import matplotlib.pyplot as pltdef sigmoid(x):    return 1/(1+exp(-x))class LogRegressionClassifier(object):    def __init__(self):        self.dataMat = list()        self.labelMat = list()        self.weights = list()    def loadDataSet(self, filename):        fr = open(filename)        for line in fr.readlines():            lineArr = line.strip().split()            dataLine = [1.0]            for i in lineArr:                dataLine.append(float(i))            label = dataLine.pop()            self.dataMat.append(dataLine)            self.labelMat.append(int(label))        self.dataMat = mat(self.dataMat)        self.labelMat = mat(self.labelMat).transpose()    def train(self):        self.weights = self.stocGradAscent1()    def batchGradAscent(self):        m,n = shape(self.dataMat)        alpha = 0.001        maxCycles = 500        weights = ones((n,1))        for k in range(maxCycles):            h = sigmoid(self.dataMat * weights)            error = (self.labelMat-h)            weights += alpha * self.dataMat.transpose()*error        return weights    def stocGradAscend1(self):        m,n =  shape(self.dataMat)        alpha = 0.01        weights = ones(n,1)        for i in range(m):            h = sigmoid(sum(self.dataMat[i] * weights))            error = self.labelMat[i] - h            weights += (alpha * error * self.dataMat[i])        return weights    def stocGradAscend2(self):        numIter = 2        m,n = shape(self.dataMat)        weights = ones((n,1))        for j in range(numIter):            alpha = 4/(1.0+j+i) +0.0001  #alpha decreases with iteration            randIndex = int(random.uniform(0, len(dataIndex)))            h = sigmoid( sum(self.dataMat[randIndex] * weights))            error = self.labelMat[randIndex] - h            weights += (alpha * error * self.dataMat[randIndex]).transpose()            del (dataIndex[randIndex])        return weights    def classify(self, X):        prob = sigmod( sum( X*self.weights))        if prob > 0.5:            return 1.0        else:            return 0.0    def test(self):        self.loadDataSet("testData.dat")        weights0 = self.batchGradAscent()        weights1 = self.stocGradAscend1()        weights2 = self.stocGradAscend2()        print "batchGradAscent:" + weights0        print "stocGradAscent0:" + weights1        print "stocGradAscent1:" + weights2if __name__=='__main__':    lr = LogRegressionClassifier()    lr.test()
0 0