逻辑回归实现

来源:互联网 发布:minecraft模块编程 编辑:程序博客网 时间:2024/05/20 07:35
from numpy import *  import matplotlib.pyplot as plt  import time# calculate the sigmoid function  def sigmoid(inX):      return 1.0 / (1 + exp(-inX))  # train a logistic regression model using some optional optimize algorithm  # input: train_x is a mat datatype, each row stands for one sample  #        train_y is mat datatype too, each row is the corresponding label  #        opts is optimize option include step and maximum number of iterations  def trainLogRegres(train_x, train_y, opts):      # calculate training time      startTime = time.time()      numSamples, numFeatures = shape(train_x)      alpha = opts['alpha']; maxIter = opts['maxIter']      weights = ones((numFeatures, 1))      # optimize through gradient descent algorilthm      for k in range(maxIter):          if opts['optimizeType'] == 'gradDescent': # gradient descent algorilthm              output = sigmoid(train_x * weights)              error = train_y - output              weights = weights + alpha * train_x.transpose() * error          elif opts['optimizeType'] == 'stocGradDescent': # stochastic gradient descent              for i in range(numSamples):                  output = sigmoid(train_x[i, :] * weights)                  error = train_y[i, 0] - output                  weights = weights + alpha * train_x[i, :].transpose() * error          elif opts['optimizeType'] == 'smoothStocGradDescent': # smooth stochastic gradient descent              # randomly select samples to optimize for reducing cycle fluctuations               dataIndex = list(range(numSamples))              for i in range(numSamples):                  alpha = 4.0 / (1.0 + k + i) + 0.01                  randIndex = int(random.uniform(0, len(dataIndex)))                  output = sigmoid(train_x[randIndex, :] * weights)                  error = train_y[randIndex, 0] - output                  weights = weights + alpha * train_x[randIndex, :].transpose() * error                  del(dataIndex[randIndex]) # during one interation, delete the optimized sample          else:              raise NameError('Not support optimize method type!')      return weights  # test your trained Logistic Regression model given test set  def testLogRegres(weights, test_x, test_y):      numSamples, numFeatures = shape(test_x)      matchCount = 0      for i in range(numSamples):          predict = sigmoid(test_x[i, :] * weights)[0, 0] > 0.5          if predict == bool(test_y[i, 0]):              matchCount += 1      accuracy = float(matchCount) / numSamples      return accuracy  # show your trained logistic regression model only available with 2-D data  def showLogRegres(weights, train_x, train_y):      # notice: train_x and train_y is mat datatype      numSamples, numFeatures = shape(train_x)      if numFeatures != 3:          return 1      # draw all samples      for i in range(numSamples):          if int(train_y[i, 0]) == 0:              plt.plot(train_x[i, 1], train_x[i, 2], 'or')          elif int(train_y[i, 0]) == 1:              plt.plot(train_x[i, 1], train_x[i, 2], 'ob')      # draw the classify line      min_x = min(train_x[:, 1])[0, 0]      max_x = max(train_x[:, 1])[0, 0]      weights = weights.getA()  # convert mat to array      y_min_x = float(-weights[0] - weights[1] * min_x) / weights[2]      y_max_x = float(-weights[0] - weights[1] * max_x) / weights[2]      plt.plot([min_x, max_x], [y_min_x, y_max_x], '-g')      plt.xlabel('X1'); plt.ylabel('X2')      plt.show()  def loadData():      train_x = []      train_y = []      fileIn = open('D:\\soft\\python\\testLR.txt')      for line in fileIn.readlines():          lineArr = line.strip().split()          train_x.append([1.0, float(lineArr[0]), float(lineArr[1])])          train_y.append(float(lineArr[2]))      return mat(train_x), mat(train_y).transpose()  if __name__ == '__main__':        ## step 1: load data      print("step 1: load data...")    train_x, train_y = loadData()      test_x = train_x; test_y = train_y    ## step 2: training...      print("step 2: training..." )    opts = {'alpha': 0.001, 'maxIter': 500, 'optimizeType': 'stocGradDescent'}      optimalWeights = trainLogRegres(train_x, train_y, opts)      print("the optimalWeights %s" % (optimalWeights))      ## step 3: testing      print("step 3: testing..."  )    accuracy = testLogRegres(optimalWeights, test_x, test_y)    print("the accuracy %f" % (accuracy))      ## step 4: show the result      print ("step 4: show the result...")        showLogRegres(optimalWeights, train_x, train_y)