Python实现SMO算法

来源:互联网 发布:阿里云 域名查询 编辑:程序博客网 时间:2024/06/05 17:26
import numpy as npimport randomdata = np.loadtxt("heart-c.txt")# 缓存样本点积dp = np.dot(data[:,0:data.shape[1]-2], np.transpose(data[:,0:data.shape[1]-2]))Lab = data[:,data.shape[1] - 1]# 初始化α向量alphas = [0] * data.shape[0]# 初始化偏置项b和惩罚参数Cb = 0C = 0.8# 初始化每个样本的估计错误率E = -Lab# 设置精度eps = 0.05# --计算目标函数值-- #def objValue(x,id1,id2):  global data,dp,Lab,alphas,b,C,E,eps    # 计算常量  con = 0  i = 0  j = 0  for i in range(data.shape[0]):    if i != id1 and i != id2:      con = con + alphas[i]    else:      continue    for j in range(data.shape[0]):      if j != id1 and j != id2:        con = con - 0.5 * Lab[i] * Lab[j] * dp[i][j] * alphas[i] * alphas[j]      else:        continue    # compute t1 and t2  t0 = list(map(np.dot, Lab, alphas))  t1 = np.dot(t0,dp[id1,:]) + b - Lab[id1]*alphas[id1]*dp[id1][id1] - Lab[id2]*x*dp[id2][id1]  t2 = np.dot(t0,dp[id2,:]) + b - Lab[id1]*alphas[id1]*dp[id1][id2] - Lab[id2]*x*dp[id2][id2]  v = alphas[id1] + x - 0.5*dp[id1][id1]*alphas[id1]**2 - 0.5*dp[id2][id2]*x**2 - \                    Lab[id1]*Lab[id2]*dp[id1][id2]*alphas[id1]*x - Lab[id1]*alphas[id1]*t1 - \                    Lab[id2]*x*t2 + con  return v# --求解两变量的优化问题-- #def varPairOpt(id1,id2):  global data,dp,Lab,alphas,b,C,E,eps    if id1 == id2:    return 0    alpha1 = alphas[id1]  alpha2 = alphas[id2]  alpha1_new = alpha1  alpha2_new = alpha2    y1 = Lab[id1]  y2 = Lab[id2]  e1 = E[id1]  e2 = E[id2]  s = y1 * y2    # 计算下限L和上限H  L = 0  H = 0  if y1 != y2:    L = max(0, alpha2 - alpha1)    H = min(C, C + alpha2 - alpha1)  else:    L = max(0, alpha2 + alpha1 - C)    H = min(C, alpha2 + alpha1)  if L == H:    return 0    eta = 2 * dp[id1][id2] - dp[id1][id1] - dp[id2][id2]  # 确定alpha2_new的值  if eta < 0:    alpha2_new = alpha2 - y2 * (e1 - e2) / eta    if alpha2_new < L:      alpha2_new = L    elif alpha2_new > H:      alpha2_new = H  else: # eta equals to zero    Lobj = objValue(L,id1,id2)    Hobj = objValue(H,id1,id2)    if Lobj > Hobj + eps:      alpha2_new = L    elif Lobj < Hobj + eps:      alpha2_new = H    else:      alpha2_new = alpha2    # alpha2没有足够大的变化则返回  if abs(alpha2-alpha2_new) < eps:    return 0  alpha1_new = alpha1 + s * (alpha2 - alpha2_new)  # 更新b  b1_new = E[id1] + Lab[id1] * (alpha1_new - alphas[id1]) * dp[id1][id2] + \                    Lab[id2] * (alpha2_new - alphas[id2]) * dp[id1][id2] + b  b2_new = E[id2] + Lab[id1] * (alpha1_new - alphas[id1]) * dp[id1][id2] + \                    Lab[id2] * (alpha2_new - alphas[id2]) * dp[id2][id2] + b  b_old = b  if alpha1_new > 0 and alpha1_new < C:    b = b1_new  elif alpha2_new > 0 and alpha2_new < C:    b = b2_new  else:    b = (b1_new + b2_new) / 2    # 更新E  E = np.add(np.add(E,b - b_old), np.add(Lab[id1]*(alpha1_new-alphas[id1])*dp[id1,:], \                    Lab[id2]*(alpha2_new-alphas[id2])*dp[id2,:]))  # 更新α  alphas[id1] = alpha1_new  alphas[id2] = alpha2_new  return 1# --选择变量-- #def select(i,Ei,entireSet):  global data,dp,Lab,alphas,b,C,E,eps    max_deltaE = 0  maxj = -1  if entireSet == 1:  # 从整个训练集中选择    index = random.randint(0, data.shape[0] - 1)    while index == i:      index = random.randint(0, data.shape[0] - 1)    return index  else:  # 启发式选择    indexs = [k for k in range(len(alphas)) if (alphas[k] > 0) and (alphas[k] < C)]    Len = len(indexs)    for j in range(Len):      if indexs[j] == i:        continue      Ej = np.dot(list(map(np.dot, Lab, alphas)), dp[indexs[j],:]) + b - Lab[indexs[j]]      deltaE = abs(Ei - Ej)      if deltaE > max_deltaE:        maxj = indexs[j]        max_deltaE = deltaE    return maxjiter_ = 0max_iter = 50entireSet = 1  # 作为一个标记看是选择全遍历还是部分遍历alpha_change = 0while (iter_ < max_iter) and ((alpha_change > 0) or entireSet):    alpha_change = 0  # 遍历整个训练集  if entireSet:    i = 0    for i in range(data.shape[0]):      Ei = np.dot(list(map(np.dot, Lab, alphas)), dp[i,:]) + b - Lab[i]      if (Lab[i]*Ei < -eps and alphas[i] < C) or (Lab[i]*Ei > eps and alphas[i] > 0):        # 选择第2个变量        j = select(i,Ei,entireSet)        # 优化        if varPairOpt(i,j):          alpha_change += 1    iter_ += 1  # 遍历满足0 < α < C的训练样本集合  else:    indexs = [k for k in range(data.shape[0] - 1) if (alphas[k] > 0) and (alphas[k] < C)]    Len = len(indexs)    i = 0    for i in range(Len):      Ei = np.dot(list(map(np.dot, Lab, alphas)), dp[indexs[i],:]) + b - Lab[indexs[i]]      if (Lab[indexs[i]]*Ei < -eps and alphas[indexs[i]] < C) or (Lab[indexs[i]]*Ei > eps and alphas[indexs[i]] > 0):        # 选择第2个变量        j = select(indexs[i],Ei,entireSet)        # 优化        if varPairOpt(indexs[i],j):          alpha_change += 1    iter_ += 1    if entireSet:  # 此次迭代为全遍历,下次变成部分遍历    entireSet = 0  elif alpha_change == 0:  # 若部分遍历没有找到需要交换的α,改为全遍历    entireSet = 1  print("第 " + str(iter_) + " 次迭代")  print(np.around(alphas,decimals=2))