机器学习之线性回归python实现

来源：互联网发布：nginx源码编辑：程序博客网时间：2024/05/16 17:29

- 一理论基础
  - 线性回归
  - 岭回归
  - lasso回归
  - 局部加权线性回归
- 二 python实现
  - 代码
  - 结果
  - 数据

一. 理论基础

1. 线性回归

损失函数：

L (w) = 1 2 M \sum i = 1 m (y - x i w) 2

闭式解：

W = (X T X) - 1 X T Y

如果XTX没有逆矩阵，则不能用这种方法，可以采用梯度下降等优化方法求近似解。

2. 岭回归

相当于在线性回归的基础上加了正则化。

损失函数：

L (w) = 1 2 m \sum i = 1 m (y - x i w) 2 + λ \sum i = 1 n w 2 i

闭式解：

W = (X T X + λ I) - 1 X T Y

3. lasso回归

相当于加了l1的正则化。

损失函数：

L (w) = 1 2 m \sum i = 1 m (y - x i w) 2 + λ \sum i = 1 n | w i |

这里不能采用闭式解，可以采用前向逐步回归。

4. 局部加权线性回归

给待测点附近的每个点赋予一定的权重。

损失函数:

L (θ) = 1 2 M \sum i = 1 m w i (y - x i θ) 2

其中，wi表示第i个样本的权重。
局部加权线性回归使用”核“来对附近的点赋予更高的权重。核的类型可以自由选择，最常用的核就是高斯核，高斯核对应的权重如下：

w i = e x p (| x i - x | - 2 k 2)

这样就有一个只含对角元素的权重矩阵W，并且点 xi 与x 越近，wi也会越大。这里的参数k 决定了对附近的点赋予多大的权重，这也是唯一需要考虑的参数。

当k越大，有越多的点被用于训练回归模型；
当k越小，有越少的点用于训练回归模型。

二. python实现

1. 代码

#encoding=utf-8####################################################################Copyright: CNIC#Author: LiuYao#Date: 2017-9-12#Description: implements the linear regression algorithm###################################################################import numpy as npfrom numpy.linalg import detfrom numpy.linalg import invfrom numpy import matfrom numpy import randomimport matplotlib.pyplot as pltimport pandas as pdclass LinearRegression:    '''    implements the linear regression algorithm class    '''    def __init__(self):        pass    def train(self, x_train, y_train):        x_mat = mat(x_train).T        y_mat = mat(y_train).T        [m, n] = x_mat.shape        x_mat = np.hstack((x_mat, mat(np.ones((m, 1)))))        self.weight = mat(random.rand(n + 1, 1))        if det(x_mat.T * x_mat) == 0:            print 'the det of xTx is equal to zero.'            return        else:            self.weight = inv(x_mat.T * x_mat) * x_mat.T * y_mat        return self.weight    def locally_weighted_linear_regression(self, test_point, x_train, y_train, k=1.0):        x_mat = mat(x_train).T        [m, n] = x_mat.shape        x_mat = np.hstack((x_mat, mat(np.ones((m, 1)))))        y_mat = mat(y_train).T        test_point_mat = mat(test_point)        test_point_mat = np.hstack((test_point_mat, mat([[1]])))        self.weight = mat(np.zeros((n+1, 1)))        weights = mat(np.eye((m)))        test_data = np.tile(test_point_mat, [m,1])        distances = (test_data - x_mat) * (test_data - x_mat).T / (n + 1)        distances = np.exp(distances / (-2 * k ** 2))        weights = np.diag(np.diag(distances))        # weights = distances * weights        xTx = x_mat.T * (weights * x_mat)        if det(xTx) == 0.0:            print 'the det of xTx is equal to zero.'            return         self.weight = xTx.I * x_mat.T * weights * y_mat        return test_point_mat * self.weight    def ridge_regression(self, x_train, y_train, lam=0.2):        x_mat = mat(x_train).T        [m, n] = np.shape(x_mat)        x_mat = np.hstack((x_mat, mat(np.ones((m, 1)))))        y_mat = mat(y_train).T        self.weight = mat(random.rand(n + 1,1))        xTx = x_mat.T * x_mat + lam * mat(np.eye(n))        if det(xTx) == 0.0:            print "the det of xTx is zero!"            return        self.weight = xTx.I * x_mat.T * y_mat        return self.weight    def lasso_regression(self, x_train, y_train, eps=0.01, itr_num=100):        x_mat = mat(x_train).T        [m,n] = np.shape(x_mat)        x_mat = (x_mat - x_mat.mean(axis=0)) / x_mat.std(axis=0)               x_mat = np.hstack((x_mat, mat(np.ones((m, 1)))))        y_mat = mat(y_train).T        y_mat = (y_mat - y_mat.mean(axis=0)) / y_mat.std(axis=0)        self.weight = mat(random.rand(n+1, 1))        best_weight = self.weight.copy()        for i in range(itr_num):            print self.weight.T            lowest_error = np.inf            for j in range(n + 1):                for sign in [-1, 1]:                    weight_copy = self.weight.copy()                    weight_copy[j] += eps * sign                    y_predict = x_mat * weight_copy                    error = np.power(y_mat - y_predict, 2).sum()                    if error < lowest_error:                        lowest_error = error                        best_weight = weight_copy            self.weight = best_weight        return self.weight    def lwlr_predict(self, x_test, x_train, y_train, k=1.0):        m = len(x_test)        y_predict = mat(np.zeros((m, 1)))        for i in range(m):            y_predict[i] = self.locally_weighted_linear_regression(x_test[i], x_train, y_train, k)        return y_predict    def lr_predict(self, x_test):        m = len(x_test)        x_mat = np.hstack((mat(x_test).T, np.ones((m, 1))))        return x_mat * self.weight    def plot_lr(self, x_train, y_train):        x_min = x_train.min()        x_max = x_train.max()        y_min = self.weight[0] * x_min + self.weight[1]        y_max = self.weight[0] * x_max + self.weight[1]        plt.scatter(x_train, y_train)        plt.plot([x_min, x_max], [y_min[0,0], y_max[0,0]], '-g')        plt.show()    def plot_lwlr(self, x_train, y_train, k=1.0):        x_min = x_train.min()        x_max = x_train.max()        x = np.linspace(x_min, x_max, 1000)        y = self.lwlr_predict(x, x_train, y_train, k)        plt.scatter(x_train, y_train)        plt.plot(x, y.getA()[:, 0], '-g')        plt.show()    def plot_weight_with_lambda(self, x_train, y_train, lambdas):        weights = np.zeros((len(lambdas), ))        for i  in range(len(lambdas)):            self.ridge_regression(x_train, y_train, lam=lambdas[i])            weights[i] = self.weight[0]        plt.plot(np.log(lambdas), weights)        plt.show()def main():    data = pd.read_csv('/home/LiuYao/Documents/MarchineLearning/regression.csv')    data = data / 30    x_train = data['x'].values    y_train = data['y'].values    regression = LinearRegression()    # regression.train(x_train, y_train)     # y_predict = regression.predict(x_train)    # regression.plot(x_train, y_train)    # print '相关系数矩阵：', np.corrcoef(y_train, np.squeeze(y_predict))    # y_predict = regression.lwlr_predict([[15],[20]], x_train, y_train, k=0.1)    # print y_predict    # regression.ridge_regression(x_train, y_train, lam=3)    # regression.plot_lr(x_train, y_train)    regression.lasso_regression(x_train, y_train, itr_num=1000)    regression.plot_lr(x_train, y_train)if __name__ == '__main__':    main()

2. 结果

线性回归

局部线性回归

岭回归（l2惩罚）

岭回归

岭回归lambda与weight的关系

随着lambda的增大，意味着权值的惩罚越来越大，weight越来越小。

lasso回归（l1惩罚）

lasso回归倾向于将weight的某些维度压缩到0，比如例子中将weight的第二维压缩为0，使直线过原点；而岭回归倾向于使weight所有维度变小。

3. 数据

x,y8.8,7.559.9,7.9510.75,8.5512.3,9.4515.65,13.2516.55,12.013.6,11.911.05,11.359.6,9.08.3,9.058.1,10.710.5,10.2514.5,12.5516.35,13.1517.45,14.719.0,13.719.6,14.420.9,16.621.5,17.7522.4,18.123.65,18.7524.9,19.625.8,20.326.45,20.728.15,21.5528.55,21.429.3,21.9529.15,21.028.35,19.9526.9,19.026.05,18.925.05,17.9523.6,16.822.05,15.5521.85,16.123.0,17.819.0,16.618.8,15.5519.3,15.115.15,11.912.05,10.812.75,12.713.8,10.656.5,5.859.2,6.410.9,7.2512.35,8.5513.85,9.016.6,10.1517.4,10.8518.25,12.1516.45,14.5520.85,15.7521.25,15.1522.7,15.3524.45,16.4526.75,16.9528.2,19.1524.85,20.820.45,13.529.95,20.3531.45,23.231.1,21.430.75,22.329.65,23.4528.9,23.3527.8,22.3

阅读全文

1 0