【Machine Learning实验】梯度下降算法

来源：互联网发布：淘宝上下架规则可行吗编辑：程序博客网时间：2024/04/28 01:10

梯度下降算法是利用梯度下降的方向迭代寻找目标函数的参数的最优值。

它遵循LMS（Least Mean Square是）准则，该准则是通过使似然函数最大推导得出，即得出的参数使得样本数据集出现的概率最大。

常用的迭代方法有两种：批量梯度下降法（Batch Gradient Descent）和随机梯度下降法（Stochastic Gradient Descent）。

批量梯度下降法（Batch Gradient Descent）：

随机梯度下降法（Stochastic Gradient Descent）：

梯度下降算法对局部极值敏感，但是对于线性回归问题只有整体极值，没有局部极值，所以在这种情况下，算法总是收敛的。

对于随机梯度下降算法，其收敛速度要快于批量梯度下降算法，但是它在最小值附近震荡的幅度较大，所以可能不会收敛于true minimum^[1]。

算法实现如下：

样本数据集输入特征x[M][N]={ {1,1}, {2,1}, {1,6}, {3,4}, {5,2}, {7,9}, {8,3}, {1.5,6}, {10,11},}，输出y[M]={3,4,13,11,9,25,14,13.5,32}。

可以看出theta[0]=1;theta[1]=2;

最后通过一组实验数据进行验证：test[2]={10,6}

代码：

#include <stdio.h>#include <stdlib.h>#include <math.h>const int M=9;//训练样本个数const int N=2;//训练样本元素个数const float learningRate=0.001;//学习率const float errorThr=1;//均方误差阈值const int MAX=1000;//最大循环迭代次数float x[M][N]={{1,1},{2,1},{1,6},{3,4},{5,2},{7,9},{8,3},{1.5,6},{10,11},};float y[M]={3,4,13,11,9,25,14,13.5,32};  float htheta(float *x,float *theta);void Stochastic(float *theta);float predict(float *x,float *theta);void Batch(float *theta);void Stochastic(float *theta)//随机梯度下降法{int i,j,k=0;for(i=0;i<N;i++)theta[i]=0;float errorSquare=0;for(k=0;k<MAX;k++){errorSquare=0;for(j=0;j<N;j++){for(i=0;i<M;i++){errorSquare+=(y[i]-htheta(x[i],theta))*(y[i]-htheta(x[i],theta));theta[j]=theta[j]+learningRate*(y[i]-htheta(x[i],theta))*x[i][j];}if(errorSquare<errorThr)break;}printf("****************************\n");printf("No.%d Theta:",k);for(int i=0;i<N;i++)printf("%f ",theta[i]);printf("\n");if(errorSquare<errorThr)break;}printf("****************************\n");printf("Time of Iteratrion:%d\n",k);printf("Sum of Error Square:%f\n",errorSquare);}void Batch(float *theta)//批量梯度下降法{int i,j,k=0;float errorSum=0,errorSquare=0;for(i=0;i<N;i++)theta[i]=0;for(k=0;k<MAX;k++){errorSquare=0;for(j=0;j<N;j++){for(i=0;i<M;i++){errorSquare+=(y[i]-htheta(x[i],theta))*(y[i]-htheta(x[i],theta));errorSum+=(y[i]-htheta(x[i],theta))*x[i][j];}theta[j]+=learningRate*errorSum;errorSum=0;if(errorSquare<errorThr)//误差检验break;}printf("****************************\n");printf("No.%d Theta:",k);for(int i=0;i<N;i++)printf("%f ",theta[i]);printf("\n");if(errorSquare<errorThr)break;}printf("****************************\n");printf("Time of Iteratrion:%d\n",k);printf("Sum of Error Square:%f\n",errorSquare);}float htheta(float *x,float *theta){float ret=0;for(int i=0;i<N;i++){ret+=x[i]*theta[i];}return ret;}float predict(float *x,float *theta){return htheta(x,theta);}int main(){float theta[N];float test[2]={10,6};printf("Stochastic Gradient Descent:\n");Stochastic(theta);/*printf("Theta:");for(int i=0;i<N;i++)printf("%f ",theta[i]);printf("\n");*/printf("Check:");for(int i=0;i<M;i++){printf("%f ",predict(x[i],theta));}printf("%f ",predict(test,theta));printf("\n\n\n\n");/*************************************/printf("Batch Gradient Descent:\n");Batch(theta);/*printf("Theta:");for(int i=0;i<N;i++)printf("%f ",theta[i]);printf("\n");*/printf("Check:");for(int i=0;i<M;i++){printf("%f ",predict(x[i],theta));}printf("%f ",predict(test,theta));printf("\n\n");return 1;}

结果：

参考：

【1】http://www.stanford.edu/class/cs229/notes/cs229-notes1.pdf

http://blog.csdn.net/pennyliang/article/details/6998517