机器学习入门:线性回归及梯度下降(二)
来源:互联网 发布:电视用什么软件看电影 编辑:程序博客网 时间:2024/06/07 15:48
本文是用C++对上一篇文章机器学习入门:线性回归及梯度下降(二)的实现
测试数据
特征变量维度dim=384
训练样本simple_size=25000 下载地址save_train
测试样本test_size=25000 下载地址save_test
测试代码
#include <iostream>#include <fstream>#include <string>#include <vector>#include <stdlib.h>#include <string.h>#include <math.h>using namespace std;#define varibleNum 384#define dimOfSpace varibleNum + 1#define learnTimes 2000// n = 384 R = {n+1} m = 25000static double numda = 0.1;class LiRegress{ private: double thita[dimOfSpace]; double *residul; vector<vector<double> > x; vector<double> y; vector<double> yresult; ifstream file; double alpha; int m; double costBefor, costAft; // 解析某一行训练数据 void getOneTrainCase(string line) { string temp; int left = -1; // 上次停留指针 vector<double> onecase; for (int i = 0; i < line.length(); i++) { if (line[i] == ',') // 遇到,判断 { if (left == -1) { //cout << "the " << atoi(line.substr(0, i).c_str()) << " th test case:" << endl; onecase.push_back(1); } else { temp = line.substr(left, i - left); onecase.push_back(atof(temp.c_str())); } left = i + 1; } } x.push_back(onecase); temp = line.substr(left, line.length() - 1); // 最后一个数是y值 y.push_back(atof(temp.c_str())); } // 解析某一行测试数据并预测 void getOneTestCase(string line) { string temp; int left = -1; vector<double> onecase; for (int i = 0; i < line.length(); i++) { if (line[i] == ',') { if (left == -1) { //cout << "the " << atoi(line.substr(0, i).c_str()) << " th test case:" << endl; onecase.push_back(1); } else { temp = line.substr(left, i - left); onecase.push_back(atof(temp.c_str())); } left = i + 1; } } temp = line.substr(left, line.length() - 1); onecase.push_back(atof(temp.c_str())); // y = thita*x 求出预测值 double ans = 0; for (int i = 0; i < dimOfSpace; i++) { ans += onecase[i] * thita[i]; } onecase.clear(); yresult.push_back(ans); } // 保存运算过程每次循环时Hthita数组,节省运算时间 void getResidulOfAllCases(double *_thita) { double ans; for (int i = 0; i < m; i++) { ans = 0; for (int j = 0; j < dimOfSpace; j++) { ans += x[i][j] * _thita[j]; } residul[i] = ans; } } // loss函数 使用经典的均方误差(MSE)计算 double CostFunction() { double jthita = 0; for (int i = 0; i < m; i++) { jthita += pow(residul[i] - y[i], 2); } jthita *= 0.5 * 1 / (double)m; return jthita; } public: LiRegress() { // 初始化 memset(thita, 0, sizeof(thita)); residul = NULL; alpha = 0.09; m = 0; }; LiRegress(char *filename) { memset(thita, 0, sizeof(thita)); residul = NULL; // Hthita alpha = 1.0; // 下降梯度 m = 0; // simple_size openFile(filename); }; ~LiRegress() { free(thita); free(residul); x.clear(); y.clear(); yresult.clear(); }; double *get() { return thita; } void openFile(const char *filename) { file.open(filename); if (!file.is_open()) { cout << "error" << endl; return; } string line; getline(file, line); // 去除标题行 while (file.good()) { getline(file, line); if (!line.empty()) { m++; getOneTrainCase(line); // 逐行解析数据 } } cout << "over" << endl; // 初始化 if (!residul) { free(residul); residul = NULL; } residul = new double[m]; file.close(); } void Repeat() { // Ԥ?????xi) getResidulOfAllCases(thita); costBefor = CostFunction(); bool equal = false; int n = learnTimes; while (n--) { costAft = gradientDecent(costBefor); cout << learnTimes - n << " " << alpha << " " << costBefor << " " << costAft << " " << costAft - costBefor << endl; // 调节alpha值以改进速率 if (learnTimes - n == 100) alpha = 0.0984771; if (costAft > costBefor) // 如果更新后loss变大,则减小alpha的值 { alpha = 0.0984771; } // cost相等 else if (costAft == costBefor) { if (equal) // 连续相等,则认为找到了局部最小值 { break; } if (!equal) // 局部最小值之间左右震荡 { alpha *= 0.1; equal = true; } } else { equal = false; double radient = (costBefor - costAft) / costBefor; if ((costBefor - costAft) < 0.0001) { //alpha += 0.0005; costBefor = costAft; // cout << "over " << radient << " " << costBefor - costAft << endl; //break; } else costBefor = costAft; // 应用此更新 } } } // 梯度下降 double gradientDecent(double costBefor) { // 根据当前thita数组计算Hthita数组 getResidulOfAllCases(thita); double *temp = new double[dimOfSpace]; int i, j; double sum; for (j = 0; j < dimOfSpace; j++) { sum = 0; for (i = 0; i < m; i++) // 梯度下降过程,num用来防止过拟合 { sum += (residul[i] - y[i]) * x[i][j]; } if (j == 0) temp[j] = thita[j] - alpha * 1 / (double)m * sum; else temp[j] = thita[j] - alpha * 1 / (double)m * (sum - numda * thita[j]); } getResidulOfAllCases(temp); double costAfter = CostFunction(); // 计算loss值 if (costAfter < costBefor) // 如果loss值减少,则沿下降方向移动 { for (j = 0; j < dimOfSpace; j++) { thita[j] = temp[j]; } } delete[] temp; return costAfter; } void readTest(const char *filename) { file.open(filename); if (!file.is_open()) { cout << "error" << endl; return; } string line; int num = 0; getline(file, line); while (file.good()) { getline(file, line); if (!line.empty()) { num++; getOneTestCase(line); } } cout << "caculate over" << endl; // 将结果写入文件爱呢 ofstream outfile("result.csv"); outfile << "Id,reference" << endl; for (int i = 0; i < m; i++) { outfile << i << "," << yresult[i] << endl; } outfile.close(); }};int main(){ LiRegress *lg = new LiRegress(); lg->openFile("save_train.csv"); lg->Repeat(); lg->readTest("save_test.csv"); return 0;}
测试结果
刚开始alpha=0.9 下降速率很快
500次迭代后下降速率就很慢了
0 0
- 机器学习入门:线性回归及梯度下降(二)
- 机器学习入门:线性回归及梯度下降(一)
- 机器学习入门:线性回归及梯度下降
- 机器学习入门:线性回归及梯度下降
- 机器学习入门:线性回归及梯度下降
- 机器学习入门:线性回归及梯度下降
- 机器学习入门:线性回归及梯度下降
- 机器学习入门:线性回归及梯度下降
- 机器学习入门:线性回归及梯度下降
- 机器学习入门:线性回归及梯度下降
- 机器学习入门:线性回归及梯度下降
- 机器学习入门:线性回归及梯度下降
- 机器学习入门:线性回归及梯度下降
- 机器学习入门:线性回归及梯度下降
- 机器学习入门:线性回归及梯度下降
- 机器学习入门:线性回归及梯度下降
- 机器学习入门:线性回归及梯度下降
- 机器学习入门:线性回归及梯度下降
- tcpdump命令详解
- scala(二)
- HoloLens开发——vuforia图像识别
- faster-rcnn训练成功
- C语言趣味程序(3)
- 机器学习入门:线性回归及梯度下降(二)
- jquery.dataTable.js 使用详解 _fnFeatureHtmlInfo
- Android 自定义VideoView
- mysql 技术内幕-笔记
- 整数的原码 反码与补码,大尾小尾存储
- MATLAB GUI图像处理->打开和保存图片
- 对udp组播流(MPTS)进行简单的收录
- ORA-15018: diskgroup cannot be created
- Vim命令