Logistic回归模型的训练与测试,C++ 实现

来源:互联网 发布:js上下文对象 编辑:程序博客网 时间:2024/04/27 20:59
#include <iostream>#include <fstream>#include <sstream>#include <string>#include <vector>#include <cstring>#include <stdio.h>#include <algorithm>#include <cmath>#include <random>using namespace std;void loadDataset(vector<vector<double>> &dataMat,vector<int> &labelMat,const string &filename){ifstream file(filename);string line;while(getline(file,line)){istringstream record(line);vector<double> data;data.push_back(1.0);double temp;while(record>>temp)data.push_back(temp);labelMat.push_back(int(temp));data.pop_back();dataMat.push_back(data);}}double scalarProduct(vector<double> &w,vector<double> &x){double ret=0.0;for(int i=0;i<w.size();i++)ret+=w[i]*x[i];return ret;}double sigmoid(double z){double ret=1/(1+exp(-z));return ret;}vector<vector<double>> matTranspose(vector<vector<double>> &dataMat){vector<vector<double>> ret(dataMat[0].size(),vector<double>(dataMat.size(),0));for(int i=0;i<ret.size();i++)for(int j=0;j<ret[0].size();j++)ret[i][j]=dataMat[j][i];return ret;}void  gradAscent(vector<double> &weight,vector<vector<double>> &dataMat,vector<int> &labelMat){int maxCycles=500;double alpha=0.001;vector<vector<double>> dataMatT=matTranspose(dataMat);while(maxCycles>0){vector<double> h;vector<double> error;for(auto &data:dataMat)h.push_back(sigmoid(scalarProduct(data,weight)));for(int i=0;i<labelMat.size();i++){double dist=labelMat[i]-h[i];if(abs(dist)<1e-10)dist=0;error.push_back(dist);}for(int i=0;i<weight.size();i++)weight[i]+=alpha*scalarProduct(dataMatT[i],error);maxCycles--;}}void stocGradAscent(vector<double> &weight,vector<vector<double>> &dataMat,vector<int> &labelMat,int numIter=150){double alpha=0.01;double h=0.0;int i=0;int j=0;double error=0.0;vector<int> randIndex;for(i=0;i<dataMat.size();i++)randIndex.push_back(i);for(int k=0;k<numIter;k++){random_shuffle(randIndex.begin(),randIndex.end());for(i=0;i<dataMat.size();i++){alpha=4/(1+k+i)+0.01;h=sigmoid(scalarProduct(dataMat[randIndex[i]],weight));error=labelMat[randIndex[i]]-h;for(j=0;j<weight.size();j++){weight[j]+=alpha*error*dataMat[randIndex[i]][j];}}}}int classify(vector<double> &data,vector<double> &weights){if(scalarProduct(data,weights)>0.5)return 1;elsereturn 0;}double testResult(vector<vector<double>> &testDataMat,vector<int> &testDataLabel,vector<double> &weight){double errCount=0.0;double dataSize=testDataMat.size();for(int i=0;i<dataSize;i++)if(classify(testDataMat[i],weight)!=testDataLabel[i])errCount+=1.0;return errCount/dataSize;}int main(){vector<vector<double>> trainMat;vector<int> trainLabel;string trainFile("horseColicTraining.txt");loadDataset(trainMat,trainLabel,trainFile);vector<vector<double>> testMat;vector<int> testLabel;string testFile("horseColicTest.txt");loadDataset(testMat,testLabel,testFile);vector<double> weight(trainMat[0].size(),1);clock_t start_time=clock();gradAscent(weight,trainMat,trainLabel);double err=testResult(testMat,testLabel,weight);clock_t end_time=clock();for(auto v:weight)cout<<v<<endl;cout<<"the error rate is: "<<err<<endl;/*vector<double> weight2(dataMat[0].size(),1);clock_t start_time2=clock();stocGradAscent(weight2,dataMat,labelMat);clock_t end_time2=clock();    */}


0 0
原创粉丝点击