c++实现mlp神经网络

来源：互联网发布：人工智能发展三个阶段编辑：程序博客网时间：2024/06/05 01:51

之前一直用theano训练样本，最近需要转成c或c++实现。在网上参考了一下其它代码，还是喜欢c++。但是看了几份cpp代码之后，发现都多少有些bug，很不爽。由于本人编码能力较弱，还花了不少时间改正。另外又添加了写权值和读权值的功能，可以保存训练的结果。下面是代码实现的基本功能描述。

问题描述：

用cpp重写mlp，即普通的多层神经网络。需要实现多个隐藏层与输出层互连，分类层采用softmax分类。

测试例子：

测试例子为自己构造，将3位二进制转为10进制有8中可能，我分别让它们对应label0-7。例如：001对应的label为1,111对应的label为7，以此类推

下面看代码：

main.cpp

#include <iostream>#include "NeuralNetwork.h"#include "util.h"using namespace std;/*main函数中调用的两个函数功能一样*将3位二进制分类成十进制*test_lr用的是单层的softmax回归*mlp是含有多个隐藏层的神经网络*/int main(){cout << "****softmax****" << endl;test_lr();cout << "****mlp****" << endl;mlp();    return 0;}

test_lr函数为softmax测试，先看它的相关文件

LogisticRegression.h

#ifndef LOGISTICREGRESSIONLAYER#define LOGISTICREGRESSIONLAYERclass LogisticRegression{public:LogisticRegression(int n_i, int i_o, int);~LogisticRegression();void forward_propagation(double* input_data);void back_propagation(double* input_data, double* label, double lr);void softmax(double* x);void printwb();void writewb(const char *pcname);long readwb(const char *pcname, long);void setwb(double ppdw[][3], double [8]);void train(double *x, double *y, double lr);int predict(double *);double cal_error(double **ppdtest, double* pdlabel, int ibatch);//double cal_error(double* label);void makeLabels(int* pimax, double (*pplabels)[8]);//本层前向传播的输出值，也是最终的预测值double* output_data;//反向传播时所需值double* delta;public:int n_in;int n_out;int n_train;double** w;double* b;};void test_lr();void testwb();#endif

头文件中的testwb为测试权值的读写而测试的，可以先不用理会

LogisticRegression.cpp

#include <cmath>#include <ctime>#include <iostream>#include "LogisticRegression.h"#include "util.h"using namespace std;LogisticRegression::LogisticRegression(int n_i, int n_o, int n_t){    n_in = n_i;    n_out = n_o;    n_train = n_t;    w = new double* [n_out];    for(int i = 0; i < n_out; ++i)    {        w[i] = new double [n_in];    }    b = new double [n_out];    double a = 1.0 / n_in;    srand((unsigned)time(NULL));    for(int i = 0; i < n_out; ++i)    {        for(int j = 0; j < n_in; ++j)            w[i][j] = uniform(-a, a);        b[i] = uniform(-a, a);    }    delta = new double [n_out];    output_data = new double [n_out];}LogisticRegression::~LogisticRegression(){    for(int i=0; i < n_out; i++) delete []w[i];     delete[] w;     delete[] b;    delete[] output_data;    delete[] delta;}void LogisticRegression::printwb(){    cout << "'****w****\n";    for(int i = 0; i < n_out; ++i)    {        for(int j = 0; j < n_in; ++j)            cout << w[i][j] << ' ';        cout << endl;            //w[i][j] = uniform(-a, a);    }    cout << "'****b****\n";    for(int i = 0; i < n_out; ++i)    {        cout << b[i] << ' ';    }    cout << endl;cout << "'****output****\n";    for(int i = 0; i < n_out; ++i)    {        cout << output_data[i] << ' ';    }    cout << endl;}void LogisticRegression::softmax(double* x){    double _max = 0.0;    double _sum = 0.0;    for(int i = 0; i < n_out; ++i)    {        if(_max < x[i])            _max = x[i];    }    for(int i = 0; i < n_out; ++i)    {        x[i] = exp(x[i]-_max);        _sum += x[i];    }    for(int i = 0; i < n_out; ++i)    {        x[i] /= _sum;    }}void LogisticRegression::forward_propagation(double* input_data){    for(int i = 0; i < n_out; ++i)    {        output_data[i] = 0.0;        for(int j = 0; j < n_in; ++j)        {            output_data[i] += w[i][j]*input_data[j];        }        output_data[i] += b[i];    }    softmax(output_data);}void LogisticRegression::back_propagation(double* input_data, double* label, double lr){    for(int i = 0; i < n_out; ++i)    {        delta[i] = label[i] - output_data[i] ;        for(int j = 0; j < n_in; ++j)        {            w[i][j] += lr * delta[i] * input_data[j] / n_train;        }        b[i] += lr * delta[i] / n_train;    }}int LogisticRegression::predict(double *x){forward_propagation(x);cout << "***result is ***" << endl;int iresult = getMaxIndex(output_data, n_out);cout << iresult << endl;if (iresult == 1)printArr(output_data, n_out);return iresult;}void LogisticRegression::train(double *x, double *y, double lr){    forward_propagation(x);back_propagation(x, y, lr);}//这个函数目前还用不到，大家不用看double LogisticRegression::cal_error(double **ppdtest, double* pdlabel, int ibatch){    double error = 0.0, dmax = 0;int imax = -1, ierrNum = 0;for (int i = 0; i < ibatch; ++i){imax = predict(ppdtest[i]);if (imax != pdlabel[i])++ierrNum;}error = (double)ierrNum / ibatch;return error;}void LogisticRegression::writewb(const char *pcname){savewb(pcname, w, b, n_out, n_in);}long LogisticRegression::readwb(const char *pcname, long dstartpos){return loadwb(pcname, w, b, n_out, n_in, dstartpos);}void LogisticRegression::setwb(double ppdw[][3], double szib[8]){for (int i = 0; i < n_out; ++i){for (int j = 0; j < n_in; ++j)w[i][j] = ppdw[i][j];b[i] = szib[i];}cout << "setwb----------" << endl;printArrDouble(w, n_out, n_in);printArr(b, n_out);}void LogisticRegression::makeLabels(int* pimax, double (*pplabels)[8]){for (int i = 0; i < n_train; ++i){for (int j = 0; j < n_out; ++j)pplabels[i][j] = 0;int k = pimax[i];pplabels[i][k] = 1.0;}}void test_lr() {    srand(0);    double learning_rate = 0.1;    double n_epochs = 200;    int test_N = 2;    const int trainNum = 8, n_in = 3, n_out = 8;//int n_out = 2;double train_X[trainNum][n_in] = {{1, 1, 1},{1, 1, 0},{1, 0, 1},{1, 0, 0},{0, 1, 1},{0, 1, 0},{0, 0, 1},{0, 0, 0}};//szimax存储的是最大值的下标int szimax[trainNum];for (int i = 0; i < trainNum; ++i)szimax[i] = trainNum - i - 1;double train_Y[trainNum][n_out];// construct LogisticRegressionLogisticRegression classifier(n_in, n_out, trainNum);classifier.makeLabels(szimax, train_Y);// train onlinefor(int epoch=0; epoch<n_epochs; epoch++) {for(int i=0; i<trainNum; i++) {            //classifier.trainEfficient(train_X[i], train_Y[i], learning_rate);            classifier.train(train_X[i], train_Y[i], learning_rate);        }    }const char *pcfile = "test.wb";classifier.writewb(pcfile);LogisticRegression logistic(n_in, n_out, trainNum);logistic.readwb(pcfile, 0);    // test data    double test_X[2][n_out] = {        {1, 0, 1},        {0, 0, 1}    }; // testcout << "before readwb ---------" << endl;    for(int i=0; i<test_N; i++) {classifier.predict(test_X[i]);        cout << endl;    }cout << "after readwb ---------" << endl;    for(int i=0; i<trainNum; i++) {logistic.predict(train_X[i]);        cout << endl;    }    cout << "*********\n";   }void testwb(){    int test_N = 2;    const int trainNum = 8, n_in = 3, n_out = 8;//int n_out = 2;double train_X[trainNum][n_in] = {{1, 1, 1},{1, 1, 0},{1, 0, 1},{1, 0, 0},{0, 1, 1},{0, 1, 0},{0, 0, 1},{0, 0, 0}};double szib[n_out] = {1, 2, 3, 3, 3, 3, 2, 1};// construct LogisticRegressionLogisticRegression classifier(n_in, n_out, trainNum);classifier.setwb(train_X, szib);const char *pcfile = "test.wb";classifier.writewb(pcfile);LogisticRegression logistic(n_in, n_out, trainNum);logistic.readwb(pcfile, 0);}

下面是mlp神经网络组合起来的代码,这个就是将隐藏层与softmax层互连，当时调试了好久好久。。。

NeuralNetwork.h

#ifndef NEURALNETWORK_H#define NEURALNETWORK_H#include "HiddenLayer.h"#include "LogisticRegression.h"class NeuralNetwork{public:    NeuralNetwork(int n, int n_i, int n_o, int nhl, int*hls);    ~NeuralNetwork();    void train(double** in_data, double** in_label, double lr, int epochs);    void predict(double** in_data, int n);void writewb(const char *pcname);void readwb(const char *pcname);private:    int N; //样本数量    int n_in; //输入维数    int n_out; //输出维数    int n_hidden_layer; //隐层数目    int* hidden_layer_size; //中间隐层的大小 e.g. {3,4}表示有两个隐层，第一个有三个节点，第二个有4个节点    HiddenLayer **sigmoid_layers;    LogisticRegression *log_layer;};void mlp();#endif

mlp()就是测试整个神经网络基本功能的代码，写的比较乱。基本就是先用一个mlp训练，然后保存权值。之后另一个mlp读取权值，预测结果。
NeuralNetwork.cpp

#include <iostream>#include "NeuralNetwork.h"#include "util.h"//#include "HiddenLayer.h"//#include "LogisticRegression.h"using namespace std;const int n_train = 8, innode = 3, outnode = 8;NeuralNetwork::NeuralNetwork(int n, int n_i, int n_o, int nhl, int *hls){N = n;n_in = n_i;n_out = n_o;n_hidden_layer = nhl;hidden_layer_size = hls;//构造网络结构sigmoid_layers = new HiddenLayer* [n_hidden_layer];for(int i = 0; i < n_hidden_layer; ++i){if(i == 0){sigmoid_layers[i] = new HiddenLayer(n_in, hidden_layer_size[i]);//第一个隐层}else{sigmoid_layers[i] = new HiddenLayer(hidden_layer_size[i-1], hidden_layer_size[i]);//其他隐层}}log_layer = new LogisticRegression(hidden_layer_size[n_hidden_layer-1], n_out, N);//最后的softmax层}NeuralNetwork::~NeuralNetwork(){//二维指针分配的对象不一定是二维数组for(int i = 0; i < n_hidden_layer; ++i)delete sigmoid_layers[i];  //删除的时候不能加[]delete[] sigmoid_layers;//log_layer只是一个普通的对象指针，不能作为数组deletedelete log_layer;//删除的时候不能加[]}void NeuralNetwork::train(double** ppdinData, double** ppdinLabel, double dlr, int iepochs){printArrDouble(ppdinData, N, n_in);cout << "******label****" << endl;printArrDouble(ppdinLabel, N, n_out);//反复迭代样本iepochs次训练for(int epoch = 0; epoch < iepochs; ++epoch){double e = 0.0;for(int i = 0; i < N; ++i){//前向传播阶段 for(int n = 0; n < n_hidden_layer; ++ n){if(n == 0) //第一个隐层直接输入数据{sigmoid_layers[n]->forward_propagation(ppdinData[i]);}else //其他隐层用前一层的输出作为输入数据{sigmoid_layers[n]->forward_propagation(sigmoid_layers[n-1]->output_data);}}//softmax层使用最后一个隐层的输出作为输入数据log_layer->forward_propagation(sigmoid_layers[n_hidden_layer-1]->output_data);//e += log_layer->cal_error(ppdinLabel[i]);//反向传播阶段log_layer->back_propagation(sigmoid_layers[n_hidden_layer-1]->output_data, ppdinLabel[i], dlr);for(int n = n_hidden_layer-1; n >= 1; --n){if(n == n_hidden_layer-1){sigmoid_layers[n]->back_propagation(sigmoid_layers[n-1]->output_data, log_layer->delta, log_layer->w, log_layer->n_out, dlr, N);}else{double *pdinputData;pdinputData = sigmoid_layers[n-1]->output_data;sigmoid_layers[n]->back_propagation(pdinputData, sigmoid_layers[n+1]->delta, sigmoid_layers[n+1]->w, sigmoid_layers[n+1]->n_out, dlr, N);}}//这里该怎么写？if (n_hidden_layer > 1)sigmoid_layers[0]->back_propagation(ppdinData[i],sigmoid_layers[1]->delta, sigmoid_layers[1]->w, sigmoid_layers[1]->n_out, dlr, N);elsesigmoid_layers[0]->back_propagation(ppdinData[i],log_layer->delta, log_layer->w, log_layer->n_out, dlr, N);}//if (epoch % 100 == 1)//cout << "iepochs number is " << epoch << "   cost function is " << e / (double)N << endl;}}void NeuralNetwork::predict(double** ppdata, int n){for(int i = 0; i < n; ++i){for(int n = 0; n < n_hidden_layer; ++ n){if(n == 0) //第一个隐层直接输入数据{sigmoid_layers[n]->forward_propagation(ppdata[i]);}else //其他隐层用前一层的输出作为输入数据{sigmoid_layers[n]->forward_propagation(sigmoid_layers[n-1]->output_data);}}//softmax层使用最后一个隐层的输出作为输入数据log_layer->predict(sigmoid_layers[n_hidden_layer-1]->output_data);//log_layer->forward_propagation(sigmoid_layers[n_hidden_layer-1]->output_data);}}void NeuralNetwork::writewb(const char *pcname){for(int i = 0; i < n_hidden_layer; ++i){sigmoid_layers[i]->writewb(pcname);}log_layer->writewb(pcname);}void NeuralNetwork::readwb(const char *pcname){long dcurpos = 0, dreadsize = 0;for(int i = 0; i < n_hidden_layer; ++i){dreadsize = sigmoid_layers[i]->readwb(pcname, dcurpos);cout << "hiddenlayer " << i + 1 << " read bytes: " << dreadsize << endl;if (-1 != dreadsize)dcurpos += dreadsize;else{cout << "read wb error from HiddenLayer" << endl;return;}}dreadsize = log_layer->readwb(pcname, dcurpos);if (-1 != dreadsize)dcurpos += dreadsize;else{cout << "read wb error from sofmaxLayer" << endl;return;}}//double **makeLabelSample(double **label_x)double **makeLabelSample(double label_x[][outnode]){double **pplabelSample;pplabelSample = new double*[n_train];for (int i = 0; i < n_train; ++i){pplabelSample[i] = new double[outnode];}for (int i = 0; i < n_train; ++i){for (int j = 0; j < outnode; ++j)pplabelSample[i][j] = label_x[i][j];}return pplabelSample;}double **maken_train(double train_x[][innode]){double **ppn_train;ppn_train = new double*[n_train];for (int i = 0; i < n_train; ++i){ppn_train[i] = new double[innode];}for (int i = 0; i < n_train; ++i){for (int j = 0; j < innode; ++j)ppn_train[i][j] = train_x[i][j];}return ppn_train;}void mlp(){//输入样本double X[n_train][innode]= {{0,0,0},{0,0,1},{0,1,0},{0,1,1},{1,0,0},{1,0,1},{1,1,0},{1,1,1}};double Y[n_train][outnode]={{1, 0, 0, 0, 0, 0, 0, 0},{0, 1, 0, 0, 0, 0, 0, 0},{0, 0, 1, 0, 0, 0, 0, 0},{0, 0, 0, 1, 0, 0, 0, 0},{0, 0, 0, 0, 1, 0, 0, 0},{0, 0, 0, 0, 0, 1, 0, 0},{0, 0, 0, 0, 0, 0, 1, 0},{0, 0, 0, 0, 0, 0, 0, 1},};const int ihiddenSize = 2;int phidden[ihiddenSize] = {5, 5};//printArr(phidden, 1);NeuralNetwork neural(n_train, innode, outnode, ihiddenSize, phidden);double **train_x, **ppdlabel;train_x = maken_train(X);//printArrDouble(train_x, n_train, innode);ppdlabel = makeLabelSample(Y);neural.train(train_x, ppdlabel, 0.1, 3500);cout<<"trainning complete..."<<endl;//pcname存放权值const char *pcname = "mlp55new.wb";neural.writewb(pcname);NeuralNetwork neural2(n_train, innode, outnode, ihiddenSize, phidden);cout<<"readwb start..."<<endl;neural2.readwb(pcname);cout<<"readwb end..."<<endl;neural.predict(train_x, n_train);cout << "----------after readwb________" << endl;neural2.predict(train_x, n_train);for (int i = 0; i != n_train; ++i){delete []train_x[i];delete []ppdlabel[i];}delete []train_x;delete []ppdlabel;cout<<endl;}

HiddenLayer.h

#ifndef HIDDENLAYER_H#define HIDDENLAYER_Hclass HiddenLayer{public:HiddenLayer(int n_i, int n_o);~HiddenLayer();void forward_propagation(double* input_data);void back_propagation(double *pdinputData, double *pdnextLayerDelta,        double** ppdnextLayerW, int iNextLayerOutNum, double dlr, int N);void writewb(const char *pcname);long readwb(const char *pcname, long);//本层前向传播的输出值,作为下一层的输入值double* output_data;//反向传播时所需值double* delta;public:int n_in;int n_out;double** w;double*b;};#endif

HiddenLayer.cpp

#include <cmath>#include <cstdlib>#include <ctime>#include <iostream>#include "HiddenLayer.h"#include "util.h"using namespace std;HiddenLayer::HiddenLayer(int n_i, int n_o){n_in  = n_i;n_out = n_o;w = new double* [n_out];for(int i = 0; i < n_out; ++i){w[i] = new double [n_in];}b = new double [n_out];double a = 1.0 / n_in;srand((unsigned)time(NULL));for(int i = 0; i < n_out; ++i){for(int j = 0; j < n_in; ++j)w[i][j] = uniform(-a, a);b[i] = uniform(-a, a);}delta = new double [n_out];output_data = new double [n_out];}HiddenLayer::~HiddenLayer(){for(int i=0; i<n_out; i++) delete []w[i]; delete[] w; delete[] b;delete[] output_data;delete[] delta;}void HiddenLayer::forward_propagation(double* pdinputData){for(int i = 0; i < n_out; ++i){output_data[i] = 0.0;for(int j = 0; j < n_in; ++j){output_data[i] += w[i][j]*pdinputData[j];}output_data[i] += b[i];output_data[i] = sigmoid(output_data[i]);}}void HiddenLayer::back_propagation(double *pdinputData, double *pdnextLayerDelta,   double** ppdnextLayerW, int iNextLayerOutNum, double dlr, int N){/*pdinputData          为输入数据*pdnextLayerDelta   为下一层的残差值delta,是一个大小为iNextLayerOutNum的数组**ppdnextLayerW      为此层到下一层的权值iNextLayerOutNum    实际上就是下一层的n_outdlr                  为学习率learning rateN                   为训练样本总数*///sigma元素个数应与本层单元个数一致，而网上代码有误//作者是没有自己测试啊，测试啊//double* sigma = new double[iNextLayerOutNum];double* sigma = new double[n_out];//double sigma[10];for(int i = 0; i < n_out; ++i)sigma[i] = 0.0;for(int i = 0; i < iNextLayerOutNum; ++i){for(int j = 0; j < n_out; ++j){sigma[j] += ppdnextLayerW[i][j] * pdnextLayerDelta[i];}}//计算得到本层的残差deltafor(int i = 0; i < n_out; ++i){delta[i] = sigma[i] * output_data[i] * (1 - output_data[i]);}//调整本层的权值wfor(int i = 0; i < n_out; ++i){for(int j = 0; j < n_in; ++j){w[i][j] += dlr * delta[i] * pdinputData[j];}b[i] += dlr * delta[i];}delete[] sigma;}void HiddenLayer::writewb(const char *pcname){savewb(pcname, w, b, n_out, n_in);}long HiddenLayer::readwb(const char *pcname, long dstartpos){return loadwb(pcname, w, b, n_out, n_in, dstartpos);}

下面是一个工具文件

util.h

#ifndef UTIL_H#define UTIL_Htypedef unsigned char BYTE;double sigmoid(double x);double uniform(double _min, double _max);//void printArr(T *parr, int num);//void printArrDouble(double **pparr, int row, int col);void initArr(double *parr, int num);int getMaxIndex(double *pdarr, int num);void savewb(const char *pcname, double **ppw, double *pb, int irow, int icol);long loadwb(const char *pcname, double **ppw, double *pb,int irow, int icol, long dstartpos);void readonefile(const char *pcname);void writeonefile(const char *pcname);template <typename T>void printArr(T *parr, int num){cout << "****printArr****" << endl;for (int i = 0; i < num; ++i)cout << parr[i] << ' ';cout << endl;}template <typename T>void printArrDouble(T **pparr, int row, int col){cout << "****printArrDouble****" << endl;for (int i = 0; i < row; ++i){for (int j = 0; j < col; ++j){cout << pparr[i][j] << ' ';}cout << endl;}}#endif

util.cpp

#include "util.h"#include <iostream>#include <ctime>#include <cmath>using namespace std;int getMaxIndex(double *pdarr, int num){double dmax = -1;int imax = -1;for(int i = 0; i < num; ++i){if (pdarr[i] > dmax){dmax = pdarr[i];imax = i;}}return imax;}double sigmoid(double x){return 1.0/(1.0+exp(-x));}double uniform(double _min, double _max){return rand()/(RAND_MAX + 1.0) * (_max - _min) + _min;}void initArr(double *parr, int num){for (int i = 0; i < num; ++i)parr[i] = 0.0;}void savewb(const char *pcname, double **ppw, double *pb, int irow, int icol){FILE *pf;if( (pf = fopen(pcname, "ab" )) == NULL ) { printf( "File coulkd not be opened " ); return;} int isizeofelem = sizeof(double);for (int i = 0; i < irow; ++i){if (fwrite((const void*)ppw[i], isizeofelem, icol, pf) != icol){fputs ("Writing ppw error",stderr);return;}}if (fwrite((const void*)pb, isizeofelem, irow, pf) != irow){fputs ("Writing ppw error",stderr);return;}fclose(pf);}long loadwb(const char *pcname, double **ppw, double *pb,int irow, int icol, long dstartpos){FILE *pf;long dtotalbyte = 0, dreadsize;if( (pf = fopen(pcname, "rb" )) == NULL ) { printf( "File coulkd not be opened " ); return -1;} //让文件指针偏移到正确位置fseek(pf, dstartpos , SEEK_SET);int isizeofelem = sizeof(double);for (int i = 0; i < irow; ++i){dreadsize = fread((void*)ppw[i], isizeofelem, icol, pf);if (dreadsize != icol){fputs ("Reading ppw error",stderr);return -1;}//每次成功读取，都要加到dtotalbyte中，最后返回dtotalbyte += dreadsize;}dreadsize = fread(pb, isizeofelem, irow, pf);if (dreadsize != irow){fputs ("Reading pb error",stderr);return -1;}dtotalbyte += dreadsize;dtotalbyte *= isizeofelem;fclose(pf);return dtotalbyte;}void readonefile(const char *pcname){FILE *pf;if( (pf = fopen(pcname, "rb" )) == NULL ) { printf( "File could not be opened " ); return;} /*int isizeofelem = sizeof(BYTE);BYTE ielem;*/int isizeofelem = sizeof(double);double ielem;while(1 == fread((void*)(&ielem), isizeofelem, 1, pf))cout << ielem << endl;fclose(pf);}void writeonefile(const char *pcname){FILE *pf;if( (pf = fopen(pcname, "wb" )) == NULL ) { printf( "File could not be opened " ); return;} //int isizeofelem = sizeof(BYTE);//BYTE ielem = (BYTE)16;int isizeofelem = sizeof(int);int ielem = 16;if(1 == fwrite((void*)(&ielem), isizeofelem, 1, pf))cout << ielem << endl;fclose(pf);}

至此代码已经贴完了，我测试是可以运行的。本人编码较少，如果有什么问题，请见谅。

这里有vs2008建的工程代码，如果不想自己建工程，可以下载运行即可，工程里面只是少了保存权值的函数而已。

下载地址：点击打开链接