Neural Network archetype in C++

来源：互联网发布：sqlserver分页编辑：程序博客网时间：2024/05/01 08:23

/*--------------------------------------------------------------------------------------------
* Neural Network Prototype
*
* Perceptron Learning (See Russell & Norvig PP 742)
*
* Terry 10 Nov. 2004
*------------------------------------------------------------------------------------------*/

#include <iostream>
#include <fstream>
#include <string>
#include <math.h>
//#include < cmath >
//#include <conio.h>
#include <time.h>
#include <stdlib.h>

using namespace std;

////////////////////////////////////////////////////////////////////////////////

class NeuralNet {
public:
    int RULE;
    double UPPERBOUND;
    double LOWERBOUND;

    static const int NUM_INPUT = 10;
    static const int NUM_HIDDEN = 6;
    static const int NUM_OUTPUT = 2;

    static const int NUM_PATTERNS=200;
    int xp[NUM_PATTERNS][NUM_INPUT];
    double yp[NUM_PATTERNS][NUM_OUTPUT];

    static const int NUM_DATA = 1000;
    int xt[NUM_DATA][NUM_INPUT ];
    double yt[NUM_DATA][NUM_OUTPUT];

    double weights1[NUM_HIDDEN][NUM_INPUT+1];
    double weights2[NUM_OUTPUT][NUM_HIDDEN+1];
    static const double alpha = 0.1;                           // learning rate
    bool fullytrained;

////////////////////////////////////////////////////////////////////////////////
    NeuralNet(){
        UPPERBOUND=1;
        LOWERBOUND=0;
        RULE = 6;
        srand( 100);

        for(int j=0;j<NUM_HIDDEN;j++) {
            weights1[j][0] = 0.0;
            for (int k =1; k<=NUM_INPUT; k++){
                        weights1[j][k] = (double)(rand()%1000)/2000 - 0.25;        // set up starting weights - eg all random
                //      cout <<" w1["<< k <<"]["<< j << "]:" <<weights1[k][j]<<endl;
                          }
              // cout <<endl;
                }
       for(int i=0;i<NUM_OUTPUT;i++) {
            weights2[i][0] = 0.0;
            for (int j =0; j<=NUM_HIDDEN; j++){
                        weights2[i][j] = (double)(rand()%1000)/2000 - 0.25;             // set up starting weights - eg all random
            //      cout <<" w2["<< j <<"]["<< i << "]:" <<weights2[j][i]<<endl;
                          }
             //   cout <<endl;
                }
    }

    ~NeuralNet(){}
////////////////////////////////////////////////////////////////////////////////

    void PrintWeights(){
        for(int j=0;j<NUM_HIDDEN;j++) {
                for (int k =0; k<=NUM_INPUT; k++){
                cout <<" w1["<< j <<"]["<< k << "]:" <<weights1[j][k]<<endl;}
                cout <<endl;}

        for(int i=0;i<NUM_OUTPUT;i++) {
                for (int j =0; j<=NUM_HIDDEN; j++){
                cout <<" w2["<< i <<"]["<< j << "]:" <<weights2[i][j]<<endl;}
                cout <<endl;}
    }
////////////////////////////////////////////////////////////////////////////////

    double CalInput2Hidden(int j, int vct[NUM_INPUT]){
        double in_to_hidden = 0.0;
        in_to_hidden+=(-1.0) * weights1[j][0];
        for(int k=1;k<=NUM_INPUT;k++)
                {in_to_hidden += weights1[j][k] * vct[k-1];}
        return in_to_hidden;
    }

     double CalInput2Output(int i, double vct[NUM_HIDDEN]){
        double in_to_output = 0.0;
        in_to_output+=(-1.0) * weights2[i][0];
        for(int j=1;j<=NUM_HIDDEN;j++)
                {in_to_output += weights2[i][j] * vct[j-1];}
        return in_to_output;
    }
////////////////////////////////////////////////////////////////////////////////

    double sigmoid( double x ){ return 1.0 /( 1.0 + exp( - x ) ); } //   g

    double derivative( double x ){ // the derivative of the sigmoid function    g'
        double sig = sigmoid(x);
        return sig * ( 1.0 - sig );}
////////////////////////////////////////////////////////////////////////////////

    string FormattedOutput(double input[NUM_OUTPUT]){
        if (input[0] > input[1])
                return "10";
        else
                return "01";
            }

///////////////////////////////////////////////////////
    void CreateTestData(){
      cout <<"Generating " << NUM_DATA<< " random testing data ...";

    for(int p=0; p<NUM_DATA; p++){
//    for(int p=NUM_PATTERNS; p<NUM_DATA; p++){
        int sum=0;
// create inputs x[j] and true outputs y[] for pattern p
//      cout << "Test Data " << p << ": ";
        for (int i=0; i<NUM_INPUT; i++){
          xt[p][i]= rand() % 2;
          sum+=xt[p][i];
//         cout << xt[p][i] << "";
          }
        yt[p][0]= ( sum<RULE ? UPPERBOUND: LOWERBOUND)   ;
        yt[p][1]= ( sum>=RULE ? UPPERBOUND: LOWERBOUND)   ;
//        cout << "->" << FormattedOutput(yt[p]) << "" << endl;
        }
    cout <<"   Done!" << endl;

}

///////////////////////////////////////////////////////
    void CreateTrainingPatterns(){
     cout <<"/nCenerating " << NUM_PATTERNS<< " training patterns...";
        for(int p=0;p<NUM_PATTERNS;p++){
        int sum =0;
      // create inputs x[j] and true outputs y[] for pattern p
    //   cout << "Pat " << p << ": ";
        for (int i=0; i<NUM_INPUT; i++){
          xp[p][i]=rand() % 2 ;
          sum+=xp[p][i];
    //      cout << xp[p][i] << "";
          }
   //   cout << "";
        yp[p][0]= ( sum<RULE ? UPPERBOUND: LOWERBOUND)   ;
        yp[p][1]= ( sum>=RULE ? UPPERBOUND: LOWERBOUND)   ;
   //     cout << "->" << yp[p][0]<< "" << yp[p][1] << " " << endl;
      }
     cout <<"   Done!" << endl;
}

////////////////////////////////////////////////////////
void Train (int number_of_pattern_used){
//    fullytrained = false;
   int times = 50;

   cout <<"Begin training (epoch="<< times<<", using the first "<< number_of_pattern_used << " patterns) ... ";
   int curr_pattern_idx=0;

for(int epoch=0;epoch<times;epoch++){ // for each training epoch:
// if (fullytrained) break;
// cout<<"---------------------"<<endl;

double in_to_hidden[NUM_HIDDEN];
double a_hidden[NUM_HIDDEN];
double delta_hidden[NUM_HIDDEN];

double in_to_output[NUM_OUTPUT];
double a_output[NUM_OUTPUT];
double delta_output[NUM_OUTPUT];

double err_output[NUM_OUTPUT]; // errors

for (curr_pattern_idx=0; curr_pattern_idx<number_of_pattern_used ; curr_pattern_idx++) {

/*
    curr_pattern_idx++;                  // for each training pattern p:
    if (curr_pattern_idx>=NUM_PATTERNS)
        curr_pattern_idx=0;
*/


    CalAll(xp[curr_pattern_idx], in_to_hidden, a_hidden, in_to_output, a_output);

//    cout <<endl;

    for(int i=0;i<NUM_OUTPUT;i++){ // for each output:

    err_output[i] = yp[curr_pattern_idx][i] - a_output[i];   // sigmoid(in) is our prediction
    delta_output[i] = err_output[i] * derivative( in_to_output[i] );
//    cout << "delta_out["<<i<<"]= "<<err_output[i]<<"*g'("<< in_to_output[i]<<")= "<<delta_output[i]<<endl ;

    }
//    cout <<"Err: "<< (err_output[0]+err_output[1])/2 << "/n";      // print out the errors for each output

/*    if ( abs ((err_output[0]+err_output[1])/2) < 0.00001 )
        {
        fullytrained=true;
        cout <<"Fully trained!" << endl;
        break;
    }
*/
//------------------------------------------
//    cout<<endl;
    for(int j=0;j<NUM_HIDDEN;j++){
    double tmp = 0.0;

//    cout<<"sum=";
    for(int i=0; i<NUM_OUTPUT; i++) {                 // compute delta using back-propagation
        tmp+= delta_output[i] * weights2[i][j];
//        cout<<delta_output[i]<<"*"<<weights2[j][i]<<"+";
    }
//    cout<<"="<<tmp<<endl;
    delta_hidden[j] = derivative(in_to_hidden[j]) * tmp;
//    cout << "delta_hidden["<<j<<"]= "<<"g'("<< in_to_hidden[j]<<")*"<<tmp<<"= "<<delta_hidden[j]<<endl ;
    }

//-------------------------------------------
//    cout <<endl;


   for(int i=0;i<NUM_OUTPUT;i++){ // for each output adjust the weights:
        weights2[i][0] = weights2[i][0] + alpha * (double)(-1) * delta_output[i];
        for(int j=1;j<=NUM_HIDDEN;j++){
//          cout<< "w2["<<j<<"]["<<i<<"]="<<weights2[j][i];
        weights2[i][j] = weights2[i][j] + alpha * a_hidden[j-1] * delta_output[i];
//         cout << "+" <<alpha<<"*"<<a_hidden[j]<<"*"<<delta_output[i]<< "= "<< weights2[j][i]<<endl;
      }
    }

//----------------------------------------
// cout <<endl;
    for(int j=0;j<NUM_HIDDEN;j++){    // for each hidden unit adjust the weights:
weights1[j][0] = weights1[j][0] + alpha * (double)(-1) * delta_hidden[j];
     for(int k=1;k<=NUM_INPUT;k++){
//        cout<< "w1["<<j<<"]["<<k<<"] = "<<weights1[j][k];
     weights1[j][k] = weights1[j][k] + alpha * xp[curr_pattern_idx][k-1] * delta_hidden[j];
//     cout << " + " <<alpha<<" * "<<xp[curr_pattern_idx][k-1]<<" * "<<delta_hidden[j]<<" = "<< weights1[j][k]<<endl;
    }

    }

//----------------------------------------

} //end of for

} // end of epoch loop

cout << " Done!"<< endl;
}

///////////////////////////////////////////////////////////

void CalAll(int in[NUM_INPUT], double * i_hid, double *o_hid, double * i_out, double *o_out) {
    for(int j=0;j<NUM_HIDDEN;j++){
     double in_to_hidden = 0.0;
     in_to_hidden = CalInput2Hidden(j, in);
//     cout << "input to hiddenlayer unit " <<j<< ":"<< in_to_hidden<<endl;
     i_hid[j] = in_to_hidden;
     o_hid[j] =sigmoid(in_to_hidden);
//     cout << "O_hid["<<j<<"]:"<<o_hid[j]<<"/n";
    }
//    cout << "-----------/n";
    for(int i=0;i<NUM_OUTPUT;i++){ // for each output:
     double in_to_output = 0.0;
     in_to_output = CalInput2Output(i, o_hid);
//        cout << "input to outlayer unit " <<i<< ":"<< in_to_output<<endl;
        i_out[i] = in_to_output;
        o_out[i]=sigmoid(in_to_output);
//       cout << "O_out["<<i<<"]:"<<o_out[i]<<"/n";                      // compute the predicted output ;
    }
}
//////////////////////////////////////////////////////////

void Test(){

cout<<"Testing using the randomly generated "<< NUM_DATA<< " testing data..." ;

int num=0;
for(int data_idx=0; data_idx<NUM_DATA; data_idx++){

double youtputpre[NUM_OUTPUT];
double yhiddenpre[NUM_HIDDEN];

double in_to_hidden[NUM_HIDDEN];
double in_to_output[NUM_OUTPUT];

    CalAll(xt[data_idx], in_to_hidden, yhiddenpre, in_to_output, youtputpre);

//    cout << "Testing " << data_idx << ": ";
//    for (int k=0; k<NUM_INPUT; k++){
//        cout << xt[data_idx][k];
//        }
//    cout << "->" << FormattedOutput(yt[data_idx])<< "   Predicate: (" <<youtputpre[0]<<" "<<youtputpre[1]<< ")->" <<FormattedOutput(youtputpre) ;

//    if (FormattedOutput(yt[data_idx]) !=FormattedOutput(youtputpre) )
//        cout <<" (mismatch)" ;
//    else
//        cout <<" (match)" ;
//        cout << endl;

    if (FormattedOutput(yt[data_idx])==FormattedOutput(youtputpre)) num+=1;

    }

    cout <<" Done! ( Rate of Correction : " << 100.0 * (float)num / (float)NUM_DATA << "% )/n" << endl;

}

};    // end of class

//////////////////////////////////////////////////////////////

////////////////////////////////////////////////////////////////

int main( int argc, char * argv[] ){

for( int number_of_patterns_4_training=0;number_of_patterns_4_training<=200 ; number_of_patterns_4_training+=20)
{
      NeuralNet *ass2net = new NeuralNet();
      ass2net->CreateTrainingPatterns();
      ass2net->CreateTestData();
      ass2net->Train(number_of_patterns_4_training);
//    ass2net->PrintWeights();
      ass2net->Test();
}
}

/*-------------------------------------------------------------------------------------------

Development Notes:

This is deliberately designed to be close to the form (identifier setc) that Russell& Norvig use. See their perceptron pseudo-code,
Neural Networks section.

-------------------------------------------------------------------------------------------*/