DeepLearning（深度学习）原理与实现（二）

来源：互联网发布：淘宝网的古悦堂怎么样编辑：程序博客网时间：2024/05/28 05:17

下面贴出RBM C++版本的代码，一些大牛写的，结合上篇博文来加深大家对RBM理论的理解。。。

RBM类定义声明：

[cpp] view plaincopyprint?

class RBM {
public:
int N;
int n_visible;
int n_hidden;
double **W;
double *hbias;
double *vbias;
RBM(int, int, int, double**, double*, double*);
~RBM();
void contrastive_divergence(int*, double, int);
void sample_h_given_v(int*, double*, int*);
void sample_v_given_h(int*, double*, int*);
double propup(int*, double*, double);
double propdown(int*, int, double);
void gibbs_hvh(int*, double*, int*, double*, int*);
void reconstruct(int*, double*);
};

class RBM {public:  int N;  int n_visible;  int n_hidden;  double **W;  double *hbias;  double *vbias;  RBM(int, int, int, double**, double*, double*);  ~RBM();  void contrastive_divergence(int*, double, int);  void sample_h_given_v(int*, double*, int*);  void sample_v_given_h(int*, double*, int*);  double propup(int*, double*, double);  double propdown(int*, int, double);  void gibbs_hvh(int*, double*, int*, double*, int*);  void reconstruct(int*, double*);};

从上面声明中可以很直观的看出和上篇文章公式符号正好完美对应。下面是代码实现部分：

[cpp] view plaincopyprint?

#include <iostream>
#include <math.h>
#include "RBM.h"
using namespace std;
double uniform(double min, double max) {
return rand() / (RAND_MAX + 1.0) * (max - min) + min;
}
int binomial(int n, double p) {
if(p < 0 || p > 1) return 0;
int c = 0;
double r;
for(int i=0; i<n; i++) {
r = rand() / (RAND_MAX + 1.0);
if (r < p) c++;
}
return c;
}
double sigmoid(double x) {
return 1.0 / (1.0 + exp(-x));
}
RBM::RBM(int size, int n_v, int n_h, double **w, double *hb, double *vb) {
N = size;
n_visible = n_v;
n_hidden = n_h;
if(w == NULL) {
W = new double*[n_hidden];
for(int i=0; i<n_hidden; i++) W[i] = new double[n_visible];
double a = 1.0 / n_visible;
for(int i=0; i<n_hidden; i++) {
for(int j=0; j<n_visible; j++) {
W[i][j] = uniform(-a, a);
}
}
} else {
W = w;
}
if(hb == NULL) {
hbias = new double[n_hidden];
for(int i=0; i<n_hidden; i++) hbias[i] = 0;
} else {
hbias = hb;
}
if(vb == NULL) {
vbias = new double[n_visible];
for(int i=0; i<n_visible; i++) vbias[i] = 0;
} else {
vbias = vb;
}
}
RBM::~RBM() {
for(int i=0; i<n_hidden; i++) delete[] W[i];
delete[] W;
delete[] hbias;
delete[] vbias;
}
void RBM::contrastive_divergence(int *input, double lr, int k) {
double *ph_mean = new double[n_hidden];
int *ph_sample = new int[n_hidden];
double *nv_means = new double[n_visible];
int *nv_samples = new int[n_visible];
double *nh_means = new double[n_hidden];
int *nh_samples = new int[n_hidden];
/* CD-k */
sample_h_given_v(input, ph_mean, ph_sample);
for(int step=0; step<k; step++) {
if(step == 0) {
gibbs_hvh(ph_sample, nv_means, nv_samples, nh_means, nh_samples);
} else {
gibbs_hvh(nh_samples, nv_means, nv_samples, nh_means, nh_samples);
}
}
for(int i=0; i<n_hidden; i++) {
for(int j=0; j<n_visible; j++) {
W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
}
hbias[i] += lr * (ph_sample[i] - nh_means[i]) / N;
}
for(int i=0; i<n_visible; i++) {
vbias[i] += lr * (input[i] - nv_samples[i]) / N;
}
delete[] ph_mean;
delete[] ph_sample;
delete[] nv_means;
delete[] nv_samples;
delete[] nh_means;
delete[] nh_samples;
}
void RBM::sample_h_given_v(int *v0_sample, double *mean, int *sample) {
for(int i=0; i<n_hidden; i++) {
mean[i] = propup(v0_sample, W[i], hbias[i]);
sample[i] = binomial(1, mean[i]);
}
}
void RBM::sample_v_given_h(int *h0_sample, double *mean, int *sample) {
for(int i=0; i<n_visible; i++) {
mean[i] = propdown(h0_sample, i, vbias[i]);
sample[i] = binomial(1, mean[i]);
}
}
double RBM::propup(int *v, double *w, double b) {
double pre_sigmoid_activation = 0.0;
for(int j=0; j<n_visible; j++) {
pre_sigmoid_activation += w[j] * v[j];
}
pre_sigmoid_activation += b;
return sigmoid(pre_sigmoid_activation);
}
double RBM::propdown(int *h, int i, double b) {
double pre_sigmoid_activation = 0.0;
for(int j=0; j<n_hidden; j++) {
pre_sigmoid_activation += W[j][i] * h[j];
}
pre_sigmoid_activation += b;
return sigmoid(pre_sigmoid_activation);
}
void RBM::gibbs_hvh(int *h0_sample, double *nv_means, int *nv_samples, \
double *nh_means, int *nh_samples) {
sample_v_given_h(h0_sample, nv_means, nv_samples);
sample_h_given_v(nv_samples, nh_means, nh_samples);
}
void RBM::reconstruct(int *v, double *reconstructed_v) {
double *h = new double[n_hidden];
double pre_sigmoid_activation;
for(int i=0; i<n_hidden; i++) {
h[i] = propup(v, W[i], hbias[i]);
}
for(int i=0; i<n_visible; i++) {
pre_sigmoid_activation = 0.0;
for(int j=0; j<n_hidden; j++) {
pre_sigmoid_activation += W[j][i] * h[j];
}
pre_sigmoid_activation += vbias[i];
reconstructed_v[i] = sigmoid(pre_sigmoid_activation);
}
delete[] h;
}
void test_rbm() {
srand(0);
double learning_rate = 0.1;
int training_epochs = 1000;
int k = 1;
int train_N = 6;
int test_N = 2;
int n_visible = 6;
int n_hidden = 3;
// training data
int train_X[6][6] = {
{1, 1, 1, 0, 0, 0},
{1, 0, 1, 0, 0, 0},
{1, 1, 1, 0, 0, 0},
{0, 0, 1, 1, 1, 0},
{0, 0, 1, 0, 1, 0},
{0, 0, 1, 1, 1, 0}
};
// construct RBM
RBM rbm(train_N, n_visible, n_hidden, NULL, NULL, NULL);
// train
for(int epoch=0; epoch<training_epochs; epoch++) {
for(int i=0; i<train_N; i++) {
rbm.contrastive_divergence(train_X[i], learning_rate, k);
}
}
// test data
int test_X[2][6] = {
{1, 1, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 0}
};
double reconstructed_X[2][6];
// test
for(int i=0; i<test_N; i++) {
rbm.reconstruct(test_X[i], reconstructed_X[i]);
for(int j=0; j<n_visible; j++) {
printf("%.5f ", reconstructed_X[i][j]);
}
cout << endl;
}
}
int main() {
test_rbm();
return 0;
}

#include <iostream>#include <math.h>#include "RBM.h"using namespace std;double uniform(double min, double max) {  return rand() / (RAND_MAX + 1.0) * (max - min) + min;}int binomial(int n, double p) {  if(p < 0 || p > 1) return 0;    int c = 0;  double r;    for(int i=0; i<n; i++) {    r = rand() / (RAND_MAX + 1.0);    if (r < p) c++;  }  return c;}double sigmoid(double x) {  return 1.0 / (1.0 + exp(-x));}RBM::RBM(int size, int n_v, int n_h, double **w, double *hb, double *vb) {  N = size;  n_visible = n_v;  n_hidden = n_h;  if(w == NULL) {    W = new double*[n_hidden];    for(int i=0; i<n_hidden; i++) W[i] = new double[n_visible];    double a = 1.0 / n_visible;    for(int i=0; i<n_hidden; i++) {      for(int j=0; j<n_visible; j++) {        W[i][j] = uniform(-a, a);      }    }  } else {    W = w;  }  if(hb == NULL) {    hbias = new double[n_hidden];    for(int i=0; i<n_hidden; i++) hbias[i] = 0;  } else {    hbias = hb;  }  if(vb == NULL) {    vbias = new double[n_visible];    for(int i=0; i<n_visible; i++) vbias[i] = 0;  } else {    vbias = vb;  }}RBM::~RBM() {  for(int i=0; i<n_hidden; i++) delete[] W[i];  delete[] W;  delete[] hbias;  delete[] vbias;}void RBM::contrastive_divergence(int *input, double lr, int k) {  double *ph_mean = new double[n_hidden];  int *ph_sample = new int[n_hidden];  double *nv_means = new double[n_visible];  int *nv_samples = new int[n_visible];  double *nh_means = new double[n_hidden];  int *nh_samples = new int[n_hidden];  /* CD-k */  sample_h_given_v(input, ph_mean, ph_sample);  for(int step=0; step<k; step++) {    if(step == 0) {      gibbs_hvh(ph_sample, nv_means, nv_samples, nh_means, nh_samples);    } else {      gibbs_hvh(nh_samples, nv_means, nv_samples, nh_means, nh_samples);    }  }  for(int i=0; i<n_hidden; i++) {    for(int j=0; j<n_visible; j++) {      W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N;    }    hbias[i] += lr * (ph_sample[i] - nh_means[i]) / N;  }  for(int i=0; i<n_visible; i++) {    vbias[i] += lr * (input[i] - nv_samples[i]) / N;  }  delete[] ph_mean;  delete[] ph_sample;  delete[] nv_means;  delete[] nv_samples;  delete[] nh_means;  delete[] nh_samples;}void RBM::sample_h_given_v(int *v0_sample, double *mean, int *sample) {  for(int i=0; i<n_hidden; i++) {    mean[i] = propup(v0_sample, W[i], hbias[i]);    sample[i] = binomial(1, mean[i]);  }}void RBM::sample_v_given_h(int *h0_sample, double *mean, int *sample) {  for(int i=0; i<n_visible; i++) {    mean[i] = propdown(h0_sample, i, vbias[i]);    sample[i] = binomial(1, mean[i]);  }}double RBM::propup(int *v, double *w, double b) {  double pre_sigmoid_activation = 0.0;  for(int j=0; j<n_visible; j++) {    pre_sigmoid_activation += w[j] * v[j];  }  pre_sigmoid_activation += b;  return sigmoid(pre_sigmoid_activation);}double RBM::propdown(int *h, int i, double b) {  double pre_sigmoid_activation = 0.0;  for(int j=0; j<n_hidden; j++) {    pre_sigmoid_activation += W[j][i] * h[j];  }  pre_sigmoid_activation += b;  return sigmoid(pre_sigmoid_activation);}void RBM::gibbs_hvh(int *h0_sample, double *nv_means, int *nv_samples, \                    double *nh_means, int *nh_samples) {  sample_v_given_h(h0_sample, nv_means, nv_samples);  sample_h_given_v(nv_samples, nh_means, nh_samples);}void RBM::reconstruct(int *v, double *reconstructed_v) {  double *h = new double[n_hidden];  double pre_sigmoid_activation;  for(int i=0; i<n_hidden; i++) {    h[i] = propup(v, W[i], hbias[i]);  }  for(int i=0; i<n_visible; i++) {    pre_sigmoid_activation = 0.0;    for(int j=0; j<n_hidden; j++) {      pre_sigmoid_activation += W[j][i] * h[j];    }    pre_sigmoid_activation += vbias[i];    reconstructed_v[i] = sigmoid(pre_sigmoid_activation);  }  delete[] h;}void test_rbm() {  srand(0);  double learning_rate = 0.1;  int training_epochs = 1000;  int k = 1;    int train_N = 6;  int test_N = 2;  int n_visible = 6;  int n_hidden = 3;  // training data  int train_X[6][6] = {    {1, 1, 1, 0, 0, 0},    {1, 0, 1, 0, 0, 0},    {1, 1, 1, 0, 0, 0},    {0, 0, 1, 1, 1, 0},    {0, 0, 1, 0, 1, 0},    {0, 0, 1, 1, 1, 0}  };  // construct RBM  RBM rbm(train_N, n_visible, n_hidden, NULL, NULL, NULL);  // train  for(int epoch=0; epoch<training_epochs; epoch++) {    for(int i=0; i<train_N; i++) {      rbm.contrastive_divergence(train_X[i], learning_rate, k);    }  }  // test data  int test_X[2][6] = {    {1, 1, 0, 0, 0, 0},    {0, 0, 0, 1, 1, 0}  };  double reconstructed_X[2][6];  // test  for(int i=0; i<test_N; i++) {    rbm.reconstruct(test_X[i], reconstructed_X[i]);    for(int j=0; j<n_visible; j++) {      printf("%.5f ", reconstructed_X[i][j]);    }    cout << endl;  }}int main() {  test_rbm();  return 0;}

干脆把运行结果也贴出来，给那些终极极品思考者提供一些方便

0.98472 0.67248 0.99120 0.01000 0.01311 0.01020
0.01021 0.00720 0.99525 0.65553 0.98403 0.00497

转载请注明出处：http://blog.csdn.net/cuoqu/article/details/8887882