DPM(Defomable Parts Model) 源码分析-训练（三）

来源：互联网发布：网络ping大包丢包编辑：程序博客网时间：2024/06/07 00:13

DPM(Defomable Parts Model)原理

首先调用格式：

example:
pascal('person', 2); % train and evaluate a 2 component person model

pascal_train.m

function model = pascal_train(cls, n) % n=2% model = pascal_train(cls)% Train a model using the PASCAL dataset.globals; %----------读取正负样本-----------------------% pos.im,neg.im存储了图像路径，pos.x1..pos.y2为box,负样本无box[pos, neg] = pascal_data(cls);% 按照长宽比，分成等量的两部分? 即将 component label  固定，phase2时，该值为latent variable。  spos为索引spos = split(pos, n);% -----------phase 1 : train root filters using warped positives & random negatives-----------try  load([cachedir cls '_random']);catch% -----------------------------phas 1--------------------------------% 初始化 rootfilters  for i=1:n    models{i} = initmodel(spos{i});%---------train-------------% model.rootfilters{i}.w% model.offsets{i}.w    models{i} = train(cls, models{i}, spos{i}, neg, 1, 1, 1, 1, 2^28);  end  save([cachedir cls '_random'], 'models');end% -----------------phase2-------------------------------------------% :merge models and train using latent detections & hard negativestry   load([cachedir cls '_hard']);catch  model = mergemodels(models);  model = train(cls, model, pos, neg(1:200), 0, 0, 2, 2, 2^28, true, 0.7);  save([cachedir cls '_hard'], 'model');end%----------------phase 3----------------------------------------------% add parts and update models using latent detections & hard negatives.try   load([cachedir cls '_parts']);catch  for i=1:n    model = addparts(model, i, 6);  end   % use more data mining iterations in the beginning  model = train(cls, model, pos, neg(1:200), 0, 0, 1, 4, 2^30, true, 0.7);  model = train(cls, model, pos, neg(1:200), 0, 0, 6, 2, 2^30, true, 0.7, true);  save([cachedir cls '_parts'], 'model');end% update models using full set of negatives.try   load([cachedir cls '_mine']);catch  model = train(cls, model, pos, neg, 0, 0, 1, 3, 2^30, true, 0.7, true, ...                0.003*model.numcomponents, 2);  save([cachedir cls '_mine'], 'model');end% train bounding box predictiontry  load([cachedir cls '_final']);catch % 论文中说用最小二乘，怎么直接相除了，都不考虑矩阵的奇异性  model = trainbox(cls, model, pos, 0.7);  save([cachedir cls '_final'], 'model');end

initmodel.m

function model = initmodel(pos, sbin, size)% model = initmodel(pos, sbin, size)% Initialize model structure.%% If not supplied the dimensions of the model template are computed% from statistics in the postive examples.% % This should be documented! :-)% model.sbin         8% model.interval     10% model.numblocks     phase 1 ：单独训练rootfilter时为2，offset,rootfilter；phase 2，为 4 % model.numcomponents  1% model.blocksizes     （1）=1，（2）= root.h*root.w/2*31% model.regmult        0,1% model.learnmult      20,1% model.maxsize        root 的size % model.minsize% model.rootfilters{i}%   .size  以sbin为单位，尺寸为综合各样本的h/w，area计算出来的%   .w%   .blocklabel        blocklabel是为编号，offset（2）,rootfilter（2）,partfilter（12 or less）,def （12 same as part）虽然意义不同但是放在一起统一编号% model.partfilters{i}%   .w%   .blocklabel% model.defs{i}%   .anchor%   .w%   .blocklabel% model.offsets{i}%   .w               0%   .blocklabel       1% model.components{i}%   .rootindex    1%   .parts{j}%     .partindex%     .defindex%   .offsetindex    1%   .dim             2 + model.blocksizes(1) + model.blocksizes(2)%   .numblocks       2% pick mode of aspect ratiosh = [pos(:).y2]' - [pos(:).y1]' + 1;w = [pos(:).x2]' - [pos(:).x1]' + 1;xx = -2:.02:2;filter = exp(-[-100:100].^2/400); % e^-25,e^25aspects = hist(log(h./w), xx); %aspects = convn(aspects, filter, 'same');[peak, I] = max(aspects);aspect = exp(xx(I)); %滤波后最大的h/w，作为最典型的h/w% pick 20 percentile areaareas = sort(h.*w);area = areas(floor(length(areas) * 0.2)); % 比它大的，可以缩放，比该尺寸小的呢？area = max(min(area, 5000), 3000); %限制在 3000-5000% pick dimensionsw = sqrt(area/aspect);h = w*aspect;% size of HOG featuresif nargin < 4  model.sbin = 8;else  model.sbin = sbin;end% size of root filterif nargin < 5  model.rootfilters{1}.size = [round(h/model.sbin) round(w/model.sbin)];else  model.rootfilters{1}.size = size;end% set up offset model.offsets{1}.w = 0;model.offsets{1}.blocklabel = 1;model.blocksizes(1) = 1;model.regmult(1) = 0;model.learnmult(1) = 20;model.lowerbounds{1} = -100;% set up root filtermodel.rootfilters{1}.w = zeros([model.rootfilters{1}.size 31]);height = model.rootfilters{1}.size(1);% root filter is symmetricfwidth = ceil(model.rootfilters{1}.size(2)/2);  % ？？？ /2model.rootfilters{1}.blocklabel = 2;model.blocksizes(2) = width * height * 31;model.regmult(2) = 1;model.learnmult(2) = 1;model.lowerbounds{2} = -100*ones(model.blocksizes(2),1);% set up one component modelmodel.components{1}.rootindex = 1;model.components{1}.offsetindex = 1;model.components{1}.parts = {};model.components{1}.dim = 2 + model.blocksizes(1) + model.blocksizes(2);model.components{1}.numblocks = 2;% initialize the rest of the model structuremodel.interval = 10;model.numcomponents = 1;model.numblocks = 2;model.partfilters = {};model.defs = {};model.maxsize = model.rootfilters{1}.size;model.minsize = model.rootfilters{1}.size;

learn.cc

#include <stdio.h>#include <stdlib.h>#include <string.h>#include <math.h>#include <sys/time.h>#include <errno.h>/* * Optimize LSVM objective function via gradient descent. * * We use an adaptive cache mechanism.  After a negative example * scores beyond the margin multiple times it is removed from the * training set for a fixed number of iterations. */// Data File Format// EXAMPLE*// // EXAMPLE://  long label          ints//  blocks              int//  dim                 int//  DATA{blocks}//// DATA://  block label         float//  block data          floats//// Internal Binary Format//  len           int (byte length of EXAMPLE)//  EXAMPLE       <see above>//  unique flag   byte// number of iterations#define ITER 5000000// small cache parameters#define INCACHE 3#define WAIT 10// error checking#define check(e) \(e ? (void)0 : (printf("%s:%u error: %s\n%s\n", __FILE__, __LINE__, #e, strerror(errno)), exit(1)))// number of non-zero blocks in example ex#define NUM_NONZERO(ex) (((int *)ex)[labelsize+1])// float pointer to data segment of example ex#define EX_DATA(ex) ((float *)(ex + sizeof(int)*(labelsize+3)))// class label (+1 or -1) for the example#define LABEL(ex) (((int *)ex)[1])// block label (converted to 0-based index)#define BLOCK_IDX(data) (((int)data[0])-1)int labelsize;int dim;// comparison function for sorting examples // 参见 http://blog.sina.com.cn/s/blog_5155e8d401009145.htmlint comp(const void *a, const void *b) {  // sort by extended label first, and whole example second...    //逐字节比较的，当buf1<buf2时，返回值<0，当buf1=buf2时，返回值=0，当buf1>buf2时，返回值>0  // 先比较这五个量 [label id level x y]，也就是说按照 样本类别->id->level->x->y排序样本  int c = memcmp(*((char **)a) + sizeof(int),  *((char **)b) + sizeof(int),  labelsize*sizeof(int));// 5  if (c) //label 不相等    return c;    // labels are the same ，怎么可能会一样呢 id在正负样本集内从1开始是递增的啊  phase 2 阶段同一张图片产生的样本，id都是一样的  int alen = **((int **)a);  int blen = **((int **)b);  if (alen == blen) //长度一样    return memcmp(*((char **)a) + sizeof(int),   *((char **)b) + sizeof(int),   alen); //真霸气，所有字节都比较……  return ((alen < blen) ? -1 : 1);//按长度排序}// a collapsed example is a sequence of examplesstruct collapsed {  char **seq;  int num;};// set of collapsed examplesstruct data {  collapsed *x;  int num;  int numblocks;  int *blocksizes;  float *regmult;  float *learnmult;};// seed the random number generator with the current timevoid seed_time() { struct timeval tp; check(gettimeofday(&tp, NULL) == 0); srand48((long)tp.tv_usec);}static inline double min(double x, double y) { return (x <= y ? x : y); }static inline double max(double x, double y) { return (x <= y ? y : x); }// gradient descent//---------------参照论文公式17 后的步骤---------------------------------------void gd(double C, double J, data X, double **w, double **lb) {//  C=0.0002, J=1, X, w==0, lb==-100);//      int num = X.num; //组数    // state for random permutations  int *perm = (int *)malloc(sizeof(int)*X.num);  check(perm != NULL);  // state for small cache  int *W = (int *)malloc(sizeof(int)*num);  check(W != NULL);  for (int j = 0; j < num; j++)    W[j] = 0;  int t = 0;  while (t < ITER) {  // 5000000 ，霸气……    // pick random permutation    for (int i = 0; i < num; i++) //组数      perm[i] = i;//-------打乱顺序-----// 论文中是随机选择一个样本，这里是随机排好序，再顺序取。// 类似于随机取，但是这里能保证取到全部样本，避免单个样本重复被抽到，重复作用    for (int swapi = 0; swapi < num; swapi++) {      int swapj = (int)(drand48()*(num-swapi)) + swapi; //drand48 产生 0-1之间的均匀分布      int tmp = perm[swapi];      perm[swapi] = perm[swapj];      perm[swapj] = tmp;    }    // count number of examples in the small cache    int cnum = 0; //下面的循环部分的实际循环次数    for (int i = 0; i < num; i++) {      if (W[i] <= INCACHE) // 3cnum++;    }//-------------------------------------------------------    for (int swapi = 0; swapi < num; swapi++) {      // select example      int i = perm[swapi];      collapsed x = X.x[i];      // skip if example is not in small cache  //负样本分对一次+1，分错一次清为0  //连续三次都分对了，那么这个样本很有可能是 easy 样本  //直接让他罚停四次迭代      if (W[i] > INCACHE) { //3W[i]--;continue;      }      // learning rate      double T = t + 1000.0; //学习率，直接1/t太大了      double rateX = cnum * C / T;      double rateR = 1.0 / T;      if (t % 10000 == 0) {printf(".");fflush(stdout);//清除文件缓冲区，文件以写方式打开时将缓冲区内容写入文件      }      t++;            // compute max over latent placements  //  -----step 3----      int M = -1;      double V = 0;  // 组内循环，选择 Zi=argmax β*f 即文中的第3部  // 训练rootfiter时，x.num=1,因为随机产生的负样本其id不同      for (int m = 0; m < x.num; m++) { double val = 0;char *ptr = x.seq[m];float *data = EX_DATA(ptr); //特征数据的地址 第9个数据开始，//后面跟着是 block1 label | block2 data|block2 lable | block2 data  //                 1      |       1    |     2       |  h*w/2*31个floatint blocks = NUM_NONZERO(ptr); // phase 1，phase 2 : 2 个,offset,rootfilterfor (int j = 0; j < blocks; j++) {  int b = BLOCK_IDX(data); //   data++;  for (int k = 0; k < X.blocksizes[b]; k++)//（1）=1，（2）= root.h*root.w/2*31val += w[b][k] * data[k]; //第一次循环是0  data += X.blocksizes[b];}if (M < 0 || val > V) {  M = m;  V = val;}      }            // update model  //-----step.4 也算了step.5 的一半 ---------------  // 梯度下降，减小 w      for (int j = 0; j < X.numblocks; j++) {// 2double mult = rateR * X.regmult[j] * X.learnmult[j]; // 0,1  20,1,1/T，对于block2,学习率at就是 1/t,block 1 为0for (int k = 0; k < X.blocksizes[j]; k++) {  w[j][k] -= mult * w[j][k]; //不管是分对了，还是分错了，都要减掉 at*β,见公式17下的4,5 }      }      char *ptr = x.seq[M];      int label = LABEL(ptr);  //----step.5----------分错了，往梯度的负方向移动      if (label * V < 1.0)   {W[i] = 0;float *data = EX_DATA(ptr);int blocks = NUM_NONZERO(ptr);for (int j = 0; j < blocks; j++) {int b = BLOCK_IDX(data);//  yi*cnum * C / T*1,见论文中 公式16,17double mult = (label > 0 ? J : -1) * rateX * X.learnmult[b];       data++;for (int k = 0; k < X.blocksizes[b]; k++)w[b][k] += mult * data[k];data += X.blocksizes[b];}  } else if (label == -1)   {if (W[i] == INCACHE) //3W[i] = WAIT; //10elseW[i]++;  }    }    // apply lowerbounds    for (int j = 0; j < X.numblocks; j++) {      for (int k = 0; k < X.blocksizes[j]; k++) {w[j][k] = max(w[j][k], lb[j][k]);      }    }  }  free(perm);  free(W);}// score examplesdouble *score(data X, char **examples, int num, double **w) {  double *s = (double *)malloc(sizeof(double)*num);  check(s != NULL);  for (int i = 0; i < num; i++) {    s[i] = 0.0;    float *data = EX_DATA(examples[i]);    int blocks = NUM_NONZERO(examples[i]);    for (int j = 0; j < blocks; j++) {      int b = BLOCK_IDX(data);      data++;      for (int k = 0; k < X.blocksizes[b]; k++)        s[i] += w[b][k] * data[k];      data += X.blocksizes[b];    }  }  return s;  }// merge examples with identical labelsvoid collapse(data *X, char **examples, int num) {//&X, sorted, num_unique  collapsed *x = (collapsed *)malloc(sizeof(collapsed)*num);  check(x != NULL);  int i = 0;  x[0].seq = examples;  x[0].num = 1;  for (int j = 1; j < num; j++) {    if (!memcmp(x[i].seq[0]+sizeof(int), examples[j]+sizeof(int), labelsize*sizeof(int))) {      x[i].num++; //如果label 五个量相同    } else {      i++;      x[i].seq = &(examples[j]);      x[i].num = 1;    }  }  X->x = x;  X->num = i+1;  }//调用参数 C=0.0002, J=1, hdrfile, datfile, modfile, inffile, lobfileint main(int argc, char **argv) {    seed_time();  int count;  data X;  // command line arguments  check(argc == 8);  double C = atof(argv[1]);  double J = atof(argv[2]);  char *hdrfile = argv[3];  char *datfile = argv[4];  char *modfile = argv[5];  char *inffile = argv[6];  char *lobfile = argv[7];  // read header file  FILE *f = fopen(hdrfile, "rb");  check(f != NULL);  int header[3];  count = fread(header, sizeof(int), 3, f);  check(count == 3);  int num = header[0]; //正负样本总数  labelsize = header[1]; // labelsize = 5;  [label id level x y]  X.numblocks = header[2]; // 2  X.blocksizes = (int *)malloc(X.numblocks*sizeof(int)); //（1）=1，（2）= root.h*root.w/2*31  count = fread(X.blocksizes, sizeof(int), X.numblocks, f);  check(count == X.numblocks);  X.regmult = (float *)malloc(sizeof(float)*X.numblocks); //0 ，1  check(X.regmult != NULL);  count = fread(X.regmult, sizeof(float), X.numblocks, f);  check(count == X.numblocks);  X.learnmult = (float *)malloc(sizeof(float)*X.numblocks);//20， 1  check(X.learnmult != NULL);  count = fread(X.learnmult, sizeof(float), X.numblocks, f);  check(count == X.numblocks);  check(num != 0);  fclose(f);  printf("%d examples with label size %d and %d blocks\n", num, labelsize, X.numblocks);  printf("block size, regularization multiplier, learning rate multiplier\n");  dim = 0;  for (int i = 0; i < X.numblocks; i++) {    dim += X.blocksizes[i];    printf("%d, %.2f, %.2f\n", X.blocksizes[i], X.regmult[i], X.learnmult[i]);  }  // ---------------从 datfile 读取  正负 examples----------------  // examples [i] 存储了第i个样本的信息 长度为 1 int + 7 int +dim 个float + 1 byte  // 1 int legth 样本包括信息头在内的总字节长度  // 7 int [1/-1 id 0 0 0 2 dim] ,id为样本编号，[label id level centry_x centry_y]，2是block个数  // dim float feature,dim=2+1+root.h*root.w/2*31,意义如下  //   block1 label | block2 data|block2 lable | block2 data  //               1      |       1    |     2       |  h*w/2*31个float  // 1 byte unique=0  f = fopen(datfile, "rb");  check(f != NULL);  printf("Reading examples\n");    //+,-example数据  char **examples = (char **)malloc(num*sizeof(char *));     check(examples != NULL);    for (int i = 0; i < num; i++) {    // we use an extra byte in the end of each example to mark unique    // we use an extra int at the start of each example to store the     // example's byte length (excluding unique flag and this int)//[legth label id level x y  unique] unique=0    int buf[labelsize+2]; //写入时的值为[1/-1 i 0 0 0 2 dim]     count = fread(buf, sizeof(int), labelsize+2, f);    check(count == labelsize+2);    // byte length of an example's data segment//---前面七个是头，后面dim个float是样本特征数据，dim=2+1+root.h*root.w/2*31    int len = sizeof(int)*(labelsize+2) + sizeof(float)*buf[labelsize+1];    // memory for data, an initial integer, and a final byte    examples[i] = (char *)malloc(sizeof(int)+len+1);    check(examples[i] != NULL);    // set data segment's byte length    ((int *)examples[i])[0] = len;    // set the unique flag to zero    examples[i][sizeof(int)+len] = 0;    // copy label data into example    for (int j = 0; j < labelsize+2; j++)      ((int *)examples[i])[j+1] = buf[j];    // read the rest of the data segment into the example    count = fread(examples[i]+sizeof(int)*(labelsize+3), 1,   len-sizeof(int)*(labelsize+2), f);    check(count == len-sizeof(int)*(labelsize+2));  }  fclose(f);  printf("done\n");  // sort  printf("Sorting examples\n");  char **sorted = (char **)malloc(num*sizeof(char *));  check(sorted != NULL);  memcpy(sorted, examples, num*sizeof(char *));    //qsort 库函数，真正的比较函数为 comp  //从小到大，快速排序  //依次按照 样本类别->id->level->cx->cy  排序样本  //如果前面五个量都一样……  //1.等长度，比较所有字节；  //2.谁长谁小，长度不同是因为不同的component的 尺寸不一致     qsort(sorted, num, sizeof(char *), comp);   printf("done\n");  // find unique examples  // 唯一的样本，unique flag=1,  // 相同的样本第一个样本的unique flag为1，其余为0 ，有的样本的位置被，unique替代了，但是并没有完全删除掉  int i = 0;  int len = *((int *)sorted[0]); //负样本的第一个  sorted[0][sizeof(int)+len] = 1; // unique flag 置 1  for (int j = 1; j < num; j++) {    int alen = *((int *)sorted[i]);    int blen = *((int *)sorted[j]);    if (alen != blen || memcmp(sorted[i] + sizeof(int), sorted[j] + sizeof(int), alen)) //component不同 || 不同样本{      i++;      sorted[i] = sorted[j];      sorted[i][sizeof(int)+blen] = 1; //标记为 unique    }  }  int num_unique = i+1;  printf("%d unique examples\n", num_unique);  // -------------------collapse examples----------------  // 前面是找完全不一样的样本，这里是分组  // label 的五个量 [label id level centry_x centry_y] 相同的分为一组，在detect时，写入了datfile   // 负样本的 cx,cy都是相对于整张图片的，正样本是相对于剪切后的图像  // 前面五个全相同，  // 对于phase1 不可能，因为正负样本的id都不相同  // 对于phase2 正样本只保留了最有可能是正样本的样本，只有一种情况,  // rootfilter1,rootfilter2在同一张图片(id相同)，检测出来的 Hard负样本 的cx,cy相同，因此一组最多应该只能出现2个 （待验证）  // 原因是此时的latent variable 为（cx,cy,component），上述情况相下，我们只能保留component1或者component2  // 后续训练时，这两个量是连续使用的，为什么呢？？  // collapse.seq(char **) 记录了每一组的第一个样本  // collapse.num 每组的个数  // X.num 组数  // X.x=&collapse[0]，也就是第一个 collapse的地址  collapse(&X, sorted, num_unique);  printf("%d collapsed examples\n", X.num);  // initial model  // 读modfile文件，得到w的初始值。phase 1 初始化为全 0，phase 2 为上一次训练的结果……  double **w = (double **)malloc(sizeof(double *)*X.numblocks);//2  check(w != NULL);  f = fopen(modfile, "rb");  for (int i = 0; i < X.numblocks; i++) {    w[i] = (double *)malloc(sizeof(double)*X.blocksizes[i]); //（1）=1，（2）= root.h*root.w/2*31    check(w[i] != NULL);    count = fread(w[i], sizeof(double), X.blocksizes[i], f);    check(count == X.blocksizes[i]);  }  fclose(f);  // lower bounds  // 读lobfile文件，初始化为全 滤波器参数下线-100 ……  double **lb = (double **)malloc(sizeof(double *)*X.numblocks);  check(lb != NULL);  f = fopen(lobfile, "rb");  for (int i = 0; i < X.numblocks; i++) {    lb[i] = (double *)malloc(sizeof(double)*X.blocksizes[i]);    check(lb[i] != NULL);    count = fread(lb[i], sizeof(double), X.blocksizes[i], f);    check(count == X.blocksizes[i]);  }  fclose(f);    printf("Training");  //-------------------------------- train -------------------------------  //-----梯度下降发训练参数 w，参见论文 公式17 后面的步骤  gd(C, J, X, w, lb);  printf("done\n");  // save model  printf("Saving model\n");  f = fopen(modfile, "wb");  check(f != NULL);  //   存储 block1,block2的训练结果，w  for (int i = 0; i < X.numblocks; i++) {    count = fwrite(w[i], sizeof(double), X.blocksizes[i], f);    check(count == X.blocksizes[i]);  }  fclose(f);  // score examples  // ---所有的样本都的得分,没有乘以 label y   printf("Scoring\n");  double *s = score(X, examples, num, w);  // ---------Write info file-------------  printf("Writing info file\n");  f = fopen(inffile, "w");  check(f != NULL);  for (int i = 0; i < num; i++) {    int len = ((int *)examples[i])[0];    // label, score, unique flag    count = fprintf(f, "%d\t%f\t%d\n", ((int *)examples[i])[1], s[i],                     (int)examples[i][sizeof(int)+len]);    check(count > 0);  }  fclose(f);    printf("Freeing memory\n");  for (int i = 0; i < X.numblocks; i++) {    free(w[i]);    free(lb[i]);  }  free(w);  free(lb);  free(s);  for (int i = 0; i < num; i++)    free(examples[i]);  free(examples);  free(sorted);  free(X.x);  free(X.blocksizes);  free(X.regmult);  free(X.learnmult);  return 0;}