Latent Structural SVM

来源：互联网发布：linux c 串口中断级别编辑：程序博客网时间：2024/05/13 06:59

隐结构SVM（Latent Structural SVM）

让 x∈χ ， y∈{−1,1} 并且 h∈H 分别表示我们问题中的输入、输出和隐变量。在我们的上下文中，x、y、h对应的是一幅图片、它的标签和一个窗口（或是bounding box）。接下来的设定在[1]中，我们在一个联合输入/输出/隐变量空间 O 中考虑一个线性的预测规则。我们定一个一个输入/输出/隐变量映射 Φ(x,y,h)∈X×Y×H→O⊂Rd 这样

Φ (x, y, h) = {0 \to χ (x, h) i f i f y ⩽ 0 y > 0

这里0→是d维的0向量并且Φ(x,h)是表示从图像 x 截取的窗口 h 的表示。例如，在我们实验中具有4096维的DeCAF特征[2]。

预测

y * $ = a r g max y \in Y (max h \in H w \cdot Φ (x, y, h)) .

对于训练样本的经验损失用一个附加的2范数表示

L (w; S; λ) = 1 2 λ ∥ w ∥ 2 + \sum i = 1 n l (w, x i, y i)

max-margin function

l m (w, x i, y i) = max y, h (w \cdot Φ (w i, y, h) + Δ (y i y)) - max h w w \cdot Φ (x i, y i, h)

soft-max function

l s (w, x i, y i) = l o g \sum y, h e x p (w \cdot Φ (x i, y, h) + Δ (y i, y)) - l o g \sum h e x p (w \cdot Φ (x i, y i, h))

% matlab code% define objective function% featTrain.x% [0.6303411;-0.7763183;1](1个)% [0.8341424,0.4734712;0.5515491,0.8808093;1,1](2)% [-0.4017371,-0.9285000,-0.2376855;-0.9157550,-0.3713324,-0.9713422;1,1,1](3)% [0.8490803,0.8840563,0.9590592;0.5282639,0.4673803,0.2832058;1,1,1](3)% [0.7834677;-0.6214325;1](1)% [-0.5194708,-0.1672711,-0.6840945;-0.8544882,-0.9859110,-0.7293934;1,1,1](3)% [-0.5074486,-0.6840267,-0.8343239;-0.8616820,-0.7294570,-0.5512746;1,1,1](3)% [0.4799071,0.6633653,0.4913931;0.8773193,0.7482957,0.8709378;1,1,1](3)% [0.8006670,0.9598586;0.5991096,0.2804843;1,1](2)% [0.6355712,0.8618172;0.7720423,0.5072189;1,1](2)% % labelTrain% -1    1   -1  1   1   -1  -1  1   1   1% lambda = single(1e-5);% beta = single(1)funObj = @(w)SLSVMLossC2(w,featTrain,labelTrain,lambda,beta);% matlab中的变量对应的SLSVMLossC2.c文件中的变量 % matlab :  C% w -> w% featTrain -> tmp% labelTrain -> y% lambda ->lambda=% beta -> beta=1% nfields 是特征的个数% /* get input arguments */% 获得结构体阵列的域的数量% nfields(=1) = mxGetNumberOfFields(prhs[1]);% 获得阵列中元素的个数。% NStructElems(=10) = mxGetNumberOfElements(prhs[1]);% nImags=10 是标签的个数，即图像的个数% nVars=3 是权值(w)的个数% learn soft-max latent svm vectorW = minFunc(funObj,W0,options);

/* Hakan Bilen * August 5, 2015 * * Implementation of soft-max latent SVM in * "Weakly Supervised Object Detection with Posterior Regularization" in  * BMVC 2014.  * * Warning : posterior regularization for symmetry and mutual exclusion are * not implemented in this file! * 该代码一共有两个返回量 * f:惩罚 * g:梯度 */#include <math.h>#include <limits.h>#include <omp.h>#include "mex.h"/* This function may not exit gracefully on bad input! */float myLogSumExp(const float * vec, int dim) ;void computeProb(const float * in, int dim, float * out) ;void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]){  /* Variable Declarations */  double *w, f, *g, *y;  int nVars, nImgs;  float lambda, beta;  float lpos2neg,lneg2pos;  /* Get Input Pointers */  w      = mxGetPr(prhs[0]);  y      = mxGetPr(prhs[2]);  lambda = mxGetScalar(prhs[3]);  beta   = mxGetScalar(prhs[4]);  float np = 0;  float nn = 0;  nImgs = (int)mxGetNumberOfElements(prhs[2]);  nVars = (int)mxGetNumberOfElements(prhs[0]);  int        ifield, nfields;  mwIndex    jstruct;  mwSize     NStructElems;  mwSize     ndim;  if(!mxIsStruct(prhs[1]))    mexErrMsgIdAndTxt( "MATLAB:SLSVMC2:inputNotStruct",            "Input must be a structure.");  /* get input arguments */  nfields = mxGetNumberOfFields(prhs[1]);  NStructElems = mxGetNumberOfElements(prhs[1]);  if (NStructElems!=nImgs)        mexErrMsgIdAndTxt( "MATLAB:SLSVMC2:WrongNumImgs",            "Wrong number of images!");  /*number of features (boxes) for each image */  int * nBoxes = mxCalloc(nImgs,sizeof(int));  int * cumNBoxes = mxCalloc(nImgs+1,sizeof(int));  cumNBoxes[0] = 0;  int i,b,d;  for(i=1;i<=nImgs;i++) {    const mxArray *tmp = mxGetFieldByNumber(prhs[1], i-1, 0);    if(tmp == NULL) {      mexPrintf("%s%d\t%s%d\n", "FIELD: ", ifield+1, "STRUCT INDEX :", 1);      mexErrMsgIdAndTxt( "MATLAB:data:fieldEmpty",              "Above field is empty!");    }    nBoxes[i-1] = (int)mxGetDimensions(tmp)[1];    //这里是第i个样本中窗口的个数    if (mxGetDimensions(tmp)[0]!=nVars)      mexErrMsgIdAndTxt("MATLAB:SLSVMC2:wrongDim","Wrong feature dimensionality!");    cumNBoxes[i] = cumNBoxes[i-1] + nBoxes[i-1];    //这里cumNBoxes是前i个样本中获取总窗口的数量  }  /*  mexPrintf("X[0,end] %f\n",X[4096]);  mexPrintf("nImgs %d nVars %d\n",nImgs,nVars);  mexPrintf("X[2,0] %f\n",X[2*4097]);  mexPrintf("X[2,end] %f\n",X[3*4097-1]);   */  /* Allocated Memory for Function Variables *//*  plhs[0] = mxCreateDoubleScalar(0); */  plhs[1] = mxCreateDoubleMatrix(nVars,1,mxREAL);  g = mxGetPr(plhs[1]);  float * fs = mxCalloc(nImgs,sizeof(float));  float * gs = mxCalloc(nImgs*nVars,sizeof(float));  /* get number of positives and negatives */  for(i=0;i<nImgs;i++) {    if(y[i]>0) {      np++;      //np是正样本的个数(6)    }    else if(y[i]<0) {      nn++;      //nn是负样本的个数(4)    }  }  if (nn==0 || np==0)     mexErrMsgIdAndTxt( "MATLAB:data:wlabel",              "No pos or neg label!");  /* balanced loss for pos and neg */  lpos2neg = 0.5 * (np+nn) / np;  lneg2pos = 0.5 * (np+nn) / nn;  float ** convProbs = (float **)mxCalloc(nImgs,sizeof(float*));  float ** concProbs = (float **)mxCalloc(nImgs,sizeof(float*));  float ** scores    = (float **)mxCalloc(nImgs,sizeof(float*));  float ** augScores = (float **)mxCalloc(nImgs,sizeof(float*));  for(i=0;i<nImgs;i++) {    convProbs[i] = (float *)mxCalloc(2*(int)nBoxes[i],sizeof(float));    concProbs[i] = (float *)mxCalloc((int)nBoxes[i],sizeof(float));    scores[i]    = (float *)mxCalloc((int)nBoxes[i],sizeof(float));    augScores[i] = (float *)mxCalloc(2*(int)nBoxes[i],sizeof(float));  }#pragma omp parallel for schedule(dynamic) private(i)  for(i=0;i<nImgs;i++) {    if(y[i]==0)      continue;    const mxArray *tmp = mxGetFieldByNumber(prhs[1], i, 0);    if(tmp == NULL) {      mexPrintf("%s%d\t%s%d\n", "FIELD: ", ifield+1, "STRUCT INDEX :", 1);      mexErrMsgIdAndTxt( "MATLAB:data:fieldEmpty",              "Above field is empty!");    }    const float * x = (float *)mxGetData(tmp);    //应该是获取第i幅图像的窗口数量    int nB = (int)nBoxes[i];    if((int)mxGetDimensions(tmp)[1]!=nB)      mexErrMsgIdAndTxt("MATLAB:SLSVMC2:empty","mxGetDimensions(tmp)[1]!=nB");    if(nB==0)      mexErrMsgIdAndTxt("MATLAB:SLSVMC2:zeroval","zero num bb");    if (mxGetDimensions(tmp)[0]!=nVars)      mexErrMsgIdAndTxt("MATLAB:SLSVMC2:wrongdim","wrong feat dim");    /*    mexPrintf("y[%d] = %f nB %d\n",i,y[i],nB); */    float concScore = 0;    int b, d;    for(b=0;b<nB;b++) {      for(d=0;d<nVars;d++) {        //对每个窗口进行得分相加        scores[i][b] += (float)w[d] * x[nVars*b+d];      }      //计算第i幅图像的第b个窗口的得分      scores[i][b] *= beta;    }    //凹的部分    /* concave part */    if(y[i]>0) {      concScore = myLogSumExp(scores[i],nB);      computeProb(scores[i],nB,concProbs[i]);    }    else if(y[i]<0) {      concScore = logf((float)nB);      for(b=0;b<nB;b++) {        concProbs[i][b] = 0;      }    }    else {      mexErrMsgIdAndTxt("MATLAB:SLSVMC2:wlabel","wrong label");    }    //凸的部分    /* convex part */    if(y[i]>0) {      for(b=0;b<nB;b++) {        augScores[i][b] = scores[i][b];        augScores[i][b+nB] = beta * lpos2neg;      }    }    else if(y[i]<0) {      for(b=0;b<nB;b++) {        augScores[i][b] = scores[i][b] + beta * lneg2pos;        augScores[i][b+nB] = 0;      }    }    else {      mexErrMsgIdAndTxt("MATLAB:SLSVMC2:wlabel","wrong label");    }    computeProb(augScores[i],2*nB,convProbs[i]);    for(b=0;b<nB;b++) {      float difp = (convProbs[i][b]-concProbs[i][b]);      for(d=0;d<nVars;d++) {        gs[i*nVars + d] += x[nVars*b+d] * difp;      }    }    float convScore = myLogSumExp(augScores[i],2*nB);    fs[i] = convScore - concScore;  }  for(i=0;i<nImgs;i++) {    mxFree(augScores[i]);    mxFree(scores[i]);    mxFree(convProbs[i]);    mxFree(concProbs[i]);  }  mxFree(augScores);  mxFree(scores);  mxFree(convProbs);  mxFree(concProbs);  /* sum objval and grads over all images */  for(i=0;i<nImgs;i++) {    if(y[i]==0)      continue;    f += fs[i];    for(d=0;d<nVars;d++) {      g[d] += gs[i*nVars+d];    }  }  for(d=0;d<nVars;d++) {    g[d] /= (nn+np);  }  f /= beta * (nn+np);  /* add regularization */  for(d=0;d<nVars-1;d++) {    f += 0.5 * lambda * w[d] * w[d] ;  }  for(d=0;d<nVars-1;d++) {    g[d] += lambda * w[d] ;  }  mxFree(cumNBoxes);  mxFree(nBoxes);  mxFree(gs);  mxFree(fs);/*  mxFree(gconcProbs);  mxFree(gconvProbs);  mxFree(gscores);  mxFree(gaugScores); */  plhs[0] = mxCreateDoubleScalar(f);}/*---------------------------------------------------------------------------*/float myLogSumExp(const float * vec, int dim) {  float maxScore = -FLT_MAX ;  int i=0;  for (i=0;i<dim;i++) {    if(maxScore<vec[i])      maxScore = vec[i];  }  float sumScore = 0.f;  for (i=0;i<dim;i++) {    sumScore += expf(vec[i]-maxScore);  }  return logf(sumScore)+maxScore;}/*---------------------------------------------------------------------------*/void computeProb(const float * in, int dim, float * out) {  float maxScore = -FLT_MAX ;  int i=0;  for (i=0;i<dim;i++) {    if(maxScore<in[i])      maxScore = in[i];  }  //获取当前图片中的最大得分记为maxScore  float sumExp = 0.f;  for (i=0;i<dim;i++) {    sumExp += expf(in[i]-maxScore);  }  mxAssert(sumExp>0.f,"");  const float rSumExp = 1.f / sumExp;  for (i=0;i<dim;i++) {    out[i] = expf(in[i]-maxScore) * rSumExp;  }}

[1] C. John Yu and T. Joachims. Learning structural svms with latent variables. In ICML, pages 1169–1176, 2009.
[2] Chaitanya Desai, Deva Ramanan, and Charless C Fowlkes. Discriminative models for multi-class object layout. International journal of computer vision, 95(1):1–12, 201

0 0