sparse Coding的C++代码——详细注释版

来源：互联网发布：排版打印软件编辑：程序博客网时间：2024/06/05 16:56

<span style="font-size:18px;">#include "stdafx.h"#include <stdlib.h>#include <Windows.h>#include <io.h>#include <cv.h>#include <ml.h>#include <highgui.h>#include <cxcore.h>#include "sparse_coding.h"#include "retr_data_base.h"using namespace cv;// 归一化的操作。void normalizing(float* f, int nFeature){for(int i=0; i<nFeature; i++)f[i] = f[i] / 255.0f;}void extrsct_image_feature(float* f, IplImage* src_image, int width, int height){// 调整image的大小为15*15。IplImage* resizedImage = cvCreateImage(cvSize(width,height), IPL_DEPTH_8U,1);cvResize(src_image,resizedImage);for(int i=0; i<height; i++)for(int j=0; j<width; j++)// CV_IMAGE_ELEM用于读取image中的像素值。f[i*width+j] = (float)CV_IMAGE_ELEM(resizedImage,uchar,i,j);// 将image归一化。normalizing(f,width*height);cvReleaseImage(&resizedImage);}void load_basis(float** basis, int* label, int width, int height, char*root_path, vector<string>* label_name){// 获取得到文件数，子文件名，每个子文件中image的名字。ClassInfo* info = TraverClassFolder(root_path);char src_full_path[300];int sampleCount = 0;// 对于每一个类别，也即是label。for(int i=0; i<info->classCount; i++){(*label_name).push_back(info->className[i].c_str());// 对于每个类别中的每一个image。info->fileCounts[i]表示每个类别中的文件数。for(int j=0; j<info->fileCounts[i]; j++){sprintf(src_full_path,"%s\\%s\\%s.jpg", root_path,info->className[i].c_str(),info->fileName[i][j].c_str());IplImage* src_image = cvLoadImage(src_full_path,0);extrsct_image_feature(basis[sampleCount],src_image,width,height);label[sampleCount] = i;sampleCount++;cvReleaseImage(&src_image);}}}float cal_recovery_error(float** trainSample, int* trainLabel, int nClass, int nSample,int nFeature,float* testSample, float* x, int class_label){float* temp = new float[nFeature];float error = 0;// 给一个临时变量。for(int i=0; i<nFeature; i++){temp[i] = testSample[i];}for(int i=0; i<nClass*nSample; i++){if(x[i] != 0 && trainLabel[i] == class_label){for(int j=0; j<nFeature; j++){// 相当于乘以一个x后，所有的行数相减。temp[j] -= x[i]*trainSample[i][j];}}}for(int j=0; j<nFeature; j++)error += temp[j]*temp[j];delete temp;return sqrt(error);}int sparse_coding_classifier(float** trainSample, int* trainLabel, int nClass, int nSample,int nFeature,float* testSample, float* x){float minError = 1e10;float minIndex = 0;float error;for(int i=0; i<nClass; i++){error = cal_recovery_error(trainSample,trainLabel,nClass,nSample,nFeature,testSample,x,i);if(error<minError){minError = error;minIndex = i;}}return minIndex;}int _tmain(int argc, _TCHAR* argv[]){// 一共含有 38个人。int nClass = 38;// 每个人含有 30 张照片。int nSample = 30;// 照片的大小调节为 15 x 15。int width = 15;int height = 15;// 定义basis，label。用于存放样本和标签。// basis是一个二维的数组，用于存放所有的图片。每一行对于一张图片。// 申请空间。首先对于每一行 new，也就是含有的样本数。float ** basis = new float*[nClass * nSample];// 其次，对于每一列 new， 也就是图片的大小。for(int i=0; i<nClass*nSample; i++)basis[i] = new float[width*height];// 定义数组，来存放相对应的每行image的label。 大小为总的样本数。int *label = new int[nClass*nSample];// 存放每个人所在的文件名。vector<string> label_name;// A,b; 存放矩阵相乘后的矩阵。float** A = new float*[nClass*nSample];for(int i=0; i<nClass*nSample; i++)A[i] = new float[nClass*nSample];float* b = new float[nClass*nSample];float* x = new float[nClass*nSample]; // 训练样本和测试样本存放的文件夹。char train_root_path[300] = "Train";char test_root_path[300] = "Test";// 得到basis,label,label_name。 &label_name表示地址。load_basis(basis,label,width,height,train_root_path,&label_name);// Testfloat lambda = 0.1;ClassInfo* info = TraverClassFolder(test_root_path);// 用于存放extrsct_image_feature后的image。float* testSample = new float[width*height];char src_full_path[300];int predict_label;// cal the matrix A = trainSample'*trainSample size(trainSample) = [nTrainSample, nFeature]for(int i=0; i<nClass*nSample; i++){for(int j=0; j<nClass*nSample; j++){A[i][j] = 0;for(int k=0; k<width*height; j++)A[i][j] += basis[i][k]*basis[j][k];}}// A[i][i] 对角线上的元素。for(int i=0; i<nClass*nSample; i++)A[i][i] += 1e-4;int posCount = 0;int totalCount = 0;// 对于每一个类别。for(int i=0; i<info->classCount; i++){// 对于对于每一个类别中的image的数量。for(int j=0; j<info->fileCounts[i]; j++){sprintf(src_full_path,"%s\\%s\\%s.jpg",test_root_path,info->className[i].c_str(),info->fileName[i][j].c_str());IplImage* src_image = cvLoadImage(src_full_path,0);extrsct_image_feature(testSample, src_image,width,height);// 对于test image 每个像与train image相乘后累加，for(int j=0; j<nClass*nSample; j++){b[j] = 0;for(int k=0; k<width*height; k++){b[j] += -basis[j][k] * testSample[k];}}sparse_coding(A,b,x,lambda,nClass*nSample);predict_label = sparse_coding_classifier(basis,label,nClass,nSample,width*height,testSample,x);printf("%s  %s",label_name[predict_label].c_str(), info->className[i].c_str());//判断预测是否正确。if(label_name[predict_label].compare(info->className[i].c_str())==0){posCount++;}totalCount++;printf("  %d/%d -- %f\n",posCount,totalCount,(float)posCount/totalCount);cvReleaseImage(&src_image);}}system("pause");return 0;}</span>

retr_data_base.h

<span style="font-size:18px;">#ifndef RETR_DATA_BASE#define RETR_DATA_BASE#include "windows.h"#include "stdio.h"#include <vector>#include <cstring>using namespace std;#define PATH_LENGTH 300struct ClassInfo{// 类别数。int classCount;// 记录每个类别的文件数。vector<int> fileCounts;// 子文件名。 也是类别名字。vector<string> className;// 每个子文件中的image的文件名。vector<vector<string>> fileName;};ClassInfo* TraverClassFolder(const char* path);#endif</span>

retr_data_base.cpp

<span style="font-size:18px;">#include "retr_data_base.h"ClassInfo* TraverClassFolder(const char* path){ClassInfo* classInfo = new ClassInfo();vector<string> pathVec;vector<string> fileVec;string rootPath = path;rootPath.append("\\");pathVec.push_back(rootPath);while(!pathVec.empty()){WIN32_FIND_DATAA findFileData;HANDLE hFind = INVALID_HANDLE_VALUE;string searchPath = pathVec.front();searchPath.append("*");hFind = FindFirstFileA(searchPath.c_str(),&findFileData);if(hFind == INVALID_HANDLE_VALUE){return classInfo;}fileVec.clear();int folderCount = 0;int fileCount = 0;while(FindNextFileA(hFind,&findFileData)){if(strcmp(findFileData.cFileName,"..")!=0 ){if(findFileData.dwFileAttributes == FILE_ATTRIBUTE_HIDDEN || findFileData.dwFileAttributes == 38 || findFileData.dwFileAttributes == FILE_ATTRIBUTE_SYSTEM)continue;if(findFileData.dwFileAttributes == FILE_ATTRIBUTE_DIRECTORY){string dir = findFileData.cFileName;classInfo->className.push_back(dir);string rootTemp = rootPath;dir = rootPath.append(dir);rootPath = rootTemp;dir.append("\\");pathVec.push_back(dir);folderCount ++;}else if(findFileData.dwFileAttributes == FILE_ATTRIBUTE_NORMAL || findFileData.dwFileAttributes == FILE_ATTRIBUTE_ARCHIVE){string file = findFileData.cFileName;char extension[200];_splitpath(file.c_str(),NULL,NULL,NULL,extension);string withoutExtension = file;withoutExtension = withoutExtension.substr(0,withoutExtension.length()-strlen(extension));fileVec.push_back(withoutExtension);fileCount ++;}}}if(folderCount!=0)classInfo->classCount = folderCount;if(fileCount != 0){classInfo->fileCounts.push_back(fileCount);classInfo->fileName.push_back(fileVec);}pathVec.erase(pathVec.begin());}return classInfo;}</span>

sparse_coding.h

<span style="font-size:18px;">#include <cv.h>//Reference://Efficient sparse coding algorithms (Honglak Lee 2007)//min 1/2 x'*A*x + b'*x + lambda*|x|void sparse_coding(float** A,float* b, float* x_out,float lambda, int n);</span>

sparse_coding.cpp

<span style="font-size:18px;">#include "sparse_coding.h"//The structure defined for efficiency. In most cases, x is very sparse, //the computation cost will significantly reduced if the calculation is only implemented on nonzero variablesstruct SolutionX {int n;// dimension of xfloat* x;int nPos;//the number of nonzero variablesint* index;//the index of the nonzero variablesint* sign;//the signs of x};void create_solutionx(SolutionX *s, int n){s->n = n;s->nPos = 0;s->index = new int[n];s->sign = new int[n];s->x = new float[n];memset(s->index,0,n*sizeof(int));memset(s->sign,0,n*sizeof(int));memset(s->x,0,n*sizeof(float));}void release_solutionx(SolutionX *s){delete[] s->index;delete[] s->sign;delete[] s->x;}inline void cal_gradient(float** A, float* b, SolutionX* x,int n, float* gradient){memset(gradient,0,n*sizeof(float));for (int i=0;i<n;i++){for (int j=0;j<x->nPos;j++)gradient[i] += A[i][x->index[j]]*x->x[x->index[j]];}for(int i=0;i<n;i++)gradient[i] += b[i];}void reset_solutionx(SolutionX* x){for (int i=0;i<x->nPos;i++){x->x[x->index[i]] = 0;x->sign[x->index[i]] = 0;}x->nPos = 0;}void copy_solutionx(SolutionX *src,SolutionX* dst){if(dst->n != src->n)printf("error in copy_solutionx -- dst->n != src->n");reset_solutionx(dst);dst->nPos = src->nPos;for (int i=0;i<src->nPos;i++){dst->index[i] = src->index[i];dst->sign[src->index[i]] = src->sign[src->index[i]];dst->x[src->index[i]] = src->x[src->index[i]];}}// Asub*x + bsub + lambda*sign(xsub) = 0// cvSolve is employed to solve A*x = b type problem, CHOLESKY method is used(A is a positive definite matrix)void solve_reduced_problem(float** A, float* b, SolutionX *x, float lambda){int nPos = x->nPos;CvMat* Asub = cvCreateMat(nPos,nPos,CV_32FC1);CvMat* bsub = cvCreateMat(nPos,1,CV_32FC1);CvMat* xsub = cvCreateMat(nPos,1,CV_32FC1);for (int i=0;i<nPos;i++){CV_MAT_ELEM(*bsub,float,i,0) = -(b[x->index[i]] + lambda*x->sign[x->index[i]]);for (int j=0;j<nPos;j++){CV_MAT_ELEM(*Asub,float,i,j) = A[x->index[i]][x->index[j]];}}int flag = 0;int count = 0;while (true){flag = cvSolve(Asub,bsub,xsub,CV_CHOLESKY);//cvSolve return 0, if A is Singularif(flag!=0)break;for (int i=0;i<nPos;i++)CV_MAT_ELEM(*Asub,float,i,i) = A[x->index[i]][x->index[i]] + pow(10.0f,count-6);count++;}for (int i=0;i<nPos;i++){x->x[x->index[i]] = CV_MAT_ELEM(*xsub,float,i,0);}cvReleaseMat(&Asub);cvReleaseMat(&bsub);cvReleaseMat(&xsub);}// fval = 1/2 x'*A*x + b'*x + lambda*|x|double cal_function_val(float** A,float* b, SolutionX* x_in,float lambda){int n = x_in->nPos;int* index = x_in->index;float* x = x_in->x;double fval = 0;for (int i=0;i<n;i++){for (int j=0;j<n;j++){fval += 0.5*A[index[i]][index[j]]*x[index[i]]*x[index[j]];}}for (int i=0;i<n;i++){fval += b[index[i]]*x[index[i]]+ lambda*abs(x[index[i]]);}return fval;}int mySign(float x){if(x>0) return 1;if(x<0) return -1;return 0;}//min 1/2 x'*A*x + b'*x + lambda*|x|void sparse_coding(float** A,float* b, float* x_out,float lambda, int n){float* gradient = new float[n];int maxIndex = 0;float maxValue = 0;float fval;float eps = 1e-9;SolutionX x;SolutionX x_new;SolutionX xs;SolutionX xmin;//Create solutions to store the current X and the temporary X valuescreate_solutionx(&x,n);create_solutionx(&x_new,n);create_solutionx(&xs,n);create_solutionx(&xmin,n);//Check the gradient for every dimension, and find the dimension with the largest gradientcal_gradient(A,b,&x,n,gradient);maxValue = -1e10;for(int i=0;i<n;i++){if(abs(x.sign[i])!=0) continue;if(abs(gradient[i])>maxValue){maxIndex = i;maxValue = abs(gradient[i]);}}while(true){//bring in a variable according the gradientcopy_solutionx(&x,&x_new);//If the absolute value of the largest gradient is less than lambda, then the function reaches convergence;//else set the dimension with the largest gradient activeif(gradient[maxIndex]>lambda+eps){x.x[maxIndex] = (lambda - gradient[maxIndex]) / A[maxIndex][maxIndex];//The initialization is important for the variable kick out sectionx.sign[maxIndex] = -1;x.index[x.nPos] = maxIndex;x.nPos++;x_new.sign[maxIndex] = -1;x_new.index[x_new.nPos] = maxIndex;x_new.nPos++;}else if(gradient[maxIndex]<-lambda-eps){x.x[maxIndex] = (-lambda - gradient[maxIndex]) / A[maxIndex][maxIndex];x.sign[maxIndex] = 1;x.index[x.nPos] = maxIndex;x.nPos++;x_new.sign[maxIndex] = 1;x_new.index[x_new.nPos] = maxIndex;x_new.nPos++;}else{break;}//Solve the reduced problem, and get the function value with the new value of Xsolve_reduced_problem(A,b,&x_new,lambda);fval = cal_function_val(A,b,&x_new,lambda);copy_solutionx(&x_new,&xmin);//kick out variables//if the old signs are not consistent with the new solution, try to kick the inconsistent solution out if the funVal reduces//Note: the new variable which was just brought in this round is an exception//Doing discrete line search to find the X with the minimum function valuefor (int i=0;i<(x_new.nPos-1)/*the new variable is an exception*/;i++){if (mySign(x_new.x[x_new.index[i]]) != x_new.sign[x_new.index[i]]){copy_solutionx(&x_new,&xs);float wxnew,wx;float xx_new, xx;xx_new = x_new.x[x_new.index[i]];xx = x.x[x.index[i]];wx = xx_new/(xx_new-xx+eps);wxnew = (1-wx);for(int j=0;j<xs.nPos;j++){xs.x[xs.index[j]] = wxnew*x_new.x[xs.index[j]] + wx*x.x[xs.index[j]];}xs.x[x_new.index[i]] = 0;float temp_fval = cal_function_val(A,b,&xs,lambda);if (temp_fval<fval){fval = temp_fval;copy_solutionx(&xs,&xmin);}}}//Keep the signs are consistent with the valueint posCount = 0;for (int i=0;i<n;i++){xmin.sign[i] = mySign(xmin.x[i]);if (xmin.sign[i]!=0){//printf("%d %f\n",i,xmin.x[i]);xmin.index[posCount] = i;posCount++;}}xmin.nPos = posCount;//Update X with the value with minimum function valuecopy_solutionx(&xmin,&x);//Calculate the gradient for the current Xcal_gradient(A,b,&x,n,gradient);maxValue = -1e10;for(int i=0;i<n;i++){if(x.x[i]!=0) continue;if(abs(gradient[i])>maxValue){maxIndex = i;maxValue = abs(gradient[i]);}}}//Output the final value of Xmemset(x_out,0,n*sizeof(float));for (int i=0;i<x.nPos;i++){x_out[x.index[i]] = x.x[x.index[i]];}//Releasedelete[] gradient;release_solutionx(&x);release_solutionx(&x_new);release_solutionx(&xs);release_solutionx(&xmin);}</span>

0 0