opencv中PCA源码理解与训练、使用

来源:互联网 发布:中国联通网络研究院 编辑:程序博客网 时间:2024/06/07 11:43
/****************************************************************************************\*                                          PCA                                           *\****************************************************************************************/PCA::PCA() {}PCA::PCA(InputArray data, InputArray _mean, int flags, int maxComponents){    operator()(data, _mean, flags, maxComponents);}PCA::PCA(InputArray data, InputArray _mean, int flags, double retainedVariance){    computeVar(data, _mean, flags, retainedVariance);}PCA& PCA::operator()(InputArray _data, InputArray __mean, int flags, int maxComponents){    Mat data = _data.getMat(), _mean = __mean.getMat();    int covar_flags = CV_COVAR_SCALE;    int i, len, in_count;    Size mean_sz;    CV_Assert( data.channels() == 1 );    if( flags & CV_PCA_DATA_AS_COL )    {        len = data.rows;        in_count = data.cols;        covar_flags |= CV_COVAR_COLS;        mean_sz = Size(1, len);    }    else    {        len = data.cols;        in_count = data.rows;        covar_flags |= CV_COVAR_ROWS;        mean_sz = Size(len, 1);    }    int count = std::min(len, in_count), out_count = count;    if( maxComponents > 0 )        out_count = std::min(count, maxComponents);    // "scrambled" way to compute PCA (when cols(A)>rows(A)):    // B = A'A; B*x=b*x; C = AA'; C*y=c*y -> AA'*y=c*y -> A'A*(A'*y)=c*(A'*y) -> c = b, x=A'*y    if( len <= in_count )        covar_flags |= CV_COVAR_NORMAL;    int ctype = std::max(CV_32F, data.depth());    mean.create( mean_sz, ctype );    Mat covar( count, count, ctype );    if( _mean.data )    {        CV_Assert( _mean.size() == mean_sz );        _mean.convertTo(mean, ctype);        covar_flags |= CV_COVAR_USE_AVG;    }    calcCovarMatrix( data, covar, mean, covar_flags, ctype );    eigen( covar, eigenvalues, eigenvectors );    if( !(covar_flags & CV_COVAR_NORMAL) )    {        // CV_PCA_DATA_AS_ROW: cols(A)>rows(A). x=A'*y -> x'=y'*A        // CV_PCA_DATA_AS_COL: rows(A)>cols(A). x=A''*y -> x'=y'*A'        Mat tmp_data, tmp_mean = repeat(mean, data.rows/mean.rows, data.cols/mean.cols);        if( data.type() != ctype || tmp_mean.data == mean.data )        {            data.convertTo( tmp_data, ctype );            subtract( tmp_data, tmp_mean, tmp_data );        }        else        {            subtract( data, tmp_mean, tmp_mean );            tmp_data = tmp_mean;        }        Mat evects1(count, len, ctype);        gemm( eigenvectors, tmp_data, 1, Mat(), 0, evects1,            (flags & CV_PCA_DATA_AS_COL) ? CV_GEMM_B_T : 0);        eigenvectors = evects1;        // normalize eigenvectors        for( i = 0; i < out_count; i++ )        {            Mat vec = eigenvectors.row(i);            normalize(vec, vec);        }    }    if( count > out_count )    {        // use clone() to physically copy the data and thus deallocate the original matrices        eigenvalues = eigenvalues.rowRange(0,out_count).clone();        eigenvectors = eigenvectors.rowRange(0,out_count).clone();    }    return *this;}template <typename T>int computeCumulativeEnergy(const Mat& eigenvalues, double retainedVariance){    CV_DbgAssert( eigenvalues.type() == DataType<T>::type );    Mat g(eigenvalues.size(), DataType<T>::type);    for(int ig = 0; ig < g.rows; ig++)    {        g.at<T>(ig, 0) = 0;        for(int im = 0; im <= ig; im++)        {            g.at<T>(ig,0) += eigenvalues.at<T>(im,0);        }    }    int L;    for(L = 0; L < eigenvalues.rows; L++)    {        double energy = g.at<T>(L, 0) / g.at<T>(g.rows - 1, 0);        if(energy > retainedVariance)            break;    }    L = std::max(2, L);    return L;}PCA& PCA::computeVar(InputArray _data, InputArray __mean, int flags, double retainedVariance){    Mat data = _data.getMat(), _mean = __mean.getMat();    int covar_flags = CV_COVAR_SCALE;    int i, len, in_count;    Size mean_sz;    CV_Assert( data.channels() == 1 );    if( flags & CV_PCA_DATA_AS_COL )    {//每一列代表一个样本         len = data.rows;//特征数         in_count = data.cols;//样本数         covar_flags |= CV_COVAR_COLS;//列         mean_sz = Size(1, len);    }    else    {//每一行代表一个样本         len = data.cols;//特征数         in_count = data.rows;//样本数         covar_flags |= CV_COVAR_ROWS;        mean_sz = Size(len, 1);    }    CV_Assert( retainedVariance > 0 && retainedVariance <= 1 );    int count = std::min(len, in_count);//count为样本数和特征数的最小值     // "scrambled" way to compute PCA (when cols(A)>rows(A)):    // B = A'A; B*x=b*x; C = AA'; C*y=c*y -> AA'*y=c*y -> A'A*(A'*y)=c*(A'*y) -> c = b, x=A'*y    if( len <= in_count )//如果特征数小于样本数         covar_flags |= CV_COVAR_NORMAL;    int ctype = std::max(CV_32F, data.depth());    mean.create( mean_sz, ctype );    Mat covar( count, count, ctype );    if( _mean.data )    {        CV_Assert( _mean.size() == mean_sz );        _mean.convertTo(mean, ctype);    }    //计算协方差矩阵,返回covar和mean,这里并没有一开始就中心化,直接求data的协方差    calcCovarMatrix( data, covar, mean, covar_flags, ctype );    //计算协方差矩阵covar的特征值eigenvalues和特征向量eigenvectors,     eigen( covar, eigenvalues, eigenvectors );    //维数>>样本数    if( !(covar_flags & CV_COVAR_NORMAL) )    {        // CV_PCA_DATA_AS_ROW: cols(A)>rows(A). x=A'*y -> x'=y'*A        // CV_PCA_DATA_AS_COL: rows(A)>cols(A). x=A''*y -> x'=y'*A'        Mat tmp_data, tmp_mean = repeat(mean, data.rows/mean.rows, data.cols/mean.cols);        if( data.type() != ctype || tmp_mean.data == mean.data )        {            data.convertTo( tmp_data, ctype );            //减期望值             subtract( tmp_data, tmp_mean, tmp_data );        }        else        {            subtract( data, tmp_mean, tmp_mean );            tmp_data = tmp_mean;        }        Mat evects1(count, len, ctype);        gemm( eigenvectors, tmp_data, 1, Mat(), 0, evects1,            (flags & CV_PCA_DATA_AS_COL) ? CV_GEMM_B_T : 0);        eigenvectors = evects1;        // normalize all eigenvectors        for( i = 0; i < eigenvectors.rows; i++ )        {            Mat vec = eigenvectors.row(i);            //归一化特征向量             normalize(vec, vec);        }    }    // compute the cumulative energy content for each eigenvector    //计算构成输入占比(默认95%)的特征值     int L;    if (ctype == CV_32F)        L = computeCumulativeEnergy<float>(eigenvalues, retainedVariance);    else        L = computeCumulativeEnergy<double>(eigenvalues, retainedVariance);    // use clone() to physically copy the data and thus deallocate the original matrices    eigenvalues = eigenvalues.rowRange(0,L).clone();    eigenvectors = eigenvectors.rowRange(0,L).clone();    return *this;}void PCA::project(InputArray _data, OutputArray result) const{    Mat data = _data.getMat();//输入数据     CV_Assert( mean.data && eigenvectors.data &&        ((mean.rows == 1 && mean.cols == data.cols) || (mean.cols == 1 && mean.rows == data.rows)));    Mat tmp_data, tmp_mean = repeat(mean, data.rows/mean.rows, data.cols/mean.cols);//期望repeat     int ctype = mean.type();    if( data.type() != ctype || tmp_mean.data == mean.data )    {        data.convertTo( tmp_data, ctype );        subtract( tmp_data, tmp_mean, tmp_data );//样本中心化,tmp_data是输出     }    else    {        subtract( data, tmp_mean, tmp_mean );//样本中心化,tmp_mean是输出         tmp_data = tmp_mean;    }    //函数gemm,实现矩阵相乘,这里result = eigenvectors'*tmp_data    if( mean.rows == 1 )        gemm( tmp_data, eigenvectors, 1, Mat(), 0, result, GEMM_2_T );    else        gemm( eigenvectors, tmp_data, 1, Mat(), 0, result, 0 );}Mat PCA::project(InputArray data) const{    Mat result;    project(data, result);    return result;}//数据恢复,即X是恢复后的数据(压缩后) void PCA::backProject(InputArray _data, OutputArray result) const{    Mat data = _data.getMat();    CV_Assert( mean.data && eigenvectors.data &&        ((mean.rows == 1 && eigenvectors.rows == data.cols) ||         (mean.cols == 1 && eigenvectors.rows == data.rows)));    Mat tmp_data, tmp_mean;    data.convertTo(tmp_data, mean.type());    if( mean.rows == 1 )    {        tmp_mean = repeat(mean, data.rows, 1);        //矩阵相乘,相当于 tmp_data'*eigenvectors  -->XX=base*xRot;,返回result        gemm( tmp_data, eigenvectors, 1, tmp_mean, 1, result, 0 );    }    else    {        tmp_mean = repeat(mean, 1, data.cols);        gemm( eigenvectors, tmp_data, 1, tmp_mean, 1, result, GEMM_1_T );    }}Mat PCA::backProject(InputArray data) const{    Mat result;    backProject(data, result);    return result;}}void cv::PCACompute(InputArray data, InputOutputArray mean,                    OutputArray eigenvectors, int maxComponents){    PCA pca;    pca(data, mean, 0, maxComponents);    pca.mean.copyTo(mean);    pca.eigenvectors.copyTo(eigenvectors);}void cv::PCAComputeVar(InputArray data, InputOutputArray mean,                    OutputArray eigenvectors, double retainedVariance){    PCA pca;    pca.computeVar(data, mean, 0, retainedVariance);    pca.mean.copyTo(mean);    pca.eigenvectors.copyTo(eigenvectors);}void cv::PCAProject(InputArray data, InputArray mean,                    InputArray eigenvectors, OutputArray result){    PCA pca;    pca.mean = mean.getMat();    pca.eigenvectors = eigenvectors.getMat();    pca.project(data, result);}void cv::PCABackProject(InputArray data, InputArray mean,                    InputArray eigenvectors, OutputArray result){    PCA pca;    pca.mean = mean.getMat();    pca.eigenvectors = eigenvectors.getMat();    pca.backProject(data, result);}
</pre><pre name="code" class="cpp"><blockquote style="margin: 0 0 0 40px; border: none; padding: 0px;"><pre name="code" class="cpp">
opencv中的使用
1.训练
opencv中可以直接调用PCA接口对输入的图片序列进行训练,得到PCA主成分向量
<span style="font-family: Arial, Helvetica, sans-serif;"> </span><pre name="code" class="cpp">/* *PcaTrain,train the module of pca*In: imageListName, the list name of train images *    modelName, the name of modelName to save the model */ void PcaTrain(String imageListName,String modelName) {    vector<Mat> images;    read_imgList(imageListName, images);  //the change the input image go format the Pca Input format,将输入的二维图像转化成一个行向量模式Mat pcaIn = asRowMatrix(images, CV_32FC1);PCA pca(pcaIn, Mat(), CV_PCA_DATA_AS_ROW, 0.95);//第三个参数表示按照行模式读取数据,第四个参数表示所选主成分占比Mat pcaEigenvectors = pca.eigenvectors.clone();//计算得到的主成分特征 Mat pcaEigenvalues = pca.eigenvalues.clone();//计算得到的主成分特征值save(modelName, pcaEigenvalues, pcaEigenvectors);//将计算得到的主成分向特征和特征值存储 }

<span style="font-family: Arial, Helvetica, sans-serif;">2.使用</span>
直接将待计算图片和计算出的主成分特征做运算,得到降维后的向量值
<pre name="code" class="cpp"> /* *PcaCompute,compute a set of values of linearly uncorrelated variables by pri    ncipal component analysis *In: srcImg, the image that go to compute pca *    modelName, the name of modelName to load the model */ Mat PcaCompute(Mat srcImg, String modelName) {Mat pcaEigenvalues;Mat pcaEigenvectors;load(modelName, &pcaEigenvalues, &pcaEigenvectors);//加载计算出的主成分特征模型 Mat testIn,pcaIn;//chage the image to graycvtColor(srcImg,  srcImg, CV_RGB2GRAY );pcaIn = norm_0_255(srcImg);//图片格式化pcaIn.reshape(1, 1).convertTo(testIn, CV_32FC1, 1/255.);//输入图片转化成一行Mat pcaRes;gemm(testIn, pcaEigenvectors.reshape(0, pcaEigenvectors.cols), 1,  Mat(),     0, pcaRes);//图片行向量和主成分特征相乘,得到降维后向量cout<<"pcaRes rows="<< pcaRes.rows<<" cols="<<pcaRes.cols<<endl<<pcaRes<<    endl;return pcaRes; }


                                             
0 0