OpenCV下PCA降维

来源：互联网发布：钱枫和汪涵知乎编辑：程序博客网时间：2024/05/17 07:20

这两天看了下PCA降维，用OpenCV测试了下。主要是参考[1]和[2]。根据我的理解，代码记录下。

#include <opencv/cv.h>#include <opencv/highgui.h>#include <stdio.h>#include <stdlib.h>using namespace cv;using namespace std;#define DIMENTIONS7#define SAMPLE_NUM31float Coordinates[DIMENTIONS*SAMPLE_NUM]={ 101.5,100.4,97.0,98.7,100.8,114.2,104.2,100.8,93.5,95.9,100.7,106.7,104.3,106.4,100.8,97.4,98.2,98.2,99.5,103.6,102.4,99.4,96.0,98.2,97.8,99.1,98.3,104.3,101.8,97.7,99.0,98.1,98.4,102.0,103.7,101.8,96.8,96.4,92.7,99.6,101.3,103.4,101.3,98.2,99.4,103.7,98.7,101.4,105.3,101.9,100.0,98.4,96.9,102.7,100.3,102.3,100.3,98.9,97.2,97.4,98.1,102.1,102.3,99.3,97.7,97.6,101.1,96.8,110.1,100.4,98.7,98.4,97.0,99.6,95.6,107.2,99.8,99.7,97.7,98.0,99.3,97.3,104.1,102.7,97.6,96.5,97.6,102.5,97.2,100.6,99.9,98.0,98.4,97.1,100.5,101.4,103.0,99.9,101.1,98.6,98.7,102.4,96.9,108.2,101.7,100.4,98.6,98.0,100.7,99.4,102.4,103.3,99.3,96.9,94.0,98.1,99.7,109.7,99.2,98.6,97.4,96.4,99.8,97.4,102.1,100.0,98.2,98.2,99.4,99.3,99.7,101.5,99.9,98.5,96.3,97.0,97.7,98.7,112.6,100.4,98.4,99.2,98.1,100.2,98.0,98.2,97.8,99.2,97.4,95.7,98.9,102.4,114.8,102.6,101.3,97.9,99.2,98.8,105.4,111.9,99.9,98.5,97.8,94.6,102.4,107.0,115.0,99.5,98.3,96.3,98.5,106.2,92.5,98.6,101.6,99.3,101.1,99.4,100.1,103.6,98.7,101.3,99.2,97.3,96.2,99.7,98.2,112.6,100.5,100.0,99.9,98.2,98.3,103.6,123.2,102.8,102.2,99.4,96.2,98.6,102.4,115.3,101.2,100.1,98.7,97.4,99.8,100.6,112.4,102.5,104.3,98.7,100.2,116.1,105.2,101.6,102.6};float Coordinates_test[DIMENTIONS]={104.3,98.7,100.2,116.1,105.2,101.6,102.6};#define PCA_MEAN"mean"#define PCA_EIGEN_VECTOR"eigen_vector"int main(){//load samplesMat SampleSet(SAMPLE_NUM, DIMENTIONS, CV_32FC1);for (int i=0; i<(SAMPLE_NUM); ++i){for (int j=0; j<DIMENTIONS; ++j){SampleSet.at<float>(i, j) = Coordinates[i*DIMENTIONS + j];}}//TrainingPCA *pca = new PCA(SampleSet, Mat(), CV_PCA_DATA_AS_ROW);///////////////cout << "eigenvalues:" <<endl << pca->eigenvalues <<endl<<endl;//cout << "eigenvectors" <<endl << pca->eigenvectors << endl;Mat input(1,DIMENTIONS, CV_32FC1);//Test inputfor (int j=0; j<DIMENTIONS; ++j){input.at<float>(0, j) = Coordinates_test[j];}//calculate the decreased dimensionsint index;float sum=0, sum0=0, ratio;for (int d=0; d<pca->eigenvalues.rows; ++d){sum += pca->eigenvalues.at<float>(d,0);}for (int d=0; d<pca->eigenvalues.rows; ++d){sum0 += pca->eigenvalues.at<float>(d,0);ratio = sum0/sum;if(ratio > 0.9){index = d;break;}}Mat eigenvetors_d;eigenvetors_d.create((index+1), DIMENTIONS, CV_32FC1);//eigen values of decreased dimensionfor (int i=0; i<(index+1); ++i){pca->eigenvectors.row(i).copyTo(eigenvetors_d.row(i));}cout << "eigenvectors" <<endl << eigenvetors_d << endl;FileStorage fs_w("config.xml", FileStorage::WRITE);//write mean and eigenvalues into xml filefs_w << PCA_MEAN << pca->mean;fs_w << PCA_EIGEN_VECTOR << eigenvetors_d;fs_w.release();//EncodingPCA *pca_encoding = new PCA();FileStorage fs_r("config.xml", FileStorage::READ);fs_r[PCA_MEAN] >> pca_encoding->mean;fs_r[PCA_EIGEN_VECTOR] >> pca_encoding->eigenvectors;fs_r.release();Mat output_encode(1, pca_encoding->eigenvectors.rows, CV_32FC1);pca_encoding->project(input, output_encode);cout << endl << "pca_encode:" << endl << output_encode;//DecodingPCA *pca_decoding = new PCA();FileStorage fs_d("config.xml", FileStorage::READ);fs_d[PCA_MEAN] >> pca_decoding->mean;fs_d[PCA_EIGEN_VECTOR] >> pca_decoding->eigenvectors;fs_d.release();Mat output_decode(1, DIMENTIONS, CV_32FC1);pca_decoding->backProject(output_encode,output_decode);cout <<endl<< "pca_Decode:" << endl << output_decode;delete pca;delete pca_encoding;return 0;}

结果为：

eigenvalues:
[43.182041; 14.599923; 9.2121401; 4.0877957; 2.8236785; 0.88751495; 0.66496396]

eigenvectors
[0.01278889, 0.03393811, -0.099844977, -0.13044992, 0.20732452, 0.96349025, -0.020049129;
0.15659945, 0.037932698, 0.12129638, 0.89324093, 0.39454412, 0.046447847, 0.060190294;
0.21434425, 0.018043749, -0.0012475925, -0.40428901, 0.81335503, -0.22759444, 0.2773709;
0.43591988, -0.047541384, 0.19851086, -0.0035106051, -0.35545754, 0.10898948, 0.79376709]

pca_encode:
[-5.6273661, 17.138182, -0.078819014, 0.68144321]
pca_Decode:
[102.88557, 98.402702, 100.33086, 116.21081, 105.37261, 101.63729, 103.39891]

这个例子里最后backProject的结果跟原始输入比较起来感觉差别不是很大

float Coordinates_test[DIMENTIONS]={
104.3,98.7,100.2,116.1,105.2,101.6,102.6
};

主元分析，顾名思义找出数据中最主要的信息，去除次要的，以降低数据量。

具体步骤是：

1.对每个样本提取出有用的信息组成一个向量；

2.求取出所有样本向量的平均值；

3.用每个样本向量减去向量的平均值后组成一个矩阵；

4.该矩阵乘以该矩阵的逆为协方差矩阵，这个协方差矩阵是可对角化的，对角化后剩下的元素为特征值，每个特征值对应一个特征向量（特征向量要标准化）；

5.选取最大的N个特征值（其中N即为PCA的主元（PC）数，我感觉这个主元数是PCA的核心之处，可自己选取数的多少，数越少，越降低数据量，但识别效果也越差），将这N个特征值对应的特征向量组成新的矩阵；

6.将新的矩阵转置后乘以样本向量即可得到降维后的数据（这些数据是原数据中相对较为主要的，而数据量一般也远远小于原数据量，当然这要取决于你选取的主元数）。

[1] http://blog.csdn.net/yang_xian521/article/details/7445536

[2] http://blog.csdn.net/abcjennifer/article/details/8002329