学习pca的好资料集合

来源：互联网发布：网上买电影票软件编辑：程序博客网时间：2024/06/05 16:25

首先了解特征向量的概念

然后，http://www.cad.zju.edu.cn/home/chenlu/pca.htm了解pca概念

http://jacobyuan.blog.sohu.com/148317731.html进一步解释

下面是转的代码

/*
PCA program (using covariance)
Written by Y. Bin Mao
Video and Image Processing and Analysis Group (VIPAG)
School of Automation, NJUST
Jan. 6, 2008
Here is a matlab description of the algorithm
% PCA1: Perform PCA using covariance.
% data --- MxN matrix of input data (M dimensions, N trials)
% signals --- MxN matrix of projected data
% PC --- each column is a PC
% V --- Mx1 matrix of variances
%
function [signals, PC, V] = pca1( data )
[M, N] = size( data );
% subtract off the mean for each dimension
mn = mean( data, 2 );
data = data - repmat( mn, 1, N );
% calculate the covariance matrix
covariance = 1/ (N-1) * data * data';
% find the eigenvectors and eigenvalues
[PC, V] = eig( covariance );
% extract diagonal of matrix as vector
V = diag(V);
% sort the variances in decreasing order
[junk, rindices] = sort(-1*V);
V = V(rindices);
PC = PC(:,rindices);
% project the original data set
signals = PC' * data;
*/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#define ALGO_DEBUG
/*
用豪斯荷尔德（Householder）变换将n阶实对称矩阵约化为对称三对角阵
徐士良. 常用算法程序集（C语言描述），第3版. 清华大学出版社. 2004
double a[] --- 维数为nxn。存放n阶实对称矩阵A
int n --- 实对称矩阵A的阶数
double q[] --- 维数nxn。返回Householder变换的乘积矩阵Q。当与
Tri_Symmetry_Diagonal_Eigenvector()函数联用，若将
Q矩阵作为该函数的一个参数时，可计算实对称阵A的特征
值与相应的特征向量
double b[] --- 维数为n。返回对称三对角阵中的主对角线元素
double c[] --- 长度n。前n－1个元素返回对称三对角阵中的次对角线元素。
*/
void Householder_Tri_Symetry_Diagonal( double a[], int n, double q[],
double b[], double c[] )
{
int i, j, k, u;
double h, f, g, h2;
for ( i = 0; i = n-1; i++ )
for ( j = 0; j = n-1; j++ )
{
u = i * n + j;
q[u] = a[u];
}
for ( i = n-1; i >= 1; i-- )
{
h = 0.0;
if ( i > 1 )
for ( k = 0; k = i-1; k++ )
{
u = i * n + k;
h = h + q[u] * q[u];
}
if ( h + 1.0 == 1.0 )
{
c[i] = 0.0;
if ( i == 1 ) c[i] = q[i*n+i-1];
b[i] = 0.0;
}
else
{
c[i] = sqrt( h );
u = i * n + i - 1;
if ( q[u] > 0.0 ) c[i] = -c[i];
h = h - q[u] * c[i];
q[u] = q[u] - c[i];
f = 0.0;
for ( j = 0; j = i - 1; j++ )
{
q[j*n+i] = q[i*n+j] / h;
g = 0.0;
for ( k = 0; k = j; k++ )
g = g + q[j*n+k] * q[i*n+k];
if ( j + 1 = i-1 )
for ( k = j+1; k = i-1; k++ )
g = g + q[k*n+j] * q[i*n+k];
c[j] = g / h;
f = f + g * q[j*n+i];
}
h2 = f / ( h + h );
for ( j = 0; j = i-1; j++ )
{
f = q[i*n+j];
g = c[j] - h2 * f;
c[j] = g;
for ( k = 0; k = j; k++ )
{
u = j * n + k;
q[u] = q[u] - f * c[k] - g * q[i*n+k];
}
}
b[i] = h;
}
}
for ( i = 0; i = n-2; i++ ) c[i] = c[i+1];
c[n-1] = 0.0;
b[0] = 0.0;
for ( i = 0; i = n-1; i++ )
{
if ( ( b[i] != 0.0 ) && ( i-1 >= 0 ) )
for ( j = 0; j = i-1; j++ )
{
g = 0.0;
for ( k = 0; k = i-1; k++ )
g = g + q[i*n+k] * q[k*n+j];
for ( k = 0; k = i-1; k++ )
{
u = k * n + j;
q[u] = q[u] - g * q[k*n+i];
}
}
u = i * n + i;
b[i] = q[u]; q[u] = 1.0;
if ( i - 1 >= 0 )
for ( j = 0; j = i - 1; j++ )
{
q[i*n+j] = 0.0; q[j*n+i] = 0.0;
}
}
return;
}
/*
计算实对称三对角阵的全部特征值与对应特征向量
徐士良. 常用算法程序集（C语言描述），第3版. 清华大学出版社. 2004
int n --- 实对称三对角阵的阶数
double b --- 长度为n，存放n阶对称三对角阵的主对角线上的各元素；
返回时存放全部特征值
double c --- 长度为n，前n－1个元素存放n阶对称三对角阵的次对角
线上的元素
double q --- 维数为nxn，若存放单位矩阵，则返回n阶实对称三对角
阵T的特征向量组；若存放Householder_Tri_Symetry_Diagonal()
函数所返回的一般实对称矩阵A的豪斯荷尔得变换的乘
积矩阵Q，则返回实对称矩阵A的特征向量组。其中，q中
的的j列为与数组b中第j个特征值对应的特征向量
double eps --- 本函数在迭代过程中的控制精度要求
int l --- 为求得一个特征值所允许的最大迭代次数
返回值：
若返回标记小于0，则说明迭代了l次还未求得一个特征值，并有fail
信息输出；若返回标记大于0，则说明程序工作正常，全部特征值由一
维数组b给出，特征向量组由二维数组q给出
*/
int Tri_Symmetry_Diagonal_Eigenvector( int n, double b[], double c[],
double q[], double eps, int l )
{
int i, j, k, m, it, u, v;
double d, f, h, g, p, r, e, s;
c[n-1] = 0.0; d = 0.0; f = 0.0;
for ( j = 0; j = n-1; j++ )
{
it = 0;
h = eps * ( fabs( b[j] ) + fabs( c[j] ) );
if ( h > d ) d = h;
m = j;
while ( ( m = n-1 ) && ( fabs( c[m] ) > d ) ) m = m+1;
if ( m != j )
{
do
{
if ( it == l )
{
#ifdef ALGO_DEBUG
printf( "fail\n" );
#endif
return( -1 );
}
it = it + 1;
g = b[j];
p = ( b[j+1] - g ) / ( 2.0 * c[j] );
r = sqrt( p * p + 1.0 );
if ( p >= 0.0 )
b[j] = c[j] / ( p + r );
else
b[j] = c[j] / ( p - r );
h = g - b[j];
for ( i = j+1; i = n-1; i++ )
b[i] = b[i] - h;
f = f + h; p = b[m]; e = 1.0; s = 0.0;
for ( i = m-1; i >= j; i-- )
{
g = e * c[i]; h = e * p;
if ( fabs( p ) >= fabs( c[i] ) )
{
e = c[i] / p; r = sqrt( e * e + 1.0 );
c[i+1] = s * p * r; s = e / r; e = 1.0 / r;
}
else
{
e = p / c[i]; r = sqrt( e * e + 1.0 );
c[i+1] = s * c[i] * r;
s = 1.0 / r; e = e / r;
}
p = e * b[i] - s * g;
b[i+1] = h + s * ( e * g + s * b[i] );
for ( k = 0; k = n-1; k++ )
{
u = k * n + i + 1; v = u - 1;
h = q[u]; q[u] = s * q[v] + e * h;
q[v] = e * q[v] - s * h;
}
}
c[j] = s * p; b[j] = e * p;
}
while ( fabs( c[j] ) > d );
}
b[j] = b[j] + f;
}
for ( i = 0; i = n-1; i++ )
{
k = i; p = b[i];
if ( i+1 = n-1 )
{
j = i+1;
while ( ( j = n-1 ) && ( b[j] = p ) )
{
k = j; p = b[j]; j = j+1;
}
}
if ( k != i )
{
b[k] = b[i]; b[i] = p;
for ( j = 0; j = n-1; j++ )
{
u = j * n + i; v = j * n + k;
p = q[u]; q[u] = q[v]; q[v] = p;
}
}
}
return( 1 );
}
# define EPS 0.000001 /* 计算精度 */
# define Iteration 60 /* 求取特征量的最多迭代次数 */
/*
计算实对称阵的全部特征值与对应特征向量
int n --- 实对称阵的阶数
double * CovMatrix --- 维数为nxn，存放n阶对称阵的各元素；
double * Eigen --- 长度为n，为n个特征值
double * EigenVector --- 维数为nxn，返回n阶实对称阵的特征向量组其中，
EigenVector中的j列为与数组Eigen中第j个特征值
对应的特征向量
返回值：
若返回标记小于0，则说明迭代了Iteration次还未求得一个特征值；
若返回标记大于0，则说明程序工作正常，全部特征值由一
维数组Eigen给出，特征向量组由二维数组EigenVector给出
*/
int SymmetricRealMatrix_Eigen( double CovMatrix[], int n,
double Eigen[], double EigenVector[] )
{
int k;
double * subDiagonal;
subDiagonal = ( double * )malloc( sizeof( double ) * n );
Householder_Tri_Symetry_Diagonal( CovMatrix, n, EigenVector, Eigen, subDiagonal );
k = Tri_Symmetry_Diagonal_Eigenvector( n, Eigen, subDiagonal, EigenVector, EPS, Iteration );
free( subDiagonal );
return( k );
}
/*
PCA1: Perform PCA using covariance.
data --- MxN matrix of input data ( M dimensions, N trials )
行数为原始数据维数；每列数据为一个样本
signals --- MxN matrix of projected data
PC --- each column is a PC
V --- Mx1 matrix of variances
row = M dimensions, col = N trials
*/
int pca1( double * data, int row, int col,
double * signals, double * PC, double * V )
{
int x, y, k;
double * mean;
double * data1, * tPC, * tV;
double * covariance, temp;
int rvalue, * no, tp;
if ( row = 1 || col = 1 ) return( -1 );
/* subtract off the mean for each dimension */
mean = ( double * )malloc( sizeof( double ) * row );
data1 = ( double * )malloc( sizeof( double ) * row * col );
for ( y = 0; y row; y++ ) /* calculate mean */
{
mean[y] = 0;
for ( x = 0; x col; x++ )
mean[y] += data[y*col+x];
}
for ( y = 0; y row; y++ ) mean[y] = mean[y]/col;
for ( y = 0; y row; y++ ) /* subtract mean */
for ( x = 0; x col; x++ )
{
data1[y*col+x] = data[y*col+x] - mean[y];
}
free( mean );
/* calculate the covariance matrix */
covariance = ( double * )malloc( sizeof( double ) * row * row );
for ( y = 0; y row; y++ )
for ( x = 0; x row; x++ )
{
temp = 0;
for ( k = 0; k col; k++ )
temp += data1[y*col+k] * data1[x*col+k];
temp = temp / ( col-1 );
covariance[x*row+y] = temp;
}
/* find the eigenvectors and eigenvalues */
rvalue = SymmetricRealMatrix_Eigen( covariance, row, V, PC );
free( covariance );
/* sort the variances in decreasing order */
no = ( int * )malloc( sizeof( int ) * row );
for ( x = 0; x row; x++ ) no[x] = x;
for ( x = 0; x row-1; x++ )
{
temp = V[x];
for ( k = x; k row; k++ )
if ( temp V[k] )
{
tp = no[k];
no[k] = no[x];
no[x] = tp;
temp = V[k];
}
}
/* exchange eigen value and vector in decreasing order */
tV = ( double * )malloc( sizeof( double ) * row );
tPC = ( double * )malloc( sizeof( double ) * row * row );
for ( x = 0; x row; x++ ) tV[x] = V[x];
for ( x = 0; x row; x++ )
for ( y = 0; y row; y++ )
tPC[x*row+y] = PC[x*row+y];
for ( x = 0; x row; x++ )
{
if ( no[x] != x )
{
for ( y = 0; y row; y++ )
PC[y*row+x] = tPC[y*row+no[x]];
V[x] = tV[no[x]];
}
}
free( no );
free( tV );
free( tPC );
/* project the original data: signals = PC' * data; */
for ( y = 0; y row; y++ )
for ( x = 0; x col; x++ )
{
signals[y*col+x] = 0;
for ( k = 0; k row; k++ )
signals[y*col+x] += PC[k*row+y] * data1[k*col+x];
}
free( data1 );
return( rvalue );
}
/*
投影到主元素向量上
double * newdata --- 要投影到主元素分量上的数据矩阵
int row, col --- 数据矩阵的维数
row（行数）为主元向量维数（即特征数）
col（列数）为采样数
double * PC --- 主元特征向量（列向量）
double * newsignals --- 投影后获得的信号的系数（row*col）
double * ShiftValue --- 如果一个有row个元素的偏移量，如果改值为NULL
则计算均值，并减去均值，否则减去改值
*/
int project2PCA( double * newdata, int row, int col,
double * PC, double * newsignals, double * ShiftValue )
{
int x, y, k;
double * mean, * data1;
/* subtract off the mean for each dimension */
data1 = ( double * )malloc( sizeof( double ) * row * col );
/* if ShiftValue <> NULL */
if ( ShiftValue != NULL )
{
for ( y = 0; y row; y++ ) /* subtract ShiftValue */
for ( x = 0; x col; x++ )
{
data1[y*col+x] = newdata[y*col+x] - ShiftValue[y];
}
}
else
{
mean = ( double * )malloc( sizeof( double ) * row );
for ( y = 0; y row; y++ ) /* calculate mean */
{
mean[y] = 0;
for ( x = 0; x col; x++ )
mean[y] += newdata[y*col+x];
}
if ( col = 1 )
for ( y = 0; y row; y++ ) mean[y] = 0;
else
for ( y = 0; y row; y++ ) mean[y] = mean[y]/col;
for ( y = 0; y row; y++ ) /* subtract mean */
for ( x = 0; x col; x++ )
{
data1[y*col+x] = newdata[y*col+x] - mean[y];
}
free( mean );
}
/* project the original data: signals = PC' * data; */
for ( y = 0; y row; y++ )
for ( x = 0; x col; x++ )
{
newsignals[y*col+x] = 0;
for ( k = 0; k row; k++ )
newsignals[y*col+x] += PC[k*row+y] * data1[k*col+x];
}
free( data1 );
return( 1 );
}
/*
test: main function
*/
int main()
{
double data[20] = { 2, 3, 8, 2, 3,
7, 9, 29, 3, 5,
3, 8, 22, 12, 1,
3, 12, 12, 33, 2};
int row = 4, col = 5;
double signals[20], PC[16], V[4];
int x, y;
pca1( data, row, col, signals, PC, V );
printf( "Project to Principal Component: \n" );
for ( y = 0; y row; y++ )
{
for ( x = 0; x col; x++ )
{
printf( "%7.4f ", signals[y*col+x] );
}
printf( "\n" );
}
printf( "Eigen Values (in deceasing order): \n" );
for ( y = 0; y row; y++ )
printf( "%7.4f ", V[y] );
printf( "\n" );
printf( "Eigen Vectors: \n" );
for ( y = 0; y row; y++ )
{
for ( x = 0; x row; x++ )
{
printf( "%7.4f ", PC[y*row+x] );
}
printf( "\n" );
}
return( 1 );
}