dlcf

来源:互联网 发布:java 递归查找父节点 编辑:程序博客网 时间:2024/06/09 15:49
#include <string.h>#include <stdio.h>#include <stdlib.h>#include <math.h>#include<time.h>#include<io.h>#include<direct.h>//目录控制头文件#include<iostream>using namespace std;#define WIN 20     //窗口大小#define ASSCODE 5 //编码长度const int FOLD= 5;  //交叉验证迭代次数const int M_num=50;//主成分个数double W_AUC;//最大AUCdouble AUC;//临时AUCdouble BestThreshold;double sen,spe,ppv,npv,acc,cc,sen1,spe1,ppv1,npv1,acc1,cc1;void write_m(char* filename, double* a, int r, int c){FILE *fp;fp=fopen(filename,"w");for(int i=0;i<r;i++){for(int j=0;j<c;j++)fprintf(fp,"%lf ",a[i*c+j]);fprintf(fp,"\n");}fclose(fp);}void write_mm(char* filename, double** a, int r, int c){FILE *fp;fp=fopen(filename,"w");for(int i=0;i<r;i++){for(int j=0;j<c;j++)fprintf(fp,"%lf ",a[i][j]);fprintf(fp,"\n");}fclose(fp);}void file_pre_train(int block)//功能:将裂解样本和非裂解样本,分为9:1份,即校正集和预测集{char a[32];FILE *cleafp,*noncleafp;if((cleafp=fopen("indata\\cleavage.txt","r"))==NULL ){printf("cannot open cleavage.txt\n");getchar();return;}if((noncleafp=fopen("indata\\noncleavage.txt","r"))==NULL ){printf("cannot open noncleavage\n");getchar();return;}FILE *trainfp,*prefp;if((trainfp=fopen("out\\train.txt","w"))==NULL )//训练{printf("cannot write_m train\n");getchar();return;}if((prefp=fopen("out\\predict.txt","w"))==NULL )//预测{printf("cannot open infile\n");getchar();return;}for(int i=0;i<1160;i++){  fread(a,31,1,cleafp);//裂解样本  if((i/116)==block)fwrite(a,31,1,prefp);  //预测样本  elsefwrite(a,31,1,trainfp);  //训练样本}for(int i=0;i<1160;i++){  fread(a,31,1,noncleafp); //非裂解样本  if((i/116)==block) fwrite(a,31,1,prefp);  else fwrite(a,31,1,trainfp);}fclose(cleafp);fclose(noncleafp);fclose(trainfp);fclose(prefp);}void file_pre()//功能:将裂解样本和非裂解样本合并为预测数据out\\PredictData.txt{char a[32];int i,j;int c=31;int r=2320;FILE *pre_fp;FILE *cleafp,*noncleafp;if((cleafp=fopen("indata\\cleavage.txt","r"))==NULL ){printf("file_pre() cannot open cleavage.txt\n");getchar();return;}if((noncleafp=fopen("indata\\noncleavage.txt","r"))==NULL ){printf("file_pre cannot open noncleavage\n");getchar();return;}if((pre_fp=fopen("out\\PredictData.txt","w"))==NULL )//预测数据{printf("cannot open infile\n");getchar();return;}/*size_t fread( void *buffer,size_t size,size_t count,FILE *stream );*/for(i=0;i<1160;i++){   fread(a,sizeof( char ),31,cleafp);   fwrite(a,sizeof( char ),31,pre_fp);}for(i=0;i<1160;i++){       fread(a,sizeof( char ),31,noncleafp);   fwrite(a,sizeof( char ),31,pre_fp);}fclose(cleafp);fclose(noncleafp);fclose(pre_fp);}//编码变换void code(FILE *pre_fp,double *x,double *y,int r,int c)  //pre_fp——train。txt{int i,j,k;char *pat,*temp;pat=(char *) calloc(r*WIN,sizeof(char));//将对应窗口大小的数据x存储进数组中,以及裂解值yfor(i=0;i<r;i++){for(k=0;k<(28-WIN)/2;k++)fscanf(pre_fp,"%c",&temp);for(j=0;j<WIN;j++)fscanf(pre_fp,"%c",&pat[i*WIN+j]);//按窗口读取for(k=0;k<(28-WIN)/2;k++)fscanf(pre_fp,"%c",&temp);for(j=0;j<1;j++)    fscanf(pre_fp,"%lf",&y[i*1+j]);  //读入labelfgetc(pre_fp); } fclose(pre_fp);//编码变换二进制for(i=0;i<r;i++){     for(k=0;k<c;k++)     x[i*c+k]=0.0;     for(j=0;j<WIN;j++) {      if ( ASSCODE==3){switch(pat[i*WIN+j]){case'A':x[i*c+j*ASSCODE +0]= 0.07;x[i*c+j*ASSCODE +1]=-1.73;x[i*c+j*ASSCODE +2]= 0.09;break;case'C':x[i*c+j*ASSCODE +0]= 0.71;x[i*c+j*ASSCODE +1]=-0.97;x[i*c+j*ASSCODE +2]= 4.13;break;case'D':x[i*c+j*ASSCODE +0]= 3.64;x[i*c+j*ASSCODE +1]= 1.13;x[i*c+j*ASSCODE +2]= 2.36;break;case'E':x[i*c+j*ASSCODE +0]= 3.08;x[i*c+j*ASSCODE +1]= 0.39;x[i*c+j*ASSCODE +2]=-0.07;break;case'F':x[i*c+j*ASSCODE +0]=-4.92;x[i*c+j*ASSCODE +1]= 1.30;x[i*c+j*ASSCODE +2]= 0.45;break;case'G':x[i*c+j*ASSCODE  +0]= 2.23;x[i*c+j*ASSCODE  +1]=-5.36;x[i*c+j*ASSCODE  +2]= 0.30;break;case'H':x[i*c+j*ASSCODE  +0]= 2.41;x[i*c+j*ASSCODE  +1]= 1.74;x[i*c+j*ASSCODE  +2]= 1.11;break;case'I':x[i*c+j*ASSCODE  +0]=-4.44;x[i*c+j*ASSCODE  +1]=-1.68;x[i*c+j*ASSCODE  +2]=-1.03;break;case'K':x[i*c+j*ASSCODE  +0]= 2.84;x[i*c+j*ASSCODE  +1]= 1.41;x[i*c+j*ASSCODE  +2]=-3.14;break;case'L':x[i*c+j*ASSCODE  +0]=-4.19;x[i*c+j*ASSCODE  +1]=-1.03;x[i*c+j*ASSCODE  +2]=-0.98;break;case'M':x[i*c+j*ASSCODE  +0]=-2.49;x[i*c+j*ASSCODE  +1]=-0.27;x[i*c+j*ASSCODE  +2]=-0.41;break;case'N':x[i*c+j*ASSCODE  +0]= 3.22;x[i*c+j*ASSCODE  +1]= 1.45;x[i*c+j*ASSCODE  +2]= 0.84;break;case'P':x[i*c+j*ASSCODE  +0]=-1.22;x[i*c+j*ASSCODE  +1]= 0.88;x[i*c+j*ASSCODE  +2]= 2.23;break;case'Q':x[i*c+j*ASSCODE  +0]= 2.18;x[i*c+j*ASSCODE  +1]= 0.53;x[i*c+j*ASSCODE  +2]=-1.14;break;case'R':x[i*c+j*ASSCODE  +0]= 2.88;x[i*c+j*ASSCODE  +1]= 2.52;x[i*c+j*ASSCODE  +2]=-3.44;break;case'S':x[i*c+j*ASSCODE  +0]= 1.96;x[i*c+j*ASSCODE  +1]=-1.63;x[i*c+j*ASSCODE  +2]= 0.57;break;case'T':x[i*c+j*ASSCODE  +0]= 0.92;x[i*c+j*ASSCODE  +1]=-2.09;x[i*c+j*ASSCODE  +2]=-1.40;break;case'V':x[i*c+j*ASSCODE  +0]=-2.69;x[i*c+j*ASSCODE  +1]=-2.53;x[i*c+j*ASSCODE  +2]=-1.29;break;case'W':x[i*c+j*ASSCODE  +0]=-4.75;x[i*c+j*ASSCODE  +1]= 3.65;x[i*c+j*ASSCODE  +2]= 0.85;break;case'Y':x[i*c+j*ASSCODE  +0]=-1.39;x[i*c+j*ASSCODE  +1]= 2.32;x[i*c+j*ASSCODE +2]= 0.01;break;default:break;}}if(ASSCODE == 4){switch(pat[i*WIN+j]){case'A':x[i*c+j*ASSCODE  + 0] = -0.840;x[i*c+j*ASSCODE  + 1] = -0.524;x[i*c+j*ASSCODE  + 2] = -0.348;x[i*c+j*ASSCODE  + 3] = -0.911;break;case'C':x[i*c+j*ASSCODE  + 0] = 0.579;x[i*c+j*ASSCODE  + 1] = -1.674;x[i*c+j*ASSCODE  + 2] = 0.840;x[i*c+j*ASSCODE  + 3] = -1.056;break;case'D':x[i*c+j*ASSCODE  + 0] = -0.091;x[i*c+j*ASSCODE  + 1] = 2.048;x[i*c+j*ASSCODE  + 2] = 1.589;x[i*c+j*ASSCODE  + 3] = -0.185;break;case'E':x[i*c+j*ASSCODE  + 0] = 0.495;x[i*c+j*ASSCODE  + 1] = 2.083;x[i*c+j*ASSCODE  + 2] = 1.354;x[i*c+j*ASSCODE  + 3] = 0.120;break;case'F':x[i*c+j*ASSCODE  + 0] = 1.796;x[i*c+j*ASSCODE  + 1] = -1.911;x[i*c+j*ASSCODE  + 2] = -0.156;x[i*c+j*ASSCODE  + 3] = 0.789;break;case'G':x[i*c+j*ASSCODE  + 0] = -1.883;x[i*c+j*ASSCODE  + 1] = -0.440;x[i*c+j*ASSCODE  + 2] = -0.373;x[i*c+j*ASSCODE  + 3] = -0.907;break;case'H':x[i*c+j*ASSCODE  + 0] = 0.857;x[i*c+j*ASSCODE  + 1] = 1.302;x[i*c+j*ASSCODE  + 2] = -0.212;x[i*c+j*ASSCODE  + 3] = -0.354;break;case'I':x[i*c+j*ASSCODE  + 0] = 1.536;x[i*c+j*ASSCODE  + 1] = -2.023;x[i*c+j*ASSCODE  + 2] = -0.360;x[i*c+j*ASSCODE  + 3] = -0.453;break;case'K':x[i*c+j*ASSCODE  + 0] = 0.974;x[i*c+j*ASSCODE  + 1] = 2.882;x[i*c+j*ASSCODE  + 2] = -0.888;x[i*c+j*ASSCODE  + 3] = 0.413;break;case'L':x[i*c+j*ASSCODE  + 0] = 1.401;x[i*c+j*ASSCODE  + 1] = -1.638;x[i*c+j*ASSCODE  + 2] = -0.232;x[i*c+j*ASSCODE  + 3] = -0.263;break;case'M':x[i*c+j*ASSCODE  + 0] = 1.127;x[i*c+j*ASSCODE  + 1] = -1.158;x[i*c+j*ASSCODE  + 2] = -0.192;x[i*c+j*ASSCODE  + 3] = 0.071;break;case'N':x[i*c+j*ASSCODE  + 0] = -0.211;x[i*c+j*ASSCODE  + 1] = 0.963;x[i*c+j*ASSCODE  + 2] = 0.039;x[i*c+j*ASSCODE  + 3] = -0.019;break;case'P':x[i*c+j*ASSCODE  + 0] = -0.495;x[i*c+j*ASSCODE  + 1] = -0.057;x[i*c+j*ASSCODE  + 2] = -0.937;x[i*c+j*ASSCODE  + 3] = 0.714;break;case'Q':x[i*c+j*ASSCODE  + 0] = 0.267;x[i*c+j*ASSCODE  + 1] = 1.027;x[i*c+j*ASSCODE  + 2] = 0.044;x[i*c+j*ASSCODE  + 3] = 0.360;break;case'R':x[i*c+j*ASSCODE  + 0] = 1.744;x[i*c+j*ASSCODE  + 1] = 2.899;x[i*c+j*ASSCODE  + 2] = -1.134;x[i*c+j*ASSCODE  + 3] = -0.110;break;case'S':x[i*c+j*ASSCODE  + 0] = -0.954;x[i*c+j*ASSCODE  + 1] = 0.332;x[i*c+j*ASSCODE  + 2] = -0.164;x[i*c+j*ASSCODE  + 3] = -0.584;break;case'T':x[i*c+j*ASSCODE  + 0] = -0.399;x[i*c+j*ASSCODE  + 1] = 0.234;x[i*c+j*ASSCODE  + 2] = -0.093;x[i*c+j*ASSCODE  + 3] = -0.272;break;case'V':x[i*c+j*ASSCODE  + 0] = 0.603;x[i*c+j*ASSCODE  + 1] = -1.534;x[i*c+j*ASSCODE  + 2] = -0.386;x[i*c+j*ASSCODE  + 3] = -0.440;break;case'W':x[i*c+j*ASSCODE  + 0] = 2.707;x[i*c+j*ASSCODE  + 1] = -1.413;x[i*c+j*ASSCODE  + 2] = -0.112;x[i*c+j*ASSCODE  + 3] = 1.781;break;case'Y':x[i*c+j*ASSCODE  + 0] = 2.655;x[i*c+j*ASSCODE  + 1] = -0.792;x[i*c+j*ASSCODE  + 2] = 1.562;x[i*c+j*ASSCODE  + 3] = 0.569;break;case'*':x[i*c+j*ASSCODE  + 0] = -11.869;x[i*c+j*ASSCODE  + 1] = -0.605;x[i*c+j*ASSCODE  + 2] = 0.158;x[i*c+j*ASSCODE  + 3] = 0.738;break;default:break;}}if(ASSCODE == 5){switch(pat[i*WIN+j]){case'A':x[i*c+j*ASSCODE  + 0] = -1.359;x[i*c+j*ASSCODE  + 1] = -1.680;x[i*c+j*ASSCODE  + 2] = -0.531;x[i*c+j*ASSCODE  + 3] = -1.186;x[i*c+j*ASSCODE  + 4] = 0.045;break;case'C':x[i*c+j*ASSCODE  + 0] = 1.340;x[i*c+j*ASSCODE  + 1] = -2.070;x[i*c+j*ASSCODE  + 2] = 1.226;x[i*c+j*ASSCODE  + 3] = -1.221;x[i*c+j*ASSCODE  + 4] = -0.844;break;case'D':x[i*c+j*ASSCODE  + 0] = -1.820;x[i*c+j*ASSCODE  + 1] = 1.850;x[i*c+j*ASSCODE  + 2] = 2.418;x[i*c+j*ASSCODE  + 3] = -0.561;x[i*c+j*ASSCODE  + 4] = -0.116;break;case'E':x[i*c+j*ASSCODE  + 0] = -1.104;x[i*c+j*ASSCODE  + 1] = 2.371;x[i*c+j*ASSCODE  + 2] = 2.047;x[i*c+j*ASSCODE  + 3] = -0.140;x[i*c+j*ASSCODE  + 4] = -0.020;break;case'F':x[i*c+j*ASSCODE  + 0] = 3.283;x[i*c+j*ASSCODE  + 1] = -1.073;x[i*c+j*ASSCODE  + 2] = -0.349;x[i*c+j*ASSCODE  + 3] = 0.970;x[i*c+j*ASSCODE  + 4] = 0.221;break;case'G':x[i*c+j*ASSCODE  + 0] = -2.885;x[i*c+j*ASSCODE  + 1] = -2.463;x[i*c+j*ASSCODE  + 2] = -0.555;x[i*c+j*ASSCODE  + 3] = -0.882;x[i*c+j*ASSCODE  + 4] = 0.565;break;case'H':x[i*c+j*ASSCODE  + 0] = 0.259;x[i*c+j*ASSCODE  + 1] = 1.737;x[i*c+j*ASSCODE  + 2] = -0.484;x[i*c+j*ASSCODE  + 3] = -0.287;x[i*c+j*ASSCODE  + 4] = -1.297;break;case'I':x[i*c+j*ASSCODE  + 0] = 2.893;x[i*c+j*ASSCODE  + 1] = -1.543;x[i*c+j*ASSCODE  + 2] = -0.675;x[i*c+j*ASSCODE  + 3] = -0.636;x[i*c+j*ASSCODE  + 4] = -0.904;break;case'K':x[i*c+j*ASSCODE  + 0] = -1.199;x[i*c+j*ASSCODE  + 1] = 3.887;x[i*c+j*ASSCODE  + 2] = -1.287;x[i*c+j*ASSCODE  + 3] = 0.444;x[i*c+j*ASSCODE  + 4] = -0.381;break;case'L':x[i*c+j*ASSCODE  + 0] = 2.491;x[i*c+j*ASSCODE  + 1] = -1.154;x[i*c+j*ASSCODE  + 2] = -0.449;x[i*c+j*ASSCODE  + 3] = -0.495;x[i*c+j*ASSCODE  + 4] = -0.531;break;case'M':x[i*c+j*ASSCODE  + 0] = 1.839;x[i*c+j*ASSCODE  + 1] = -0.732;x[i*c+j*ASSCODE  + 2] = -0.355;x[i*c+j*ASSCODE  + 3] = -0.042;x[i*c+j*ASSCODE  + 4] = -0.123;break;case'N':x[i*c+j*ASSCODE  + 0] = -1.370;x[i*c+j*ASSCODE  + 1] = 0.799;x[i*c+j*ASSCODE  + 2] = 0.203;x[i*c+j*ASSCODE  + 3] = -0.480;x[i*c+j*ASSCODE  + 4] = 0.875;break;case'P':x[i*c+j*ASSCODE  + 0] = -1.059;x[i*c+j*ASSCODE  + 1] = -0.600;x[i*c+j*ASSCODE  + 2] = -1.386;x[i*c+j*ASSCODE  + 3] = 0.592;x[i*c+j*ASSCODE  + 4] = 1.612;break;case'Q':x[i*c+j*ASSCODE  + 0] = -0.734;x[i*c+j*ASSCODE  + 1] = 1.301;x[i*c+j*ASSCODE  + 2] = 0.229;x[i*c+j*ASSCODE  + 3] = -0.038;x[i*c+j*ASSCODE  + 4] = 0.967;break;case'R':x[i*c+j*ASSCODE  + 0] = -0.505;x[i*c+j*ASSCODE  + 1] = 4.325;x[i*c+j*ASSCODE  + 2] = -1.750;x[i*c+j*ASSCODE  + 3] = 0.138;x[i*c+j*ASSCODE  + 4] = -0.850;break;case'S':x[i*c+j*ASSCODE  + 0] = -2.027;x[i*c+j*ASSCODE  + 1] = -0.656;x[i*c+j*ASSCODE  + 2] = -0.187;x[i*c+j*ASSCODE  + 3] = -0.936;x[i*c+j*ASSCODE  + 4] = 0.555;break;case'T':x[i*c+j*ASSCODE  + 0] = -1.249;x[i*c+j*ASSCODE  + 1] = -0.338;x[i*c+j*ASSCODE  + 2] = -0.073;x[i*c+j*ASSCODE  + 3] = -0.498;x[i*c+j*ASSCODE  + 4] = 0.735;break;case'V':x[i*c+j*ASSCODE  + 0] = 1.336;x[i*c+j*ASSCODE  + 1] = -1.684;x[i*c+j*ASSCODE  + 2] = -0.663;x[i*c+j*ASSCODE  + 3] = -0.587;x[i*c+j*ASSCODE  + 4] = -0.499;break;case'W':x[i*c+j*ASSCODE  + 0] = 4.237;x[i*c+j*ASSCODE  + 1] = 0.352;x[i*c+j*ASSCODE  + 2] = -0.225;x[i*c+j*ASSCODE  + 3] = 2.115;x[i*c+j*ASSCODE  + 4] = 0.857;break;case'Y':x[i*c+j*ASSCODE  + 0] = 3.507;x[i*c+j*ASSCODE  + 1] = 0.588;x[i*c+j*ASSCODE  + 2] = 2.324;x[i*c+j*ASSCODE  + 3] = 0.681;x[i*c+j*ASSCODE  + 4] = 0.322;break;case'*':x[i*c+j*ASSCODE  + 0] = -5.358;x[i*c+j*ASSCODE  + 1] = -3.217;x[i*c+j*ASSCODE  + 2] = 0.521;x[i*c+j*ASSCODE  + 3] = 3.050;x[i*c+j*ASSCODE  + 4] = -1.189;break;default:break;}}            if (ASSCODE==20){     switch(pat[i*WIN+j]) {              case'A':x[i*c+j*ASSCODE+0]=1;break;              case'C':x[i*c+j*ASSCODE+1]=1;break;      case'D':x[i*c+j*ASSCODE+2]=1;break;      case'E':x[i*c+j*ASSCODE+3]=1;break;      case'F':x[i*c+j*ASSCODE+4]=1;break;      case'G':x[i*c+j*ASSCODE+5]=1;break;      case'H':x[i*c+j*ASSCODE+6]=1;break;      case'I':x[i*c+j*ASSCODE+7]=1;break;      case'K':x[i*c+j*ASSCODE+8]=1;break;      case'L':x[i*c+j*ASSCODE+9]=1;break;      case'M':x[i*c+j*ASSCODE+10]=1;break;      case'N':x[i*c+j*ASSCODE+11]=1;break;      case'P':x[i*c+j*ASSCODE+12]=1;break;      case'Q':x[i*c+j*ASSCODE+13]=1;break;      case'R':x[i*c+j*ASSCODE+14]=1;break;      case'S':x[i*c+j*ASSCODE+15]=1;break;      case'T':x[i*c+j*ASSCODE+16]=1;break;      case'V':x[i*c+j*ASSCODE+17]=1;break;      case'W':x[i*c+j*ASSCODE+18]=1;break;      case'Y':x[i*c+j*ASSCODE+19]=1;break;  default:break; }            } }    }}void normalize(double *x,int r,int c)//数据标准化{int i,j;double m=0,v=0;double *aver;//均值if ((aver=(double *) calloc(c*1, sizeof(double))) == NULL) printf("aver is wrong");double *covar;//方差if ((covar=(double *) calloc(c*1, sizeof(double))) == NULL) printf("covar is wrong");    for(j=0;j<c;j++)   {      aver[j]=0.0;  for(i=0;i<r;i++)  aver[j]+=x[i*c+j];  aver[j]/=r;   }for(j=0;j<c;j++)  {  covar[j]=0.0;  for(i=0;i<r;i++)         {    x[i*c+j]=x[i*c+j]-aver[j];            covar[j]+=pow(x[i*c+j],2);          }          covar[j]=sqrt(covar[j]/(r-1));  }for(j=0;j<c;j++){for(i=0;i<r;i++)x[i*c+j]=(x[i*c+j]-aver[j])/covar[j];}return;}void normalize2(double **x,int r,int c,double *aver,double *covar)//数据标准化{int i,j;double m=0,v=0;    for(j=0;j<c;j++)   {      aver[j]=0.0;  for(i=0;i<r;i++)  aver[j]+=x[i][j];  aver[j]/=r;   }for(j=0;j<c;j++)  {  covar[j]=0.0;  for(i=0;i<r;i++)         {    x[i][j]=x[i][j]-aver[j];            covar[j]+=pow(x[i][j],2);          }          covar[j]=sqrt(covar[j]/(r-1));  }for(j=0;j<c;j++){for(i=0;i<r;i++)x[i][j]=(x[i][j]-aver[j])/covar[j];}return;}double* cov(double *x,int r,int c,double *cov_x)///??{int i,k;for(int j=0;j<c;j++)   {  for(k=0;k<c;k++)     {    cov_x[j*c+k]=0;    for(i=0;i<r;i++)       cov_x[j*c+k]+=x[i*c+j]*x[i*c+k];    cov_x[j*c+k]/=(r-1);     }   }return 0;}void householder(double** m, int n, double *d, double *e){int l,k,j,i;double scale, hh, h, g, f;for(i=n-1;i>0;i--){l=i-1;h=scale=0;if(l>0){for(k=0;k<l+1;k++){scale+=fabs(m[i][k]);}if(scale==0.0){e[i]=m[i][l];}else{for(k=0;k<l+1;k++){m[i][k]/=scale;//m[i][k]=m[i][k]/scale;h+=m[i][k]*m[i][k];}f=m[i][l];g=(f>=0.0?-sqrt(h):sqrt(h));  //alphae[i]=scale*g;h-=f*g;m[i][l]=f-g;f=0.0;for(j=0;j<l+1;j++){m[j][i]=m[i][j]/h;g=0.0;for(k=0;k<j+1;k++){g+=m[j][k]*m[i][k];}for(k=j+1;k<l+1;k++){g+=m[k][j]*m[i][k];}e[j]=g/h;f+=e[j]*m[i][j];}hh=f/(h+h);for(j=0;j<l+1;j++){f=m[i][j];e[j]=g=e[j]-hh*f;for(k=0;k<j+1;k++){m[j][k]-=( f*e[k]+g*m[i][k] );}}}}else{e[i]=m[i][l];}d[i]=h;}d[0]=0.0;e[0]=0.0;for(i=0;i<n;i++){l=i;if(d[i]!=0.0){for(j=0;j<l;j++){g=0.0;for(k=0;k<l;k++){g+=m[i][k]*m[k][j];}for(k=0;k<l;k++){m[k][j]-=g*m[k][i];}}}d[i]=m[i][i];m[i][i]=1.0;for(j=0;j<l;j++){m[j][i]=m[i][j]=0.0;}}}void ql(double** z, int n,  double* d, double* e){int m,l,iter,i,k;double s,r,p,g,f,dd,c,b;for(i=1;i<n;i++)e[i-1]=e[i];e[n-1]=0.0;for(l=0;l<n;l++){iter=0;do{for(m=l;m<n-1;m++){dd=fabs(d[m])+fabs(d[m+1]);if(fabs(e[m])+dd==dd)  break;}if(m!=l){if(iter++==30) return;g=(d[l+1]-d[l])/(e[l]+e[l]);r=sqrt(g*g+1.0);g=d[m]-d[l]+e[l]/( g + g>0?(r>0?r:-r):(r>0?-r:r) );s=c=1.0;p=0.0;for(i=m-1;i>=l;i--){f=s*e[i];b=c*e[i];e[i+1]=(r=sqrt(f*f+g*g));if(r==0.0){d[i+1]-=p;e[m]=0.0;break;}s=f/r;  c=g/r;  g=d[i+1]-p;r=(d[i]-g)*s+2.0*c*b;  d[i+1]=g+(p=s*r);  g=c*r-b;for(k=0;k<n;k++){f=z[k][i+1];z[k][i+1]=s*z[k][i]+c*f;z[k][i]=c*z[k][i]-s*f;}}if(r==0.0 && i>=l)  continue;d[l]-=p;  e[l]=g;  e[m]=0.0;}}while(m!=l);}}////对特征值特征向量排序void order(double** q, double* val, int c) {for(int n=0;n<c-1;n++){for(int m=n+1;m<c;m++){if(val[n]< val[m] ){swap(val[n], val[m]);           //特征值特征向量排序for(int k=0;k<c;k++)swap(q[k][n],q[k][m]);}}}double sum=0.0;double sum1=0.0;for(int i=0;i<c;i++){  sum+=val[i];}for(int p=0;p<c;p++){sum1+=val[p];double t=sum1/sum;if(t>=0.80&&t<=0.85)printf("主成分占得比例:%.3f  取主成分个数:%d\n",t,p);}}/////////////////////////////////////////////////////////////////void eigen(double *covar,int c,double *val){int i,j;double** Rx=new double*[c]; //Rx<-->covar  二维与一维的转换for(i=0;i<c;i++) Rx[i]=new double[c];for(i=0;i<c;i++)for(j=0;j<c ;j++)  Rx[i][j]=covar[i*c+j];double* e=new double[c]; //临时向量householder(Rx,c,val,e);ql(Rx,c,val,e);delete []e;order(Rx,val,c); for(i=0;i<c;i++)for(j=0;j<c ;j++)  covar[i*c+j]=Rx[i][j];}void whiten(double *x,double *d,int row,int col,double *L){   int i,j;   double *L1;//白化阵   if ((L1=(double *) calloc(row*col, sizeof(double))) == NULL) printf("L1 is wrong");   for(i=0;i<col;i++)  for(j=0;j<row;j++) L1[i*row+j]=x[j*col+i]/sqrt(d[i]);   for(int i=0;i<row;i++)//转置   for(int j=0;j<col;j++)   L[i*col+j]=L1[j*row+i];}void mult(double *a,double *b,int r1,int c1,int c2,double **x){int i,j,k;for(i=0;i<r1;i++){for(j=0;j<c2;j++){x[i][j]=0;for(k=0;k<c1;k++)x[i][j]+=a[i*c1+k]*b[k*c2+j];}}}void mult1(double *a,double **b,int r1,int c1,int c2,double **x){int i,j,k;for(i=0;i<r1;i++){for(j=0;j<c2;j++){x[i][j]=0;for(k=0;k<c1;k++)x[i][j]+=a[i*c1+k]*b[k][j];}}}void mult2(double **a,double **b,int r1,int c1,int c2,double **x){int i,j,k;for(i=0;i<r1;i++){for(j=0;j<c2;j++){x[i][j]=0;for(k=0;k<c1;k++)x[i][j]+=a[i][k]*b[k][j];}}}////////////////////////////////////////////////////////////////////////////////////////void permute1(double** q, double* d, int c){double sum=0.0;double sum1=0.0;for(int n=0;n<c-1;n++){for(int m=n+1;m<c;m++){if(d[n]<d[m] ){swap(d[n],d[m]);for(int k=0;k<c;k++) swap(q[k][n],q[k][m]);}}}}double** orthnormalize(double** w1,int q){double** w=new double*[q];//以下为对W1的正交标准化,求得W2for(int i=0;i<q;i++) w[i]=new double[q];double** w1p=new double*[q];//以下为对W1的正交标准化,求得W2for(int i=0;i<q;i++) w1p[i]=new double[q];for(int i=0;i<q;i++)for(int j=0;j<q;j++)w1p[i][j]=w1[j][i];mult2(w1,w1p,q,q,q,w);double* d=new double[q];double* e=new double[q];householder(w,q,d,e);ql(w,q,d,e);permute1(w,d,q);double** B=new double*[q];for(int i=0;i<q;i++) B[i]=new double[q];for(int i=0;i<q;i++){for(int j=0;j<q;j++){B[i][j]=0;for(int k=0;k<q;k++){B[i][j]+=w[i][k]*w[j][k]/sqrt(d[k]);}}}    double** w2=new double*[q];//W2for(int i=0;i<q;i++) w2[i]=new double[q];mult2(B,w1,q,q,q,w2);for(int i=0;i<q;i++) delete[]w[i];    delete[] w;    w=NULL;for(int i=0;i<q;i++) delete[]w1p[i];    delete[] w1p;    w1p=NULL;for(int i=0;i<q;i++) delete[]B[i];    delete[] B;    B=NULL;delete[]d;delete[]e;return w2;}double** G(double** a, int r, int c){double** ga=new double*[r];for(int i=0;i<r;i++) ga[i]=new double[c];for(int i=0;i<r;i++){for(int j=0;j<c;j++){ga[i][j]=tanh(a[i][j]);}}return ga;}double** GP(double** a, int r, int c){double** gga=new double*[r];for(int i=0;i<r;i++) gga[i]=new double[c];for(int i=0;i<r;i++){for(int j=0;j<c;j++){gga[i][j]=1-tanh(a[i][j])*tanh(a[i][j]);}}return gga;}void setvalue(double** from,double** to, int c){for(int i=0;i<c;i++){for(int j=0;j<c;j++){to[i][j]=from[i][j];}}}double** w_function(double **Z,int r,int q){int i,j,k;double** W1=new double*[q];//随机阵for(i=0;i<q;i++) W1[i]=new double[q];double** W2=new double*[q];//随机阵for(i=0;i<q;i++) W2[i]=new double[q];for(i=0;i<q;i++)for(j=0;j<q;j++)W1[i][j]=(rand()%1000000)/1000000.0;//write_mm("W1.txt",W1,q,q);double** X=new double*[r];for(i=0;i<r;i++) X[i]=new double[q];W2=orthnormalize(W1,q);//write_mm("W2.txt",W2,q,q);setvalue(W2,W1,q);//以下循环求得W3int iter=0;double zelta=0;do{iter++;setvalue(W2,W1,q);//旧的等于新值,迭代mult2(Z,W1,r,q,q,X);//write_mm("X.txt",X,r,q);double** g=G(X,r,q);double** gp=GP(X,r,q);for(j=0;j<q;j++){for(k=0;k<q;k++){W2[j][k]=0;for(i=0;i<r;i++)//均值W2[j][k]+=(Z[i][j]*g[i][k]-W1[j][k]*gp[i][k]);W2[j][k]/=r;}}for(i=0;i<r;i++) delete[]X[i];    delete[] X;    X=NULL;for(i=0;i<r;i++) delete[]g[i];    delete[] g;    g=NULL;for(i=0;i<r;i++) delete[]gp[i];    delete[] gp;    gp=NULL;    orthnormalize(W2,q);setvalue(W2,W1,q);//交换值    double sum1=0;//判断zelta大小    double sum2=0;    for(i=0;i<q;i++)       {    for(j=0;j<q;j++)       {      sum1+=(W2[i][j]-W1[i][j])*(W2[i][j]-W1[i][j]);      sum2+=W1[i][j]*W1[i][j];       }       }    zelta=sqrt(sum1/sum2/q/q);}while( zelta>1e-7 && iter<300 );return W2;}int test(double *coef1,double *coef,int q){int i,count=0;double temp,temp1=0.0;for(i=0;i<q;i++){temp=coef1[i]-coef[i];coef[i]=coef1[i];temp1+=temp*temp;}temp=sqrt(temp1);if(temp>1e-10)return 1;else    return 0;}double coefficient(double **x,double *y,double *aver_x,double *covar_x,double aver_y,double covar_y,double *coef,double b0,int r,int q){int i,j,k;int error;double *train_x1;double *train_y1;if((train_x1=(double *) calloc(r*q,sizeof(double)))==NULL) printf("the train_x1 is wrong");if((train_y1=(double *) calloc(r*1,sizeof(double)))==NULL) printf("the train_y1 is wrong");for(i=0;i<q;i++)for(j=0;j<q;j++)    {  train_x1[i+j*q]=0.0;  for(k=0;k<r;k++)                 train_x1[i*q+j]+=x[k][i]*x[k][j];     }    for(i=0;i<q;i++){for(k=0;k<r;k++)            train_y1[i]+=x[k][i]*y[k]; }double *coef1;coef1=(double *) calloc(q, sizeof(double)); for(i=0;i<q;i++) { coef1[i]=0;     coef[i]=0; } do {    for(i=0;i<q;i++){       double temp=0.0;   for(j=0;j<q;j++)   if(i!=j)  temp+=train_x1[i*q+j]*coef1[j];   temp=train_y1[i]-temp;   coef1[i]=temp/train_x1[i+i*q];}error=test(coef1,coef,q); }while(error); double temp=0.0; for(j=0;j<q;j++)    {coef[j]*=covar_y/covar_x[j];    temp+=coef[j]*aver_x[j];        b0=aver_y-temp;        }return b0;}int result(FILE *outfp,double *test_y,double *predictvalue,int r,double th)//评价参数{int i,j;    double  a,b,c,d,e;double tn=0;double tp=0;double fp=0;double fn=0;for(i=0;i<r;i++){if(test_y[i]==1.0){if(predictvalue[i]<th)fn+=1;elsetp+=1;}if(test_y[i]==0.0){if(predictvalue[i]<th)tn+=1;elsefp+=1;}}a=tp+fn;b=tn+fp;c=tp+fp;d=tn+fn;e=tp+fp+tn+fn;sen=tp/a;spe=tn/b;ppv=tp/c;npv=tn/d;acc=(tp+tn)/e;cc=(tp*tn-fn*fp)/sqrt((tn+fn)*(tn+fp)*(tp+fn)*(tp+fp));if(fabs(sen-spe)<0.1){   fprintf(outfp,"\nth       tn      fp      tp      fn          sen         spe          ppv        npv       acc        cc     \n");   fprintf(outfp,"%-4.3lf    %-4.0lf    %-4.0lf    %-4.0lf    %-4.0lf    %lf    %lf    %lf    %lf   %lf    %lf    \n",th,tn,fp,tp,fn,sen,spe,ppv,npv,acc,cc );}elsereturn 0;   return 0;}int AUC_curve(double *test_y,double *predictvalue,int r){int i,j,k=0;    double  a,b,c,d,e;AUC=0;double *Sen,*Spe;Sen=(double *)calloc(r,sizeof(double));//创建数组Spe=(double *)calloc(r,sizeof(double));double *threshold=new double[r];double *se_add_sp=new double[r];double *accuracy=new double[r];//正确率double *cc=new double[r];//相关系数double tempPrey=0;double tempy=0;for (i=0;i<r-1;i++){for (j=i+1;j<r;j++){if (predictvalue[i]<predictvalue[j]){tempPrey=predictvalue[i];predictvalue[i]=predictvalue[j];predictvalue[j]=tempPrey;tempy=test_y[i];test_y[i]=test_y[j];test_y[j]=tempy;}}}double temptheshold=0;double sen1=0,spe1=0;double sen2=0,spe2=0;for(i=0;i<r;i++) //计算sp  se{if(i!=(r-1)){temptheshold=(predictvalue[i]+predictvalue[i+1])/2.0;threshold[i]=temptheshold;}else{temptheshold=predictvalue[i]-0.5;threshold[i]=temptheshold;}double tn=0;double tp=0;double fp=0;double fn=0;for(j=0;j<r;j++)//计数循环{if(test_y[j]==1.0){if(predictvalue[j]<temptheshold)fn+=1;elsetp+=1;}if(test_y[j]==0.0){if(predictvalue[j]<temptheshold)tn+=1;elsefp+=1;}}a=tp+fn;  //真正 总数b=tn+fp;  //负类 的总个数c=tp+fp;d=tn+fn;e=tp+fp+tn+fn;sen1=tp/a;spe1=tn/b;   //se_add_sp[i]=sen1+spe1;spe1=1-spe1; //最后sp等于1-spsen1=sen1;AUC += (spe1-spe2)*(sen1+sen2)/2.0;  //计算AUCsen2=sen1;Sen[i]=sen1;spe2=spe1;Spe[i]=spe1;accuracy[i]=(tp+tn)/e;//正确率cc[i]=(tp*tn-fn*fp)/sqrt(a*b*c*d);//相关系数}FILE *auc=fopen("out\\all_AUC.txt","a");fprintf(auc,"se\t1-sp\n");for(i=0;i<r;i++)fprintf(auc,"%lf %lf\n",Sen[i],Spe[i]); //打印  sp,sefprintf(auc,"AUC=%lf\n",AUC);fclose(auc);if(W_AUC<AUC){W_AUC=AUC;FILE *auc_fp=fopen("out\\best_AUC.txt","w"); fprintf(auc_fp,"se \t\t1-sp\t\tthreshold\tse_add_sp\taccuracy\tcc\n");double tempac=0.0;for(i=0;i<r;i++){fprintf(auc_fp,"%lf \t%lf \t%lf \t%lf \t%lf \t%lf\n",Sen[i],Spe[i],threshold[i],se_add_sp[i],accuracy[i],cc[i]); //打印sp,seif(accuracy[i]>tempac){tempac=accuracy[i];BestThreshold=threshold[i];}}printf("BestAUC=%lf, BestAC=%lf, BestThreshold= %lf\n",W_AUC,tempac,BestThreshold);fprintf(auc_fp,"AUC=%lf\n",AUC);fclose(auc_fp);}return 0;}int accessfile(){/*检查文件夹是否存在,不存在则创建*/if( (_access( "indata", 0 )) != -1 )  {  printf( "读取数据文件夹 indata 存在\n" );  }else{printf( "数据文件目录 indata 不存在,先创建目录放入数据后在运行\n" ); return -1;}if( (_access( "out", 0 )) != -1 )  {  printf( "输出文件夹 out 存在\n" );  }else{if( _mkdir( "out" ) == 0 ){printf( "Directory 'out' was successfully created\n" );}elseprintf( "Problem creating directory 'out'\n" );}}int main(){if(accessfile()==-1)return -1;/*记录开始时间*/struct  tm *newtime;time_t  long_time,timebegin,timeend;timebegin=time(NULL);time(&long_time);// 等价于 long_time=time(NULL);newtime=localtime( &long_time );printf( "%s", asctime( newtime ));W_AUC=0.0;//对输入文件指针赋值remove("out\\all_AUC.txt");int row_train=2088;//校正集行数    int row_all=2320;//留一交叉验证,校正部分行数int rcount=ASSCODE*WIN; //也是100int column=rcount;   double b0=0;//常数项double C=0;//主链贡献率int i,j,k;double *train_x;  //训练集 样本double *train_y;if((train_x=(double *) calloc(row_train*column,sizeof(double)))==NULL)printf("the train_x is wrong");if((train_y=(double *) calloc(row_train,sizeof(double)))==NULL) printf("the train_y is wrong");double *aver_xx;//独立成分的均值if ((aver_xx=(double *) calloc(column, sizeof(double))) == NULL)printf("aver_xx is wrong");double *covar_xx;//训练样本每一列的方差if ((covar_xx=(double *) calloc(column, sizeof(double))) == NULL) printf("covar_xx is wrong");double *covar;//方差阵if ((covar=(double *) calloc(column*column, sizeof(double))) == NULL) printf("covar is wrong");double *eigval=new double[column];//正交阵的特征值 double *L;//白化阵转置后得到if((L=(double*) calloc(column*M_num,sizeof(double)))==NULL) printf("L is wrong");double *test_x;   //测试集 预测集double *test_y;if((test_x=(double *) calloc(row_all*column,sizeof(double)))==NULL) printf("the test_x is wrong");if((test_y=(double *) calloc(row_all,sizeof(double)))==NULL) printf("the test_y is wrong");//double** Tmain=new double*[row_train];//主成份//for(i=0;i<row_train;i++) Tmain[i]=new double[column];double** Z=new double*[row_train];//白化数据for(i=0;i<row_train;i++) Z[i]=new double[M_num];double** W2=new double*[M_num];//wfor(i=0;i<M_num;i++) W2[i]=new double[M_num];double** Y=new double*[row_train];//独立成分for(i=0;i<row_train;i++) Y[i]=new double[M_num];double *aver_x;//独立成分的均值if ((aver_x=(double *) calloc(M_num*1, sizeof(double))) == NULL) printf("aver_x is wrong");double *covar_x;//独立成分的方差if ((covar_x=(double *) calloc(M_num*1, sizeof(double))) == NULL) printf("covar_x is wrong");double aver_y=0,covar_y=0;double** B=new double*[column];//解混矩阵for(i=0;i<column;i++) B[i]=new double[M_num];double *coef;//系数coef=(double *) calloc(M_num, sizeof(double));double *Coef;//权重系数Coef= (double *) calloc(column, sizeof(double));double** TEST=new double*[row_all];//解混矩阵处理后的测试集for(i=0;i<row_all;i++)TEST[i]=new double[M_num]; double *predict_value;predict_value=(double *) calloc(row_all, sizeof(double)); FILE *b_fp; b_fp=fopen("out\\b0.txt","w"); fprintf(b_fp,"窗口大小为%d\n",WIN); FILE *outfp; outfp=fopen("out\\parameter.txt","w"); fprintf(outfp,"窗口大小为%d\n",WIN);for(int it=0;it<FOLD;it++)/////////////////////交叉验证  FOLD ////////////////////////////////////////{        printf("第%d个样本\n",it+1);file_pre_train(it );//分类训练集和测试集FILE *train_fp=fopen("out\\train.txt","r");if(train_fp==NULL)printf("fp=null\n" );code(train_fp,train_x,train_y,row_train,column);//编码变换normalize(train_x,row_train,column);//原始数据标准化:(x-均值)/标准差//write_m("train_x.txt",train_x,row_train,c);for(i=0;i<column;i++)  //求每一列的标准差{for(j=0;j<row_train;j++)aver_xx[i]+=train_x[j*column+i];aver_xx[i]/=row_train;for(j=0;j<row_train;j++)covar_xx[i]+=pow((train_x[j*column+i]-aver_xx[i]),2);covar_xx[i]=sqrt(covar_xx[i]/(row_train-1));}for(i=0;i<row_train;i++)//IC50值标准化aver_y+=train_y[i];aver_y/=row_train;for(i=0;i<row_train;i++)covar_y+=pow((train_y[i]-aver_y),2);covar_y=sqrt(covar_y/(row_train-1));for(i=0;i<row_train;i++)train_y[i]=(train_y[i]-aver_y)/covar_y;//printf("%lf%lf\n",aver_y,covar_y);//write_m("train_y.txt",train_y,row_train,1);cov(train_x,row_train,column,covar);//返回协方差阵covar//write_m("cov.txt",covar,column,column);eigen(covar,column,eigval);//covar:特征向量column:列数,eigval特征值//write_m("eigen.txt",covar,column,column);//mult(train_x,covar,row_train,column,M_num,Tmain);//主成份T//write_mm("zhuchenfen.txt",T,row_train,c);whiten(covar,eigval,column,M_num,L);//矩阵L[column,M_num]作为返回值//write_m("L3.txt",L,column,M_num);mult(train_x,L,row_train,column,M_num,Z);//白化数据Z[row_train,M_num]write_mm("白化数据Z.txt",Z,row_train,M_num);W2=w_function(Z,row_train,M_num);//单位正交阵W//write_mm("W2.txt",W2,q,q);mult2(Z,W2,row_train,M_num,M_num,Y);//计算独立成分矩阵//write_mm("独立成分Y.txt",Y,row_train,c);mult1(L,W2,column,M_num,M_num,B);//计算解混矩阵//write_mm("解混矩阵B.txt",B,c,q);normalize2(Y,row_train,M_num ,aver_x,covar_x);//独立成分矩阵做标准化/*write_mm("Y.txt",Y,row_train,c);write_m("aver_x.txt",aver_x,q,1);write_m("covar_x.txt",covar_x,q,1);*/b0=coefficient(Y,train_y,aver_x,covar_x,aver_y,covar_y,coef,b0,row_train,M_num);printf("b0: %lf\n",b0);//write_m("系数coef值.txt",coef,1,q);fprintf(b_fp,"%lf\n",b0);//记录 b0//////////////---预测---//////////////////////////////////////////////////////////////file_pre(); //将裂解和非裂解各去1160,合并为一个预测文件,前面的预测是交叉验证预测FILE *test_fp=fopen("out\\PredictData.txt","r");      //全部序列 1/0int row=0;char temparray[35];while(!feof(test_fp)){row++;fgets(temparray,35,test_fp);}printf("predict row=%d\n",row);rewind(test_fp);code(test_fp,test_x,test_y,row_all,column);normalize(test_x,row_all,column);/*mult1(test_x,B,row_all,c,q,TEST);write_mm("TEST_x.txt",TEST,row_all,q);*/for(i=0;i<row_all;i++)    {   for(j=0;j<M_num;j++)   {  TEST[i][j]=0;  for(k=0;k<column;k++)  TEST[i][j]+=test_x[i*column+k]*B[k][j];//解混矩阵B   }    }//write_mm("TEST_x.txt",TEST,row_all,q);   for(i=0;i<row_all;i++)   predict_value[i]=0.0;   for(i=0;i<row_all;i++)//预测y=a*x+b    {for(j=0;j<M_num;j++)predict_value[i]+=TEST[i][j]*coef[j];predict_value[i]+=b0;     }   write_m("out\\predict_value.txt",predict_value,row_all,1);//记录预测值       double sum;   for(i=0;i<column; i++)       {        sum=0.0 ;        for(j=0;j<M_num;j++ )        sum+=B[i][j]*coef[j];        Coef[i]=sum;       }   for(i=0;i<column;i++)   Coef[i]/=covar_xx[i];   write_m("out\\权重系数Coef.txt",Coef,column,1);   for(i=0;i<column;i++)       C-=Coef[i]*aver_xx[i];   //printf("主链贡献值const=%lf\n",C);   fprintf(outfp,"\n第%d个样本\n",it+1);   fprintf(outfp,"主成分个数为%d\n",M_num);   double th;   for(th=0.4;th<0.7;th+=0.005)   result(outfp,test_y,predict_value,row_all,th);       //outfp parameter.txt 文件                                                                //test——y 末尾0/1串                                                                //predict_value TEST(row_all*row_all)全部测试集*coef+b0                                                                //row_all总数2320                                                                //th 0.4:0.005:0.7   AUC_curve(test_y,predict_value,row_all);    //打印AUC   }/*记录结束时间,并打印到屏幕*/timeend=time(NULL);/*显示当前时间字符串*/time( &long_time );newtime = localtime( &long_time );printf("\n%s",asctime( newtime ) );fprintf(outfp,"%s\n",asctime( newtime ));int mm=(int)difftime(timeend,timebegin);printf(" The total time you take is: %d:%d:%d\n",mm/3600,mm%3600/60,mm%3600%60);fprintf(outfp," The total time you take is: %d:%d:%d\n",mm/3600,mm%3600/60,mm%3600%60);return 0;}

原创粉丝点击