SAS CE- CE3 Var Redu

来源:互联网 发布:windows xp 32位 64位 编辑:程序博客网 时间:2024/06/16 12:51
**********************************************************************************************************************;*** 3.Variable reduction and ranking ***;**********************************************************************************************************************;** Master variable reduction macro **;%macro CE_Var_Redu(insdn=out.CE2_Recoded);  %** Create working file;  proc sql noprint;    select count(*) into : nobs from &insdn where mod_val_test^=3;  quit;  %put observation count is &nobs;  data workfile; set &insdn (where=(mod_val_test^=3));    %if &nobs > &samplesize %then %do; if uniform(131071)<=1.0*&samplesize/&nobs; %end;   run;  %** Get independent variables;  proc contents data=workfile (drop=&keep_list)    out=vars (keep=name label) noprint;  run;  proc sql noprint;    select count(*) into : varnum from vars;  quit;  %put Number of variables is &varnum;  %** Reorder variables to avoid bias when working in groups;  data vars; set vars; rannum = ranuni(274923); run;  proc sort data = vars; by rannum; run;  %** Using Univariate Regression method **;  %if %upcase(&univ_reg)=Y %then %do;    %do i=1 %to &varnum;      data _null_; set vars;    if _n_=&i;    call symputx('curvar',name);      run;      %Univ_Reg(workfile,&curvar);  %if &i = 1 %then %do;        data outsdnuniv; length variable $32.; set univ_tmp; run;  %end;  %else %do;    data outsdnuniv; set outsdnuniv univ_tmp; run;  %end;    %end;proc datasets library=work nolist; delete univ_tmp; quit; run;    %* Finalize file;    data outsdnuniv; set outsdnuniv;      if PValue <= &maxpuni then univ_flag = 1; else univ_flag = 0;    run;    proc sort data = outsdnuniv; by variable; run;  %end;  %** Using correlation method **;  %if %upcase(&correlation)=Y %then %do;    proc corr data=workfile noprint outp=outsdncorr;      var &dep_var;      with &PREFIX.:;  %if %upcase(&redu_weight) = Y %then %do; weight &weight; %end;     run;    %* Finalize file;    data outsdncorr (drop=_type_);  length variable $32.;      set outsdncorr (where=(_type_='CORR') rename=(_name_=variable &dep_var=corr));      if abs(corr) >= &corrcut then corr_flag=1; else corr_flag=0;    run;   proc sort data= outsdncorr; by variable; run;  %end;  %** Using factor/principal analysis method **;  %if %upcase(&principal)=Y %then %do;    %if &varnum <= &nprin %then %do;       %let nprin = %eval(&varnum-1);     %end;%** Split variables into manageable groups;%let group=%sysfunc(max(%eval(&varnum/(&nprin*5)),1));    data tmp; set vars; rank=ceil(_n_/(&varnum/&group)); run;%** Process variables by group;%do i = 1 %to &group;  proc sql noprint; select name into : regvlist separated by ' '         from tmp where rank = &i;       quit;      %single_group_prin(&insdn,prin_tmp,&regvlist);  %if &i = 1 %then %do;        data outsdnprin; length variable $32.; set prin_tmp; run;  %end;  %else %do;    data outsdnprin; set outsdnprin prin_tmp; run;  %end;    %end;proc datasets library=work nolist; delete prin_tmp tmp; quit; run;    %* Finalize file;    %if &group > 1 %then %do;      proc sql noprint; select variable into : regvlist separated by ' '         from outsdnprin where Factor >= &minprin;       quit;  %single_group_prin(&insdn,outsdnprin,&regvlist);    %end;    data outsdnprin; set outsdnprin;      if Factor >= &minprin then prin_flag=1; else prin_flag=0;    run;proc sort data = outsdnprin; by variable; run;  %end;  %** Using Clustering method **;  %if %upcase(&cluster)=Y %then %do;    %if &varnum <= &maxc %then %do;       %let maxc = %eval(&varnum-1);     %end;%** Split variables into manageable groups;%let group=%sysfunc(max(%eval(&varnum/(&maxc*5)),1));    data tmp; set vars; rank=ceil(_n_/(&varnum/&group)); run;%** Process variables by group;%do i = 1 %to &group;  proc sql noprint; select name into : regvlist separated by ' '         from tmp where rank = &i;       quit;      %single_group_clus(&insdn,clus_tmp,&regvlist);  %if &i = 1 %then %do;        data outsdnclus; length variable $32.; set clus_tmp; run;  %end;  %else %do;    data outsdnclus; set outsdnclus clus_tmp; run;  %end;    %end;proc datasets library=work nolist; delete clus_tmp tmp; quit; run;    %* Finalize file;    %if &group > 1 %then %do;      proc sql noprint; select variable into : regvlist separated by ' '         from outsdnclus where RSquareRatio <= &maxratio;       quit;  %single_group_clus(&insdn,outsdnclus,&regvlist);    %end;    data outsdnclus; set outsdnclus;      if RSquareRatio <= &maxratio then clus_flag=1; else clus_flag=0;    run;    proc sort data = outsdnclus; by variable; run;  %end;  %** Using regression method **;  %if %upcase(&regression)=Y   %then %do;%** Split variables into manageable groups;%let group=%sysfunc(ceil(&varnum/100));    data tmp; set vars; rank=ceil(_n_/(&varnum/&group)); run;%** Process variables by group;%do i = 1 %to &group;  proc sql noprint; select name into : regvlist separated by ' '         from tmp where rank = &i;       quit;      %single_group_reg(&insdn,reg_tmp,&regvlist);  %if &i = 1 %then %do;        data outsdnreg; length variable $32.; set reg_tmp; run;  %end;  %else %do;    data outsdnreg; set outsdnreg reg_tmp; run;  %end;    %end;proc datasets library=work nolist; delete reg_tmp tmp; quit; run;    %* Finalize file;    %if &group > 1 %then %do;      proc sql noprint; select variable into : regvlist separated by ' '         from outsdnreg;       quit;  %single_group_reg(&insdn,outsdnreg,&regvlist);    %end;    proc sort data = outsdnreg; by variable; run;  %end;  %** Using logistic method **;  %if %upcase(&logistic)=Y and %upcase(&Binary_dv)=Y  %then %do;    %** Split variables into manageable groups;%let group=%sysfunc(ceil(&varnum/100));    data tmp; set vars; rank=ceil(_n_/(&varnum/&group)); run;%** Process variables by group;%do i = 1 %to &group;  proc sql noprint; select name into : regvlist separated by ' '         from tmp where rank = &i;       quit;      %single_group_log(&insdn,log_tmp,&regvlist);  %if &i = 1 %then %do;        data outsdnlog; length variable $32.; set log_tmp; run;  %end;  %else %do;    data outsdnlog; set outsdnlog log_tmp; run;  %end;    %end;proc datasets library=work nolist; delete log_tmp tmp; quit; run;    %* Finalize file;    %if &group > 1 %then %do;      proc sql noprint; select variable into : regvlist separated by ' '         from outsdnlog;       quit;  %single_group_log(&insdn,outsdnlog,&regvlist);    %end;    proc sort data = outsdnlog; by variable; run;  %end;  %** Using information value method **;  %if %upcase(&information)=Y %then %do;    %do i=1 %to &varnum;      data _null_; set vars;    if _n_=&i;    call symputx('curvar',name);      run;      %Info_Val_Var(workfile,&curvar,&varnum);  %if &i = 1 %then %do;        data outsdninfv; length variable $32.; set infv_tmp; run;  %end;  %else %do;    data outsdninfv; set outsdninfv infv_tmp; run;  %end;    %end;proc datasets library=work nolist; delete infv_tmp; quit; run;    %* Finalize file;    data outsdninfv; set outsdninfv;      if infv >= &infvcut then infv_flag = 1; else infv_flag = 0;    run;    proc sort data = outsdninfv; by variable; run;  %end;    %* Get basic metrics for variables;  proc means data=out.CE2_Recoded noprint ;    var &prefix: ;    output out=cnt n= ;    output out=min min= ;    output out=max max= ;output out=mean mean= ;  run;  data stats;    set cnt (in=a) min (in=b) max (in=c) mean (in=d);length var $8;if a then var = "Count";  else if b then var = "Minimum";  else if c then var = "Maximum";  else if d then var = "Mean";    drop _freq_ _type_;  run;  proc transpose data=stats out=stats name=Variable label=Label; id var; run;  proc sort data=stats; by variable; run;  data stats; length variable $32.; set stats; run;  %*combine results from all methods together;  data out.CE3_Var_Redu;    merge stats      %if %upcase(&univ_reg)=Y %then %do; outsdnuniv(in=tuniv) %end;      %if %upcase(&correlation)=Y %then %do; outsdncorr(in=tcorr) %end;      %if %upcase(&principal)=Y %then %do; outsdnprin(in=tprin) %end;      %if %upcase(&cluster)=Y %then %do; outsdnclus(in=tclus) %end;      %if %upcase(&logistic)=Y and %upcase(&Binary_dv)=Y %then %do; outsdnlog(in=tlog) %end;      %if %upcase(&regression)=Y %then %do; outsdnreg(in=treg) %end;      %if %upcase(&information)=Y %then %do; outsdninfv(in=tinfv) %end;    ;    by variable;      %if %upcase(&univ_reg)=Y %then %do; if tuniv and univ_flag=1 then univsource=1; else univsource=0; drop univ_flag; %end;      %if %upcase(&correlation)=Y %then %do; if tcorr and corr_flag=1 then corrsource=1; else corrsource=0; drop corr_flag; %end;      %if %upcase(&principal)=Y %then %do; if tprin and prin_flag=1 then prinsource=1; else prinsource=0; drop prin_flag; %end;      %if %upcase(&cluster)=Y %then %do; if tclus and clus_flag=1then clussource=1; else clussource=0; drop clus_flag; %end;      %if %upcase(&logistic)=Y and %upcase(&Binary_dv)=Y %then %do; if tlog then logsource=1; else logsource=0; %end;      %if %upcase(&regression)=Y %then %do; if treg then regsource=1; else regsource=0; %end;      %if %upcase(&information)=Y %then %do; if tinfv and infv_flag=1 then infvsource=1; else infvsource=0; drop infv_flag; %end;    num_sources=sum(of       %if %upcase(&univ_reg)=Y %then %do; univsource  %end;      %if %upcase(&correlation)=Y %then %do; corrsource  %end;      %if %upcase(&principal)=Y %then %do; prinsource  %end;      %if %upcase(&cluster)=Y %then %do; clussource  %end;      %if %upcase(&logistic)=Y and %upcase(&Binary_dv)=Y %then %do; logsource %end;      %if %upcase(&regression)=Y %then %do; regsource %end;      %if %upcase(&information)=Y %then %do; infvsource %end;    );  run;  proc sort data=out.CE3_Var_Redu; by descending num_sources %if %upcase(&information)=Y %then %do; descending infv %end; ;run;  %** Exclude highly correlated variables **;  %if %upcase(&ind_correlation)=Y %then %do;    %Maxx_Corr(workfile);  %end;  %** Exclude variables highly correlated to dependent variable **;  %if %upcase(&ind_dv_corr)=Y %then %do;    proc sql;   create table tmp asselect a.*, b.correlation as dv_corr,  case when b.correlation > &max_dv_corr then 'Y' end as drop_dv_corrfrom out.CE3_Var_Redu aleft join out.CE2_corr b on a.variable = b.variable; quit; data out.CE3_Var_Redu; set tmp; run; proc datasets library=work nolist; delete tmp; quit; run;  %end;  %** Create report in Excel;  ods listing close;  ods Tagsets.ExcelxP body="&Path_output.CE3_Var_Redu Results.xls" style=sasweb;  ods tagsets.excelxp options(sheet_name="Variables");    proc print data=out.CE3_Var_Redu (drop=           %if %upcase(&logistic)=Y and %upcase(&Binary_dv)=Y %then %do; LogStep %end;   %if %upcase(&regression)=Y %then %do; RegStep %end;   %if %upcase(&cluster)=Y %then %do; Cluster  %end;         ) noobs;     run;  ods Tagsets.ExcelxP close;  ods listing;  %*Output list of top variables;  data selected; set out.CE3_Var_Redu;    %if %upcase(&ind_correlation)=Y %then %do; if missing(drop_corr); %end; %if %upcase(&ind_dv_corr)=Y %then %do; if missing(drop_dv_corr); %end;    if num_sources>=&sources       %if %upcase(&logistic)=Y and %upcase(&Binary_dv)=Y %then %do; or logsource=1 %end;      %if %upcase(&regression)=Y %then %do; or regsource=1 %end; ;  run;  %let rck = 0;  data _null_; set selected end=eof;    m = mod(_N_,7);    length v $256.;    retain v ;    if m = 1 then v = variable;      else v = strip(v) || " " || strip(variable);FILE  "&Path_output.CE3_Varlist_redu.txt" lrecl=256;if _N_=1 then PUT '%let varlist_redu =';    if m = 0 or eof then PUT v '0d'x;if eof then PUT ';';if eof then call symputx("rck" ,_N_);  run;  %put number of selected variables = &rck;  proc datasets nolist; delete stats cnt min max mean selected workfile vars      %if %upcase(&univ_reg)=Y %then %do; outsdnuniv %end;      %if %upcase(&correlation)=Y %then %do; outsdncorr %end;      %if %upcase(&principal)=Y %then %do; outsdnprin %end;      %if %upcase(&cluster)=Y %then %do; outsdnclus %end;      %if %upcase(&logistic)=Y and %upcase(&Binary_dv)=Y %then %do; outsdnlog %end;      %if %upcase(&regression)=Y %then %do; outsdnreg %end;      %if %upcase(&information)=Y %then %do; outsdninfv %end;  ; quit; run;%mend;** Univ_Reg: using univariate regression to do variable reduction **;%macro Univ_Reg(insdn,var);  data univ_tmp; set _NULL_; run;  %PUT ***Univ_Reg STEP, CURRENT VARIABLE: &var***;  %if %upcase(&Binary_dv) = Y %then %do;     %* Run univariate logistic regression;    ods listing close;                                                                                 ods output parameterestimates=parm association=fitstat1;                                          proc logistic data=&insdn desc namelen=32;   model &dep_var=&var;  %if %upcase(&redu_weight) = Y %then %do; weight &weight; %end;     run;  ods listing; %if %sysfunc(exist(fitstat1)) %then %do;   data fitstat1 (keep=CC_RSQ); set fitstat1(keep=cvalue1 obs=1);    CC_RSQ = input(cvalue1,best8.);  run;%end;%else %do;  data fitstat1; CC_RSQ = .; output; run;%end;    data parm (keep=ProbChiSq sign rename=(ProbChiSq=PValue));   length sign $6. ;      set parm (firstobs=2 obs=2 keep=variable Estimate ProbChiSq);      if Estimate>0 then sign='(+)'; else sign='(-)';  run;  %end;  %else %do;%* Run univariate simple regression;ods listing close;    ods output SelParmEst=parm SelectionSummary=fitstat1;proc reg data=&insdn;  model &dep_var=&var /selection=forward MAXSTEP=1 slentry=0.999;  %if %upcase(&redu_weight) = Y %then %do; weight &weight; %end;run; ods listing;%if %sysfunc(exist(fitstat1)) %then %do;   data fitstat1 (rename=(ModelRsquare=CC_RSQ)); set fitstat1(keep=ModelRsquare obs=1); run;%end;%else %do;  data fitstat1; CC_RSQ = .; output; run;%end;data parm (keep=ProbF sign rename=(ProbF=PValue));   length sign $6. ;      set parm (firstobs=2 obs=2 keep=variable Estimate ProbF);      if Estimate>0 then sign='(+)'; else sign='(-)';  run;  %end;  %* Combine;  data univ_tmp;length variable $32.;    merge parm fitstat1;    variable="&var";  run;  %* Clean up;  proc datasets library=work nolist; delete fitstat1 parm; quit; run;%mend;** Single_Group_Prin: using principal components to do variable reduction **;%macro Single_Group_Prin(insdn,outsdn,vlist);  %local j;  data &outsdn; set _NULL_; run;  %* Run factor / principal components analysis;  proc factor data=&insdn out=tmpdataa2 method=prin priors=one nfact=&nprin noprint;    var &vlist;    %if %upcase(&redu_weight) = Y %then %do; weight &weight; %end;  run;  %* Get correlations;  proc corr data=tmpdataa2 out=tmpdataa3 noprint;    var Factor: ;    with &vlist;    %if %upcase(&redu_weight) = Y %then %do; weight &weight; %end;  run;  %* Rearrange dataset;  data tmpdataa4 (drop=_type_ _name_); set tmpdataa3 (where=(_type_='CORR'));     length variable $32;      variable=_name_;    %do j = 1 %to &nprin;      factor=abs(factor&j);   output;%end;  run;  %* Get best correlation for each variable;  proc sort data=tmpdataa4 (keep=variable factor); by variable descending factor; run;  data &outsdn; set tmpdataa4;    by variable;    if first.variable;  run;  proc datasets library=work nolist; delete tmpdataa: ; quit; run;%mend;** Single_Group_Clus: using clustering to do variable reduction **;%macro Single_Group_Clus(insdn,outsdn,vlist);  data &outsdn; set _NULL_; run;  %* Run cluster procedure;  ods listing close;  ods output rsquare=&outsdn;  proc varclus data=&insdn (keep=&vlist &weight) minc=&maxc maxc=&maxc short;    var &vlist;    %if %upcase(&redu_weight) = Y %then %do; weight &weight; %end;  run;  ods listing;  %* Clean up file;  data &outsdn (keep=variable cluster RSquareRatio);     length variable $32.;    set &outsdn;     retain clustertemp;    if cluster ne ' ' then  clustertemp=cluster;      else cluster=clustertemp;  run;%mend;** Single_Group_Reg: using linear regression to do variable reduction **;%macro Single_Group_Reg(insdn,outsdn,vlist);  data &outsdn; set _NULL_; run;  ods listing close;  ods output   SelectionSummary=&outsdn;  proc reg data=&insdn;    model &dep_var = &vlist /selection=forward slentry=&alphareg;    %if %upcase(&redu_weight) = Y %then %do; weight &weight; %end;  run;  ods listing;  data &outsdn (keep=variable RegStep RegPValue);     length variable $32.;    set  &outsdn (rename=(VarEntered = variable step=RegStep ProbF = RegPValue));  run;%mend;** Single_Group_Log: using logistic regression to do variable reduction **;%macro Single_Group_Log(insdn,outsdn,vlist);  data &outsdn; set _NULL_; run;  ods listing close;  ods output   ModelBuildingSummary=&outsdn;  proc logistic data=&insdn desc namelen=32;    model &dep_var = &vlist /selection=forward slentry=&alphalog;    %if %upcase(&redu_weight) = Y %then %do; weight &weight; %end;  run;  ods listing;  data &outsdn (keep=variable LogStep LogPValue);     length variable $32.;    set  &outsdn (rename=(EffectEntered = variable step=LogStep ProbChiSq = LogPValue));  run;%mend;** Info_Val_Var: using information value to do variable reduction **;%macro Info_Val_Var(insdn,var,set_size);  data infv_tmp; set _NULL_; run;  %* Check number of unique values;  proc sql noprint;    select count(distinct(&var)) into : unq from &insdn;  quit;  %if %eval(&unq>&decile) %then %do;    %if %upcase(&redu_weight) = Y %then %do;  %* Create bins if there is weighting;      proc sql noprint;        select sum(&weight) into : cumwgt from &insdn;      quit;      proc sort data=&insdn (keep=&dep_var &var &weight) out=tmp; by &var; run;      data tmp (drop=rank); set tmp;        retain rank;        rank + &weight;        bin = (floor(rank*&decile/(&cumwgt+1)));      run;  %* Summarize data to bins;  proc summary data=tmp;        weight &weight;        var &var &dep_var;        class bin;    output out=tmp2 (drop=_freq_) sumwgt=cnt mean(&dep_var)=mean_dv max(&dep_var)=max_dv                    mean(&var)=mean_var min(&var)=min_var max(&var)=max_var /noinherit;  run;    %end;    %else %do;  %* Create bins if there is no weighting;      proc rank data=&insdn (keep=&dep_var &var) out=tmp groups=&decile;        var &var;    ranks bin;  run;  %* Summarize data to bins;  proc summary data=tmp;        var &var &dep_var;        class bin;    output out=tmp2 (rename=_freq_=cnt) mean(&dep_var)=mean_dv max(&dep_var)=max_dv                    mean(&var)=mean_var min(&var)=min_var max(&var)=max_var /noinherit;  run;%end;  %end;  %else %do;    %* Summarize to &var value when less than &decile;    %if %upcase(&redu_weight) = Y %then %do;      proc summary data=&insdn;        weight &weight;        var &dep_var;        class &var;    output out=tmp2 (drop=_freq_ rename=&var=bin) sumwgt=cnt mean(&dep_var)=mean_dv max(&dep_var)=max_dv /noinherit;  run;%end;%else %do;      proc summary data=&insdn;        var &dep_var;        class &var;    output out=tmp2 (rename=(_freq_=cnt &var=bin)) mean(&dep_var)=mean_dv max(&dep_var)=max_dv /noinherit;  run;%end;data tmp2; set tmp2;  mean_var = bin;  min_var = bin;  max_var = bin;run;  %end;  %* Create global metrics;  proc sql noprint;     select mean_dv into : norm   from tmp2 where _type_ = 0;    select cnt into : ntotal from tmp2 where _type_ = 0; select max(max_dv) into : maxdv from tmp2 where _type_ = 0;select sum(cnt*mean_dv) into: totalresp from tmp2 where _type_ = 1;  quit;  %* Calculate information value, ks and gini statistics;  data tmp3; set tmp2 (where=(_type_=1));    lift_index = (mean_dv/&norm)*100;     retain cumresp cumtotal;    if _n_ = 1 then do;       cumresp = 0;       cumtotal = 0;     end;    cumresp = cumresp + (mean_dv*cnt);    cumpct_resp = cumresp/&totalresp;    cumtotal = cumtotal + cnt;    cumpct_freq = cumtotal/&ntotal;    cumavg = cumresp/cumtotal;    cumindex = (cumavg/&norm)*100;     badrate = mean_dv/&maxdv;    goodrate = 1 - badrate;    badcnt = badrate * cnt;      goodcnt = goodrate * cnt;     badratio = (cnt * mean_dv)/&totalresp;    goodratio = (cnt - badcnt)/(&ntotal-&totalresp/&maxdv);     drop cumresp cumtotal;   run;     data infv_tmp; set tmp3 end=eof;       length variable $32.; variable = "&var";     retain infv gini ks cumbad cumgood 0;     prev_dv = lag(cumpct_resp);     prev_pct = lag(cumpct_freq);     if _n_ ne 1 then        gini = gini+2*((cumpct_freq+prev_pct)/2-(cumpct_resp+prev_dv)/2)*(cumpct_freq-prev_pct);     if badratio+goodratio = 0 then contribution = 0;       else if badratio*goodratio=0 then contribution=cnt*1.0/&set_size*max(badratio,goodratio);       else contribution = (badratio-goodratio)*log(Max(badratio/goodratio,0.00001));     infv = infv+(contribution);     cumbad = cumbad + badratio;     cumgood = cumgood + goodratio;     ks = max(ks,abs(cumbad-cumgood));     if eof then output;      keep variable infv gini ks;   run;   proc datasets library=work nolist; delete tmp: ; quit; run;%mend;** Maxx_Corr: eliminate variables that are highly correlated **;%macro Maxx_Corr(insdn);  %* Add field to variable dataset;  data out.CE3_Var_Redu; set out.CE3_Var_Redu;    length drop_corr $1.;  run;  %* Get variables to evaluate;  data tmp; set out.CE3_Var_Redu;    if num_sources>=&sources       %if %upcase(&logistic)=Y and %upcase(&Binary_dv)=Y %then %do; or logsource=1 %end;      %if %upcase(&regression)=Y %then %do; or regsource=1 %end; ;  run;  proc sql noprint;    select variable into : varlist_redu separated by ' ' from tmp;  quit;    %* Correlation table;  proc corr data=&insdn out=tmp noprint;    var &dep_var &varlist_redu;  run;  proc sql;     create table corrmatrix (drop=_type_) as     select * from tmp where _type_ = 'CORR' and _name_ ^= "&dep_var" order by abs(&dep_var) desc;  quit;  %* Cycle through to eliminate variable wiht correlations that are too high;  %let breakdelcorr=0;  %do %while(&breakdelcorr=0);    data corrmatrix (drop=i); set corrmatrix;      varnum = _n_;      cnt = 0;      array vars{*} &prefix.: ;      do i = 1 to dim(vars);        if &maxcorr < abs(vars{i}) then cnt = cnt + 1;      end;    run; proc sql noprint;  select count(*) into: vcnt from corrmatrix where cnt>1; quit; %if &vcnt = 0 %then %let breakdelcorr=1;  %* No more variables to drop; %else %do;    %let ndropvar=0;    data _null_; set corrmatrix (where=(cnt>1) obs=1) end=eof;      call symputx("keepvar",_name_);      if eof then call symputx("ndropvar",1);    run;    data _null_; set corrmatrix (where=(abs(&keepvar) > &maxcorr and _name_ ^= "&keepvar")) end=lastob;      call symputx('dropvar'||left(_n_),_name_);      if lastob then call symputx('ndrop',_n_);    run;    %if &ndropvar=0 %then %let breakdelcorr=1;  %* No more variables to drop;    %else %do;  %* More variables to drop;      data corrmatrix; set corrmatrix (drop= %do i = 1 %to &ndrop; &&dropvar&i %end; );        %do i = 1 %to &ndrop;          if upcase(_name_) = upcase("&&dropvar&i") then delete;        %end;      run;  data out.CE3_Var_Redu; set out.CE3_Var_Redu;        %do i = 1 %to &ndrop;          if upcase(variable) = upcase("&&dropvar&i") then do;            drop_corr = 'Y';          end;        %end;       run;%end;    %end;  %end;  proc datasets nolist; delete tmp corrmatrix; quit; run;%mend;

0 0
原创粉丝点击