Bagging算法在SAS中的实现
来源:互联网 发布:阿里云智能家居 编辑:程序博客网 时间:2024/05/21 19:41
原文地址:Bagging算法在SAS中的实现作者:文穗
%macro bagging(data = , y = , numx = , catx = , ntrees = 10);***********************************************************;* THIS SAS MACRO IS AN ATTEMPT TO IMPLEMENT BAGGING *;* PROPOSED BY LEO BREIMAN (1996) *;* ======================================================= *;* PAMAMETERS: *;* DATA : INPUT SAS DATA TABLE *;* Y : RESPONSE VARIABLE WITH 0/1 VALUE *;* NUMX : A LIST OF NUMERIC ATTRIBUTES *;* CATX : A LIST OF CATEGORICAL ATTRIBUTES *;* NTREES : # OF TREES TO DO THE BAGGING *;* ======================================================= *;* OUTPUTS: *;* 1. A SAS CATALOG FILE NAMED "TREEFILES" IN THE WORKING *;* DIRECTORY CONTAINING ALL SCORING FILES IN BAGGING *;* 2. A LST FILE SHOWING ks STATISTICS OF THE BAGGING *;* CLASSIFIER AND EACH TREE CLASSIFIER *;* ======================================================= *;* CONTACT: *;* WENSUI.LIU@53.COM, LOSS FORECASTING & RISK MODELING *;***********************************************************;options mprint mlogic nocenter nodate nonumber;*** a random seed value subject to change ***;%let seed = 20110613;*** assign a library to the working folder ***;libname _path '';*** generate a series of random seeds ***;data _null_; do i = 1 to &ntrees; random = put(ranuni(&seed) * (10 ** 8), 8.); name = compress("random"||put(i, 3.), ' '); call symput(name, random); end;run; *** clean up catalog files in the library ***;proc datasets library = _path nolist; delete TreeFiles tmp / memtype = catalog;run;quit;proc sql noprint; select count(*) into :nobs from &data where &y in (1, 0);quit;data _tmp1 (keep = &y &numx &catx _id_); set &data; _id_ + 1;run; %do i = 1 %to &ntrees; %put &&random&i; *** generate bootstrap samples for bagging ***; proc surveyselect data = _tmp1 method = urs n = &nobs seed = &&random&i out = sample&i(rename = (NumberHits = _hits)) noprint; run; *** generate data mining datasets for sas e-miner ***; proc dmdb data = sample&i out = db_sample&i dmdbcat = cl_sample&i; class &y &catx; var &numx; target &y; freq _hits; run; *** create a sas temporary catalog to contain sas output ***; filename out_tree catalog "_path.tmp.out_tree.source"; *** create decision tree mimicking CART ***; proc split data = db_sample&i dmdbcat = cl_sample&i criterion = gini assess = impurity maxbranch = 2 splitsize = 100 subtree = assessment exhaustive = 0 nsurrs = 0; code file = out_tree; input &numx / level = interval; input &catx / level = nominal; target &y / level = binary; freq _hits; run; *** create a perminant sas catalog to contain all tree outputs ***; filename in_tree catalog "_path.TreeFiles.tree&i..source"; data _null_; infile out_tree; input; file in_tree; if _n_ > 3 then put _infile_; run; *** score the original data by each tree output file ***; data _score&i (keep = p_&y.1 p_&y.0 &y _id_); set _tmp1; %include in_tree; run; *** calculate KS stat ***; proc printto new print = lst_out; run; ods output kolsmir2stats = _kstmp(where = (label1 = 'KS')); proc npar1way wilcoxon edf data = _score&i; class &y.; var p_&y.1; run; proc printto; run; %if &i = 1 %then %do; data _tmp2; set _score&i; run; data _ks; set _kstmp (keep = nvalue2); tree_id = &i; seed = &&random&i; ks = round(nvalue2 * 100, 0.0001); run; %end; %else %do; data _tmp2; set _tmp2 _score&i; run; data _ks; set _ks _kstmp(in = a keep = nvalue2); if a then do; tree_id = &i; seed = &&random&i; ks = round(nvalue2 * 100, 0.0001); end; run; %end; %end;*** aggregate predictions from all trees in the bag ***;proc summary data = _tmp2 nway; class _id_; output out = _tmp3(drop = _type_ rename = (_freq_ = freq)) mean(p_&y.1) = mean(p_&y.0) = mean(&y) = ;run;*** calculate bagging KS stat ***;proc printto new print = lst_out;run;ods output kolsmir2stats = _kstmp(where = (label1 = 'KS'));proc npar1way wilcoxon edf data = _tmp3; class &y; var p_&y.1;run;proc printto;run;data _ks; set _ks _kstmp (in = a keep = nvalue2); if a then do; tree_id = 0; seed = &seed; ks = round(nvalue2 * 100, 0.0001); end;run;proc sort data = _ks; by tree_id;run;proc sql noprint; select max(ks) into :max_ks from _ks where tree_id > 0; select min(ks) into :min_ks from _ks where tree_id > 0; select ks into :bag_ks from _ks where tree_id = 0;quit;*** summarize the performance of bagging classifier and each tree in the bag ***;title "MAX KS = &max_ks, MIN KS = &min_ks, BAGGING KS = &bag_ks";proc print data = _ks noobs; var tree_id seed ks;run;title;proc datasets library = _path nolist; delete tmp / memtype = catalog;run;quit;%mend bagging;%let x1 = tot_derog tot_tr age_oldest_tr tot_open_tr tot_rev_tr tot_rev_debt tot_rev_line rev_util bureau_score ltv tot_income;%let x2 = purpose;libname data 'D:SAS_CODEbagging';�gging(data = data.accepts, y = bad, numx = &x1, catx = &x2, ntrees = 10);
0 0
- Bagging算法在SAS中的实现
- Akka 在Bagging投票算法中的简单应用
- Bagging算法的R语言实现
- Bagging算法的R语言实现
- Matlab实现Bagging(集成学习)算法
- Bagging算法细讲
- Bagging 的python实现
- 机器学习算法--bagging算法
- boosting和bagging算法学习
- Bagging算法与随机森林
- 集成算法-xgboost/bagging/voting
- 用SAS宏实现oracle中的decode函数
- 机器学习中的bagging技巧
- MD5算法在JS中的实现
- 常用算法在VB中的实现
- MD5算法在PB中的实现
- Md5算法在VFP中的实现
- Md5算法在VFP中的实现
- iOS数据存取
- 泡泡带你看java script
- Solr索引建立
- POI读写excel实例和JXL实现excel的方式 (2)
- 不一样的ViewPager
- Bagging算法在SAS中的实现
- 第6章深入使用Hibernate 6.2 继承映射
- DaoCloud 如何获取树莓派
- 纯代码创建UI界面入门(二)
- hadoop2.6整合hbase 1.1.2完全分布式安装
- fork和lockf应用
- Gallery 和ImageSwitcher实现照片墙功能
- 剖析CSS relative相对定位用法
- 第6章深入使用Hibernate 6.3 Hibernate的批量处理