SAS NLP Model

来源:互联网 发布:中国大学生编程大赛 编辑:程序博客网 时间:2024/06/16 16:03
/*Declare Library where the datasets are placed*/LIBNAME lib ' ';/*----------------------------------------------------------------------------------------------------------------------------*//*Datasets required are as follwoing:*//*1) Transaction data subsetted for Order_status_cd in ('IN','MI') and order_type_cd in ('I', 'C') and sales channel filter *//*2) Quotes data*//*3) Warranty data*//*4) Fimrographics variables at site level including No. of contact variables. It should not have any duplicates*//*----------------------------------------------------------------------------------------------------------------------------*//*This code proceeds in the following manner:*//*Creates the transactional variables like 'Total 3 year revenue', etc from transaction data*//*Flags the responses*//*Creates quotes variables(quotes_var)*//*Creates warranty variable and other datasets*//*Calculates Average order gap*//*Split the dataset into Refresh and Conversion*//*----------------------------------------------------------------------------------------------------------------------*//*This code outputs quarter level datasets for LOB Refresh and LOB Conversion Model*/options mlogic mprint symbolgen compress =yes reuse =yes;/*declare name of following datasets*/%let trans=  anz_nlp_q3end_trx_inmi_v3;%let quotes= anz_nlp_q3end_quotes_final;%let warranty= anz_nlp_q3end_warranty_v2;%let firmo= anz_firmographics_final;/*Creating LOB level modeling dataset for various quarters*/%macro base(qtr=,last_date=, lob=, lob_name=);proc sql;create table temp_var1 asselect distinct dim_plat_site_id,/*rev variables*/sum(case when %eval(&qtr.-30) <=qtr< &qtr. then TOTAL_REV_USD end) as Total_3yr_rev,sum(case when %eval(&qtr.-20) <=qtr< &qtr. then TOTAL_REV_USD end) as Total_2yr_rev,sum(case when %eval(&qtr.-10) <=qtr< &qtr. and sku_lob1='SNP' then TOTAL_REV_USD end) as SNP_1yr_rev,sum(case when %eval(&qtr.-30) <=qtr< &qtr. and sku_lob1='DT' then TOTAL_REV_USD end) as DT_3yr_rev,/*margin variables*/sum(case when %eval(&qtr.-10) <=qtr< &qtr. and sku_lob1='Services' then net_margin_usd end) as SVC_1yr_margin,/*recency of transaction-line of business wise*/(1+&last_date.-max(case when total_rev_usd>0 then order_date end)) as overall_recency,(1+&last_date.-max(case when sku_lob1='NB' and total_rev_usd>0 then order_date end)) as NB_recency,(1+&last_date.-max(case when sku_lob1='Services' and total_rev_usd>0 then order_date end)) as SVC_recency,/*Tie Group  Past 3yr rev*/sum(case when %eval(&qtr.-30) <=qtr< &qtr. and tie_lob1=&lob. then total_rev_usd end) as tie_&lob_name._3yr_revfrom lib.&trans.where %eval(&qtr.-30) <=qtr< &qtr.group by dim_plat_site_idhaving round(Total_3yr_rev,1)>0order by dim_plat_Site_id;quit;proc sql;create table temp_var2 asselect distinct dim_plat_site_id,count(distinct case when %eval(&qtr.-10) <=qtr< &qtr. and total_rev_usd>0 then order_date end) as Total_1yr_orders,count(distinct case when %eval(&qtr.-10) <=qtr< &qtr. and sku_lob1='SV' and total_rev_usd>0 then order_date end) as SV_1yr_orders,count(distinct case when %eval(&qtr.-20) <=qtr< &qtr. and sku_lob1='SNP' and total_rev_usd>0 then order_date end) as SNP_2yr_orders,count(distinct case when %eval(&qtr.-20) <=qtr< &qtr. and sku_lob1='Services' and total_rev_usd>0 then order_date end) as SVC_2yr_orders,count(distinct case when %eval(&qtr.-30) <=qtr< &qtr. and sku_lob1='Services' and total_rev_usd>0 then order_date end) as SVC_3yr_ordersfrom lib.&trans.where %eval(&qtr.-30) <=qtr< &qtr.and dim_plat_site_id in (select distinct dim_plat_site_id from temp_var1)group by dim_plat_site_idorder by dim_plat_Site_id;quit;/*Response flag*/proc sql;create table temp asselect distinct dim_plat_site_id, 1 as flag_&lob_name., sum(total_Rev_usd) as total_revfrom lib.&trans.where qtr= &qtr.and tie_lob1=&lob.group by dim_plat_site_idhaving round(total_rev,1)>0order by dim_plat_site_id;quit;data temp_merged;merge temp_var1(in=a) temp_var2(in=b) temp(in=c drop=total_rev);by dim_plaT_site_id;if a;run;data temp_merged;set temp_merged;if flag_&lob_name. =. then flag_&lob_name.=0;run;/*Quotes Data*//*Revise Quotes variable creation*/proc sql;create table qtr_flags asselect *, (case when (&qtr.-(case when mod(&qtr.,10) in (1,2) then 8 else 2 end )) <= qtr < &qtr. then 1 end) as f_qt_last_2from lib."es.;quit;proc sql;create table quotes_var_1 asselect dim_plat_site_id,count(distinct (case when f_qt_last_2 = 1 then quote_num end)) as no_of_qt_last_2,max(f_qt_last_2) as f_qt_last_2from qtr_flagsgroup by dim_plat_site_id order by dim_plat_site_id;quit;data quotes_var;set quotes_var_1;if f_qt_last_2 = . then f_qt_last_2 = 0;run;/*Warranty flags*/proc sql;create table warranty_Q&Qtr. asselect distinct dim_plat_site_id,count(case when war_end_qtr = &qtr. and lob in ('NB') then fact_warranty_sid end) as NB_Wrnty_End_cq_cntfrom lib.&warranty.group by dim_plat_site_id;quit;data warranty_Q&Qtr._v1;set warranty_Q&Qtr.;array zero _numeric_;do over zero; if zero=. then zero=0; end; run;data  lib.warranty_Q&Qtr._final(drop=NB_Wrnty_End_cq_cnt);set  warranty_Q&Qtr._v1;if NB_Wrnty_End_cq_cnt>0 then NB_Wrnty_End_cq_flag=1;else NB_Wrnty_End_cq_flag=0;run;/*merging warranty, quotes and Last lob flags data*/proc sort data=lib.warranty_Q&Qtr._final out=lib.warranty_&qtr._final;by dim_plaT_site_id;run;proc sort data=temp_merged out=temp_merged;by dim_plaT_site_id;run;proc sort data=quotes_var out=quotes_var;by dim_plaT_site_id;run;/*ORDERGAP, CONSIDER ONLY PURCHASE ORDERS*/proc sql;create table avg_order_gap_ds as select dim_plat_site_id,sum(case when %eval(&qtr.-30) <=qtr< &qtr. then TOTAL_REV_USD end) as Total_3yr_rev,max(case when total_rev_usd>0 then order_date end) as order_date_1, min(case when total_rev_usd>0 then order_date end) as order_date_2,count(distinct case when total_rev_usd>0 then order_date end) as no_ordersfrom lib.&trans.where %eval(&qtr.-30) <=qtr< &qtr.group by dim_plat_site_idhaving round(Total_3yr_rev,1)>0order by dim_plat_Site_id;quit;data avg_order_gap_ds(drop= order_date_1 order_date_2 no_orders);set avg_order_gap_ds;if no_orders>1 then avg_order_gap=(order_date_1-order_date_2)/(no_orders-1);else avg_order_gap=.;run;proc sort data=avg_order_gap_ds out=avg_order_gap_ds_sort;by dim_plat_Site_id;run;data temp_merged_v1;merge temp_merged(in=a) lib.warranty_&qtr._final(in=b) quotes_var(in=d)  avg_order_gap_ds_sort(in=f) lib.&firmo.(in=g);by dim_plat_site_id;if a;run;data temp_merged_v1;set temp_merged_v1;qtr=&qtr.;run;data lib.&lob_name._&qtr._refresh_check lib.&lob_name._&qtr._conv_check;set temp_merged_v1;if tie_&lob_name._3yr_rev >0 then output lib.&lob_name._&qtr._refresh_check;else output lib.&lob_name._&qtr._conv_check;run;%mend; /*---------------------Calling the dataset creation macro with respective attributes--------------------------------------------*//*Last date is the last date of the observation period(past 3 year period) for a quarter*//*Specify the quarters for which the dataset needs to be created*/%let RespQtr= ;/*Specify the quarter for which the scores are required ex: 20143*/%let last_Date1= ;/*Specify the last date of the observation period(past 3 year period) for the Resp Quarter ex: '01nov2013'd for 20144*/%let PrevQtr=  ;/*Specify the quarter preceeding the quarter for which the scores are required ex:20144*/%let last_date2= ;/*Specify the last date of the observation period(past 3 year period) for the Prev Quarter ex: '02aug2013'd for 20143*/%base(qtr=&PrevQtr. ,last_Date=&last_date2., lob='NB', lob_name=NB);%base(qtr=&PrevQtr. ,last_Date=&last_date2., lob='DT', lob_name=DT);%base(qtr=&PrevQtr. ,last_Date=&last_date2., lob='SV', lob_name=SV);%base(qtr=&RespQtr. ,last_Date=&last_Date1., lob='DT', lob_name=DT);%base(qtr=&RespQtr. ,last_Date=&last_Date1., lob='NB', lob_name=NB);%base(qtr=&RespQtr. ,last_Date=&last_Date1., lob='SV', lob_name=SV);/*This Code has scoring macro for the following models:1) DT Conversion2) DT Refresh3) NB Conversion4) NB Refresh5) SV Conversion6) SV RefreshIndividual scoring macro performs the following operations on the respective modeling  datasets:1) Missing Value Treatment2) Outlier Treatment3) Additional Variable Creation4) Predictive Scoring-------------------------------------------------------------------------------------------------------------------------------------*//*Scoring Macro For DT ConversionInput: DT Conversion Modeling Dataset and Library Name*//*----------------------------------------------------------------------------------------------------------------------------*/%macro score_dtc(ds, lib_name);/*MVT*/data &ds.;set &lib_name..&ds.;if overall_recency = . then overall_recency = 1100;if SNP_recency = . then SNP_recency = 1100; if SVC_recency = . then SVC_recency = 1100;array zero _numeric_;do over zero; if zero=. then zero=0; end;run;/*Outlier*/data &ds.;set &ds.;if NoOfContacts > 17 then NoOfContacts =  17;if SVC_3yr_orders > 6 then SVC_3yr_orders =  6;if buy_power_value > 48618.6015625 then buy_power_value =  48618.6015625;run;/*Flag Creation*/proc sql;create table &ds._f asSelect * ,(case when NoOfContacts > 3 then 1 else 0 end)  as f_NoOfContacts,(case when state_prov ="NSW" then 1 else 0 end)  as f_NSW,(case when SVC_3yr_orders > 1 then 1 else 0 end)  as f_SVC_3yr_orders,(case when SVC_recency < 150 then 1 else 0 end)  as f_SVC_recency,(case when buy_power_value > 1500 then 1 else 0 end)  as f_buy_power_value,(case when sb_segment ="SB+" then 1 else 0 end)  as f_segment_sb_plsfrom &ds.;quit;/*Scoring*/data &lib_name..&ds._scrd(drop=k);set &ds._f;k=-5.0616617 +f_NSW * 0.16598 +f_NoOfContacts * 0.2067096 +f_SVC_3yr_orders * 0.4129997 +f_SVC_recency * 0.13224 +f_buy_power_value * 1.0158809 +f_segment_sb_pls * 0.1968826 +overall_recency * -0.0014509;dt_c=(exp(k)/(exp(k)+1));run;%mend score_dtc;/*Scoring Macro For DT RefreshInput: DT Refresh Modeling Dataset and Library Name*//*------------------------------------------------------------------------------------------------------------------------------*//*DT Refresh*/%macro score_dtr(ds, lib_name);/*MVT*/data &ds.;set &lib_name..&ds.;if Overall_recency=. then Overall_recency=1100;if buy_power_value=. then buy_power_value=0;if SNP_2YR_Orders=. then SNP_2YR_Orders=0;if Total_1yr_orders=. then Total_1yr_orders=0;if NoOfContacts=. then NoOfContacts=0;if avg_order_gap=. then avg_order_gap=0;run;/*Outlier*/data &ds.;set &ds.;If buy_power_value >82544 then buy_power_value = 82544 ;If SNP_2yr_orders >27 then SNP_2yr_orders = 27 ;If Total_1yr_orders >19 then Total_1yr_orders = 19 ;If Total_3yr_rev >80880 then Total_3yr_rev = 80880 ;If NoOfContacts >24 then NoOfContacts = 24 ;run;/*Flags*/data &ds._f;set &ds.;if avg_order_gap_3yr > 50 and avg_order_gap_3yr<100 then avg_order_gap_3yr_50to100_flg =  1; else avg_order_gap_3yr_50to100_flg=0;if buy_power_value > 3500 then buy_power_value_gt3500_flg =  1; else buy_power_value_gt3500_flg=0;if SNP_2YR_Orders > 2 then SNP_2YR_Orders_gt2_flg =  1; else SNP_2YR_Orders_gt2_flg=0;if Total_1yr_orders > 1 then Total_1yr_orders_gt1_flg =  1; else Total_1yr_orders_gt1_flg=0;if Total_3yr_rev > 4600 then Total_3yr_rev_gt4600_flg =  1; else Total_3yr_rev_gt4600_flg=0;if NoOfContacts > 5 then NoOfContacts_gt5_flg =  1; else NoOfContacts_gt5_flg=0;run;/*Scoring*/data &lib_name..&ds._scrd(drop=j);set &ds._f;j=(-3.2664)+(avg_order_gap_3yr_50to100_flg*0.1313)+(buy_power_value_gt3500_flg*0.7603)+(overall_recency*-0.00275)+(SNP_2YR_Orders_gt2_flg*0.4222)+(Total_1yr_orders_gt1_flg*0.3954)+(Total_3yr_rev_gt4600_flg*0.2843)+(NoOfContacts_gt5_flg*0.112);dt_r=(exp(j)/(exp(j)+1));run;%mend score_dtr;/*Scoring Macro For NB ConversionInput: NB Conversion Modeling Dataset and Library Name*//*----------------------------------------------------------------------------------------------------------------------------------------*/%macro score_nbc(ds, lib_name);/*MVT*/data &ds.;set &lib_name..&ds.;if buy_power_value=. then buy_power_value=0;if overall_recency=. then overall_recency=1100;if SNP_1yr_rev=. then SNP_1yr_rev=0;if SVC_1yr_margin=. then SVC_1yr_margin=0;if SVC_2yr_orders=. then SVC_2yr_orders=0;run;/*Outlier Treatment*/data &ds.;set &ds.;If SNP_1yr_rev <0 then SNP_1yr_rev = 0;If buy_power_value >27559.5 then buy_power_value = 27559.5 ;If SNP_1yr_rev >3965.458984375 then SNP_1yr_rev = 3965.458984375 ;If SVC_1yr_margin >873.195007324218 then SVC_1yr_margin = 873.195007324218 ;If SVC_2yr_orders >4 then SVC_2yr_orders = 4 ;run;/*Flags*/data &ds._f;set &ds.;if buy_power_value > 1400 then buy_power_value_gt1400 =  1; else buy_power_value_gt1400=0;if SNP_1yr_rev > 800 then SNP_1yr_rev_gt800 =  1; else SNP_1yr_rev_gt800=0;if SVC_1yr_margin > 60 then SVC_1yr_margin_gt60 =  1; else SVC_1yr_margin_gt60=0;if SVC_2yr_orders > 2 then SVC_2yr_orders_gt2 =  1; else SVC_2yr_orders_gt2=0;if sb_segment="SB+" then segment_sb_plus =1; else  segment_sb_plus=0;run;/*Scoring*/data &lib_name..&ds._scrd(drop=sum);set &ds._f;sum=(-4.3762)+(buy_power_value_gt1400*0.9858)+(overall_recency*-0.00159)+(SNP_1yr_rev_gt800*0.3206)+(SVC_1yr_margin_gt60*0.1655)+(SVC_2yr_orders_gt2*0.544)+(segment_sb_plus*0.4085);nb_c=(exp(sum)/(exp(sum)+1));run;%mend score_nbc;/*Scoring Macro For NB RefreshInput: NB Refresh Modeling Dataset and Library Name*//*---------------------------------------------------------------------------------------------------------------------------------*//*NB Refresh*/%macro score_nbr(ds, lib_name);/*MVT*/data &ds.;set &lib_name..&ds.;if DT_3yr_rev=. then DT_3yr_rev=0;if NB_Wrnty_End_cq_flag=. then NB_Wrnty_End_cq_flag=0;if NB_recency=. then NB_recency=1100;if buy_power_value=. then buy_power_value=0;if S_flag_job_area_IT=. then S_flag_job_area_IT=0;run;/*Outlier*/data &ds.;set &ds.;If DT_3yr_rev > 4923.7841796875 then DT_3yr_rev = 4923.7841796875;If buy_power_value > 195507.703125 then buy_power_value = 195507.703125;run;/*Flags*/data &ds._f;set &ds.;if sb_segment ="SB+" then Sb_segment_sbp = 1; else sb_segment_sbp = 0;if buy_power_value >= 2900 then buy_power_value_gteq2900 =  1; else buy_power_value_gteq2900=0;/*scoring*/data &lib_name..&ds._scrd(drop=sum);set &ds._f;sum=(DT_3yr_rev*7.06477E-06)+(-3.0792399)+(NB_Wrnty_End_cq_flag*0.5415848)+(NB_recency*-0.0028946)+(S_flag_job_area_IT*0.4029746)+(Sb_segment_sbp*1.1333576)+(buy_power_value_gteq2900*0.9691902);nb_r=(exp(sum)/(exp(sum)+1));run;%mend score_nbr;/*Scoring Macro For SV ConversionInput: SV Conversion Modeling Dataset and Library Name*//*-----------------------------------------------------------------------------------------------------------------------*/%macro score_svc(ds, lib_name);/*MVT*/data &ds.;set &lib_name..&ds.;if Total_2yr_rev=. then Total_2yr_rev=0;if buy_power_value=. then buy_power_value=0;if no_of_qt_last_2=. then no_of_qt_last_2=0;if overall_recency=. then overall_recency=1100;run;/*Outlier*/data &ds.;set &ds.;if Total_2yr_rev < 0 then Total_2yr_rev =  0;if Total_2yr_rev > 18946.51953125 then Total_2yr_rev =  18946.51953125;if buy_power_value > 27730.80078125 then buy_power_value =  27730.80078125;run;/*Flags*/proc sql;create table &ds._f asselect *,(case when buy_power_value > 1600 then 1 else 0 end)  as f_buy_power_value,(case when no_of_qt_last_2 >= 2 then 1 else 0 end)  as f_no_of_qt_last_2,(case when sb_segment = "SB+" then 1 else 0 end) as f_segment_sbpfrom &ds.;quit;/*Scoring*/data &lib_name..&ds._scrd (drop=y);set &ds._f;y = -7.0495908 +Total_2yr_rev * 0.0000384 +f_buy_power_value * 1.1023197 +f_no_of_qt_last_2 * 0.303036 +f_segment_sbp * 2.2749556 +overall_recency * -0.002347;sv_c= exp(y)/(1+exp(y));run;%mend score_svc;/*Scoring Macro For SV RefreshInput: SV Refresh Modeling Dataset and Library Name*//*---------------------------------------------------------------------------------------------------------------------------------------*/%macro score_svr(ds, lib_name);/*MVT*/data &ds.;set &lib_name..&ds.;if avg_order_gap =. then avg_order_gap =1100 ;if SVC_recency =. then SVC_recency =1100 ;if SV_1yr_orders=. then SV_1yr_orders=0;if overall_recency=. then overall_recency=1100;run;/*Outlier*//*Flag*/proc sql;create table &ds._f as select *, (case when avg_order_gap >180 then 1 else 0 end)  as f180_avg_order_gap,(case when SVC_recency <=30 then 1 else 0 end)  as f_SVC_recency,(case when SV_1yr_orders >=2 then 1 else 0 end)  as f_SV_1yr_orders,(case when sb_segment ="SB+" then 1 else 0 end)  as f_sb_segmentfrom &ds.;quit;/*Scoring*/data &lib_name..&ds._scrd (drop=sum);set &ds._f;sum=-3.6086828+(-0.3996449*f180_avg_order_gap)+(0.7048313*f_SVC_recency)+(1.4622138*f_SV_1yr_orders)+(1.2451883*f_sb_segment)+(0.8346774*flg_sic_mining)+(0.457318*flg_sic_pub_utility)+(-0.0037737*overall_recency);sv_r=(exp(sum)/(exp(sum)+1));run;%mend score_svr;/*This Code calls the scoring macro with the requried attributes and outputs the dataset with predicted scoresThe macro 'check_missing_score' checks for the missing values in predicted scores for a LOBThe macro 'rank' groups the scored dataset on the basis of the predicted scores and assigns 'H' rank for top 10% scores,'L' rank for the lowest 70% scores and the remaining (20%) scores are assigned as 'M' rank*/libname datalib '/DELL_SMB_APJ/samarth/ANZ_Q3_NLP';    /*declare location of datasets*/%let RespQtr= 20144;/*Declare the quarter like 20144 for which the recommendations are required*/%let PrevQtr= 20143;/*Declare the previous quarter like 20143*/options mlogic mprint symbolgen spool;/*Calling the model scoring macros with their respective attributes: Modeling Dataset Name and Library Name*/%score_dtc(dt_&PrevQtr._conv_check, datalib);%score_nbc(nb_&PrevQtr._conv_check, datalib);%score_svc(sv_&PrevQtr._conv_check, datalib);%score_dtc(dt_&RespQtr._conv_check, datalib);%score_nbc(nb_&RespQtr._conv_check, datalib);%score_svc(sv_&RespQtr._conv_check, datalib);%score_dtr(dt_&PrevQtr._refresh_check, datalib);%score_nbr(nb_&PrevQtr._refresh_check, datalib);%score_svr(sv_&PrevQtr._refresh_check, datalib);%score_dtr(dt_&RespQtr._refresh_check, datalib);%score_nbr(nb_&RespQtr._refresh_check, datalib);%score_svr(sv_&RespQtr._refresh_check, datalib);/*----------------------------------------------------------------------------------------------------------------------------*//* QC :Checking if missing values were generated as model scores------------------------------------------------------------------------------------------------------------------------------*/%macro check_missing_score(lob);proc sql;select count(distinct dim_plat_site_id) as &lob._r_missing_&PrevQtr. from datalib.&lob._&PrevQtr._refresh_check_scrd where &lob._r=.;quit;proc sql;select count(distinct dim_plat_site_id) as &lob._c_missing_&PrevQtr. from datalib.&lob._&PrevQtr._conv_check_scrd where &lob._c=.;quit;proc sql;select count(distinct dim_plat_site_id) as &lob._r_missing_&RespQtr. from datalib.&lob._&RespQtr._refresh_check_scrd where &lob._r=.;quit;proc sql;select count(distinct dim_plat_site_id) as &lob._c_missing_&RespQtr. from datalib.&lob._&RespQtr._conv_check_scrd where &lob._c=.;quit;%mend check_missing_score;%check_missing_score(dt);%check_missing_score(nb);%check_missing_score(sv);/*Check if the output of all the above macros is 0;if any of then outputs is not zero,it indicates that there are missing score for one of the LOBs*//*Ranking the individual scores*/%macro rank(ds, lib, var_name,type);proc rank data=&lib..&ds. out=&ds.  groups=10 descending ;var &var_name.;ranks &var_name._grp;run;data &ds. ;set &ds.;if &var_name._grp=0 then &var_name._rank='H';else if &var_name._grp=1 or &var_name._grp=2 then &var_name._rank='M';     else if &var_name._grp>2 then &var_name._rank='L';run;proc sql;create table &ds._1 as select *, &var_name._rank||&type. as &var_name._rank2 from &ds.;quit;data &lib..&ds.(drop=&var_name._rank  rename=(&var_name._rank2=&var_name._rank));set &ds._1;run;%mend rank;/*Calling the ranking macro*/%rank(dt_&PrevQtr._conv_check_scrd, datalib,dt_c,'c');%rank(nb_&PrevQtr._conv_check_scrd, datalib,nb_c,'c');%rank(sv_&PrevQtr._conv_check_scrd, datalib,sv_c,'c');%rank(dt_&RespQtr._conv_check_scrd, datalib,dt_c,'c');%rank(nb_&RespQtr._conv_check_scrd, datalib,nb_c,'c');%rank(sv_&RespQtr._conv_check_scrd, datalib,sv_c,'c');%rank(dt_&PrevQtr._refresh_check_scrd, datalib, dt_r,'r');%rank(nb_&PrevQtr._refresh_check_scrd, datalib,nb_r,'r');%rank(sv_&PrevQtr._refresh_check_scrd, datalib,sv_r,'r');%rank(dt_&RespQtr._refresh_check_scrd, datalib,dt_r,'r');%rank(nb_&RespQtr._refresh_check_scrd, datalib,nb_r,'r');%rank(sv_&RespQtr._refresh_check_scrd, datalib,sv_r,'r');/*This code checks for the next purchase behavior(during the preceeding quarter) of the site list of the preceeding quarterand makes a bucket level transition matrix for the preceeding quarter*/libname datalib '/DELL_SMB_APJ/samarth/ANZ_Q3_NLP';    /*declare location of datasets*/%let PrevQtr= 20143;/*Declare the previous quarter like 20143*/%let Trans=anz_nlp_q3end_trx_inmi_v3;/*Declare the name of tansaction dataset*/options mlogic mprint symbolgen spool;/*Appending all the scored datasets together for LOB and Quarter*/%macro append_ds(lib, lob, qtr);data &lob._&qtr.;set &lib..&lob._&qtr._conv_check_scrd(keep=dim_plaT_Site_id &lob._c &lob._c_grp &lob._c_rank)&lib..&lob._&qtr._refresh_check_scrd(keep=dim_plaT_site_id &lob._r &lob._r_grp &lob._r_rank );&lob._score=sum(&lob._c,&lob._r);&lob._grp=sum(&lob._c_grp,&lob._r_grp);if &lob._c_rank='' then &lob._rank=&lob._r_rank;else if &lob._r_rank='' then &lob._rank=&lob._c_rank;run;data &lob._&qtr.(keep=dim_plat_site_id &lob._score &lob._grp &lob._rank);set &lob._&qtr.;run;proc sort data=&lob._&qtr out=&lib..&lob._&qtr;by dim_plaT_Site_id;run;%mend append_ds;/*Calling the append_ds macro with respective attributes*/%append_ds(datalib,DT,&PrevQtr.);%append_ds(datalib,NB,&PrevQtr.);%append_ds(datalib,SV,&PrevQtr.);/*merging all the datasets together*/data site_bucket_&PrevQtr.;merge datalib.dt_&PrevQtr.(in=a) datalib.nb_&PrevQtr.(in=b) datalib.sv_&PrevQtr.(in=c);by dim_plaT_site_id;if a or b or c;bucket=cat(trim(dt_rank), trim(nb_rank), trim(sv_rank));run;/*---------------------------------------------------------------------------------------------------------------------------------*//*Now we have site list with buckets for Prev QuarterThe following code finds out the first purchase behviour of these sites*//*find first purchase date of previous quarter*/proc sql;create table trx_subsetLOB_&PrevQtr.  asselect *from datalib.anz_nlp_q3end_trx_inmi_v3where  tie_lob1 in ("DT", "NB", "SV", "ST", "WS", "NW") and total_rev_usd>0 and order_status_cd in ('IN','MI') and order_type_cd in ('I','C')and dim_plat_site_id in (select distinct dim_plat_site_id from site_bucket_&PrevQtr.)and qtr = &PrevQtr.;quit;/*get first purchase out of the trxn data subset */proc sql;create table first_prch_Date_site  asselect dim_plat_site_id,min(order_date)  as first_prch_date from trx_subsetLOB_&PrevQtr.group by dim_plat_site_idorder by dim_plat_site_id;quit;/*subset the transaction data for only first purchase date corresponding to each site id*/proc sort data=trx_subsetLOB_&PrevQtr.; by dim_plat_site_id; run;proc sql;create table first_prch_data asselect a.dim_plat_site_id, total_rev_usd, tie_lob1, order_date,qtr from first_prch_Date_site a left join trx_subsetLOB_&PrevQtr. b on (a.dim_plat_site_id=b.dim_plat_sitE_id and a.first_prch_date=b.order_date);quit;/*flag creation for all lobs to specify first purchase*/proc sql;create table temp_3 asselect *, (case when tie_lob1 = 'DT' then 1 else 0 end) as frst_purch_dt,(case when tie_lob1 = 'NB' then 1 else 0 end) as frst_purch_nb,(case when tie_lob1 = 'SV' then 1 else 0 end) as frst_purch_sv,(case when tie_lob1 = 'ST' then 1 else 0 end) as frst_purch_st,(case when tie_lob1 = 'WS' then 1 else 0 end) as frst_purch_ws,(case when tie_lob1 = 'NW' then 1 else 0 end) as frst_purch_nwfrom first_prch_datawhere total_rev_usd > 0;quit;/*roll up to site id level*/proc sql;create table temp_4 asselect dim_plat_site_id,max(frst_purch_dt) as frst_purch_dt,max(frst_purch_nb) as frst_purch_nb,max(frst_purch_sv) as frst_purch_sv,max(frst_purch_st) as frst_purch_st,max(frst_purch_ws) as frst_purch_ws,max(frst_purch_nw) as frst_purch_nwfrom temp_3group by dim_plat_site_idorder by dim_plat_site_id;quit;/*merge the data with buckets and the data with first purchase flags*/data site_bucket_first_purch_&PrevQtr.;merge site_bucket_&PrevQtr.(in=a) temp_4(in=b);by dim_plat_site_id;if a;run;data site_bucket_first_purch_&PrevQtr.;set site_bucket_first_purch_&PrevQtr.;sum_flags = frst_purch_dt+frst_purch_nb+frst_purch_sv;if sum_flags > 1 then multi = 1;else multi = 0;run;/*calculate no. of accounts in each bucket for single lob purchases in first purchase*/proc sql;create table matrix_part_1 asselect bucket, sum(frst_purch_dt) as DT,sum(frst_purch_nb) as NB,sum(frst_purch_sv) as SV,sum(frst_purch_st) as ST,sum(frst_purch_ws) as WS,sum(frst_purch_nw) as NWfrom site_bucket_first_purch_&PrevQtr.where sum_flags = 1group by bucketorder by bucket;quit;/*calculate no. of accounts in each bucket for multiple lob purchases in first purchase*/data site_bucket_first_purch_&PrevQtr._m;set site_bucket_first_purch_&PrevQtr.;if multi=1 then output;run;data site_bucket_first_purch_&PrevQtr._m;set site_bucket_first_purch_&PrevQtr._m;if frst_purch_sv=1 then do;frst_purch_dt=0;frst_purch_nb=0;end;if frst_purch_sv=0 and frst_purch_dt=1 then do;frst_purch_nb=0;end;new_sum_flags=frst_purch_sv+frst_purch_dt+frst_purch_nb;run;proc sql;create table matrix_part_2 asselect bucket, sum(frst_purch_dt) as DT,sum(frst_purch_nb) as NB,sum(frst_purch_sv) as SV,sum(frst_purch_st) as ST,sum(frst_purch_ws) as WS,sum(frst_purch_nw) as NWfrom site_bucket_first_purch_&PrevQtr._mgroup by bucketorder by bucket;quit;/*append single and multiple lob purchases*/data transition_matrix_&PrevQtr.;set matrix_part_1 matrix_part_2;run;proc sql;create table transition_matrix_&PrevQtr._f as select bucket,sum(DT) as DT,sum(NB) as NB,sum(SV) as SV,sum(ST) as ST,sum(WS) as WS,sum(NW) as NWfrom transition_matrix_&PrevQtr.group by bucketorder by bucket;quit;/*getting count sites in each bucket*/proc sql;create table countsites_bucket asselect bucket, count(distinct dim_plaT_Site_id) as Count_Sites from site_bucket_&PrevQtr.group by bucketorder by bucket;quit;/*merge the matrix with count of accounts in all the buckets*/data datalib.transition_matrix_&PrevQtr.;merge transition_matrix_&PrevQtr._f(in=a) countsites_bucket(in=b);by bucket;if b;run;/*Shortlisting buckets from previous quarter on the basis of their respective respose rateswhile taking average response rate as the threshold*/proc sql;create table foo asselect sum(sum(NB),sum(DT),sum(SV)) as response, sum(count_sites) as sum_count_Sites, calculated response*100/calculated sum_count_Sites as avg_rrfrom datalib.transition_matrix_&PrevQtr;select avg_rr into :avg_rr from foo ; quit;data datalib.transition_matrix_&PrevQtr.;set datalib.transition_matrix_&PrevQtr.;rr=sum(NB,DT,SV)*100/count_sites;target_flag=0;if rr>=&avg_rr. then target_flag=1;run;/*This code process the transaction matrix(at bucket level) and provides recommendations on the basis of the transition counts. Further, the code merges the recommendations with the site list of the resp. quarter*//*MVT*/libname datalib '/DELL_SMB_APJ/samarth/ANZ_Q3_NLP';%let RespQtr= 20144;/*Declare the quarter like 20144 for which the recommendations are required*/%let PrevQtr= 20143;/*Declare the previous quarter like 20143*/options mlogic mprint symbolgen;/*--------------------------------------------------------------------------------------------------------------------------------*/data transition_matrix_&PrevQtr.;set datalib.transition_matrix_&PrevQtr.;array zero _numeric_;do over zero; if zero=. then zero=0; end;run;%macro recommend_process(dataname);/*--------------------------------------------------------------------------------------------------------------------------------*/%macro recommend(ds=);data &ds.;set &ds.;max_val=max(DT,NB,SV);max_count=0;dt_max=0;nb_max=0;sv_max=0;if max_val=DT then dt_max=1;if max_val=NB then nb_max=1;if max_val=SV then sv_max=1;max_count=dt_max+sv_max+nb_max;;run;Data &ds._1;set &ds.(where=(max_count=1));Pri_reco="NA";if dt_max=1 then do;pri_reco="DT";DT=-1;end;if nb_max=1 then do;pri_reco="NB";NB=-1;end;if sv_max=1 then do;pri_reco="SV";SV=-1;end;run;/*Seperating out cases where max_count>1*/data &ds._23;set &ds.(where=(max_count>1));if DT_max=1 then dt_rank=substr(bucket,1,1);else dt_rank="O";if nb_max=1 then nb_rank=substr(bucket,3,1);else nb_rank="O";if sv_max=1 then sv_rank=substr(bucket,5,1);else sv_Rank="O";run;%macro num_char_rank(lb=);data &ds._23(drop=&lb._rank);set &ds._23;if &lb._rank='O' then &lb._numrank=0;if &lb._rank='H' then &lb._numrank=3;if &lb._rank='M' then &lb._numrank=2;if &lb._rank='L' then &lb._numrank=1;run;%mend;%num_char_rank (lb=dt);%num_char_rank(lb=nb);%num_char_rank (lb=sv);data &ds._23;set &ds._23(drop= max_count max_val dt_max nb_max sv_max);maxrank=max(DT_numrank,NB_numrank,SV_numrank);maxrank_count=0;dt_maxrank=0;nb_maxrank=0;sv_maxrank=0;if maxrank=DT_numrank then dt_maxrank=1;if maxrank=NB_numrank then nb_maxrank=1;if maxrank=SV_numrank then sv_maxrank=1;maxrank_count=dt_maxrank+sv_maxrank+nb_maxrank;run;/*taking out those which are LOB conflicts and have no rank conflict*/data &ds._23_1;set &ds._23(where=(maxrank_count=1));Pri_reco="NA";if dt_maxrank=1 then do;pri_reco="DT";DT=-1;end;if nb_maxrank=1 then do;pri_reco="NB";NB=-1;end;if sv_maxrank=1 then do;pri_reco="SV";SV=-1;end;run;/*taking out those which are LOB conflcits and have rank conflict as well*/data &ds._23_23;set &ds._23(where=(maxrank_count>1));pri_Reco="NA";if sv_maxrank=1 then do;pri_reco="SV";SV=-1;end;if dt_maxrank=1 and sv_maxrank=0 then do;pri_Reco="DT";DT=-1;end;run;%mend recommend;/*-----------------------------------------------------------------------------------------------------------------------------*/%recommend (ds=&dataname.);data t_matrix_&PrevQtr._new(rename=(pri_reco=nlp1));set &dataname._1(keep=bucket dt sv nb pri_reco target_flag) &dataname._23_1(keep=bucket dt sv nb pri_Reco target_flag) &dataname._23_23(keep=bucket dt sv nb pri_reco target_flag);run;%recommend(ds=t_matrix_&PrevQtr._new);data t_matrix_&PrevQtr._new2(rename=(pri_reco=nlp2));set t_matrix_&PrevQtr._new_1(keep=bucket dt sv nb pri_reco nlp1 target_flag) t_matrix_&PrevQtr._new_23_1(keep=bucket dt sv nb pri_Reco nlp1 target_flag) t_matrix_&PrevQtr._new_23_23(keep=bucket dt sv nb pri_reco nlp1 target_flag);run;data datalib.nlp_reco_&PrevQtr.(keep=bucket nlp1 nlp2 target_flag);set t_matrix_&PrevQtr._new2;run;%mend recommend_process;/*--------------------------------------------------------------------------------------------------------------------------------*//*Calling the recommendation macro*/%recommend_process(dataname=transition_matrix_&PrevQtr.);/*---------------------------------------------------------------------------------------------------------------------------*//*Forming buckets in scored sitelist of response quarter*//*appending all the scored and ranked datasets*/%macro append_ds(lib, lob, qtr);data &lob._&qtr.;set &lib..&lob._&qtr._conv_check_scrd(keep=dim_plaT_Site_id &lob._c &lob._c_grp &lob._c_rank)&lib..&lob._&qtr._refresh_check_scrd(keep=dim_plaT_site_id &lob._r &lob._r_grp &lob._r_rank );&lob._score=sum(&lob._c,&lob._r);&lob._grp=sum(&lob._c_grp,&lob._r_grp);if &lob._c_rank='' then &lob._rank=&lob._r_rank;else if &lob._r_rank='' then &lob._rank=&lob._c_rank;run;data &lob._&qtr.(keep=dim_plat_site_id &lob._score &lob._grp &lob._rank);set &lob._&qtr.;run;proc sort data=&lob._&qtr out=&lib..&lob._&qtr;by dim_plaT_Site_id;run;%mend append_ds;/*The macro checks for both conv and refresh values being populated together*/%append_ds(datalib,DT,&RespQtr.);%append_ds(datalib,NB,&RespQtr.);%append_ds(datalib,SV,&RespQtr.);/*merging all the datasets together*/data site_bucket_&RespQtr.;merge datalib.dt_&RespQtr.(in=a) datalib.nb_&RespQtr.(in=b) datalib.sv_&RespQtr.(in=c);by dim_plaT_site_id;if a or b or c;bucket=cat(trim(dt_rank), trim(nb_rank), trim(sv_rank));run;/*Merging the recommendation(based on previous quarter) with sitelist of response quarter*/proc sort data=site_bucket_&RespQtr.;by bucket;run;proc sort data=datalib.nlp_reco_&PrevQtr.;by bucket;run;data datalib.site_bucket_&RespQtr._reco;merge site_bucket_&RespQtr.(in=a) datalib.nlp_reco_&PrevQtr.(in=b keep=bucket nlp1 nlp2 target_flag);by bucket;if a;run;

0 0
原创粉丝点击