/*------------------------【变量分组宏】:method=1,等频;method=2,等宽-;method=3,分位数分组------------------------------*/
/*DSin=待分析数据集,method=分组方法,Nbins=分组数,IVar=分析自变量,DVar=分析因变量,DSout=映射输出*/
%macro ChcAnalysis(DSin=,method=,Nbins=,IVar=,DVar=,DSout=,p_start=,p_end=,p_interval=);
proc sort data=&DSin;
by &IVar;
run;
data temp;
set &DSin;
by &IVar;
_obs = _n_ ;
keep &IVar &DVar _obs;
run;
%if &method = 1 %then %do;/* 基于等频分组,即根据观测obs分组*/
proc sql noprint;
select count(&DVar) into:N from temp;
select min(_obs),max(_obs) into: Vmin,:Vmax from temp;
quit;
%let Binsize = %sysevalf(&Vmax/&Nbins);
%let LB_1 = 0;
%do i = 1 %to &Nbins;
%let LB_&i = %sysevalf(&LB_1 + (&i-1) * &Binsize);
%let UB_&i = %sysevalf(&&LB_&i + &Binsize);
proc sql noprint;
select sum(&DVar),count(&DVar) into:sum_&i ,:N_&i from temp
where _obs >= &&LB_&i and _obs < &&UB_&i;/*等频基于观测号,划分*/
quit;
%end;
%end;
%else %if &method = 2 %then %do;/* 根据自变量的取值范围,进行等宽分组*/
proc sql noprint;
select count(&DVar) into:N from temp;
select min(&IVar),max(&IVar) into: Vmin,:Vmax from temp;
/*PS:无论是存储观测的最大最小值,还是自变量的最大最小值,都要存储到相同的宏变量里,因为后续引用名称相同*/
quit;
%let Binsize = %sysevalf((&Vmax-&Vmin)/&Nbins);
%let LB_1 = 0;
%do i = 1 %to &Nbins;
%let LB_&i = %sysevalf(&LB_1 + (&i-1) * &Binsize);
%let UB_&i = %sysevalf(&&LB_&i + &Binsize);
proc sql noprint;
select sum(&DVar),count(&DVar) into:sum_&i ,:N_&i from temp
where &IVar >= &&LB_&i and &IVar < &&UB_&i;/*等宽基于自变量值,划分*/
quit;
%end;
%end;
%else %if &method = 3 %then %do;/* 分位数分组,需要进一步改进*/
/* 根据univariate过程,存储分位数数据集*/
proc univariate data = &DSin;
var &IVar ;
output out =temp_qt
pctlpts = &p_start to &p_end by &p_interval
pctlpre = P_;
run;
/* 对分位数数据集转置*/
proc transpose data=temp_qt
out=qt_dsout_trans(rename = (col1=value) drop=_LABEL_ )
NAME = Quantiles;
run;
/* 将一列数据存为宏变量 */
proc sql;
select quantiles into: qtl separated by ' ' from qt_dsout_trans;
select value into:value separated by ' ' from qt_dsout_trans;
quit;
/* 利用scan函数,将对value宏变量进行分割,另存为单个数值*/
%let n = %sysevalf((&p_end - &p_start)/&p_interval + 1);
%do j = 1 %to &n ;
%let var&j = %sysfunc(trim(%scan(&value,&j,' ')));
%put &&var&j;
%end;
/* 创建分组结果集*/
proc sql ;
create table &DSout(n num,bin_l num,bin_u num );
%do i = 1 %to 10;
%let bin_l&i = &&var&i;
%let k = %eval(&i+1);
%let bin_u&k = &&var&k;
insert into &dsout values
(&i,&&bin_l&i,&&bin_u&k );
%end;
quit;
%end;
/*将划分好的变量进行分组映射*/
data &DSout;
%do i=1 %to &NBins;
Bin = &i;
Lowerbound = &&LB_&i;
upperbound = &&UB_&i;
if (&&sum_&i = .) then N_1 = 0 ;else N_1 = &&sum_&i ;
if (&&N_&i = . ) then Bintotal = 0 ;else Bintotal = &&N_&i;
N_0 = Bintotal - N_1 ;
percent_1 = N_1 / Bintotal;
percent_0 = N_0 / Bintotal;
output;
%end;
run;
%mend;