而做逻辑回归时,也需要一种图形观看连续变量x与logit(y)的关系,可以SAS本身没有提供,需要我们自行绘制。以下就是做好的一个宏。
- /****************************************************************************/
- %Macro E_Logit(data,target,var,bins);
- /*第一步:对变量进行RANK分组*/
- proc rank data=&data groups=&bins out=out;
- var &var;
- ranks bin;
- run;
- /*第二步:对每一组计算该变量的平均值;响应事件数和总事件数**/
- /*数据集BINS 包含: */
- /* &target = 每个BIN里面响应事件数 */
- /* _FREQ_ =每个BIN里面总事件数 */
- /* &var =每个BIN里面&var平均值 */
- proc means data=out noprint nway;
- class bin;
- var &target &var;
- output out=bins sum(&target)=&target mean(&var)=&var;
- run;
- /*第三步:根据公式计算 empirical logit */
- data bins;
- set bins;
- elogit=log((&target+(sqrt(_FREQ_ )/2))/
- ( _FREQ_ -&target+(sqrt(_FREQ_ )/2)));
- run;
- /*第四步:画LOGIT与原变量平均值;LOGIT与BIN变量的线图*/
- /*proc sgplot data = bins;*/
- /*title "Empirical Logit against &var";*/
- /*series y=elogit x=&var;*/
- /*scatter y=elogit x=&var;*/
- /*run;*/
- proc sgplot data = bins;
- title "Empirical Logit against Binned &var";
- scatter y=elogit x=bin;
- series y=elogit x=bin;
- run;quit;
- /*第五步:用BIN变量替代原来的变量,并对BIN变量进行代码保存和改造*/
- title;
- proc means data = out noprint nway;
- class bin;
- var &var;
- output out=endpts max(&var)=max;
- run;
- filename rank "d:\rank.sas";
- /*编写BIN代码*/
- data _null_;
- file rank;
- set endpts end=last;
- if _n_ = 1 then put "select;";
- if not last then do;
- put " when (&var <= " max ") B_&var =" bin ";";
- end;
- else if last then do;
- put "otherwise B_&var =" bin ";";
- put "end;";
- end;
- run;
- /* Use the code. */
- data &data;
- set &data;
- %include rank /source2;
- run;
- /*proc means data = &data min max;*/
- /* class B_&var;*/
- /* var &var;*/
- /*run;*/
- proc delete data=out bins endpts;run;
- %Mend E_Logit;
- /****************************************************************************/
- /*%Macro E_Logit(数据集,二分目标变量,连续自变量,分箱个数);*/
- %E_Logit(gg,flag,score,5);
- run;
- %E_Logit(tel,churn,avPlan,4);
- run;


雷达卡




京公网安备 11010802022788号







