总是提示where 子句运算符要求兼容的变量,我做的是一个
10-fold cross- validation 交叉验证,因变量是字符型,自变量是数字,
- %macro xval(dsn=,outcome=,covars=,k=10,sel=stepwise,outdsn=_xval_,outdsn2=comparison);
- data _modif;
- set &dsn;
- unif=&k*ranuni(20052905);
- xv=ceil(unif);
- run;
- %do i=1 %to &k;
- proc logistic data=_modif(where=(xv ne &i)) outmodel=_mod&i;
- model &outcome (event="1") =&covars / selection=&sel;
- ods output association=assoc&i;
- run;
- %if print^=0 %then %do;proc printto file='junk.txt';%end;
- proc logistic inmodel=_mod&i;
- score data=_modif(where=(xv=&i)) out=out&i;
- run;
- ods select none;
- ods output KolSmir2Stats=KS&i;
- proc npar1way data= out&i edf;
- where &outcome^=.;
- class &outcome;
- var P_1;
- run;
- ods select all;
- ods select none;
- ods output WilcoxonScores=Wil&i;
- proc npar1way wilcoxon data= out&i;
- where &outcome^=.;
- class &outcome;
- var P_1;
- run;
- ods select all;
- data AUC&i;
- set Wil&i end=eof;
- retain v1 v2 1;
- if _n_=1 then v1=abs(ExpectedSum - SumOfScores);
- v2=N*v2;
- if eof then do;
- d=v1/v2;
- /*Gini=d * 2; */
- Scoring_AUC= d + 0.5;
- put Scoring_AUC=;
- put "****Open work.results dataset to see results of training datasets....";
- keep Scoring_AUC;
- output;
- end;
- run;
- %if print^=0 %then %do;proc printto;run;%end;
- %end;
- data &outdsn;
- set %do j=1 %to &k;out&j %end;;
- run;
- data training (keep =label2 nvalue2 rename= (nvalue2=Training_AUC));
- set %do j=1 %to &k;assoc&j %end;;
- where label2= 'c';
- if label2='c' then label2 ='AUC';
- run;
- data ks (keep =label2 nvalue2 rename= (nvalue2=Scoring_KS));
- set %do j=1 %to &k;ks&j %end;;
- where label2= 'D';
- if label2='D' then label2 ='KS';
- run;
- data validation;
- set %do j=1 %to &k;AUC&j %end;;
- run;
- data &outdsn2 (drop = label2);
- merge training validation ks;
- run;
- ods select none;
- ods output WilcoxonScores=WilcoxonScore;
- proc npar1way wilcoxon data= &outdsn;
- where &outcome^=.;
- class &outcome;
- var P_1;
- run;
- ods select all;
- data AUC;
- set WilcoxonScore end=eof;
- retain v1 v2 1;
- if _n_=1 then v1=abs(ExpectedSum - SumOfScores);
- v2=N*v2;
- if eof then do;
- d=v1/v2;
- Gini=d * 2;
- AUC= d + 0.5;
- put AUC= GINI=;
- put "****Open work.results dataset to see results of training datasets....";
- keep AUC Gini;
- output;
- end;
- run;
- %mend;
- %xval(dsn=data,outcome=var_13,covars= t569 t868 t725 t291 t323,k=10,sel=stepwise, outdsn=kfold, outdsn2=comparison);
复制代码