/**********************************************************
Purpose: trim or winsorize SAS dataset to remove the impact from extreme values
Input
dsetin : dataset to winsorize/trim
byvar : define subset to winsorize/trim,e.g. 'date'. type 'none' for the whole dataset
type : 'delete' or 'winsor' (delete will trim, winsor will winsorize
vars : subsetting variables to winsorize/trim on; type 'none for no byvar
pctl : the percenagte of left and right tails to trim/winsorize
Output
dsetout : dataset to output with winsorized/trimmed values
************************************************************/
%macro winsor(dsetin=, dsetout=, byvar=none, vars=, type=winsor, pctl=5 95);
%if &dsetout = %then %let dsetout = &dsetin;
%let varL=;
%let varH=;
%let xn=1;
%do %until ( %scan(&vars,&xn)= );
%let token = %scan(&vars,&xn);
%let varL = &varL &token.L;
%let varH = &varH &token.H;
%let xn=%EVAL(&xn + 1);
%end;
%let xn=%eval(&xn-1);
data xtemp;
set &dsetin;
run;
%if &byvar = none %then %do;
data xtemp;
set xtemp;
xbyvar = 1;
run;
%let byvar = xbyvar;
%end;
proc sort data = xtemp;
by &byvar;
run;
proc univariate data = xtemp noprint;
by &byvar;
var &vars;
output out = xtemp_pctl PCTLPTS = &pctl PCTLPRE = &vars PCTLNAME = L H;
run;
data &dsetout;
merge xtemp xtemp_pctl;
by &byvar;
array trimvars{&xn} &vars;
array trimvarl{&xn} &varL;
array trimvarh{&xn} &varH;
do xi = 1 to dim(trimvars);
%if &type = winsor %then %do;
if not missing(trimvars{xi}) then do;
if (trimvars{xi} < trimvarl{xi}) then trimvars{xi} = trimvarl{xi};
if (trimvars{xi} > trimvarh{xi}) then trimvars{xi} = trimvarh{xi};
end;
%end;
%else %do;
if not missing(trimvars{xi}) then do;
if (trimvars{xi} < trimvarl{xi}) then delete;
if (trimvars{xi} > trimvarh{xi}) then delete;
end;
%end;
end;
drop &varL &varH xbyvar xi;
run;
%mend winsor;
来源https://bbs.pinggu.org/thread-844003-1-1.html
|