可以用上一个或下一个非缺失值填补缺失值
支持数值变量也支持字符变量
- %macro FillMissWithNonMiss(SourceTable,TargetTable,ByFactors,MissingVar,OrderVar,Type);
- /**********************************************************************/
- /* 此宏对原表中含有缺失值的变量进行填补,其方法是将缺失值用上一个或下 */
- /* 一个非缺失值来填补。其中SourceTable是原始表格;TargetTable是结果表 */
- /* 格;ByFactors是分组变量;MissingVar是可能含有缺失值的变量, */
- /* =_Numeric_表示选择全部数值型变量,=_Character_表示选择全部字符型变 */
- /* 量,=_All_表示选择全部变量,也可以选择多个变量,用空格分隔;Order_ */
- /* Var是排列变量,可设置为多个,用空格分隔,默认按升序排列,若要降序 */
- /* 排列,可在相应的变量后加DESCENDING;Type是缺失值的选取方式, */
- /* =Previous表示缺失值选取前一个非缺失值,=Next表示缺失值选取后一个非 */
- /* 缺失值,=Mix表示用线性插值的方法填补缺失值(该方法只适用于数值型变 */
- /* 量,对字符型变量若设为Mix,则自动用Previous的方法插值)。 */
- /* */
- /* 最终结果表格中指定的含有缺失值的变量的缺失值被填补。 */
- /* */
- /* Created on 2011.9.28 */
- /* Modified on 2013.3.22 */
- /**********************************************************************/
- /* 检查TargetTable的存在性,若不存在则设为&SourceTable */
- %if &TargetTable EQ %STR() %then %let TargetTable=&SourceTable;
- /* 若SourceTable与TargetTable相同,则设FMWNM_res为&SourceTable */
- %if %UPCASE(&SourceTable) EQ %UPCASE(&TargetTable) %then %let FMWNM_res=&SourceTable;
- %else %let FMWNM_res=FMWNM_res;
- data &FMWNM_res;
- set &SourceTable;
- run;
- /* 检查ByFactors的存在性 */
- %if &ByFactors EQ %STR() %then %do;
- %let ByFactors=FMWNM_ByFactors;
- data &FMWNM_res;
- set &FMWNM_res;
- FMWNM_ByFactors=1;
- run;
- %end;
- %ChkVar(SourceTable=&FMWNM_res,InputVar=&ByFactors,FlagVarExists=FMWNM_FlagVarExists1);
- %if %SYSFUNC(FIND(&FMWNM_FlagVarExists1,0)) NE 0 %then %do;
- %put ERROR: The ByFactors %SCAN(&ByFactors,%SYSFUNC(FIND(&FMWNM_FlagVarExists1,0))) does not exist in SourceTable, please check it again.;
- %goto exit;
- %end;
- %local LastByFactors;
- %let LastByFactors=%SYSFUNC(SCAN(&ByFactors,-1,' '));
- /* 逗号分隔的ByFactors_Comma用于SQL语句 */
- %local ByFactors_Comma;
- %if %SYSFUNC(FIND(&ByFactors,%STR( ))) NE 0 %then %do;
- %let ByFactors_Comma=%SYSFUNC(TRANWRD(%SYSFUNC(COMPBL(&ByFactors)),%STR( ),%STR(,)));
- %end;
- %else %let ByFactors_Comma=&ByFactors;
- /* 检查MissingVar的存在性 */
- %if (%UPCASE(&MissingVar) NE _NUMERIC_) AND (%UPCASE(&MissingVar) NE _CHARACTER_) AND (%UPCASE(&MissingVar) NE _ALL_) %then %do;
- %ChkVar(SourceTable=&FMWNM_res,InputVar=&MissingVar,FlagVarExists=FMWNM_FlagVarExists2);
- %if %SYSFUNC(FIND(&FMWNM_FlagVarExists2,0)) NE 0 %then %do;
- %put ERROR: The MissingVar %SCAN(&ByFactors,%SYSFUNC(FIND(&FMWNM_FlagVarExists2,0))) does not exist in SourceTable, please check it again.;
- %goto exit;
- %end;
- %end;
- /* 检查OrderVar的存在性 */
- %if &OrderVar EQ %STR() %then %do;
- %let OrderVar=FMWNM_OrderVar;
- data &FMWNM_res;
- set &FMWNM_res;
- FMWNM_OrderVar=_N_;
- run;
- %end;
- %ChkVar(SourceTable=&FMWNM_res,InputVar=&OrderVar,FlagVarExists=FMWNM_FlagVarExists3);
- %if %SYSFUNC(FIND(&FMWNM_FlagVarExists3,0)) NE 0 %then %do;
- %put ERROR: The OrderVar %SCAN(&OrderVar,%SYSFUNC(FIND(&FMWNM_FlagVarExists3,0))) does not exist in SourceTable, please check it again.;
- %goto exit;
- %end;
- /* 逗号分隔的OrderVar_Comma用于SQL语句 */
- %local OrderVar_Comma;
- %if &OrderVar NE %STR() %then %do;
- %if %SYSFUNC(FIND(&OrderVar,%STR( ))) NE 0 %then %do;
- %let OrderVar=%SYSFUNC(COMPBL(&OrderVar)); /* 压缩多余空格 */
- %let OrderVar_Comma=%PrxChange(InputString=&OrderVar,PrxString=s/ DESCENDING/DESCENDING/); /* 解决DESCENDING的格式问题 */
- %let OrderVar_Comma=%SYSFUNC(TRANWRD(&OrderVar_Comma,%STR( ),ANCBS_Space));
- %let OrderVar_Comma=%PrxChange(InputString=&OrderVar_Comma,PrxString=s/DESCENDING/ DESCENDING/);
- %let OrderVar_Comma=%SYSFUNC(TRANWRD(&OrderVar_Comma,ANCBS_Space,%STR(,)));
- %end;
- %else %let OrderVar_Comma=&OrderVar;
- %end;
- %else %let OrderVar_Comma=;
- /* 检查Type的存在性和合法性 */
- %if &Type EQ %STR() %then %let Type=PREVIOUS;
- %if (%UPCASE(&Type) NE PREVIOUS) AND (%UPCASE(&Type) NE NEXT) AND (%UPCASE(&Type) NE MIX) %then %do;
- %put ERROR: The Type should be PREVIOUS, NEXT or MIX, case insensitive and without quotes.;
- %goto exit;
- %end;
- /* 开始进行计算 */
- /* 首先,根据Type设置对原始表格进行排序 */
- %if %UPCASE(&Type) EQ PREVIOUS %then %do;
- proc sort data=&FMWNM_res;
- by &OrderVar;
- run;
- %end;
- %else %if (%UPCASE(&Type) EQ NEXT) OR (%UPCASE(&Type) EQ MIX) %then %do; /* 将原表按&ByFactors逆序排序 */
- proc sort data=&FMWNM_res;
- by DESCENDING %SYSFUNC(TRANWRD(&OrderVar,%STR( ),%STR( )DESCENDING%STR( )));
- run;
- %end;
- /* 其次,对含有缺失值的变量进行补足 */
- %if %UPCASE(&MissingVar) EQ _NUMERIC_ %then %do;
- %GetMissNum(SourceTable=&FMWNM_res,TargetTable=FMWNM_MissNumeric_temp,InputVar=_NUMERIC_);
- proc sql noprint;
- create table FMWNM_MissNumeric as
- select *,monotonic() as _N_ from FMWNM_MissNumeric_temp
- where MissNum GT 0;
- quit;
- proc sql noprint;
- select count(*) into :FMWNM_MissVarNum
- from FMWNM_MissNumeric;
- quit;
- %if %UPCASE(&Type) NE MIX %then %do;
- %do FMWNM_i=1 %to &FMWNM_MissVarNum;
- proc sql noprint;
- select VarName into :FMWNM_MissVarName
- from FMWNM_MissNumeric
- where _N_ EQ &FMWNM_i;
- quit;
- data &FMWNM_res(drop=var_temp);
- set &FMWNM_res;
- retain var_temp;
- %if %UPCASE(&Type) EQ PREVIOUS %then %do;
- by &ByFactors;
- %end;
- %else %if %UPCASE(&Type) EQ NEXT %then %do;
- by DESCENDING %SYSFUNC(TRANWRD(&ByFactors,%STR( ),%STR( )DESCENDING%STR( )));
- %end;
- if first.&LastByFactors then var_temp=&FMWNM_MissVarName;
- if &FMWNM_MissVarName NE . then var_temp=&FMWNM_MissVarName;
- else &FMWNM_MissVarName=var_temp;
- run;
- %end;
- %end;
- %else %do;
- %do FMWNM_i=1 %to &FMWNM_MissVarNum;
- proc sql noprint;
- select VarName into :FMWNM_MissVarName
- from FMWNM_MissNumeric
- where _N_ EQ &FMWNM_i;
- quit;
-
- %let FMWNM_MissVarName=%SYSFUNC(STRIP(&FMWNM_MissVarName));
- proc sort data=&FMWNM_res;
- by DESCENDING %SYSFUNC(TRANWRD(&OrderVar,%STR( ),%STR( )DESCENDING%STR( )));
- run;
- data &FMWNM_res;
- set &FMWNM_res;
- retain &FMWNM_MissVarName._Diff;
- by DESCENDING %SYSFUNC(TRANWRD(&ByFactors,%STR( ),%STR( )DESCENDING%STR( )));
- if first.&LastByFactors then &FMWNM_MissVarName._Diff=&FMWNM_MissVarName;
- if first.&LastByFactors AND &FMWNM_MissVarName EQ . then &FMWNM_MissVarName._No=1;
- else if &FMWNM_MissVarName NE . then do;
- &FMWNM_MissVarName._Diff=&FMWNM_MissVarName; /* 得到两个缺失值之间的差 */
- &FMWNM_MissVarName._No=0; /* 得到缺失值的序号 */
- end;
- else if &FMWNM_MissVarName EQ . then do;
- &FMWNM_MissVarName._No+1;
- end;
- run;
- proc sort data=&FMWNM_res;
- by &OrderVar;
- run;
- data &FMWNM_res(drop=&FMWNM_MissVarName._Diff &FMWNM_MissVarName._No var_temp);
- set &FMWNM_res;
- retain var_temp;
- by &ByFactors;
- if first.&LastByFactors then var_temp=&FMWNM_MissVarName;
- if &FMWNM_MissVarName NE . then var_temp=&FMWNM_MissVarName;
- else do;
- &FMWNM_MissVarName=var_temp+(&FMWNM_MissVarName._Diff-var_temp)/(&FMWNM_MissVarName._No+1); /* 利用两个缺失值之间的差和缺失值的序号得到需要插入的值 */
- var_temp=&FMWNM_MissVarName;
- end;
- run;
- %end;
- %end;
- %end;
- %else %if %UPCASE(&MissingVar) EQ _CHARACTER_ %then %do;
- %GetMissNum(SourceTable=&FMWNM_res,TargetTable=FMWNM_MissChar_temp,InputVar=_CHARACTER_);
- proc sql noprint;
- create table FMWNM_MissChar as
- select *,monotonic() as _N_ from FMWNM_MissChar_temp
- where MissNum GT 0;
- quit;
- proc sql noprint;
- select count(*) into :FMWNM_MissVarNum
- from FMWNM_MissChar;
- quit;
- %do FMWNM_j=1 %to &FMWNM_MissVarNum;
- proc sql noprint;
- select VarName into :FMWNM_MissVarName
- from FMWNM_MissChar
- where _N_ EQ &FMWNM_j;
- quit;
- data &FMWNM_res(drop=var_temp);
- set &FMWNM_res;
- retain var_temp;
- %if (%UPCASE(&Type) EQ PREVIOUS) OR (%UPCASE(&Type) EQ MIX) %then %do;
- by &ByFactors;
- %end;
- %else %if %UPCASE(&Type) EQ NEXT %then %do;
- by DESCENDING %SYSFUNC(TRANWRD(&ByFactors,%STR( ),%STR( )DESCENDING%STR( )));
- %end;
- if first.&LastByFactors then var_temp=&FMWNM_MissVarName;
- if &FMWNM_MissVarName NE '' then var_temp=&FMWNM_MissVarName;
- else &FMWNM_MissVarName=var_temp;
- run;
- %end;
- %end;
- %else %do;
- %if %UPCASE(&MissingVar) EQ _ALL_ %then %do;
- %GetMissNum(SourceTable=&FMWNM_res,TargetTable=FMWNM_MissAll_temp,InputVar=_ALL_);
- %end;
- %else %do; /* 若MissingVar为指定变量名称,则仅取指定变量的缺失值数量数据 */
- %GetMissNum(SourceTable=&FMWNM_res,TargetTable=FMWNM_MissAll_temp,InputVar=&MissingVar);
- %end;
- proc sql noprint;
- create table FMWNM_MissAll as
- select *,monotonic() as _N_ from FMWNM_MissAll_temp
- where MissNum GT 0;
- quit;
- proc sql noprint;
- select count(*) into :FMWNM_MissVarNum
- from FMWNM_MissAll;
- quit;
- %do FMWNM_k=1 %to &FMWNM_MissVarNum;
- proc sql noprint;
- select VarName,VarType into :FMWNM_MissVarName,:FMWNM_MissVarType
- from FMWNM_MissAll
- where _N_ EQ &FMWNM_k;
- quit;
-
- %if %UPCASE(&Type) NE MIX %then %do;
- data &FMWNM_res(drop=var_temp);
- set &FMWNM_res;
- retain var_temp;
- %if %UPCASE(&Type) EQ PREVIOUS %then %do;
- by &ByFactors;
- %end;
- %else %if %UPCASE(&Type) EQ NEXT %then %do;
- by DESCENDING %SYSFUNC(TRANWRD(&ByFactors,%STR( ),%STR( )DESCENDING%STR( )));
- %end;
- if first.&LastByFactors then var_temp=&FMWNM_MissVarName;
- %if &FMWNM_MissVarType EQ N %then %do;
- if &FMWNM_MissVarName NE . then var_temp=&FMWNM_MissVarName;
- else &FMWNM_MissVarName=var_temp;
- %end;
- %else %do;
- if &FMWNM_MissVarName NE '' then var_temp=&FMWNM_MissVarName;
- else &FMWNM_MissVarName=var_temp;
- %end;
- run;
- %end;
- %else %do;
- %let FMWNM_MissVarName=%SYSFUNC(STRIP(&FMWNM_MissVarName));
- %if &FMWNM_MissVarType EQ N %then %do;
- proc sort data=&FMWNM_res;
- by DESCENDING %SYSFUNC(TRANWRD(&OrderVar,%STR( ),%STR( )DESCENDING%STR( )));
- run;
- data &FMWNM_res;
- set &FMWNM_res;
- retain &FMWNM_MissVarName._Diff;
- by DESCENDING %SYSFUNC(TRANWRD(&ByFactors,%STR( ),%STR( )DESCENDING%STR( )));
- if first.&LastByFactors then &FMWNM_MissVarName._Diff=&FMWNM_MissVarName;
- if first.&LastByFactors AND &FMWNM_MissVarName EQ . then &FMWNM_MissVarName._No=1;
- else if &FMWNM_MissVarName NE . then do;
- &FMWNM_MissVarName._Diff=&FMWNM_MissVarName; /* 得到两个缺失值之间的差 */
- &FMWNM_MissVarName._No=0; /* 得到缺失值的序号 */
- end;
- else if &FMWNM_MissVarName EQ . then do;
- &FMWNM_MissVarName._No+1;
- end;
- run;
- proc sort data=&FMWNM_res;
- by &OrderVar;
- run;
- data &FMWNM_res(drop=&FMWNM_MissVarName._Diff &FMWNM_MissVarName._No var_temp);
- set &FMWNM_res;
- retain var_temp;
- by &ByFactors;
- if first.&LastByFactors then var_temp=&FMWNM_MissVarName;
- if &FMWNM_MissVarName NE . then var_temp=&FMWNM_MissVarName;
- else do;
- &FMWNM_MissVarName=var_temp+(&FMWNM_MissVarName._Diff-var_temp)/(&FMWNM_MissVarName._No+1); /* 利用两个缺失值之间的差和缺失值的序号得到需要插入的值 */
- var_temp=&FMWNM_MissVarName;
- end;
- run;
- %end;
- %else %do;
- data &FMWNM_res(drop=var_temp);
- set &FMWNM_res;
- retain var_temp;
- by &ByFactors;
- if first.&LastByFactors then var_temp=&FMWNM_MissVarName;
- if &FMWNM_MissVarName NE '' then var_temp=&FMWNM_MissVarName;
- else &FMWNM_MissVarName=var_temp;
- run;
- %end;
- %end;
- %end;
- %end;
- %if %UPCASE(&FMWNM_res) EQ %UPCASE(&TargetTable) %then %do;
- proc sort data=&TargetTable;
- by &OrderVar;
- run;
- %end;
- %else %do;
- proc sql noprint;
- create table &TargetTable as
- select * from &FMWNM_res
- order by &OrderVar_Comma;
- quit;
- %end;
- /* 删除临时生成的ByFactors和OrderVar */
- %if %UPCASE(&ByFactors) EQ FMWNM_BYFACTORS %then %do;
- data &TargetTable;
- set &TargetTable;
- drop FMWNM_ByFactors;
- run;
- %end;
- %if %UPCASE(&OrderVar) EQ FMWNM_ORDERVAR %then %do;
- data &TargetTable;
- set &TargetTable;
- drop FMWNM_OrderVar;
- run;
- %end;
- /* 删除不必要的表格 */
- proc datasets lib=work nolist;
- delete FMWNM_:;
- quit;
- %exit:
- %mend;
- %macro Demo();
- %let SourceTable=a;
- %let TargetTable=b;
- %let ByFactors=;
- %let MissingVar=_All_; /* =_Numeric_表示选择全部数值型变量,=_Character表示选择全部字符型变量,=_All_表示选择全部变量,也可以选择多个变量,用空格分隔 */
- %let OrderVar=;
- %let Type=MIX; /* 对缺失值进行处理的方法,=Previous、Next或MIX */
- %FillMissWithNonMiss(&SourceTable,&TargetTable,&ByFactors,&MissingVar,&OrderVar,&Type);
- %mend;