宏解释+源代码
1、宏解释
这个宏用来统计数据集中指定变量的缺失值数量,支持数值变量和字符变量
最后都汇到一张结果表格中
之前我用的是把表格转置后,用nmiss和cmiss求横向的数组的缺失值
这样代码比较简单,但是后来发现proc transpose实在是太慢了,而且耗资源
这下改了下,直接用sql对列进行计算了,不需要转置了
2、源代码
- %macro GetMissNum(SourceTable,TargetTable,InputVar);
- /**********************************************************************/
- /* 此宏的作用是统计原表中不同变量的缺失值数量。其中SourceTable是原始 */
- /* 表格,SourceTable是结果表格;TargetTable是结果表格;InputVar是原始 */
- /* 表格中的变量,可设多个变量,用空格分隔,也可如下设置:=_Numeric_表 */
- /* 示统计全部数值型变量,=_Character_表示统计全部字符型变量,=_All_表 */
- /* 示统计全部变量。 */
- /* */
- /* 最终结果表格中包含所有指定变量的名称、类型和相应的缺失值数量。 */
- /* */
- /* Created on 2013.5.8 */
- /* Modified on 2013.5.8 */
- /**********************************************************************/
- /* 检查TargetTable的存在性 */
- %if &TargetTable EQ %STR() %then %do;
- %put ERROR: The TargetTable should not be blank, please check it again.;
- %goto exit;
- %end;
- /* 检查InputVar的合法性 */
- %if %UPCASE(&InputVar) NE _NUMERIC_ AND %UPCASE(&InputVar) NE _CHARACTER_ AND %UPCASE(&InputVar) NE _ALL_ %then %do;
- %ChkVar(SourceTable=&SourceTable,InputVar=&InputVar,FlagVarExists=GMN_FlagVarExists);
- %if %SYSFUNC(FIND(&GMN_FlagVarExists,0)) NE 0 %then %do;
- %put ERROR: The InputVar should be _Numeric_, _Character_, _All_ or any variable name in SourceTable, case insensitive and without quotes.;
- %put ERROR: The InputVar "%SCAN(&InputVar,%SYSFUNC(FIND(&GMN_FlagVarExists,0)))" does not exist in SourceTable, please check it again.;
- %goto exit;
- %end;
- %end;
- %if %UPCASE(&InputVar) EQ _NUMERIC_ %then %do;
- proc contents data=&SourceTable position out=GMN_VarList(keep=name type varnum) noprint;
- run;
- /* 检查数值型变量是否存在 */
- %ChkValue(SourceTable=GMN_VarList,
- InputVar=type,
- Value=1,
- FlagValueExists=GMN_FlagNumVarExists);
- %if &GMN_FlagNumVarExists GT 0 %then %do;
- proc sql noprint;
- select NAME into :GMN_NumVarList separated by ' '
- from GMN_VarList
- where TYPE EQ 1;
- quit;
- /* 化简原始表格 */
- data &TargetTable(keep=&GMN_NumVarList drop=GMN_i);
- set &SourceTable;
- array VarList &GMN_NumVarList;
- do GMN_i=1 to dim(VarList);
- if VarList{GMN_i} NE . then VarList{GMN_i}=0;
- else VarList{GMN_i}=1;
- end;
- run;
- %GetStatsForTable(SourceTable=&TargetTable,
- TargetTable=&TargetTable,
- ByFactors=,
- InputVar=&GMN_NumVarList,
- InputVarType=,
- OutputVarType=,
- Weight=,
- Statistic=SUM);
- proc transpose data=&TargetTable out=&TargetTable;
- var &GMN_NumVarList;
- run;
- data &TargetTable(rename=(_NAME_=VarName _LABEL_=VarLabel COL1=MissNum));
- retain _NAME_ _LABEL_ VarType COL1;
- set &TargetTable;
- VarType='N';
- run;
- %end;
- %else %do;
- %put ERROR: There is no numeric variable existed in SourceTable, please check it again.;
- %goto exit;
- %end;
- %end;
- %else %if %UPCASE(&InputVar) EQ _CHARACTER_ %then %do;
- proc contents data=&SourceTable position out=GMN_VarList(keep=name type varnum) noprint;
- run;
- /* 检查字符型变量是否存在 */
- %ChkValue(SourceTable=GMN_VarList,
- InputVar=type,
- Value=2,
- FlagValueExists=GMN_FlagCharVarExists);
- %if &GMN_FlagCharVarExists GT 0 %then %do;
- proc sql noprint;
- select NAME,STRIP(NAME)||'_Temp' into :GMN_CharVarList separated by ' ',:GMN_CharVarList_Temp separated by ' '
- from GMN_VarList
- where TYPE EQ 2;
- quit;
- /* 化简原始表格 */
- data &TargetTable(keep=&GMN_CharVarList_Temp drop=GMN_j);
- set &SourceTable;
- array VarList &GMN_CharVarList;
- array VarList_Temp &GMN_CharVarList_Temp;
- do GMN_j=1 to dim(VarList);
- if VarList{GMN_j} NE "" then VarList_Temp{GMN_j}=0;
- else VarList_Temp{GMN_j}=1;
- end;
- run;
- %GetStatsForTable(SourceTable=&TargetTable,
- TargetTable=&TargetTable,
- ByFactors=,
- InputVar=&GMN_CharVarList_Temp,
- InputVarType=,
- OutputVarType=,
- Weight=,
- Statistic=SUM);
- proc transpose data=&TargetTable out=&TargetTable;
- var &GMN_CharVarList_Temp;
- run;
- data &TargetTable(rename=(_NAME_=VarName _LABEL_=VarLabel COL1=MissNum));
- retain _NAME_ _LABEL_ VarType COL1;
- set &TargetTable;
- VarType='C';
- _NAME_=SUBSTR(_NAME_,1,LENGTH(_NAME_)-5);
- run;
- %end;
- %else %do;
- %put ERROR: There is no character variable existed in SourceTable, please check it again.;
- %goto exit;
- %end;
- %end;
- %else %do;
- proc contents data=&SourceTable position out=GMN_VarList(keep=name type varnum) noprint;
- run;
- /* 筛选指定的变量 */
- %if %UPCASE(&InputVar) NE _ALL_ %then %do;
- %let InputVar_Comma=%PrxChange(InputString=&InputVar,PrxString=s/(\w+)/'$1'/); /* 给InputVar中的代码加引号 */
- %let InputVar_Comma=%SYSFUNC(TRANSLATE(&InputVar_Comma,%STR(,),%STR( ))); /* 替换InputVar中的空格为逗号 */
- proc sql noprint;
- create table GMN_VarList as
- select * from GMN_VarList
- where Name in (&InputVar_Comma)
- order by Name;
- quit;
- %end;
- /* 检查数值型变量是否存在 */
- %ChkValue(SourceTable=GMN_VarList,
- InputVar=type,
- Value=1,
- FlagValueExists=GMN_FlagNumVarExists);
- /* 检查字符型变量是否存在 */
- %ChkValue(SourceTable=GMN_VarList,
- InputVar=type,
- Value=2,
- FlagValueExists=GMN_FlagCharVarExists);
- %if &GMN_FlagNumVarExists GT 0 %then %do;
- proc sql noprint;
- select NAME into :GMN_NumVarList separated by ' '
- from GMN_VarList
- where TYPE EQ 1;
- quit;
- /* 化简原始表格 */
- data GMN_NumMiss(keep=&GMN_NumVarList drop=GMN_k);
- set &SourceTable;
- array VarList &GMN_NumVarList;
- do GMN_k=1 to dim(VarList);
- if VarList{GMN_k} NE . then VarList{GMN_k}=0;
- else VarList{GMN_k}=1;
- end;
- run;
- %GetStatsForTable(SourceTable=GMN_NumMiss,
- TargetTable=GMN_NumMiss,
- ByFactors=,
- InputVar=&GMN_NumVarList,
- InputVarType=,
- OutputVarType=,
- Weight=,
- Statistic=SUM);
- proc transpose data=GMN_NumMiss out=GMN_NumMiss;
- var &GMN_NumVarList;
- run;
- data GMN_NumMiss(rename=(_NAME_=VarName _LABEL_=VarLabel COL1=MissNum));
- retain _NAME_ _LABEL_ VarType COL1;
- set GMN_NumMiss;
- VarType='N';
- run;
- %end;
- %if &GMN_FlagCharVarExists GT 0 %then %do;
- proc sql noprint;
- select NAME,STRIP(NAME)||'_Temp' into :GMN_CharVarList separated by ' ',:GMN_CharVarList_Temp separated by ' '
- from GMN_VarList
- where TYPE EQ 2;
- quit;
- /* 化简原始表格 */
- data GMN_CharMiss(keep=&GMN_CharVarList_Temp drop=GMN_l);
- set &SourceTable;
- array VarList &GMN_CharVarList;
- array VarList_Temp &GMN_CharVarList_Temp;
- do GMN_l=1 to dim(VarList);
- if VarList{GMN_l} NE "" then VarList_Temp{GMN_l}=0;
- else VarList_Temp{GMN_l}=1;
- end;
- run;
- %GetStatsForTable(SourceTable=GMN_CharMiss,
- TargetTable=GMN_CharMiss,
- ByFactors=,
- InputVar=&GMN_CharVarList_Temp,
- InputVarType=,
- OutputVarType=,
- Weight=,
- Statistic=SUM);
- proc transpose data=GMN_CharMiss out=GMN_CharMiss;
- var &GMN_CharVarList_Temp;
- run;
- data GMN_CharMiss(rename=(_NAME_=VarName _LABEL_=VarLabel COL1=MissNum));
- retain _NAME_ _LABEL_ VarType COL1;
- set GMN_CharMiss;
- VarType='C';
- _NAME_=SUBSTR(_NAME_,1,LENGTH(_NAME_)-5);
- run;
- %end;
- data &TargetTable;
- set
- %if &GMN_FlagNumVarExists GT 0 %then %do;
- GMN_NumMiss
- %end;
- %if &GMN_FlagCharVarExists GT 0 %then %do;
- GMN_CharMiss
- %end;
- ;
- run;
- %end;
- /* 删除不必要的表格 */
- proc datasets lib=work nolist;
- delete GMN_:;
- quit;
- %exit:
- %mend;
- %macro Demo();
- %let SourceTable=Cars;
- %let TargetTable=MissNum;
- %let InputVar=Cylinders; /* =_Numeric_表示统计全部数值型变量,=_Character_表示统计全部字符型变量,=_All_表示统计全部变量,大小写不敏感 */
- %GetMissNum(&SourceTable,&TargetTable,&InputVar);
- %mend;



雷达卡



京公网安备 11010802022788号







