作用:对数据集中的分类变量进行检查(字符型变量可能有重编码)
实现过程:利用freq过程以及元数据表的属性。
参数1:逻辑库名 参数2:数集名 参数3:指定的水平数
返回的数据集中创建了3个新的指示变量
1、id_indi 指示该变量可能为ID
2、ab_indi 指示该变量需要看一下(或者num型变量水平过少,或者char变量水平过多)
3、cls_indi 指示该变量按指定的水平数被归类为分类变量
%macro set_des(lib_n,set_n,nlev)/store;
ods output nlevels=t3;
proc freq data=&set_n nlevels;
tables _all_/noprint;
run;
proc sql;
create table set_des as
select t2.name,t2.type,t3.nlevels,t1.nobs,t3.nmisslevels from
(select memname,nobs from sashelp.vtable
where libname=upper("&lib_n") and memname=upper("&set_n")) as t1
left join
(select memname,name,type from sashelp.vcolumn
where libname=upper("&lib_n") and memname=upper("&set_n")) as t2
on t1.memname=t2.memname
left join t3
on t2.name=t3.tablevar
;
quit;
data set_des;
set set_des;
if nlevels/nobs ge 0.95 then id_indi=1;
if type eq 'char' and nlevels ge &nlev then ab_indi=1;
else if type eq 'num' and nlevels le &nlev then ab_indi=1;
if nlevels le &nlev then cls_indi=1;
run;
proc sort data=set_des;
by descending id_indi descending ab_indi nlevels;
run;
proc print data=set_des;
run;
%mend;