tag 标签: 字符型变量经管大学堂:名校名师名课

相关日志

分享 计算字符型变量的信息熵
intheangel 2014-6-24 21:21
%macro entropy(row_data,destination); proc contents data=row_data(drop=destination) noprint out=var_list;run; data var_cha_list; set var_list; where type=2; keep name; run; /*first we need to get the character variable which we can calculate the entropy*/ %if %sysfunc(exist(var_cha_list)) ne 0 %then %do; proc datasets lib=work nolist; delete entropy_variable; quit; data entropy_variable; length variable $40 entropy 8; stop; run; %end; /*create the table entropy_variable to store the results*/ %let dsid = %sysfunc(open(var_cha_list)); %if dsid gt 0 %then %do; %let nobs = %sysfunc(attrn(dsid,nobs)); %do i=1 %to nobs;/*start resolve this question, get the answers*/ %let rc = %sysfunc(fetch(dsid,i)); %let varnume = %sysfunc(varnum(dsid,name)); %let variable = %sysfunc(getvarc(dsid,varnume)); ods html close; ods output CrossTabFreqs=pro; proc freq data=row_data; table variable * destination; run; ods output close; ods html; /*use the freq proc to summary the data we need*/ data pro_temp(keep=variable percent); set pro(keep=variable percent destination); where destination='' and variable^=''; run; data pro_temp_2(keep=variable rowpercent); set pro(keep=variable rowpercent destination); where destination^='' and variable^=''; run; proc sql; create table final as select * from pro_temp as a inner join pro_temp_2 as b on a.variable.=b.variable.; quit; /*organize the data so we can use it directly*/ data final; set final; retain entropy 0; if rowpercent^=0 and rowpercent^=100 then entropy=entropy+rowpercent/100*log2(rowpercent/100)*percent/100; else entropy=entropy; run; /*calculate the entropy*/ data entropy_variable(keep=variable entropy); retain variable; set final end=eof; variable="variable"; entropy=-entropy; if eof=1; run; proc append base=entropy_variable data=entropy_variable force;run; /*get all the answer together*/ %end; %let dsid = %sysfunc(close(dsid)); %end; /*Worte by Albert.feng , if you use this code please note.*/ %mend entropy; %entropy(c,play_golf);
个人分类: macro|58 次阅读|0 个评论
GMT+8, 2026-1-17 23:50