多是icd编码,如A00,第一位是字母,后两位是数字,有很多重复。
一是分类的icd编码范围,如A00A09,表示从A00-A09共十个icd编码,范围为连续,无重叠(一个icd编码只属于一个分类)。
猜想如下:
用宏取得icd编码对应的范围,这减少data步的次数,用set语句的point=选项直接跳到未对比过的观测。
由宏返回编码范围,if语句查看,直到取得icd编码对应的范围为止。
if语句判断时字母用ascii码判断,数据直接运算。
问题:
宏调用未返回任何值。请问更位高手,这类问题应该用什么方法解决。
可能原因:
data步中,如果call execute产生sas language statements,或者macro language elements产生sas language statements,这些statements在当前data step执行后执行。
参考:
[url=http://sasor.feoh.net/viewtopic.php?f=1&t=1178http://tj.100xuexi.com/HP/20100121/OTD47955.shtml]http://sasor.feoh.net/viewtopic.php?f=1&t=1178http://tj.100xuexi.com/HP/20100121/OTD47955.shtml[/url]
note如下:
NOTE: 数值已转换为字符值,位置:(行:列)。
1499:42
NOTE: 变量 FIRST.icdtemp 未初始化。
程序如下:
data death;
input icdtemp $ @@;
cards;
A03 A03 A09 A09 A15 A15 A15 A15
A16 A16 A16 A16 A17 A18 A31 A35
A35 A39 A39 A40 A41 A41 A46 A46 A48
A86 A86 A86 A87 A87
;
run;
data icd;
input group subgroup icdsubcode $;
cards;
1 1 A00A09
1 2 A15A19
1 3 A20A28
1 4 A30A49
1 5 A50A64
1 6 A65A69
1 7 A70A74
1 8 A75A79
1 9 A80A89
1 10 A90A99
1 11 B00B09
1 12 B15B19
1 13 B20B24
1 14 B25B34
1 15 B35B49
1 16 B50B64
1 17 B65B83
1 18 B85B89
1 19 B90B94
1 20 B95B97
1 21 B99B99
2 1 C00C97
2 2 D00D09
2 3 D10D36
;
run;
proc sort data=death;by icdtemp;run;
proc sort data=icd;by icdsubcode;run;
%macro getsubcode(pointer);
%global stra strb numa numb icdsubcode;
%local pointer;
data deathtemp;
pointer=%eval(&pointer);
set icd point=pointer;
a=substr(left(upcase(icdsubcode)),1,1);
b=substr(left(upcase(icdsubcode)),4,1);
c=substr(left(upcase(icdsubcode)),2,2);
d=substr(left(upcase(icdsubcode)),5,2);
call symputx("stra",a);
call symputx("strb",b);
call symputx("numa",c);
call symputx("numb",d);
call symputx("icdsubcode",icdsubcode);
output;
stop;
run;
%mend getsubcode;
data death2;
set death;
retain pointer icdsubcode;
i=1;strtemp=rank(substr(left(icdtemp),1,1));numtemp=put(substr(left(icdtemp),2,2),2.);
if FIRST.icdtemp then do;
do while (i=1);
pointer+1;
call execute('%getsubcode('||pointer||')');
a='&stra';b='&strb';c='&numa';d='&numb';e='&icdsubcode';
m1=resolve(a);m2=resolve(b);m3=resolve(c);m4=resolve(d);m5=resolve(e);
if strtemp >= rank(m1) and strtemp <= rank(m2) and numtemp >=m3 and numtemp <=m4
then do;
icdsubcode=m5;
i=0;
end;
end;
end;
output;
run;
补充:
最后我用sql实现了,但是还是想问问如果不用sql怎么实现,因为担心数据量大的话sql效率的问题
sql的如下:
data death;
input icdtemp $ @@;
cards;
A03 A03 A09 A09 A15 A15 A15 A15
A16 A16 A16 A16 A17 A18 A31 A35
A35 A39 A39 A40 A41 A41 A46 A46 A48
A86 A86 A86 A87 A87
;
run;
data icd;
input group subgroup icdsubcode $;
cards;
1 1 A00A09
1 2 A15A19
1 3 A20A28
1 4 A30A49
1 5 A50A64
1 6 A65A69
1 7 A70A74
1 8 A75A79
1 9 A80A89
1 10 A90A99
1 11 B00B09
1 12 B15B19
1 13 B20B24
1 14 B25B34
1 15 B35B49
1 16 B50B64
1 17 B65B83
1 18 B85B89
1 19 B90B94
1 20 B95B97
1 21 B99B99
2 1 C00C97
2 2 D00D09
2 3 D10D36
;
run;
data death;
set death;
strtemp=rank(substr(left(icdtemp),1,1));
numtemp=put(substr(left(icdtemp),2,2),2.);
run;
data icd;
set icd;
a=rank(substr(left(upcase(icdsubcode)),1,1));
b=rank(substr(left(upcase(icdsubcode)),4,1));
c=put(substr(left(upcase(icdsubcode)),2,2),2.);
d=put(substr(left(upcase(icdsubcode)),5,2),2.);
run;
proc sort data=death;by icdtemp;run;
proc sort data=icd;by a b c d;run;
proc sql ;
create table deathtotal as select * from death join icd on strtemp>=a and strtemp<=b and numtemp>=c and numtemp<=d;
run;