data sample;
input code year1 year2;
datalines;
1234 2004 2004
2345 2004 2004
3456 2004 2004
3456 2005 2004
4567 2004 2004
4567 2005 2004
5678 2004 2004
5678 2005 2004
5678 2006 2004
;
proc sql;
create table sam1 as
select code , max(year1) as year
from sample
group by code
order by code
;
create table sam2 as
select year, count(distinct code) as num
from sam1
group by year
order by year
;
quit;
data sam21;
set sam2;
by year;
order=_n_;
lorder=_n_+1;
run;
data sam22;
merge sam21(in=a drop=lorder) sam21(drop=order year rename=(lorder=order num=lastnum));
by order;
retain cumnum;
if a;
if missing(lastnum) then lastnum=0;
cumnum+lastnum;
run;
proc sql;
create table sample1 as
select *
from sample a , sam22 b
where a.year1=b.year
;
quit;
data sample2;
set sample1;
if year1=year2 then number=0;
else if year1>year2 then number=lastnum;
run;
proc sort data=sample2;
by year1;
run;
刚才的有个地方错了,不好意思,这个我算的是一样的
data sample;
input code year1 year2;
datalines;
1234 2004 2004
2345 2004 2004
3456 2004 2004
3456 2005 2004
4567 2004 2004
4567 2005 2004
5678 2004 2004
5678 2005 2004
5678 2006 2004
;
proc sql;
create table sam1 as
select code , max(year1) as year
from sample
group by code
order by code
;
create table sam2 as
select year, count(distinct code) as num
from sam1
group by year
order by year
;
quit;
data sam21;
set sam2;
by year;
order=_n_;
lorder=_n_+1;
run;
data sam22;
merge sam21(in=a drop=lorder) sam21(drop=order year rename=(lorder=order num=lastnum));
by order;
retain cumnum;
if a;
if missing(lastnum) then lastnum=0;
cumnum+lastnum;
run;
proc sql;
create table sample1 as
select *
from sample a , sam22 b
where a.year1=b.year
;
quit;
data sample2;
set sample1;
if year1=year2 then number=0;
else if year1>year2 then number=cumnum;这个地方刚才没改
run;