Q: 原始数据集如下:
Obs | var1 | var2 |
1 | A | z |
2 | A | y |
3 | A | x |
4 | A | t |
5 | B | s |
6 | B | d |
7 | C | f |
8 | C | g |
9 | C | h |
A:想要得到一个分组排序的编号的变量flag, 期望的数据集如下:
Obs | var1 | var2 | flag |
1 | A | t | 1 |
2 | A | x | 2 |
3 | A | y | 3 |
4 | A | z | 4 |
5 | B | d | 1 |
6 | B | s | 2 |
7 | C | f | 1 |
8 | C | g | 2 |
9 | C | h | 3 |
DATA步实现(简单快捷):
- data test;
- input var1 $ var2 $;
- cards;
- A z
- A y
- A x
- A t
- B s
- B d
- C f
- C g
- C h
- ;
- run;
- proc sort data=test;by var1 var2;run;
- data final;
- set test;
- by var1;
- if first.var1 then flag=1;
- else flag+1;
- run;
- proc print;run;
- proc sql;
- create table test as
- select * from test order by var1,var2
- ;
- create table final as
- select a.var1,a.var2,(a.num-b.diff) as flag
- from
- (select var1,var2,monotonic() as num
- from test) as a,
- (select var1,var2,monotonic() as num,count(*) as obs,(calculated num-calculated obs) as diff
- from test
- group by var1
- having max(num)=num) as b
- where a.var1=b.var1
- ;
- quit;
- proc print;run;
SQL实现2(增加一个temp变量,借助该变量实现):
- data test;
- input var1 $ var2 $ temp;
- cards;
- A z 1
- A y 1
- A x 1
- A t 1
- B s 1
- B d 1
- C f 1
- C g 1
- C h 1
- ;
- run;
- proc sort data=test;by var1 var2;
- run;
- proc sql noprint;
- create table final as
- select A.var1,A.var2,SUM (B.temp) as flag
- from test as A inner join test as B ON A.var1 = B.var1
- where B.var2 <= A.var2
- group by A.var1,A.var2 ;
- quit;