- data test1;
- length var1 $ 100 var2 $ 100;
- informat task_id yymmdd10. date yymmdd10. var3 yymmdd10.;
- input task_id date var1 $ var2 $ var3;
- format task_id yymmdd10. date yymmdd10. var3 yymmdd10.;
- cards;
- 2007/10/31 2008/12/31 366010 6506263N200700425 2009/5/29
- 2009/10/31 2009/12/31 2316010 HO650720000200900021 2012/5/13
- 2011/2/28 2011/4/30 5882654 544126 2015/12/21
- 2011/2/28 2011/4/30 5882654 6441040 2015/12/21
- 2007/5/31 2007/7/31 593722 1028129 2008/1/31
- 2007/9/30 2007/12/31 593722 1028129 2008/1/11
- 2007/5/31 2007/7/31 593722 1028129 2007/12/8
- 2007/5/31 2007/7/31 593722 1028236 2007/11/12
- 2007/5/31 2007/12/31 593722 2023509 2007/9/25
- 2007/5/31 2007/7/31 593722 6054504 2007/10/9
- 2007/5/31 2007/7/31 593722 6054721 2007/10/10
- 2007/5/31 2007/7/31 593722 6054808 2007/10/11
- 2007/5/31 2007/7/31 593722 6058092 2007/10/26
- 2007/5/31 2007/8/31 593722 6060651 2007/11/8
- 2009/11/30 2010/1/31 593722 9500660 2012/12/31
- ;
- proc sort data=test1;
- by var1 var2 task_id;
- run;
- /*先按var1+var2分组*/
- data test2;
- set test1;
- by var1 var2 ;
- if lag(var2)^=var2 then group+1;
- run;
- /*根据最早时间和最晚时间补全中间的每个月份*/
- data test3;
- number=intck('month','31may2007'd,'28feb2011'd);
- do group=1 to 13;
- do i=1 to 46;
- task_id=intnx('month','30apr2007'd,i,'e');
- output;
- end;
- end;
- format task_id yymmdd10.;
- drop i number;
- run;
- data test4;
- merge test2 test3;
- by group task_id;
- run;
- /*取每一组的开始时间,以便过滤掉一些比每组的最早时间还早的数据*/
- proc sql noprint;
- create table test5 as select
- group,
- min(task_id) as task_id_old format=yymmdd10.
- from test2
- group by group;
- quit;
- data test6;
- merge test4 test5;
- by group;
- if task_id>=task_id_old; /*滤掉一些比每组的最早时间还早的数据*/
- length var1_new $ 100 var2_new $ 100;
- retain date_new var1_new var2_new var3_new;
- array temp1(2) var1 var2;
- array temp2(2) $ var1_new var2_new;
- do i=1 to 2;
- if temp1(i)^="" then temp2(i)=temp1(i);
- end;
- array temp3(2) date var3;
- array temp4(2) date_new var3_new;
- do i=1 to 2;
- if temp3(i)^=. then temp4(i)=temp3(i);
- end;
- drop i date var1 var2 var3 task_id_old;
- format date_new yymmdd10. var3_new yymmdd10.;
- rename date_new=date var1_new=var1 var2_new=var2 var3_new=var3;
- run;


雷达卡

京公网安备 11010802022788号







