- data dataset_1;
- input id : $3. start_date : mmddyy10. service_count;
- format start_date e8601da.;
- cards;
- 001 7/4/2018 1
- 001 4/9/2019 2
- 002 4/1/2021 1
- 002 5/3/2022 3
- 003 1/2/2019 1
- 004 11/5/2023 2
- 005 6/4/2020 4
- 005 4/2/2021 1
- 005 4/19/2021 4
- 006 5/2/2021 3
- 006 5/31/2022 1
- 007 1/5/2021 1
- 007 9/16/2022 2
- 007 10/25/2023 3
- 008 4/8/2020 2
- ;
- run;
- data final_output;
- set dataset_1 end=_eof_;
- by id start_date;
- *仅分析首个 start_date 晚于 2020-01-01 的观测;
- retain first_start_date;
- if first.id then first_start_date=start_date;
- if first_start_date>'01JAN2020'd;
- *将每条观测划分到首个 start_date 以来的不同月份;
- ady=start_date-first_start_date+1;
- if ady>=365 then category='Month 12+';
- else category='Month '||cats(ceil(ady/30));
- categoryn=ifn(ady>=365,13,ceil(ady/30));
- *按月份对 ID 去重计数,按月份对 service_count 求和;
- array _count_[13]_temporary_(13*0); *13 stands for "Month 12+";
- array _sum_[13]_temporary_(13*0);
- if first.ID or categoryn^=lag(categoryn) then _count_[categoryn]+1;
- _sum_[categoryn]+service_count;
- *按格式输出结果;
- length item $ 42;
- array _result_[13] result1-result13;
- if _eof_ then do;
- item=cats('# of IDs');
- do _n_=1 to dim(_result_);
- _result_[_n_]=_count_[_n_];
- end;
- output;
- item='Count of Services';
- do _n_=1 to dim(_result_);
- _result_[_n_]=_sum_[_n_];
- end;
- output;
- item='Average Services Per ID';
- do _n_=1 to dim(_result_);
- if _count_[_n_] then _result_[_n_]=_sum_[_n_]/_count_[_n_];
- else _result_[_n_]=.N;
- end;
- output;
- end;
- keep item result1 result3 result6 result9 result13;
- run;


雷达卡




京公网安备 11010802022788号







