参考楼上的回复,将程序整理了一下,以供参考。有更简便的方法希望大家能提出来,多多交流。
/*make preparation for dataset*/
%macro repeat;
%do i=1 to 100;
data a&i.;
set a&i;
source=&i.;
run;
%end;
%mend repeat;
data all;
set a1-a100;
run;
/*Create a dummy dataset for analyze */
data all;
input custid txndate txnamt mcc $ source;
datalines;
001 1 500 A 1
002 1 569 A 1
001 5 895 B 5
001 3 898 C 3
003 2 589 B 2
003 1 589 D 1
001 2 589 A 2
004 2 410 C 2
005 1 482 A 1
005 3 785 B 3
;
run;
/*Q1:count the number of customer who has pos transaction in the past 100 months*/
proc sql;
select COUNT(distinct custid) into: count_custid
from all;
quit;
%PUT the number of customer who has pos transaction in the past 100 months is &count_custid;
/*Q2: count the max, min transaction amount in the past ten months */
proc sql;
select min(txnamt), max(txnamt) into: min_txnamt, : max_txnamt
from all
where source le 10;
quit;
%put in the past ten months, the max transaction amount is: &max_txnamt.. the min is: &min_txnamt..;
/*Q3: count the number of customer who has more than three pos transaction records in the past half year*/
proc sql;
select count(distinct custid) into: count_3half_custid
from (select *, count(txndate) as count_txndate_half
from all
where txndate<6
group by custid)
where count_txndate_half>3;
quit;
%put the number of customer who has more than three pos transaction records in the past half year is: &count_3half_custid..;
/*Q4: count the number of customer who has transaction record in consecutive two months in the past three months*/
proc sort data=all nodupkey;
by custid source;
run; *in case of a cutomer who has multiple transaction records in a month;
data all1;
set all;
by custid source;
if first.custid then seq=1;
else seq+1;
run;
/*customers have transaction records at least in both past two months*/
data all_1;
set all1;
where source=2 and seq=2;
run;
/*customers have transaction record at the 2rd and 3rd month*/
data all_21;
set all1;
where source=2 and seq=1;
run;
data all_22;
set all1;
where source=3 and seq=2;
run;
proc sort data=all_21;
by custid;
run;
proc sort data=all_22;
by custid;
run;
data all2;
merge all_21(in=a) all_22(in=b);
by custid;
if a and b;
run;
data all3;
set all_1 all2;
run;
proc sql;
select count(distinct custid) into: custid_2month
from all3;
quit;
%put the number of customers who has at least two consecutive two-month transaction records in the past three month is :&custid_2month..;
/*Q5: count the transaction amount group by MCC*/
proc sql;
create table mcc as
select mcc, source, sum(txnamt) as sum
from all
where source<=20
group by mcc, source;
quit;


雷达卡




京公网安备 11010802022788号







