|
Data sets
(1)proc sql;
内外连接的实现的基本原理:首先生成两个数据表的迪卡尔积,然后再根据where语句来选择符合条件的数据作为输出结果
inner join: where a.XX=b.XX; 仅返回匹配的数据,最多可以有32个表同时进行内连接
left outer join: on a.XX=b.XX; 左表(a)的记录将会全部表示出来,而右表(b)只会显示符合搜索条件的记录(例子中为: A.aID = B.bID).
B表记录不足的地方均为NULL
right outer join: on a.XX=b.XX;
full outer join: on a.XX=b.XX;
(2)
One-to-One Reading:the new data set contains all the variables from all the input data sets. If the data sets contain same-named variables, the values that are read in from the last data set replace those that were read in from earlier ones
data one2one;
set a;
set b;
run;
Concatenating:the new data set contains all the variables and the total number of observations from all input data sets
data concat;
set a b;
run;
Interleaving:input data set must be sorted or indexed in ascending order based on the BY variable(s),new data set contains all the variables and the total number of observations from all input data sets.
data interlv;
set a b;
by num;
run;
Match-Merging: input data set must be sorted or indexed in ascending order based on the BY variable(s). During match-merging, SAS sequentially checks each observation of each data set to see whether the BY values match, then writes the combined observation to the new data set.
data merged;
merge a b;
by num;
run;
Data
Vertically:
Combining Raw Data Files Using a FILENAME Statement
filename qtr1 ('c:\data\month1.dat''c:\data\month2.dat'
'c:\data\month3.dat');
data work.firstqtr;
infile qtr1;
input Flight $ Origin $ Dest $
Date : date9. RevCargo : comma15.2;
run;
Combining Raw Data Files Using an INFILE Statement
data quarter (drop=monthnum midmon lastmon);
monthnum=month(today());
midmon=month(intnx('month',today(),-1));
lastmon=month(intnx('month',today(),-2));
do i = monthnum, midmon, lastmon;
nextfile="c:\sasuser\month"
!!compress(put(i,2.)!!".dat",' ');
do until (lastobs);
infile temp filevar=nextfile end=lastobs;
input Flight $ Origin $ Dest $ Date : date9.
RevCargo : comma15.2;
output;
end;
end;
stop;
run;
Combining SAS Data Sets Using PROC APPEND
proc append base=work.acities
data=work.airports force;
Horizontally:
Combining Data with the IF-THEN/ELSE Statement
data mylib.employees_new;
set mylib.employees;
if IDnum=1001 then Birthdate='01JAN1963'd;
else if IDnum=1002 then Birthdate='08AUG1946'd;
else if IDnum=1003 then Birthdate='23MAR1950'd;
else if IDnum=1004 then Birthdate='17JUN1973'd;
run;
Combining Data with the ARRAY Statement
data mylib.employees_new;
array birthdates{1001:1004} _temporary_ ('01JAN1963'd
'08AUG1946'd '23MAR1950'd '17JUN1973'd);
set mylib.employees;
Birthdate=birthdates(IDnum);
run;
Combining Data with the FORMAT Procedure
proc format;
value $birthdate '1001' = '01JAN1963'
'1002' = '08AUG1946'
'1003' = '23MAR1950'
'1004' = '17JUN1973';
run;
data mylib.employees_new;
set mylib.employees;
Birthdate=input(put(IDnum,$birthdate.),date9.);
run;
Performing a DATA Step Match-Merge
proc sort data=sasuser.expenses out=expenses;
by flightid date;
run;
proc sort data=sasuser.revenue out=revenue;
by flightid date;
run;
data revexpns (drop=rev1st revbusiness revecon
expenses);
merge expenses(in=e) revenue(in=r);
by flightid date;
if e and r;
Profit=sum(rev1st, revbusiness, revecon,
-expenses);
run;
proc sort data=revexpns;
by dest;
run;
proc sort data=sasuser.acities out=acities;
by code;
run;
data sasuser.alldata;
merge revexpns(in=r) acities
(in=a rename=(code=dest)
keep=city name code);
by dest;
if r and a;
run;
Performing a PROC SQL Join
proc sql;
create table sqljoin as
select revenue.flightid,
revenue.date format=date9.,
revenue.origin, revenue.dest,
sum(revenue.rev1st,
revenue.revbusiness,
revenue.revecon)
-expenses.expenses as Profit,
acities.city, acities.name
from sasuser.expenses, sasuser.revenue,
sasuser.acities
where expenses.flightid=revenue.flightid
and expenses.date=revenue.date
and acities.code=revenue.dest
order by revenue.dest, revenue.flightid,
revenue.date;
quit;
Working with a Many-to-Many Match
proc sql;
create table flightemp as
select flightschedule.*, firstname, lastname
from sasuser.flightschedule, sasuser.flightattendants
where flightschedule.empid=flightattendants.empid;
quit;
data fightemps3(drop=empnum jobcode)
set sasuser.flightschedule;
do i=1 to num;
set sasuser.flightattendants
(rename=(empid=empnum))
nob=num point=i;
if empid=empnum then output;
end;
run;
Combining Summary Data and Detail Data
proc means data=sasuser.monthsum noprint;
var revcargo;
output out=sasuser.summary sum=Cargosum;
run;
data sasuser.percent1;
if _n_=1 then set sasuser.summary
(keep=cargosum);
set sasuser.monthsum
(keep=salemon revcargo);
PctRev=revcargo/cargosum;
run;
data sasuser.percent2(drop=totalrev);
if _n_=1 then do until(lastobs);
set sasuser.monthsum(keep=revcargo)
end=lastobs;
totalrev+revcargo;
end;
set sasuser.monthsum (keep=salemon revcargo);
PctRev=revcargo/totalrev;
run;
Using an Index to Combine Data
data work.profit work.errors;
set sasuser.dnunder;
set sasuser.sale2000(keep=routeid
flightid date rev1st revbusiness
revecon revcargo)key=flightdate;
if _iorc_=0 then do;
Profit=sum(rev1st, revbusiness, revecon,
revcargo, -expenses);
output work.profit;
end;
else do;
_error_=0;
output work.errors;
end;
run;
|