playmore 发表于 2013-8-7 17:08
嗯,刚看了下
我的方法适合单向的关联
你要是做多项的话
捣腾出来了,以前没用过Proc IML,....
data have ;
input name1 :$8. name2 :$8. ;
dummy = 1 ;
datalines;
Tom Jack
Jack Mike
Tom Hellen
Dora Hellen
Ross Jack
Will Paul
Paul Howard
Paul Andy
Andy Kim
Andy Tom
;
;;;;
/* create list of unique names that are in either column */
/* assumes names in name1 and name2 may be different lists */
proc sql ;
create table unique_names as
select distinct name
from
( select distinct name1 as name
from have )
union (select distinct name2 as name
from have )
order by name
;
quit;
/* merge with original data to create complete list */
proc sort data=have ;
by name1 name2;
run;
data have2 ;
merge have (in=a) unique_names (in=b rename=(name=name1));
by name1; /* left side */
if a or b;
run;
proc sort data=have2 ;
by name2 name1;
data havenew ;
merge have2 (in=a) unique_names (in=b rename=(name=name2));
by name2; /*right side */
if a or b;
run;
/* resort */
proc sort data=havenew;
by name1 name2 ;
run;
/* read column names into macro var in sorted order */
/* used to sort column output from proc transpose */
proc sql noprint;
select name into :namelist separated by ' '
from unique_names
order by name
;
quit;
/* get number of names - used for array processing later on */
proc sql noprint;
select count ( distinct name ) into :namecnt
from unique_names
;
quit;
/* transpose rows to columns */
proc transpose data=havenew out=want ;
id name2;
by name1 ;
run;
/* reorder proc transpose output */
/* assign 1 when name1 var matches column name */
data want;
attrib name1 &namelist label='';
set want (drop=_name_);
where name1 ne ' ' ;
run;
data havesorted (drop=i);
set want;
/* array nm {&namecnt} &namelist ;*/
array nm
&namelist;
do i = 1 to &namecnt ;
/* deal with missing values (set to 1 or 0 ) */
if nm{i} = . then do;
/* set to 1 if the person's name is the same as the column name */
if vname(nm)=name1 then nm{i}=1;
else nm{i} = 0;
end;
end;
run;
proc iml;
use havesorted;
read all var _NUM_ into m;
n_row=nrow(m);
n_col=ncol(m);
do i=1 to n_col;
do j=1 to n_row;
if m[j,i]=1 then m[i,j]=1;
end;
end;
print m;
create MyData from m;
append from m;
close MyData;
quit;