yongyitian 发表于 2014-3-19 11:20
您好,上次问您的问题您没来得及给我解答,然后我自己写了一点程序,但是还是有点问题想要请教您。
我自己写了一段程序,产生了一个outlier的数据集,一共100000条数据,我想从这个数据集中随机抽出一部分(rate)数据来随机替代原来数据集中的一部分数据,然后计算在这样的替代rate之下数据的mean和std。以下是我的程序。
/*create outlier dataset*/
%macro cond(cond1, cond2);
when (c[&cond1] >0 and &cond2 ) do;
c[&cond1] +-1;
sampSize +-1;
output;
end;
%mend cond;
data B1841039.outlier_weight;
call streaminit(12345);
sampSize =100000;
array p[4] _temporary_(5 5 45 45);
array c[4] _temporary_;
do i =1 to dim(p);
c
=ceil(sampSize *p/100);
end;
c[4] =c[4]-(sum(of c)-sampSize);
mean = 3283.95;
std = 563.1736630;
do until (sampSize <=0);
x =rand('normal', mean, std);
select;
%cond(1,%str(x>mean+3*std ) )
%cond(2,%str(x>0 and x<mean-3*std) )
%cond(3,%str(x>mean-3*std and x<mean-2*std) )
%cond(4,%str(x>mean+2*std and x<mean+3*std) )
otherwise;
end;
end;
stop;
run;
/*create random number and "total" */
proc sql;
create table temp as
select *,
ranuni(123) as key,
count(*) as total
from B1841039.outlier_weight
order by key;
quit;
/*sample portion of data from outlier dataset, store in datset sample1*/
%macro sample1(rate1=0.003987);
data sample1 sample2;
set temp;
if _n_<=int(total*&rate1) then output sample1;
else output sample2;
drop key total;
run;
%mend;
%sample1;
/*sample portion of data from weight dataset, store in datset sample_1*/
proc sql;
create table temp_ori as
select *,
ranuni(123) as key,
count(*) as total
from b1841039.birth_data
order by key;
quit;
%macro sample2(rate2=0.1);
data sample_1 sample_2;
set temp_ori;
if _n_<=int(total*&rate2) then output sample_1;
else output sample_2;
drop key total;
run;
%mend;
%sample2;
/*replace portion of birth_weight by x*/
data replace (rename=(x=birth_weight));
merge sample_1 sample1;
drop sampsize mean std i birth_weight;
run;
data rep_weight;
set replace sample_2;
run;
/*calculate mean and std of weight after replace*/
proc means data=rep_weight mean std;
var birth_weight;
run;
但是我想要在每个rate之下把随机从outlier里面抽取1000次,然后随机替代源数据集里面的数据1000次,最后得到以下这样的样式的数据,但是我写不好这个循环,您可以指点我么?
Simulation degree Simulation dataset order mean std
0.1 1 ….. ……
0.1 2 ……. …….
0.1 3 …….. …….
0.1 …. ….. ……
0.1 1000 …… …….