把你的程序做了几处修改。下面是修改后的程序和log。虽然可以运行。但肯定不是你想要的,因为用了不
同的sample_size 和 rate。 具体修改了那些地方请对比程序修改前后的log文件。
我这运行你那段生成outlier的程序出错。 不知你运行是否有错。
%macro cond(cond1, cond2);
when (c[&cond1] >0 and &cond2 ) do;
c[&cond1] +-1;
sampSize +-1;
output;
end;
%mend cond;
%let mean_age=3283.95;
%let std_age=563.1736630;
data work.outlier_weight;
call streaminit(12345);
sampSize =1000;
array p[4] _temporary_(5 5 45 45);
array c[4] _temporary_;
do i =1 to dim(p);
c [i] =ceil(sampSize *p[i]/100);
end;
* c[4] =c[4]-(sum(of c-sampSize);
mean = &mean_age;
std = &std_age;
do until (sampSize <=0);
x =rand('normal', mean, std);
select;
%cond(1,%str(x>mean+3*std ) )
%cond(2,%str(x>0 and x<mean-3*std) )
%cond(3,%str(x>mean-3*std and x<mean-2*std) )
%cond(4,%str(x>mean+2*std and x<mean+3*std) )
otherwise;
end;
end;
stop;
run;
%macro simulation (rate1 ,rate2);
/*count number of observations in outlier dataset and create macro variable &n1 &n2*/
proc sql noprint;
select count(*) into :n1
from work.outlier_weight;
quit;
proc sql noprint;
select count(*) into :n2
from work.birth_weight;
quit;
/* certian sample size and create macro variable &sample_size*/
data _null_;
sample_size=int(&n1*&rate1); put sample_size=;
call symputx("sample_size",sample_size);
run;
/*create random number and create a loop*/
%let seed=12345;
%do simu_num=1 %to 2;
%let seed=%eval(&seed+&simu_num);
%let obs=%eval(&n2-&sample_size);
%put n1=&n1;
%put n2=&n2;
%put sample_size=&sample_size;
%put seed=&seed;
%put obs=&obs;
/*sample numbers of data from outlier dataset randomly*/
proc sql noprint outobs=&sample_size;
create table simu_weight as
select x as birth_weight
from work.outlier_weight
order by ranuni(&seed);
quit;
/*create variable simu_num and rate in SAS dataset simu_weight*/
data simu_weight;
set simu_weight;
simu_num=&simu_num;
* put simu_num;
rate=&rate2;
run;
/*sample numbers of data from brith_weight dataset randomly*/
data birth_weight_new;
set work.birth_weight;
do i=1 to &n2;
order=ranuni(&seed);
end;
run;
proc sort data=birth_weight_new out=weight_random(drop=order i);
by order;
run;
data weight_random;
set weight_random;
simu_num=&simu_num;
* put simu_num;
rate=&rate2;
run;
/*replace oringnal data by simulation data*/
data sample;
set weight_random(obs=&obs) simu_weight;
run;
/* proc sql noprint;
create table all
like sample ;
quit; */
proc append base=all data=sample force;
run;
%put loop=&simu_num;
%end;
proc sql;
create table final as
select distinct rate, simu_num, avg(birth_weight) as mean_weight, std(birth_weight) as std_weight
from all
group by simu_num
order by simu_num;
quit;
proc datasets lib=work nolist;
delete all;
quit;
%mend;
%simulation (1, 0.05)
----- LOG -----
381 %macro cond(cond1, cond2);
382 when (c[&cond1] >0 and &cond2 ) do;
383 c[&cond1] +-1;
384 sampSize +-1;
385 output;
386 end;
387 %mend cond;
388
389
390 %let mean_age=3283.95;
391 %let std_age=563.1736630;
392 data work.outlier_weight;
393 call streaminit(12345);
394 sampSize =1000;
395 array p[4] _temporary_(5 5 45 45);
396 array c[4] _temporary_;
397 do i =1 to dim(p);
398 c [i] =ceil(sampSize *p[i]/100);
399 end;
400 * c[4] =c[4]-(sum(of c-sampSize);
401 mean = &mean_age;
402 std = &std_age;
403 do until (sampSize <=0);
404 x =rand('normal', mean, std);
405 select;
406 %cond(1,%str(x>mean+3*std ) )
407 %cond(2,%str(x>0 and x<mean-3*std) )
408 %cond(3,%str(x>mean-3*std and x<mean-2*std) )
409 %cond(4,%str(x>mean+2*std and x<mean+3*std) )
410 otherwise;
411 end;
412 end;
413 stop;
414 run;
NOTE: The data set WORK.OUTLIER_WEIGHT has 1000 observations and 5 variables.
NOTE: DATA statement used (Total process time):
real time 0.03 seconds
cpu time 0.03 seconds
415
416 %macro simulation (rate1 ,rate2);
417
418 /*count number of observations in outlier dataset and create macro variable &n1 &n2*/
419 proc sql noprint;
420 select count(*) into :n1
421 from work.outlier_weight;
422 quit;
423 proc sql noprint;
424 select count(*) into :n2
425 from work.birth_weight;
426 quit;
427
428 /* certian sample size and create macro variable &sample_size*/
429 data _null_;
430 sample_size=int(&n1*&rate1); put sample_size=;
431 call symputx("sample_size",sample_size);
432 run;
433
434 /*create random number and create a loop*/
435
436 %let seed=12345;
437 %do simu_num=1 %to 2;
438 %let seed=%eval(&seed+&simu_num);
439 %let obs=%eval(&n2-&sample_size);
440
441 %put n1=&n1;
442 %put n2=&n2;
443 %put sample_size=&sample_size;
444 %put seed=&seed;
445 %put obs=&obs;
446 /*sample numbers of data from outlier dataset randomly*/
447 proc sql noprint outobs=&sample_size;
448 create table simu_weight as
449 select x as birth_weight
450 from work.outlier_weight
451 order by ranuni(&seed);
452 quit;
453
454 /*create variable simu_num and rate in SAS dataset simu_weight*/
455 data simu_weight;
456 set simu_weight;
457 simu_num=&simu_num;
458 * put simu_num;
459 rate=&rate2;
460 run;
461
462 /*sample numbers of data from brith_weight dataset randomly*/
463
464 data birth_weight_new;
465
466 set work.birth_weight;
467 do i=1 to &n2;
468 order=ranuni(&seed);
469 end;
470 run;
471 proc sort data=birth_weight_new out=weight_random(drop=order i);
472 by order;
473 run;
474
475 data weight_random;
476 set weight_random;
477 simu_num=&simu_num;
478 * put simu_num;
479 rate=&rate2;
480 run;
481
482 /*replace oringnal data by simulation data*/
483 data sample;
484 set weight_random(obs=&obs) simu_weight;
485 run;
486 /* proc sql noprint;
487 create table all
488 like sample ;
489 quit; */
490
491 proc append base=all data=sample force;
492 run;
493 %put loop=&simu_num;
494 %end;
495
496 proc sql;
497 create table final as
498 select distinct rate, simu_num, avg(birth_weight) as mean_weight, std(birth_weight) as
498! std_weight
499 from all
500 group by simu_num
501 order by simu_num;
502 quit;
503
504 proc datasets lib=work nolist;
505 delete all;
506 quit;
507 %mend;
508 %simulation (1, 0.05)
NOTE: PROCEDURE SQL used (Total process time):
real time 0.00 seconds
cpu time 0.00 seconds
NOTE: PROCEDURE SQL used (Total process time):
real time 0.01 seconds
cpu time 0.01 seconds
sample_size=1000
NOTE: DATA statement used (Total process time):
real time 0.00 seconds
cpu time 0.00 seconds
n1= 1000
n2= 3987
sample_size=1000
seed=12346
obs=2987
NOTE: The query as specified involves ordering by an item that doesn't appear in its SELECT
clause.
NOTE: Table WORK.SIMU_WEIGHT created, with 1000 rows and 1 columns.
NOTE: PROCEDURE SQL used (Total process time):
real time 0.01 seconds
cpu time 0.01 seconds
NOTE: There were 1000 observations read from the data set WORK.SIMU_WEIGHT.
NOTE: The data set WORK.SIMU_WEIGHT has 1000 observations and 3 variables.
NOTE: DATA statement used (Total process time):
real time 0.01 seconds
cpu time 0.00 seconds
NOTE: There were 3987 observations read from the data set WORK.BIRTH_WEIGHT.
NOTE: The data set WORK.BIRTH_WEIGHT_NEW has 3987 observations and 4 variables.
NOTE: DATA statement used (Total process time):
real time 2.25 seconds
cpu time 2.25 seconds
NOTE: There were 3987 observations read from the data set WORK.BIRTH_WEIGHT_NEW.
NOTE: The data set WORK.WEIGHT_RANDOM has 3987 observations and 2 variables.
NOTE: PROCEDURE SORT used (Total process time):
real time 0.01 seconds
cpu time 0.01 seconds
NOTE: There were 3987 observations read from the data set WORK.WEIGHT_RANDOM.
NOTE: The data set WORK.WEIGHT_RANDOM has 3987 observations and 4 variables.
NOTE: DATA statement used (Total process time):
real time 0.01 seconds
cpu time 0.01 seconds
NOTE: There were 2987 observations read from the data set WORK.WEIGHT_RANDOM.
NOTE: There were 1000 observations read from the data set WORK.SIMU_WEIGHT.
NOTE: The data set WORK.SAMPLE has 3987 observations and 5 variables.
NOTE: DATA statement used (Total process time):
real time 0.01 seconds
cpu time 0.01 seconds
NOTE: Appending WORK.SAMPLE to WORK.ALL.
NOTE: BASE data set does not exist. DATA file is being copied to BASE file.
NOTE: There were 3987 observations read from the data set WORK.SAMPLE.
NOTE: The data set WORK.ALL has 3987 observations and 5 variables.
NOTE: PROCEDURE APPEND used (Total process time):
real time 0.00 seconds
cpu time 0.00 seconds
loop=1
n1= 1000
n2= 3987
sample_size=1000
seed=12348
obs=2987
NOTE: The query as specified involves ordering by an item that doesn't appear in its SELECT
clause.
NOTE: Table WORK.SIMU_WEIGHT created, with 1000 rows and 1 columns.
NOTE: PROCEDURE SQL used (Total process time):
real time 0.01 seconds
cpu time 0.01 seconds
NOTE: There were 1000 observations read from the data set WORK.SIMU_WEIGHT.
NOTE: The data set WORK.SIMU_WEIGHT has 1000 observations and 3 variables.
NOTE: DATA statement used (Total process time):
real time 0.01 seconds
cpu time 0.01 seconds
NOTE: There were 3987 observations read from the data set WORK.BIRTH_WEIGHT.
NOTE: The data set WORK.BIRTH_WEIGHT_NEW has 3987 observations and 4 variables.
NOTE: DATA statement used (Total process time):
real time 2.40 seconds
cpu time 2.20 seconds
NOTE: There were 3987 observations read from the data set WORK.BIRTH_WEIGHT_NEW.
NOTE: The data set WORK.WEIGHT_RANDOM has 3987 observations and 2 variables.
NOTE: PROCEDURE SORT used (Total process time):
real time 0.01 seconds
cpu time 0.01 seconds
NOTE: There were 3987 observations read from the data set WORK.WEIGHT_RANDOM.
NOTE: The data set WORK.WEIGHT_RANDOM has 3987 observations and 4 variables.
NOTE: DATA statement used (Total process time):
real time 0.01 seconds
cpu time 0.00 seconds
NOTE: There were 2987 observations read from the data set WORK.WEIGHT_RANDOM.
NOTE: There were 1000 observations read from the data set WORK.SIMU_WEIGHT.
NOTE: The data set WORK.SAMPLE has 3987 observations and 5 variables.
NOTE: DATA statement used (Total process time):
real time 0.00 seconds
cpu time 0.00 seconds
NOTE: Appending WORK.SAMPLE to WORK.ALL.
NOTE: There were 3987 observations read from the data set WORK.SAMPLE.
NOTE: 3987 observations added.
NOTE: The data set WORK.ALL has 7974 observations and 5 variables.
NOTE: PROCEDURE APPEND used (Total process time):
real time 0.01 seconds
cpu time 0.01 seconds
loop=2
NOTE: The query requires remerging summary statistics back with the original data.
NOTE: Table WORK.FINAL created, with 2 rows and 4 columns.
NOTE: PROCEDURE SQL used (Total process time):
real time 0.03 seconds
cpu time 0.01 seconds
NOTE: Deleting WORK.ALL (memtype=DATA).
NOTE: PROCEDURE DATASETS used (Total process time):
real time 0.00 seconds
cpu time 0.00 seconds
|