n = 100 beta1 = 2; beta2 = -1 lambdaT = .002 # baseline hazard lambdaC = .004 # hazard of censoring x1 = rnorm(n,0) x2 = rnorm(n,0) # true event time T = rweibull(n, shape=1, scale=lambdaT*exp(-beta1*x1-beta2*x2)) C = rweibull(n, shape=1, scale=lambdaC) # censoring time time = pmin(T,C) # observed time is minof censored and true event = time==T # set to 1 if event is observed library(survival) fit - survfit(Surv(time,event)~1) plot(fit)
1. create sample 2. create index 3. combine data 1. create sample (1) systematic sample of known # of obs data sasuser.subset; do pickit=1 to 142 by 15; set sasuser.revenue point=pickit; output; end; stop; run; (2) systematic sample of unknown # of obs data sasuser.subset; do pickit=1 to totobs by 10; set sasuser.revenue point=pickit nobs=totobs; output; end; stop; run; (3) random sample with replacement data work.rsubset(drop=1 sampsize); sampsize=10; do i=1 to sampsize; pickit=ceil(ranuni(0)*totobs); set sasuser.revenue point=pickit nobs=totobs; output; end; stop; run; proc print data=work.rsubset label; title 'A Random Sample with Replacement'; run; (4)random sample without replacement data work.rsubset(drop=obsleft sampsize); sampsize=10; obsleft=totobs; do while(sampsize0); pickit+1; if ranuni(0)sampsize/obsleft then do; set sasuser.revenue point=pickit nobs=totobs; output; sampsize=sampsize-1; end; obsleft=obsleft-1; end; stop; run; proc print data=work.rsubset label; title 'A Random Sample without Replacement'; run; 2. create index in data step manage index with proc datasets manage index with proc sql 3. combine data (1) filename statement filename qtr1('add1' 'add2' 'add3'); data work.firstqtr; infile qtr1; input Flight $ Origin $ Dest $ Date: date9. Revcargo: comma15.2; run; (2) infile statement data quarter (drop=monthnum midmon lastmon); monthnum=month(today()); midmon=month(intnx('month', today(), -1)); lastmon=month(intnx('month',today,-2)); do i=mnthnum, midmon, lastmon; nextfile=""!!compress(put(i,2.)!!".dat",' '); do until(lastobs); infile temp filevar=nextfile end=lastobs; input Flight $ Origin $ Dest $ Date: date9. Revcargo: comma15.2; output; end; stop; run; (3)proc append proc append base=work.acities data=work.airports force; run; (4) if-then/else statement data mylib.employees_new; set mylib.employees; if IDnum=1001 then Birthdate='01JAN1963'd; else if IDnum=1002 then Birthdate='08AUG1946'd; else if IDnum=1003 then Birthdate='23MAR1950'd; else if IDnum=1004 then Birthdate='17JUN11973'd; run; (5) array statement data mylib.employees_new; array birthdates{1001:1004} _temporary_ ( '01JAN1963'd '08AUG1946'd '23MAR1950'd '17JUN11973'd ); set mylib.employees; Bithdate=birthdates(IDnum); run; (6)format procedure proc format; value $birthdate '1001'= '01JAN1963'd '1002'='08AUG1946'd '1003'='23MAR1950'd '1004'='17JUN11973'd; run; data mylib.employees_new; set mylib.employees; Birthdate=input(put(IDnum,$birthdate.),date9.); run; (7) match-merge proc sort data=sasuser.expenses out=expenses; by flightid date; run; proc sort data=sasuser.revenue out=expenses; by flightid date; run; datarevexpns (drop=rev1st revbusiness revecon expenses); merge expenses(in=e) revenue(in=r); by flightid date; if e and r; Profit=sum(rev1st, revbusiness, revecon, -expenses); run; data sasuser.alldata; merge revexpns (in=r) acities (in=a rename=(code=dest) keep=city name code); by dest; if r and a; run; (8) sql proc sql; create table sqljoin as select revenue.flightid, revenue.date format=date9., revenue.origin, revenue.dest, sum(revenue.rev1st, revenue.revbusiness, revenue.revecon)-expenses.expenses as Profit, acities.city, acities.name from sasuser.expenses, sasuser.revenue, sasuser.acities where expenses.flightid=revenue.flightid and expenses.date=revenue.date and acities.code=revenue.dest order by revenue.dest, revenue.flightid, revenue.date; quit; (9) many-to-many match proc sql; create table flightemp as select flightschedule.*, firstname, lastname from sasuser.flightschedule, sasuser.flightattendants where flightschedule.empid=flightattendants.empid; quit; data fightemps3(drop=empnum jobcode) set sasuser.flightschedule; do i=1 to num; set sasuser.flightattendants(rename=(empid=empnum)) nob=num point=1; if empid=empnum then output; end; run; (10) summary data and detail data proc means data=sasuser.monthsum noprint; var revcargo; output out=sasuser.summary sum=Cargosum; run; data sasuser.percent1; if _n_=1 then set sasuser.summary(keep=cargosum); set sasuser.monthsum(keep=salemon revcargo); PctRev=revcargo/cargosum; run; data sasuser.percent2(drop=totalrev); if _n_=1 then do until(lastobs); set sasuser.monthsum(keep=revcargo) end=lastobs; totalrev+revcargo; end; set sasuser.monthsum (keep=salemon revcargo); PctRev=revcargo/totalrev; run; (11)index data work.profit work.errors; set sasuser.dnunder; set sasuser.sale200(keep=routeid flightid date rev1st revbusiness revecon revcargo) key=flightdate; if _iorc_=0 then do; Profit=sum(rev1st, revbusiness, revecon, revcargo, -expenses); output work.profit; end; else do; _error_=0; output work.errors; end; run; (12) multidimensional array data work.wndchill(drop=column row); array WC {4,2} _temporary_(-22, -16, -28, -22, -32, -26, -35, 29); set sasuser.flights; row=round(wspeed,5)/5; colunm=(round(temp,5)/5)+3; WindChill=wc{row, column}; run; (13) stored array values data work.lookup1; array Targets{1997:1999,12} _temporary_; if _n_=1 then do i=1 to 3; set sasuser.ctargets; array Mnth{*} Jan--Dec; do j=1 to dim(mnth); targets{year,j}=mnth{j}; end; end; set sasuser.monthsum(keep=salemin revargo monthno); year=input(substr(salemon,4),4.); Ctarget=targets{year,monthno}; format ctarget dollar15.2; run; (14) transpose and merge proc transpose data=sasuser.ctargets out=work.ctarget2 name=Month prefix=Ctarget; by year; run; proc sort data=work.ctarget2; by year month; run; data work.mnthsum2; set sasuser.monthsum(keep=SaleMon RevCargo); length Month $ 8; Year=input(substr(SaleMon,4),4.); Month=substr(SaleMon,1,1)||lowcase(substr(SaleMon,2,2)); run; proc sort data=work.mnthsum2; by year month; run; data work.merged; merge workmnthsum2 work.ctarget2; by year month; run;
What is macro data? The terms micro and macro data are often used to denote data used in social science research. The distinction between them is, however, not always obvious. Micro data Micro data can generally be described as individual level data. These data have often been collected from each individual through a survey or interview. In such a dataset, each row typically represents an individual person and each column an attribute such as age, gender or job-type. Some well-known surveys that collect this type of data include the European Social Survey (ESS), the General Social Survey (GSS), the World Values Survey (WVS) etc. 'Micro data' would also denote data on individuals collected from governmental administrative systems and registers. While the main distinction is most often drawn between micro and macro data, the term 'meso data' is also sometimes used. Meso data generally refers to data on collective and cooperative actors such as commercial companies, organizations or political parties. Macro data 'Macro data' is generally a term used to describe mainly two subtypes of data; aggregated data system-level data Aggregated macro data provide information constructed by combining information on the lower level units, which the higher level unit is composed of (Diez-Roux 2002). Examples of aggregate data include summaries of the properties of individuals, unemployment statistics, demographics, GDP etc. Most often, aggregated macro data imply that the variables are summaries of the properties of lower level units and not measures of inherent higher level properties. System level macro data yield information about properties of the state or the political system and cannot be disaggregated to lower level units. This type of data form political indicators, such as institutional variables and regime indices, and is not based on summaries of the properties of lower-level units, but measures characteristics of the higher-level units themselves. The MacroDataGuide provides links and qualitative information on a wide range of both aggragated and system-level macro data sources. References Diez-Roux, Ana V. 2002. “A glossary for multilevel analysis”. Journal of Epidemiology and Community Health 56 (August): 588-594. BTW a good glossary of social science data terms: http://3stages.org/glossary/glossary.html#micro