其之 code 为:
- // ericmelse
- * Load example file:
- sysuse nlsw88, clear
- * The distribution underlying the box plot for inspection:
- histogram wage, width(0.1) start(0) lc(navy%50) horizontal yla(, ang(h))
- histogram hours, width(0.1) start(0) lc(navy%50) vertical
- * Graph the box plots for inspection:
- graph box wage, marker(1, ms(oh) mc(navy%50)) yla(, ang(h))
- graph box hours, marker(1, ms(oh) mc(navy%50)) yla(, ang(h)) hor
- * Compute required data for Boxplot of Wage:
- gen maxX = 84 // boxplot x-scale position
- * The median and quartiles are easiest:
- egen med_w = median(wage) //
- egen upq_w = pctile(wage), p(75) //
- egen loq_w = pctile(wage), p(25) //
- egen mean_w = mean(wage) //
- * We could now get the IQR by subtraction, upq - loq, which would be more efficient,
- * but we will mention that it has its own egen function.
- egen iqr_w = iqr(wage) //,
- * For upper limits of whiskers:
- egen upper_w = max(min(wage, upq_w + 1.5 * iqr_w)) //,
- * For lower limits of whiskers:
- egen lower_w = min(max(wage, loq_w - 1.5 * iqr_w)) //,
- * Compute required data for Boxplot of Wage:
- gen maxY = 43 // boxplot y-scale position
- * The median and quartiles are easiest:
- egen med_h = median(hours) //
- egen upq_h = pctile(hours), p(75) //
- egen loq_h = pctile(hours), p(25) //
- egen mean_h = mean(hours) //
- * We could now get the IQR by subtraction, upq - loq, which would be more efficient,
- * but we will mention that it has its own egen function.
- egen iqr_h = iqr(hours) //,
- * For upper limits of whiskers:
- egen upper_h = max(min(hours, upq_h + 1.5 * iqr_h)) //,
- * For lower limits of whiskers:
- egen lower_h = min(max(hours, loq_h - 1.5 * iqr_h)) //,
- * Boxplots and scatterplot in the same figure:
- twoway /// Box plot of Wage
- (rbar med_w upq_w maxX, blc(navy) bfc(white) barw(1.7) ) ///
- (rbar med_w loq_w maxX, blc(navy) bfc(white) barw(1.7) ) ///
- (rspike upq_w upper_w maxX , blc(navy) ) ///
- (rspike loq_w lower_w maxX , blc(navy) ) ///
- (rcap upper_w upper_w maxX , blc(navy) msize(*1) ) ///
- (rcap lower_w lower_w maxX , blc(navy) msize(*1) ) ///
- (scatter mean_w maxX, pstyle(p1) ms(dh) msize(*1) mls(*.8) mlc(gs9) ) /// Diamond for mean
- (scatter wage maxX if !inrange(wage, lower_w, upper_w), ms(oh) mc(navy%20) legend(off)) /// Outliers
- (rbar med_h upq_h maxY, blc(navy) bfc(white) barw(1.3) hor) /// Box plot of Hours
- (rbar med_h loq_h maxY, blc(navy) bfc(white) barw(1.3) hor) ///
- (rspike upq_h upper_h maxY , blc(navy) hor) ///
- (rspike loq_h lower_h maxY , blc(navy) hor) ///
- (rcap upper_h upper_h maxY , blc(navy) msize(*1) hor) ///
- (rcap lower_h lower_h maxY , blc(navy) msize(*1) hor) ///
- (scatter maxY mean_h , pstyle(p1) ms(dh) msize(*1) mls(*.8) mlc(gs9) ) /// Diamond for mean
- (scatter maxY hours if !inrange(hours, lower_h, upper_h), ms(oh) mc(navy%20) legend(off)) /// Outliers
- (scatter wage hours, msymbol(oh) /// Scatter plot of Wage & Hours
- legend(off) xscale(range(-1 80)) ///
- mcolor(navy%50)) ///
- , ytitle(Hourly wage) /// General plot controls
- xtitle(Usual hours worked)
- * The above final code can be edited to modify the plot using the regular twoway options etc.