|
#2500 salary and training
x <- read.csv("EAI.csv",header=T)
x <- x[-2501,]
mean(x$Salary) #总体均值
sd(x$Salary) #总体标准差
p <- length(x$Training[x$Training == "Yes"] )/2500 #总体比率
#一个点估计
x1 <- x[sample(rownames(x),30),]
mean(x1$Salary)
p1 <- length(x1$Training[x1$Training=="Yes"])/length(x1$Training)
#500样品容量为30的简单随机样品 均值和p比率的直方图
op <-par(mfrow=c(2,1))
x.salary <- function(){
a <- x[sample(rownames(x),30),]
xn <-mean(a$Salary)
return(xn)
}
x.sal <-replicate(500,x.salary())
hist(x.sal,probability =T,col="lightblue")
lines(density(x.sal))
p.training <- function(){
a <- x[sample(rownames(x),30),]
pn <-length(a$Training[a$Training=="Yes"])/length(a$Training)
return(pn)
}
p.tra <-replicate(500,p.training())
hist(p.tra,probability= T,col="lightblue")
lines(density(p.tra))
par(op)
有错误,x.salary 和p.training, 不关联,X随机抽取后,没有关联到P, x,p 相互独立,再改改看
|