经管之家送您一份
应届毕业生专属福利!
求职就业群
感谢您参与论坛问题回答
经管之家送您两个论坛币!
+2 论坛币
数据载入Boston,求cross-validation
我的代码是
library(ggplot2)
library(MASS)
#利用函数poly进行高次拟合,该函数可以将自变量自动进行高次变化,degree参数控制着最高次项的次数如下:
fit3=lm(nox~poly(dis,4),data=Boston)
yy3 = predict(fit3)
df <- transform(Boston,yy3 = predict(fit3))
ggplot(data=Boston,aes(x=dis,y=nox)) + geom_point()+geom_line(aes(x=dis,y=yy3),data=Boston,col='blue')
rmse <- function(nox,rnox)
{
return(sqrt(sum((nox-rnox)^ 2))/length(rnox))
}
split <- function(Boston,rate)
{
n <- length(Boston[,1])
index <- sample(1:n,round(rate * n))
train <- Boston[index,]
test <- Boston[-index,]
df <- list(train=train,test=test,data=Boston)
return(df)
}
performance_Gen <- function(Boston,n){
performance <- data.frame()
for(index in 1:n){
fit <- lm(y ~ poly(dis,degree=index),data = Boston$train)
performance <- rbind(performance,data.frame(degree =index,type='train',rmse=rmse(Boston$train['nox'],predict(fit))))
performance <- rbind(performance,data.frame(degree = index,type='test',rmse=rmse(Boston$test['nox'],predict(fit,newdata=Boston$test))))
}
return(performance)
}
df_split <- split(df,0.5)
performance<- performance_Gen(df_split,10)
ggplot(performance,aes(x=degree,y=rmse,linetype=type))+geom_point()+geom_line()
在运行到performance<- performance_Gen(df_split,10) 时报错变数的长度不一样('poly(dis, degree = index)')
完全看不懂什么意思,都运行到就差一步了,内心十分崩溃~求高人指点,万分感谢
扫码加我 拉你入群
请注明:姓名-公司-职位
以便审核进群资格,未注明则拒绝
|