For S-Plus
ridge.lm <- function(y,Xmat,k.vec,plot.it=T,crossvalid=F,original.scale=T,mycex=0.7) { #if crossvalid=T, exact PRESS statistic calculated #if original.scale=T, the ridge trace will plot coefficients based on unctr'd, unscaled Xmat n <- length(y) xmeans <- apply(Xmat,2,mean) xscale <- sqrt(apply(Xmat,2,var)*(n-1)) X.std <- cbind(1,scale(Xmat)/sqrt(n-1)) p <- dim(X.std)[[2]] if(sum(k.vec==0)==0) k.vec <- c(0,k.vec) #add 0 shrinkage if not included r <- length(k.vec) betas <- matrix(NA,r,p) VIF <- matrix(NA,r,p-1) C.k <- rep(NA,r) df.k <- rep(NA,r) PR.ridge.k <- rep(NA,r) GCV.k <- rep(NA,r) real.cv <- NA if(crossvalid) real.cv <- rep(NA,r) #--OLS model ols <- lm(y ~ X.std-1) sigma.2 <- (summary(ols)$sigma)^2 #---looping over the different shrinkage parameters #---don't shrink the intercept since centered data for(i in 1:r) { cat("iter=",i,"\n") k <- c(0,rep(k.vec,p-1)) X.k <- t(X.std) %*% X.std + k * diag(p) betas[i,] <- solve(qr(X.k), t(X.std) %*% cbind(y)) errs <- y-X.std%*%cbind(betas[i,]) SSres.k <- sum(errs^2) temp <- solve(t(X.k)) VIF[i,] <- diag(temp%*%t(X.std)%*%X.std%*%temp)[-1] H.k.ii <- diag(X.std[,-1]%*%solve(t(X.std[,-1])%*%X.std[,-1]+k[-1]*diag(p-1))%*%t(X.std[,-1])) C.k <- SSres.k/sigma.2-n+2+2*sum(H.k.ii) df.k <- sum(H.k.ii) PR.ridge.k <- sum((errs/(1-1/n-H.k.ii))^2) GCV.k <- SSres.k/(n-1-df.k)^2 if(crossvalid) { pred.err <- 0 for(j in 1:n) { X <- Xmat[-j,] X1.std <- cbind(1,scale(X)/sqrt(n-2)) X.k <- t(X1.std) %*% X1.std + k * diag(p) b <- solve(qr(X.k), t(X1.std) %*% cbind(y[-j])) xm <- apply(X,2,mean) xs <- sqrt(apply(X,2,var)*(n-2)) omit.x <- c(1,(Xmat[j,]-xm)/xs) errs <- y[j]-sum(omit.x*b) pred.err <- pred.err + errs^2 } real.cv <- pred.err } } #---backtransforming betas to original location and scale of Xmat beta.orig <- t(t(betas[,2:p])/xscale) intercept <- betas[,1]-apply(t(t(beta.orig)*xmeans),1,sum) beta.orig <- cbind(intercept,beta.orig) #---ridge trace plot if(plot.it) { orig.par <- par() par(mfrow=c(3,2),oma=c(0,0,3,0)) if(original.scale) { betas.ts <- as.ts(beta.orig[,-1]) } else { betas.ts <- as.ts(betas[,-1]) } ts.plot(betas.ts,xlab="k",ylab="Betas",main="Ridge trace", axes=F,type='b',lty=1:(p-1),pch=1:(p-1),cex=mycex) axis(side=2) # axis(side=1,at=1:r,labels=as.character(k.vec)) ???doesn't work?? mylab <- as.character(k.vec) axis(side=1,at=1:r,labels=mylab,cex=mycex) box() legend(round(r*.75),max(as.vector(betas.ts)),legend=paste("x",as.character(1:(p-1))), lty=1:(p-1),marks=1:(p-1),cex=mycex) plot(k.vec,C.k,type='l',xlab="k",ylab="C_k",main="C_k vs k",cex=mycex) plot(k.vec,PR.ridge.k,type='l',xlab="k",lty=1, ylab="PR(Ridge)_k",main="Pseudo-Press vs k",cex=mycex) if(crossvalid) { lines(k.vec,real.cv,type='l',lty=2) legend(k.vec[1],max(real.cv,PR.ridge.k),legend=c("PRESS","PR(Ridge)"),lty=2:1,cex=mycex) } plot(k.vec,GCV.k,type='l',xlab="k",ylab="GCV(k)",main="GCV(k) vs k",cex=mycex) plot(k.vec,df.k,type='l',xlab="k",ylab="df_k",main="df vs k",cex=mycex) par(mgp=orig.par$mgp) } dimnames(VIF) <- list(as.character(k.vec),paste("beta",as.character(1:(p-1)))) dimnames(beta.orig) <- list(as.character(k.vec),paste("beta",as.character(0:(p-1)))) return(betas,beta.orig,VIF,C.k,df.k,PR.ridge.k,real.cv,GCV.k) } #---farm data # temp <- read.table("Data/farmprod.dat",header=T,row.names="year") # # k.vec <- seq(0,0.02,by=0.01) # # out <- ridge.lm(temp[,1],temp[,-1],k.vec=k.vec,plot.it=T,crossvalid=T) # #---hospital data temp <- as.matrix(read.table("Data/hospital.dat",header=T,row.names=NULL)) k.vec <- seq(0,0.25,by=0.01) out <- ridge.lm(temp[,6],temp[,-6],k.vec=k.vec,plot.it=T,original.scale=F,crossvalid=T)