代码全是根据算法自编函数,可以自行设置参数,尤其是窗宽的的选择更加自由。压缩包里有这部分讲义、自编代码、练习题。
贴出部分源代码
- #------------------定义函数---------------------#<?xml:namespace prefix = "o" ns = "urn:schemas-microsoft-com:office:office" />
- kern.epan <- function(u){
- return(3/4*(1-u^2)*(abs(u)<=1));
- }
- LLS.fun <- function(x,X,Y,h)
- {
- z = 1/h*kern.epan((x-X)/h);
- s1 = sum(z*(x-X));
- s2 = sum(z*(x-X)^2);
- w = z*(s2-(x-X)*s1);
- return(sum(w*Y)/sum(w));
- }
- CV1.fun=function(X,Y,h)
- {
- y1=rep(0,length(Y));
- for(i in 1:length(Y)){
- y1[i]=LLS.fun(X[i],X[-i],Y[-i],h);
- }
- return(mean((Y-y1)^2));
- }
- CI.boot.fun=function(resids,xpoints,X,yfit,B,h)
- {
- # bootstrap方法求置信区间
- result=matrix(0,length(xpoints),B);
- for(i in 1:length(xpoints)){
- for(b in 1:B){
- epsilon=sample(resids,replace=TRUE);
- newY=yfit+epsilon;
- #X2=X^2; X3=X^3; X4=X^4;
- #fit1 <- lm(Y~X+X2+X3+X4);
- #coefE=c(fit1$coeff);
- #sigmaE=sqrt(var(fit1$residuals));
- #CK=1.719 # Epanechnikov kernel used.
- #temp=cbind(2,3*2*X,4*3*X^2)%*%as.vector(coefE[-(1:2)]);
- #den=sum(temp^2);
- #h.ROT=CK*(sigmaE^2/den)^(1/(2*1+3));
- #newfit=LLS.fun(xpoints[i],X,newY,h=h.ROT);
- newfit=LLS.fun(xpoints[i],X,newY,h=h);
-
- result[i,b]=newfit;
- }
- }
-
- return(result);
- }
局部多项式法
- #------------------局部多项式法---------------------#
- #------------------Homework.one---------------------#
- library(SemiPar);
- data(age.income);
- attach(age.income);
- X=age;
- Y=log.income;
- plot(X,Y); #画原始数据的散点图
- X2=X^2; X3=X^3; X4=X^4;
- fit1<-lm(Y~X)
- lines(X,fit1$fitted.values,lty=1,col=1,lwd=3)
- fit2<-lm(Y~X+X2)
- lines(X,fit2$fitted.values,lty=2,col=2,lwd=3)
- fit3<-lm(Y~X+X2+X3)
- lines(X,fit3$fitted.values,lty=3,col=3,lwd=3)
- fit4<-lm(Y~X+X2+X3+X4)
- lines(X,fit4$fitted.values,lty=4,col=4,lwd=3)
- legend(20,15.1,c("线性","二次","三次","四次"),lty=c(1,2,3,4),col=c(1,2,3,4))
- # Rule-of-Thumb Global Bandwidth Selector;
- X2=X^2; X3=X^3; X4=X^4;
- fit1 <- lm(Y~X+X2+X3+X4);
- coefE=c(fit1$coeff);
- resids=fit1$residuals;
- sigmaE=sqrt(var(resids));
- CK=1.719 # Epanechnikov kernel used.
- temp=cbind(2,3*2*X,4*3*X^2)%*%as.vector(coefE[-(1:2)]);
- den=sum(temp^2);
- h.ROT=CK*(sigmaE^2/den)^(1/(2*1+3));
- print(h.ROT) #选择窗宽
- #final fitting;
- xfine=seq(0,70,length=50);
- ypred1 <- rep(0,length(xfine));
- for(i in 1:length(xfine)){
- ypred1[i] <- LLS.fun(xfine[i],X,Y,h=8);
- }
- plot(X,Y);
- lines(xfine,ypred1,lty=1,col=3);#拟合数据
- #求g(x)的置信区间
- yfit=rep(0,length(X));
- for(i in 1:length(X)){
- yfit[i] <- LLS.fun(X[i],X,Y,h=8);
- }
- temp=Y-yfit;
- resids=temp-mean(temp);
- result1=CI.boot.fun(resids=resids,xpoints=xfine,X=X,yfit=yfit,B=1000,h=8)
- CI.upp=rep(0,length(xfine));
- CI.low=rep(0,length(xfine));
- for(i in 1:length(xfine)){
- temp=quantile(result1[i,],probs=c(0.025,0.975),na.rm = T);
- CI.low[i]=temp[1]; CI.upp[i]=temp[2];
- }
- plot(X,Y)
- lines(xfine,ypred1,lty=1,col=2);
- lines(xfine,CI.upp,lty=3,col=3);
- lines(xfine,CI.low,lty=4,col=4);
- legend(20,15.1,c("Estmate","CI.upp","CI.low"),lty=c(1,3,4),col=c(2,3,4))
资料下载:R软件做非参数回归拟合数据,有代码和数据