(1)原始模型的建立以及模型分析
data france;
input year y x1 x2 x3 @@;
n=_n_;
cards;
1949 15.9 149.3 4.2 108.1 1950 16.4 161.2 4.1 114.8
1951 19.0 171.5 3.1 123.2 1952 19.1 175.5 3.1 126.9
1953 18.8 180.8 1.1 132.1 1954 20.4 190.7 2.2 137.7
1955 22.7 202.1 2.1 146.0 1956 26.5 212.4 5.6 154.1
1957 28.1 226.1 5.0 162.3 1958 27.6 231.9 5.1 164.3
1959 26.3 239.0 0.7 167.6
;
proc reg data=france;
model y=x1 x2 x3/ r collin vif;
output out=france predicted=pred residual=resid;
run;
proc print data=france;
var n; run;
proc plot data=france hpct=0.4 vpct=0.4;
plot resid*pred='*' $n; run;
(2)利用leave-one-out选取岭回归参数k
%let n=11; %let p=3;
proc iml;
free Ss Kk yy;
use france;
read all var{x1 x2 x3} into x;
read all var{y} into y;
close france;
print x,y;
y=(i(&n)-1/&n*j(&n))*y;
D=t(x)*(i(&n)-1/&n*j(&n))*x;
Dx=diag(vecdiag(D)##(-0.5));
x=(i(&n)-1/&n*j(&n))*x*Dx;
print x,y,Dx;
do k=0 to 0.02 by 0.00001;
beta=t(inv(t(x)*x+k*i(&p))*t(x)*y);
yHAT=x*t(beta);
e=y-yHAT; /*计算*/
HHAT=x*inv(t(x)*x+k*i(&p))*t(x);/*计算H的估计*/
h=vecdiag(HHAT);/*取H对角线元素并使其变为列向量*/
Ee=e##(2);
Hh=(j(&n,1,1)-h)##(2);
S=Ee/Hh;
Ss=Ss//(t(s));
Kk=Kk//k;
yy=yy||yHAT;
Bb=Bb//beta;
B=Kk||Bb;
end;
w=t(Kk)//yy;
print w,e,Ee,B,Hh,S,Ss[colname={"deta1" "deta2" "deta3" "deta4" "deta5" "deta6" "deta7" "deta8" "deta9" "deta10" "deta11"}],Kk;
CV=Ss[,+];/*通过矩阵元素相加得到CV(k)*/
A=Kk||CV;
f=CV[>:<,];/*f为CV值最小时的行标*/
t=A[f,1];/*t为CV值最小时所对应的k*/
print A ,f,t;
create Z from A[colname={k CV}];
append from A;
close Z;
xy=x||y;
create Q from xy[colname={x1 x2 x3 y}] ;
append from xy ;
close Q;
quit;
proc print data=Z; run;
proc print data=Q; run;
proc sort data=Z;
by CV;
proc print data=Z;run;
proc gplot data=Z ;
plot CV*K='*' ;
run;
(3)利用选取的岭回归参数k 建立回归模型
proc reg data=Q outest=outest ridge=0.00133 outvif;
model y= x1 x2 x3/noint collin vif;
run;
proc print data=outest; run;