Cross-validation, ridge regression, and boot- strap

(1)

> par(mfrow=c(2,2))

> head(ironslag) chemical magnetic

1 24 25

2 16 22

3 24 17

4 18 21

5 18 20

6 10 13

> attach(ironslag)

> a=seq(min(chemical), max(chemical), length=50)

> #sequence for plotting fitted values

>

> plot(magnetic~chemical, main="Linear",pch=16)

> m1=lm(magnetic~chemical)

> b=coef(m1)

> fitted1=b[1]+b[2]*a

> lines(a, fitted1)

> plot(magnetic~chemical, main="Quadratic",pch=16)

> m2=lm(magnetic~chemical+I(chemical^2))

> b=coef(m2)

> fitted2=b[1]+b[2]*a+b[3]*a^2

> lines(a, fitted2)

> plot(magnetic~chemical, main="Exponential",pch=16)

> m3=lm(log(magnetic)~chemical)

> b=coef(m3)

> logfitted3=b[1]+b[2]*a

> fitted3=exp(logfitted3)

> lines(a, fitted3)

> plot(log(magnetic)~log(chemical), main="Log-log",pch=16)

> m4=lm(log(magnetic)~log(chemical))

> b=coef(m4)

> logfitted4=b[1]+b[2]*log(a)

> lines(log(a), logfitted4)

(2)

●

●●

●

●●

●

● ●

●

● ●● ●

●

● ●

●

● ●

●

● ●

●

10 15 20 25 30

10203040

Linear

chemical

magnetic

●

●●

●

●●

●

● ●

●

● ●● ●

●

● ●

●

● ●

●

● ●

●

10 15 20 25 30

10203040

Quadratic

chemical

magnetic

●

●●

●

●●

●

● ●

●

● ●● ●

●

● ●

●

● ●

●

● ●

●

10 15 20 25 30

10203040

Exponential

chemical

magnetic

●

●●

●

●●

●

● ●

●

● ●● ●

●

● ●

●

● ●

●

●●●

●

2.4 2.6 2.8 3.0 3.2 3.4

2.42.83.23.6

Log−log

log(chemical)

log(magnetic)

> #Cross-validation

> #leave one out CV (LOOCV); n-fold CV

>

> n=length(chemical)

> e1 <- e2 <- e3 <- e4 <- numeric(n)

> #n-fold CV

> #fit models on leave-one-out samples

>

> for (k in 1:n){

+ x = chemical[-k] #training sets + y = magnetic[-k] #training sets + ttx = chemical[k] #testing point + tty = magnetic[k]

+

+ J1=lm(y~x)

+ yhat1=J1$coef[1]+J1$coef[2]*ttx + e1[k]=tty-yhat1

+

+ J2=lm(y~x+I(x^2))

+ yhat2=J2$coef[1]+J2$coef[2]*ttx+J2$coef[3]*ttx^2 + e2[k]=tty-yhat2

+

+ J3=lm(log(y)~x)

+ logyhat3=J3$coef[1]+J3$coef[2]*ttx + yhat3=exp(logyhat3)

(3)

+ e3[k]=tty-yhat3 +

+ J4=lm(log(y)~log(x))

+ logyhat4=J4$coef[1]+J4$coef[2]*log(ttx) + yhat4=exp(logyhat4)

+ e4[k]=tty-yhat4 + }

> #estimate the mean of the squared prediction errors (MSPE)

> c(mean(e1^2), mean(e2^2), mean(e3^2), mean(e4^2)) [1] 19.55644 17.85248 18.44188 20.45424

> #According to MSPE Model 2 would be the best fit for the data

> library(car)

> J2 Call:

lm(formula = y ~ x + I(x^2)) Coefficients:

(Intercept) x I(x^2)

25.98525 -1.52948 0.05707

> f2=lm(magnetic~chemical+I(chemical^2))

> vif(f2)

chemical I(chemical^2) 62.14357 62.14357

> X=model.matrix(f2)

> crossprod(X)

(Intercept) chemical I(chemical^2)

(Intercept) 53 1120 24990

chemical 1120 24990 584086

I(chemical^2) 24990 584086 14193450

> summary(f2) Call:

lm(formula = magnetic ~ chemical + I(chemical^2)) Residuals:

Min 1Q Median 3Q Max

(4)

-8.4335 -2.7006 -0.2754 2.5446 12.2665 Coefficients:

Estimate Std. Error t value Pr(>|t|) (Intercept) 24.49262 9.14657 2.678 0.0100 chemical -1.39334 0.88852 -1.568 0.1232 I(chemical^2) 0.05452 0.02081 2.620 0.0116 Residual standard error: 4.098 on 50 degrees of freedom

Multiple R-squared: 0.5931, Adjusted R-squared: 0.5768 F-statistic: 36.44 on 2 and 50 DF, p-value: 1.728e-10

> s.c=chemical-mean(chemical)

> s.f2=lm(magnetic~s.c+I(s.c^2))

> vif(s.f2)

s.c I(s.c^2) 1.000276 1.000276

> X=model.matrix(s.f2)

> crossprod(X)

(Intercept) s.c I(s.c^2)

(Intercept) 5.300000e+01 6.039613e-14 1322.0755 s.c 6.039613e-14 1.322075e+03 119.0367 I(s.c^2) 1.322075e+03 1.190367e+02 71777.7234

> summary(s.f2) Call:

lm(formula = magnetic ~ s.c + I(s.c^2)) Residuals:

-8.4335 -2.7006 -0.2754 2.5446 12.2665 Coefficients:

Estimate Std. Error t value Pr(>|t|) (Intercept) 19.39475 0.76573 25.33 < 2e-16 s.c 0.91086 0.11273 8.08 1.25e-10 I(s.c^2) 0.05452 0.02081 2.62 0.0116

Residual standard error: 4.098 on 50 degrees of freedom

(5)

> oldpar <- par(mfrow = c(2,2))

> plot(s.f2)

> par(oldpar)

15 20 25 30 35

−100510

Fitted values

Residuals

●

●●

●

●● ●

●

●●

●

● ●

●

● ●

●

● ●

●

●● ●

●

● ●

●

Residuals vs Fitted

50 53

10

●

●●

●

●●●●

●

●●●

●

●●

●

●●

●

●●

●

●●

●

−2 −1 0 1 2

−20123

Theoretical Quantiles

Standardized residuals

Normal Q−Q

50 53

10

15 20 25 30 35

0.00.51.01.5

Fitted values

●

● ●

●

● ●

●

●●

●

●●

●

● ●

●

● ●

●

●●

●

● ●

●

Scale−Location

50 10 53

0.0 0.1 0.2 0.3

−20123

Leverage

●

●●

●

● ●●●

●

●●●

●

●●

●

●●

●

●●

●

●●●

●

● ●

●

Cook's distance 1 0.5 0.5 1

Residuals vs Leverage

51

6 50

> head(bodyfat)

SKIN THIGH ARM FAT 1 19.5 43.1 29.1 11.9 2 24.7 49.8 28.2 22.8 3 30.7 51.9 37.0 18.7 4 29.8 54.3 31.1 20.1 5 19.1 42.2 30.9 12.9 6 25.6 53.9 23.7 21.7

> pairs(bodyfat)

(6)

SKIN

45 50 55

●

● ●

●

●●

● ●

●

● ●

●

●●

● ●

●

15 20 25

15202530

●

●●

●

●●

● ●

●

455055

●

●●

●

● THIGH

●

● ●

●

●●

●

●●

●

● ●

●

● ●

●

● ●

●

● ●

●

● ● ARM

253035

● ●

●

● ●

●

● ●

15 20 25 30

152025

●

25 30 35

●

● FAT

> #variance inflation factor VIF

> cor(bodyfat)

SKIN THIGH ARM FAT

SKIN 1.0000000 0.9238425 0.4577772 0.8432654 THIGH 0.9238425 1.0000000 0.0846675 0.8780896 ARM 0.4577772 0.0846675 1.0000000 0.1424440 FAT 0.8432654 0.8780896 0.1424440 1.0000000

> fm1=lm(FAT~., data=bodyfat)

> summary(fm1) Call:

lm(formula = FAT ~ ., data = bodyfat) Residuals:

-3.7263 -1.6111 0.3923 1.4656 4.1277 Coefficients:

Estimate Std. Error t value Pr(>|t|) (Intercept) 117.085 99.782 1.173 0.258

SKIN 4.334 3.016 1.437 0.170

THIGH -2.857 2.582 -1.106 0.285

ARM -2.186 1.595 -1.370 0.190

(7)

Residual standard error: 2.48 on 16 degrees of freedom

> vif(fm1)

SKIN THIGH ARM

708.8429 564.3434 104.6060

> gs=summary(lm(SKIN~THIGH+ARM, data=bodyfat))

> vif.SKIN=1/(1-gs$r.squared)

> vif.SKIN [1] 708.8429

> gs=summary(lm(THIGH~SKIN+ARM, data=bodyfat))

> vif.THIGH=1/(1-gs$r.squared)

> vif.THIGH [1] 564.3434

> gs=summary(lm(ARM~SKIN+THIGH, data=bodyfat))

> vif.ARM=1/(1-gs$r.squared)

> vif.ARM [1] 104.606

> #ridge regression

> library(MASS)

> f2.ridge=lm.ridge(FAT~SKIN+THIGH+ARM, data=bodyfat, lambda=seq(0,0.2, 0.005))

> plot(f2.ridge)

> select(f2.ridge)

modified HKB estimator is 0.008505093 modified L-W estimator is 0.3098511 smallest value of GCV at 0.02

> lam=0.02

> abline(v=lam)

(8)

0.00 0.05 0.10 0.15 0.20

−15−10−505101520

x$lambda

t(x$coef)

> lam=0.02

> r=lm.ridge(FAT~SKIN+THIGH+ARM, data=bodyfat, lambda=lam)

> r$coef

SKIN THIGH ARM

10.126984 -4.682273 -3.527010

> coef(r)

SKIN THIGH ARM

42.2181995 2.0683914 -0.9177207 -0.9921824

> r$ym [1] 20.195

> r$xm

SKIN THIGH ARM 25.305 51.170 27.620

> r$scales

SKIN THIGH ARM

4.896067 5.102068 3.554800

(9)

> attach(bodyfat)

> cY <- scale(FAT, scale=FALSE)

> n <- length(cY)

> sX1 <- scale(SKIN) * sqrt(n/(n-1))

> sX2 <- scale(THIGH) * sqrt(n/(n-1))

> sX3 <- scale(ARM) * sqrt(n/(n-1))

> ans2 <- lm.ridge(cY ~ sX1 + sX2 + sX3, lambda = lam)

> ans2$coef

sX1 sX2 sX3

10.126984 -4.682273 -3.527010

> coef(ans2)

sX1 sX2 sX3

-2.386183e-15 1.012698e+01 -4.682273e+00 -3.527010e+00

> library(lars)

> xset=cbind(SKIN, THIGH, ARM)

> y=FAT

> g=lars(xset,y) #lasso least absolute shrinkage and selection operator Tibshirani 1996

> plot(g$lambda, type="l")

> g$lambda[1]

[1] 19.54395

1 2 3 4 5

05101520

Index

g$lambda

(10)

> plot(g)

* * *

**

*

0.0 0.2 0.4 0.6 0.8 1.0

−50050100

|beta|/max|beta|

Standardized Coefficients

*

* *

**

*

* * *

**

* LASSO

231

0 2 5

> cv.g=cv.lars(xset, y)

> which.min(cv.g$cv) [1] 10

> #bootstrap

>

> library(boot)

> Fun <- function(a, i) mean(a[i])

> set.seed(123)

> a=c(3,5,2,1,7)

> mean(a) [1] 3.6

> R=5 # number of bootstrap samples

> out=boot(a, Fun, R);out

ORDINARY NONPARAMETRIC BOOTSTRAP

Call:

boot(data = a, statistic = Fun, R = R)

(11)

Bootstrap Statistics :

original bias std. error

t1* 3.6 0.28 1.432480

> b=boot.array(out); b #indicate how many time ith obs appeared in the bth bootstrap sample [,1] [,2] [,3] [,4] [,5]

[1,] 1 1 0 0 3

[2,] 0 1 2 2 0

[3,] 1 0 1 2 1

[4,] 0 1 2 0 2

[5,] 1 0 1 1 2

> tb.theta=(b%*%a)/R; tb.theta [,1]

[1,] 5.8 [2,] 2.2 [3,] 2.8 [4,] 4.6 [5,] 4.0

> mean(tb.theta)-mean(a) [1] 0.28

> sd(tb.theta) [1] 1.432480

> out=boot(a, Fun, 2000); out ORDINARY NONPARAMETRIC BOOTSTRAP

Call:

boot(data = a, statistic = Fun, R = 2000)

original bias std. error t1* 3.6 -0.0322 0.9813484

(12)

> plot(out)

> head(litters) #pig litter data lsize bodywt brainwt

1 3 9.447 0.444 2 3 9.780 0.436 3 4 9.155 0.417 4 4 9.613 0.429 5 5 8.850 0.425 6 5 9.610 0.434

> y=litters$brainwt

> x1=litters$lsize

> x2=litters$bodywt

> f0=lm(y~x1+x2, data=litters)

> print(f0) Call:

lm(formula = y ~ x1 + x2, data = litters) Coefficients:

(Intercept) x1 x2

0.17825 0.00669 0.02431

> summary(f0) Call:

lm(formula = y ~ x1 + x2, data = litters) Residuals:

-0.0230005 -0.0098821 0.0004512 0.0092036 0.0180760 Coefficients:

Estimate Std. Error t value Pr(>|t|) (Intercept) 0.178247 0.075323 2.366 0.03010 x1 0.006690 0.003132 2.136 0.04751 x2 0.024306 0.006779 3.586 0.00228

Residual standard error: 0.01195 on 17 degrees of freedom Multiple R-squared: 0.6505, Adjusted R-squared: 0.6094 F-statistic: 15.82 on 2 and 17 DF, p-value: 0.0001315

(13)

> anova(f0)

Analysis of Variance Table Response: y

Df Sum Sq Mean Sq F value Pr(>F) x1 1 0.00268418 0.00268418 18.788 0.0004502 x2 1 0.00183687 0.00183687 12.857 0.0022784 Residuals 17 0.00242870 0.00014286

> # random x : case resampling

> boot.litters <- function(data, i){

+ b.data <- data[i,] # select obs. in bootstrap sample + y=b.data$brainwt

+ x1=b.data$lsize + x2=b.data$bodywt

+ mod <- lm(y ~ x1 + x2, data=b.data)

+ coefficients(mod) # return coefficient vector + }

> library(boot)

> out <- boot(litters, boot.litters, 200);out ORDINARY NONPARAMETRIC BOOTSTRAP

Call:

boot(data = litters, statistic = boot.litters, R = 200)

original bias std. error t1* 0.178246962 -5.958688e-04 0.067741874 t2* 0.006690331 6.984422e-05 0.002783938 t3* 0.024306344 3.580500e-05 0.006159927

> plot(out, index=2)

(14)

Histogram of t

t*

Density

0.000 0.010

050100150

●●

●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●

●●

●

●●

●

−2 0 1 2

0.0000.0050.0100.015

Quantiles of Standard Normal

t*

> plot(out, index=3)

Histogram of t

t*

Density

0.01 0.03 0.05

010203040506070

●●

●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●●

●

●●

●

−2 0 1 2

0.010.020.030.04

Quantiles of Standard Normal

t*

> # fixed x: model-based resampling

>

> fit <- fitted(f0)

> e <- residuals(f0)

> X <- model.matrix(f0)[,-1]

(15)

> boot.litters.fixed <- function(data, i){

+ y <- fit + e[i]

+ mod <- lm(y ~ X ) + coefficients(mod) + }

> out.fix <- boot( litters, boot.litters.fixed, 200); out.fix ORDINARY NONPARAMETRIC BOOTSTRAP

Call:

boot(data = litters, statistic = boot.litters.fixed, R = 200)

original bias std. error t1* 0.178246962 8.693893e-04 0.071398245 t2* 0.006690331 -3.196501e-05 0.002966800 t3* 0.024306344 -9.890197e-05 0.006400178