參考文獻
Appendix 1 SAS與R程式
【SAS Program】
dm'log;clear;output;clear;program;recall;';
options ps=55 ls=100 nodate pageno=1 center;
data smoking;/* y = cigar *x1 = bladder * x2 = lung * x3 = kidney * x4 = leukemia * x5 = area
*/;
INPUT state $ y x1 x2 x3 x4 x5;
IF y >23.77 THEN smoke=1; ELSE smoke=0;
IF x5 = 3 or x5 =4 THEN west=1; ELSE west=0;
/*transformations: ystar1=y^-0.22,ystar2=y^-0.3,x1star=x1^0.13, x2star=x2^1.5, x3star=x3^0.49*/
ystar1=y**-0.22;
ystar2=y**-0.3;
x1x2=x1*x2; x1x3=x1*x3; x1x4=x1*x4; x2x3=x2*x3; x2x4=x2*x4; x3x4=x3*x4;
xx1=x1**2; xx2=x2**2; xx3=x3**2;
x1star=x1**0.13; x2star=x2**1.5; x3star=x3**0.49;
lable y = 'cigar' x1 = 'bladder' x2 = 'lung' x3 = 'kidney' x4 = 'leukemia'
ystar1 = 'transformed cigar1' ystar2 = 'transformed cigar2' x1star='transformed bladder' x2star='transformed lung' x3star='transformed kidney'
x1x2 = 'bladder lung' x1x3 = 'bladder kidney' x1x4 = 'bladder leukemia'
/* Part1. Simple Regression */
output out=r1;
run;
proc reg data= smoking;title 'Model 2 : y on x2';
model y = x2/all;
output out=r2;
run;
proc reg data= smoking;title 'Model 3 : y on x3';
model y = x3/all;
output out=r3;
run;
proc reg data= smoking;title 'Model 4 : y on x4';
model y = x4/all;
output out=r4;
run;
/* Part2. Multiple Linear Regression – two variables */
proc reg corr data= smoking;title 'Model P1a : y on x1 x2';
model y = x1 x2/all;
output out=rP1a;
run;
proc reg corr data= smoking;title 'Model P1b : y on x1 x2 x1x2';
model y = x1 x2 x1x2/all;
output out=rP1b;
run;
proc reg corr data= smoking;title 'Model P2a : y on x1 x3';
model y = x1 x3/all;
output out=rP2a;
run;
proc reg corr data= smoking;title 'Model P2b : y on x1 x3 x1x3';
model y = x1 x3 x1x3/all;
output out=rP2b;
run;
proc reg corr data= smoking;title 'Model P3a : y on x1 x4';
model y = x1 x4/all;
output out=rP3a;
run;
proc reg corr data= smoking;title 'Model P3a : y on x1 x4 x1x4';
model y = x1 x4 x1x4/all;
output out=rP3b;
run;
model y = x2 x3/all;
output out=rP4a;
run;
proc reg corr data= smoking;title 'Model P4b : y on x2 x3 x2x3';
model y = x2 x3 x2x3/all;
output out=rP4b;
run;
proc reg corr data= smoking;title 'Model P5a : y on x2 x4';
model y = x2 x4/all;
output out=rP5a;
run;
proc reg corr data= smoking;title 'Model P5b : y on x2 x4 x2x4';
model y = x2 x4 x2x4/all;
output out=rP5b;
run;
proc reg corr data= smoking;title 'Model P6a : y on x3 x4';
model y = x3 x4/all;
output out=rP6a;
run;
proc reg corr data= smoking;title 'Model P6a : y on x3 x4 x3x4';
model y = x3 x4 x3x4/all;
output out=rP6b;
run;
/* Part3. Multiple Linear Regression – three variables */
proc reg corr data= smoking;title 'Model 5 : y on x1 x2 x3 x4';
model y = x1 x2 x3 x4/all;
output out=r5 p=p5 r=r5 student=Ri5 Rstudent=Ti5 H=H5 PRESS=PRESS5;
run;
/*Diagnostics for Leverage and Influence. Test multicollinearity */
proc reg corr data= smoking;title 'Model 6 : y on x1 x2 x3';
model y = x1 x2 x3/influence collin vif dw;
output out=resid1 p=predy1 r=resid1 student=Ri1 Rstudent=Ti1 H=H1 PRESS=PRESS1 COOKD=COOKD1;
run;
/* Residual analysis */
proc print data=resid1;
var y predy1 resid1 Ri1 Ti1 H1 PRESS1 COOKD1;
run;
model y = x1 x2 x3;
run;
/* stepwise1 y on x1 x2 x3 x1^2 x2^2 x3^2 check variable and model */
proc reg data= smoking;title 'Forward Selection';
model y = x1 x2 x3 xx1 xx2 xx3/selection=forward slentry=0.05;
run;
proc reg data= smoking;title 'Backward elimination';
model y = x1 x2 x3 xx1 xx2 xx3/selection=backward slstay=0.05;
run;
proc reg data= smoking;title 'Stepwise regression';
model y = x1 x2 x3 xx1 xx2 xx3/selection=stepwise slentry=0.05 slstay=0.05;
run;
proc reg data= smoking outest=est;title 'All possible regressions';
model y = x1 x2 x3 xx1 xx2 xx3/selection=rsquare adjrsq cp mse press aic;
run;
proc gplot data=est;title 'cp plot';
plot _cp_*_p_/vaxis=0 to 15 by 1.5 haxis=2 to 8 by 1;
run;
proc reg corr data= smoking;title 'Model 7 : y on x1 x2 x3 xx1';
model y = x1 x2 x3 xx1/influence collin vif dw; /*Diagnostics for Leverage and Influence.*/
output out=resid2 p=predy2 r=resid2 student=Ri2 Rstudent=Ti2 H=H2 PRESS=PRESS2 COOKD=COOKD2;
run;
/* Residual analysis */
proc print data=resid2;
var y predy2 resid2 Ri2 Ti2 H2 PRESS2 COOKD2;
run;
/* Part4. Transformation on the Response (ystar1=y^-0.22) */
proc reg corr data= smoking;title 'Model 8 : ystar1 on x1 x2 x3';
model ystar1 = x1 x2 x3/influence dw; /*Diagnostics for Leverage and Influence.*/
output out=resid3 p=predy3 r=resid3 student=Ri3 Rstudent=Ti3 H=H3 PRESS=PRESS3 COOKD=COOKD3;
run;
/* Residual analysis */
proc print data=resid3;
var ystar predy3 resid3 Ri3 Ti3 H3 PRESS3 COOKD3;
run;
/* stepwise2 ystar on x1 x2 x3 x1^2 x2^2 x3^2 check variable and model */
model ystar1 = x1 x2 x3 xx1 xx2 xx3/selection=forward slentry=0.05;
run;
proc reg data= smoking;title 'Backward elimination';
model ystar1 = x1 x2 x3 xx1 xx2 xx3/selection=backward slstay=0.05;
run;
proc reg data= smoking;title 'Stepwise regression';
model ystar1 = x1 x2 x3 xx1 xx2 xx3/selection=stepwise slentry=0.05 slstay=0.05;
run;
proc reg data= smoking outest=est2;title 'All possible regressions';
model ystar1 = x1 x2 x3 xx1 xx2 xx3/selection=rsquare adjrsq cp mse press aic;
run;
proc gplot data=est2;title 'cp plot';
plot _cp_*_p_/vaxis=0 to 15 by 1.5 haxis=2 to 8 by 1;
run;
/* Part5a. Transformation on the Regressor (x1star=x1^-0.13, x2star=x2^1.5, x3star=x3^0.49) */
proc reg corr data= smoking;title 'Model 8 : y on x1star x2star x3star';
model y = x1star x2star x3star/influence dw vif; /*Diagnostics for Leverage and Influence.*/
output out=resid p=predy r=resid student=Ri Rstudent=Ti H=H PRESS=PRESS COOKD=COOKD;
/* Part5b. Transformation on the Response and Regressor (ystar2=y^-0.3,x1star=x1^0.13, x2star=x2^1.5, x3star=x3^0.49) */
/*Diagnostics for Leverage and Influence.*/
proc reg corr data= smoking;title 'Model 9 : ystar2 on x1star x2star x3star';
model ystar2 = x1star x2star x3star;
proc reg corr data= smoking;title 'Model 10 : ystar2 on x2star x3star';
model ystar2 = x2star x3star/influence dw;
output out=resid4 p=predy4 r=resid4 student=Ri4 Rstudent=Ti4 H=H4 PRESS=PRESS4 COOKD=COOKD4;
run;
/* Residual analysis */
proc print data=resid4;
var ystar predy4 resid4 Ri4 Ti4 H4 PRESS4 COOKD4;
run;
y1=y;
IF y1 >23.77 then output;
data b; set smoking;
y2=y;
IF y2 <23.77 then output;
/*proc print data=a;
proc print data=b;
run;*/
proc reg data=a;title 'Model 11a : y1star on x1 x2 x3';
model ystar1=x1 x2 x3;
output out=resid11a;
run;
proc reg data=b;title 'Model 11b : y2star on x1 x2 x3';
model ystar1=x1 x2 x3;
output out=resid11b;
run;
/* Part7. Drop extreme points, obs 8, 26 (DC, NV) */
dm'log;clear;output;clear;program;recall;';
options ps=55 ls=100 nodate pageno=1 center;
data smoking2;/* y = cigar *x1 = bladder * x2 = lung * x3 = kidney * x4 = leukemia * x5 = area
*/;
INPUT state $ y x1 x2 x3 x4 x5;
IF y >23.77 THEN smoke=1; ELSE smoke=0;
IF x5 = 3 or x5 =4 THEN west=1; ELSE west=0;
model y = x1 x2 x3;
proc reg corr data= smoking2;title 'Model 13 : y on x2 x3 drop obs8, 26';
model y = x2 x3/influence collin vif dw; /*Diagnostics for Leverage and Influence. */
output out=resid5 p=predy5 r=resid5 student=Ri5 Rstudent=Ti5 H=H5 PRESS=PRESS5 COOKD=COOKD5;
run;
/* Residual analysis */
proc print data=resid5;
var y predy5 resid5 Ri5 Ti5 H5 PRESS5 COOKD5;
run;
/*test if need the second order term */
proc rsreg data= smoking2;title 'test if need the second order term' ; model y = x1 x2 x3;
run;
/* stepwise3 y on x1 x2 x3 x1^2 check variable and model drop obs8, 26 */
proc reg data= smoking2;title 'Forward Selection drop obs8, 26 ';
model y = x1 x2 x3 xx1/selection=forward slentry=0.05;
run;
proc reg data= smoking2;title 'Backward elimination drop obs8, 26';
model y = x1 x2 x3 xx1/selection=backward slstay=0.05;
run;
proc reg data= smoking2;title 'Stepwise regression drop obs8, 26';
model y = x1 x2 x3 xx1/selection=stepwise slentry=0.05 slstay=0.05;
run;
proc reg data= smoking2 outest=est3;title 'All possible regressions drop obs8, 26';
model y = x1 x2 x3 xx1/selection=rsquare adjrsq cp mse press aic;
run;
proc gplot data=est3;title 'cp plot drop obs8, 26';
plot _cp_*_p_/vaxis=0 to 15 by 1.5 haxis=2 to 8 by 1;
run;
/*Add Indicator variables*/
dm'log;clear;output;clear;program;recall;';
options ps=55 ls=100 nodate pageno=1 center;
data smoking;/* y = cigar *x1 = bladder * x2 = lung * x3 = kidney * x4 = leukemia * x5 = area
*/;
INPUT state $ y x1-x5 IndAREA1-IndAREA4 IndCIG1 IndCIG2 IndWEST1 IndWEST2;/*IndAREA1=0;IndCIG2=0;IndWEST2=0*/;
cards;
AZ 25.82 3.52 19.8 2.75 6.61 4 0 0 0 1 0 0 1 0
model y = x1-x3 IndAREA2-IndAREA4/influence collin vif dw;
run;
proc reg;title 'Model 15a : y on x1 x2 x3 smoke';
model y = x1-x3 IndCIG1/influence collin vif dw;
run;
proc reg;title 'Model 15b : y on x2 x3 smoke drop obs 8, 26';
model y = x2 x3 IndCIG1/influence collin vif dw;
output out=resid1 p=predy1 r=resid1 student=Ri1 Rstudent=Ti1 H=H1 PRESS=PRESS1 COOKD=COOKD1;
run;
/* Residual analysis */
proc print data=resid1;
var y predy1 resid1 Ri1 Ti1 H1 PRESS1 COOKD1;
run;
proc reg;title 'Model 16 : y on x1 x2 x3 WEST';
model y = x1-x3 IndWEST1/influence collin vif dw;
run;
proc reg;title 'Model 17 : y on x1 x2 x3 WEST';
model y = x1 x2 x3 IndWEST1/influence collin vif dw;
run;
proc reg;title 'Model 18 : y on x1 x2 x3 smoke Area';
model y = x1 x2 x3 IndCIG1 IndAREA2-IndAREA4/influence collin vif dw;
run;
proc reg;title 'Model 19a : y on x1 x2 x3 smoke west';
model y = x1 x2 x3 IndCIG1 IndWEST1/influence collin vif dw;
run;
proc reg;title 'Model 19b : y on x1 x3 smoke west';
model y = x1 x3 IndCIG1 IndWEST1/influence collin vif dw;
output out=resid2 p=predy2 r=resid2 student=Ri2 Rstudent=Ti2 H=H2 PRESS=PRESS2 COOKD=COOKD2;
/* Residual analysis */
proc print data=resid1;
var y predy2 resid2 Ri2 Ti2 H2 PRESS2 COOKD2;
run;
/* stepwise4 y on x1 x2 x3 AREA Smoke WEST check variable */
proc reg data= smoking;title 'Forward Selection with dummy variables';
model y = x1 x2 x3 IndAREA2-IndAREA4 IndCIG1 IndWEST1/selection=forward slentry=0.05;
run;
proc reg data= smoking;title 'Backward elimination with dummy variables';
model y = x1 x2 x3 IndAREA2-IndAREA4 IndCIG1 IndWEST1/selection=backward slstay=0.05;
run;
proc reg data= smoking;title 'Stepwise regression with dummy variables';
model y = x1 x2 x3 IndAREA2-IndAREA4 IndCIG1 IndWEST1/selection=stepwise slentry=0.05 slstay=0.05;
run;
proc reg data= smoking outest=est3;title 'All possible regressions with dummy';
model y = x1 x2 x3 IndAREA2-IndAREA4 IndCIG1 IndWEST1/selection=rsquare adjrsq cp mse press aic;
run;
/* drop obs 8, 26*/
dm'log;clear;output;clear;program;recall;';
options ps=55 ls=100 nodate pageno=1 center;
data smoking2;/* y = cigar *x1 = bladder * x2 = lung * x3 = kidney * x4 = leukemia * x5 = area
*/;
INPUT state $ y x1-x5 IndAREA1-IndAREA4 IndCIG1 IndCIG2 IndWEST1 IndWEST2;/*IndAREA1=0;IndCIG2=0;IndWEST2=0*/;
proc reg;title 'Model 21a : y on x1 x2 x3 smoke drop obs 8, 26';
model y = x1 x2 x3 IndCIG1/influence collin vif dw;
run;
proc reg;title 'Model 21b : y on x2 x3 smoke drop obs 8, 26';
model y = x2 x3 IndCIG1/influence collin vif dw;
output out=resid1 p=predy1 r=resid1 student=Ri1 Rstudent=Ti1 H=H1 PRESS=PRESS1 COOKD=COOKD1;
run;
/* Residual analysis */
proc print data=resid1;
var y predy1 resid1 Ri1 Ti1 H1 PRESS1 COOKD1;
run;
proc reg;title 'Model 22 : y on x1 x2 x3 WEST drop obs 8, 26';
model y = x1 x2 x3 IndWEST1/influence collin vif dw;
run;
proc reg;title 'Model 23 : y on x1 x2 x3 smoke Area drop obs 8, 26';
model y = x1 x2 x3 IndCIG1 IndAREA2-IndAREA4/influence collin vif dw;
run;
proc reg;title 'Model 24 : y on x1 x2 x3 smoke west drop obs 8, 26';
model y = x1 x2 x3 IndCIG1 IndWEST1/influence collin vif dw;
run;
/* stepwise5 y on x1 x2 x3 AREA Smoke WEST check variable and model drop obs8, 26 */
proc reg data= smoking2;title 'Forward Selection with dummy variables drop obs8, 26 ';
model y = x1 x2 x3 IndAREA2-IndAREA4 IndCIG1 IndWEST1/selection=forward slentry=0.05;
run;
proc reg data= smoking2;title 'Backward elimination with dummy variables drop obs8, 26';
model y = x1 x2 x3 IndAREA2-IndAREA4 IndCIG1 IndWEST1/selection=backward slstay=0.05;
run;
proc reg data= smoking2;title 'Stepwise regression with dummy variables drop obs8, 26';
model y = x1 x2 x3 IndAREA2-IndAREA4 IndCIG1 IndWEST1/selection=stepwise slentry=0.05 slstay=0.05;
run;
proc reg data= smoking2 outest=est3;title 'All possible regressions with dummy variables drop obs8, 26';
model y = x1 x2 x3 IndAREA2-IndAREA4 IndCIG1 IndWEST1/selection=rsquare adjrsq cp mse press aic;
run;
【R Program】
smokecor <- cbind(CIG, BLAD, LUNG, KID, LEUK) cor(smokecor) -> corr.mtsmokes
ord <- order(corr.mtsmokes [1,]) xc <- corr.mtsmokes [ord, ord]
colors <- c("#EFF3FF","#BDD7E7","#6BAED6","#3182BD","#08519C")
##simple regression model and plot y1 <- lm(CIG~BLAD)
summary(y1) anova(y1)
anova(y2)
plot(CIG~BLAD, pch=16, main="Scatter Plot", xlab="Bladder Cancer", ylab="Number of Cigarettes Smoked")
abline(y1, col=10 , lwd=2)
plot(CIG~LUNG, pch=16,main="Scatter Plot", xlab="Lung Cancer", ylab="Number of Cigarettes Smoked")
abline(y2, col=5 , lwd=2)
plot(CIG~KID, pch=16,main="Scatter Plot", xlab="Kidney Cancer", ylab="Number of Cigarettes Smoked")
abline(y3, col=7, lwd=2)
plot(CIG~LEUK, pch=16,main="Scatter Plot", xlab="Leukemia", ylab="Number of Cigarettes Smoked")
anova(y21)
main="Partial Regression", pch=16)
right.marginal <- seq(min(BLAD), max(BLAD), length = 50) left.marginal <- seq(min(LUNG), max(LUNG), length = 50)
yes.marginal <- list(BLAD=right.marginal, LUNG=left.marginal) grid <- expand.grid(yes.marginal)
grid[, "fit1"] <- c(predict(ylm11, grid))
contourplot(fit1~ BLAD*LUNG, cuts=10, region = TRUE, data=grid, main="Contour Plot")
##contour plot for BLAD*KID ylm2 <- lm(CIG~BLAD+KID) ylm21 <- lm(CIG~BLAD*KID)
right.marginal <- seq(min(BLAD), max(BLAD), length = 50) left.marginal <- seq(min(KID), max(KID), length = 50)
yes.marginal <- list(BLAD=right.marginal, KID=left.marginal) grid <- expand.grid(yes.marginal)
grid[, "fit1"] <- c(predict(ylm21, grid))
contourplot(fit1~ BLAD*KID, cuts=10, region = TRUE, data=grid, main="Contour Plot")
#contour plot for BLAD*LEUK ylm3 <- lm(CIG~BLAD+LEUK) ylm31 <- lm(CIG~BLAD*LEUK)
right.marginal <- seq(min(BLAD), max(BLAD), length = 50) left.marginal <- seq(min(LEUK), max(LEUK), length = 50)
yes.marginal <- list(BLAD=right.marginal, LEUK=left.marginal) grid <- expand.grid(yes.marginal)
grid[, "fit1"] <- c(predict(ylm31, grid))
contourplot(fit1~ BLAD*LEUK, cuts=10, region = TRUE, data=grid, main="Contour Plot")
#contour plot for LUNG*KID ylm4 <- lm(CIG~LUNG+KID) ylm41 <- lm(CIG~LUNG*KID)
right.marginal <- seq(min(LUNG), max(LUNG), length = 50) left.marginal <- seq(min(KID), max(KID), length = 50)
yes.marginal <- list(LUNG=right.marginal, KID=left.marginal) grid <- expand.grid(yes.marginal)
grid[, "fit1"] <- c(predict(ylm41, grid))
contourplot(fit1~ LUNG*KID, cuts=10, region = TRUE, data=grid, main="Contour Plot")
#contour plot for LUNG*LEUK
right.marginal <- seq(min(LUNG), max(LUNG), length = 50) left.marginal <- seq(min(LEUK), max(LEUK), length = 50)
yes.marginal <- list(LUNG=right.marginal, LEUK=left.marginal) grid <- expand.grid(yes.marginal)
grid[, "fit1"] <- c(predict(ylm51, grid))
contourplot(fit1~ LUNG*LEUK, cuts=10, region = TRUE, data=grid, main="Contour Plot")
## contour plot for KID*LEUK ylm6 <- lm(CIG~KID+LEUK) ylm61 <- lm(CIG~KID*LEUK)
right.marginal <- seq(min(KID), max(KID), length = 50) left.marginal <- seq(min(LEUK), max(LEUK), length = 50) yes.marginal <- list(KID=right.marginal, LEUK=left.marginal) grid <- expand.grid(yes.marginal)
grid[, "fit1"] <- c(predict(ylm61, grid))
contourplot(fit1~ KID*LEUK, cuts=10, region = TRUE, data=grid, main="Contour Plot")
##three regressor into model/Scatterplot matrix, model fitted and Residual plot par(mfrow=c(2,2))
plot(ylm7$residuals~ylm7$fitted, xlab="Fitted Value", ylab="Residual", pch=16, main="Fitted Value vs Residual Plot")
abline(h=0, col=2)
plot(ylm7$residuals~BLAD, xlab="BLAD", ylab="Residual",pch=16, main="BLAD vs Residual Plot")
abline(h=0, col=2)
plot(ylm7$residuals~LUNG, xlab="LUNG", ylab="Residual",pch=16, main="LUNG vs Residual Plot")
abline(h=0, col=2)
plot(ylm7$residuals~KID, xlab="KID", ylab="Residual",pch=16, main="KID vs Residual Plot")
abline(h=0, col=2) par(mfrow=c(1,1))
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles",
text(-0.20, -135, c("Box-Cox Transformation"), col=3) text(-0.20, -137, expression(lambda== -0.22), col=3)
## Scatterplot matrix, model fitted and Residual plot for transformtion response tranG <- (CIG)^-0.22 plot(ylm8$residuals~ylm8$fitted, xlab="Fitted Value", ylab="Residual", pch=16,
main="Fitted Value vs Residual Plot")
abline(h=0, col=2)
## model fitted and Residual plot for add second-order polynomial plot(ylm9$residuals~ylm9$fitted, xlab="Fitted Value", ylab="Residual", pch=16,
main="Fitted Value vs Residual Plot")
abline(h=0, col=2)
qqnorml(ylm9$res, main = "Normal Q-Q Plot",
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles", plot.it = TRUE)
qqline(ylm9$res, col=4)
inf.index(ylm9,pch=16, col=3)
avp(ylm9,one.page=TRUE,ask=FALSE,identify.points=FALSE)
plot(ylm9,2) plot(ylm9,4)
## model fitted for transformtion response and add second-order polynomial ylm10 <- lm(tranG~BLAD+DBLAD+LUNG+KID)
PRESS(ylm10)
summary(ylm10) anova(ylm10)
## model fitted for transformtion response and transformation regressor summary(regressor <- bctrans(CIG~ BLAD+LUNG+KID))
lrt.bctrans(regressor,)
tranGa <- (CIG)^-0.3 tranBa <- (BLAD)^-0.13 tranLa <- (LUNG)^1.5 tranKa <- (KID)^0.5
## Scatterplot matrix, model fitted for transformtion response and transformation regressor pairs(tranGa~tranBa+tranLa+tranKa, pch=16)
scatterplot.matrix(~tranGa+tranBa+tranLa+tranKa, pch=16) ylm11 <- lm(tranGa~tranBa+tranLa+tranKa)
PRESS(ylm11)
summary(ylm11) anova(ylm11)
par(mfrow=c(2,2)) plot(ylm11$residuals~ylm9$fitted, xlab="Fitted Value", ylab="Residual", pch=16,
main="Fitted Value vs Residual Plot")
abline(h=0, col=2)
plot(CIG~BLAD, pch=unclass(indexx),col=unclass(indexx),main="CIG vs BLAD Scatter Plot") legend(3.5,40, c(expression(CIG<23.77), expression(CIG>23.77)),
col=1:2, pch=1:2, bty="n",cex = 0.8)
plot(CIG~LUNG, pch=unclass(indexx),col=unclass(indexx), main="CIG vs LUNG Scatter Plot") legend(15,40, c(expression(CIG<23.77), expression(CIG>23.77)),
col=1:2, pch=1:2, bty="n",cex = 0.8)
plot(CIG~KID, pch=unclass(indexx),col=unclass(indexx),, main="CIG vs KID Scatter Plot") legend(2.0, 40, c(expression(CIG<23.77), expression(CIG>23.77)),
col=1:2, pch=1:2, bty="n",cex = 0.8)
plot(CIG~ LEUK, pch=unclass(indexx),col=unclass(indexx),, main="CIG vs LEUK Scatter Plot") legend(5.5,40, c(expression(CIG<23.77), expression(CIG>23.77)),
col=1:2, pch=1:2, bty="n", cex=0.8) factor(AREA) -> index2
par(mfrow=c(2,2)) plot(CIG~BLAD, pch=unclass(index2),col=unclass(index2), main="CIG vs BLAD Scatter Plot") legend(3.5,40, c(expression(AREA==1), expression(AREA==2), expression(AREA==3),
expression(AREA==4)),
col=1:4, pch=1:4, bty="n",cex = 0.8)
plot(CIG~LUNG, pch=unclass(index2),col=unclass(index2), main="CIG vs LUNG Scatter Plot") legend(15,40, c(expression(AREA==1), expression(AREA==2), expression(AREA==3),
expression(AREA==4)),
col=1:4, pch=1:4, bty="n",cex = 0.8)
plot(CIG~KID, pch=unclass(index2),col=unclass(index2),, main="CIG vs KID Scatter Plot") legend(2.0, 40, c(expression(AREA==1), expression(AREA==2), expression(AREA==3), expression(AREA==4)),
col=1:4, pch=1:4, bty="n",cex = 0.8)
plot(CIG~ LEUK, pch=unclass(index2),col=unclass(index2),, main="CIG vs LEUK Scatter Plot") legend(5.3,40, c(expression(AREA==1), expression(AREA==2), expression(AREA==3),
expression(AREA==4)),
col=1:4, pch=1:4, bty="n", cex=0.8) factor(WEST) -> index3
par(mfrow=c(2,2)) plot(CIG~BLAD, pch=unclass(index3),col=unclass(index3), main="CIG vs BLAD Scatter Plot") legend(3.5,40, c(expression(WEST==0), expression(WEST==1)),
col=1:2, pch=1:2, bty="n",cex = 0.8)
plot(CIG~LUNG, pch=unclass(index3),col=unclass(index3), main="CIG vs LUNG Scatter Plot") legend(15,40, c(expression(WEST==0), expression(WEST==1)),
col=1:2, pch=1:2, bty="n",cex = 0.8)
plot(CIG~KID, pch=unclass(index3),col=unclass(index3),, main="CIG vs KID Scatter Plot")
col=1:2, pch=1:2, bty="n",cex = 0.8)
plot(CIG~ LEUK, pch=unclass(index3),col=unclass(index3),, main="CIG vs LEUK Scatter Plot") legend(5.5,40, c(expression(WEST==0), expression(WEST==1)),
col=1:2, pch=1:2, bty="n", cex=0.8)
##CIG>23.77 Plot of Dummy Variable
hsmoke <- read.table("E:/reg/hsmoking.txt", header=TRUE) attach(hsmoke)
str(hsmoke)
par(mfrow=c(2,2)) plot(HCIG~HBLAD, pch=unclass(index2),col=unclass(index2), main="HCIG vs BLAD Scatter Plot")
legend(3.5,40, c(expression(AREA==1), expression(AREA==2), expression(AREA==3), expression(AREA==4)),
col=1:4, pch=1:4, bty="n",cex = 0.8)
plot(HCIG~HLUNG, pch=unclass(index2),col=unclass(index2), main="HCIG vs LUNG Scatter Plot")
legend(15,40, c(expression(AREA==1), expression(AREA==2), expression(AREA==3), expression(AREA==4)),
col=1:4, pch=1:4, bty="n",cex = 0.8)
plot(HCIG~HKID, pch=unclass(index2),col=unclass(index2),, main="HCIG vs KID Scatter Plot") legend(2.5, 40, c(expression(AREA==1), expression(AREA==2), expression(AREA==3),
expression(AREA==4)),
col=1:4, pch=1:4, bty="n",cex = 0.8)
plot(HCIG~ HLEUK, pch=unclass(index2),col=unclass(index2),, main="HCIG vs LEUK Scatter Plot")
legend(5.3,40, c(expression(AREA==1), expression(AREA==2), expression(AREA==3), expression(AREA==4)),
col=1:4, pch=1:4, bty="n", cex=0.8)
par(mfrow=c(2,2)) plot(HCIG~HBLAD, pch=unclass(index3),col=unclass(index3), main="HCIG vs BLAD Scatter Plot")
legend(3.5,40, c(expression(WEST==0), expression(WEST==1)), col=1:2, pch=1:2, bty="n",cex = 0.8)
plot(HCIG~HLUNG, pch=unclass(index3),col=unclass(index3), main="HCIG vs LUNG Scatter Plot")
legend(15,40, c(expression(WEST==0), expression(WEST==1)), col=1:2, pch=1:2, bty="n",cex = 0.8)
plot(HCIG~HKID, pch=unclass(index3),col=unclass(index3),, main="HCIG vs KID Scatter Plot")
plot(HCIG~ HLEUK, pch=unclass(index3),col=unclass(index3),, main="HCIG vs LEUK Scatter Plot")
legend(5.5,40, c(expression(WEST==0), expression(WEST==1)), col=1:2, pch=1:2, bty="n", cex=0.8)
##CIG<23.77 Plot of Dummy Variable
lsmoke <- read.table("E:/reg/lsmoking.txt", header=TRUE) attach(lsmoke)
str(lsmoke)
par(mfrow=c(2,2)) plot(LCIG~LBLAD, pch=unclass(index2),col=unclass(index2), main="LCIG vs BLAD Scatter Plot")
legend(4.0,18, c(expression(AREA==1), expression(AREA==2), expression(AREA==3), expression(AREA==4)),col=1:4, pch=1:4, bty="n",cex = 0.8)
plot(LCIG~LLUNG, pch=unclass(index2),col=unclass(index2), main="LCIG vs LUNG Scatter Plot")
legend(20,18, c(expression(AREA==1), expression(AREA==2), expression(AREA==3), expression(AREA==4)),col=1:4, pch=1:4, bty="n",cex = 0.8)
plot(LCIG~LKID, pch=unclass(index2),col=unclass(index2),, main="LCIG vs KID Scatter Plot") legend(3.0, 18, c(expression(AREA==1), expression(AREA==2), expression(AREA==3),
expression(AREA==4)),col=1:4, pch=1:4, bty="n",cex = 0.8)
plot(LCIG~ LLEUK, pch=unclass(index2),col=unclass(index2),, main="LCIG vs LEUK Scatter Plot")
legend(7.5,18, c(expression(AREA==1), expression(AREA==2), expression(AREA==3), expression(AREA==4)), col=1:4, pch=1:4, bty="n", cex=0.8)
par(mfrow=c(2,2)) plot(LCIG~LBLAD, pch=unclass(index3),col=unclass(index3), main="LCIG vs BLAD Scatter Plot")
legend(4.0,18, c(expression(WEST==0), expression(WEST==1)), col=1:2, pch=1:2, bty="n",cex = 0.8)
plot(LCIG~LLUNG, pch=unclass(index3),col=unclass(index3), main="LCIG vs LUNG Scatter Plot")
legend(20,18, c(expression(WEST==0), expression(WEST==1)), col=1:2, pch=1:2, bty="n",cex = 0.8)
plot(LCIG~LKID, pch=unclass(index3),col=unclass(index3),, main="LCIG vs KID Scatter Plot") legend(3.0, 18, c(expression(WEST==0), expression(WEST==1)), col=1:2, pch=1:2, bty="n",cex = 0.8)
plot(LCIG~ LLEUK, pch=unclass(index3),col=unclass(index3),, main="LCIG vs LEUK Scatter Plot")
legend(7.5,18, c(expression(WEST==0), expression(WEST==1)), col=1:2, pch=1:2, bty="n",
## model for Dummy Variable CIG>23.77 and CIG<23.77
mt <- pod(CIG~BLAD+LUNG+KID, data=smoke, group=IndCIG, mean.function="parallel") lm(CIG~BLAD+LUNG+KID) -> fitt
summary(fitt) anova(fitt)
pod.lm(fitt, group=IndCIG, mean.function="parallel") -> fitt3 factor(IndCIG) -> indexx
plot(fitt3,pch=c("0", "1"),colors=c(2:3))
plot.pod(fitt3, colors=rainbow(nlevels(fitt3$group)), pch=1:nlevels(fitt3$group),key=TRUE,identify=TRUE,
xlab="Linear Predictor", ylab=as.character(c(formula(fitt3)[[2]]))) lm(CIG~BLAD+LUNG+KID+factor(IndCIG)) -> fitt22
plot(fitt22$residuals~fitt22$fitted, xlab="Fitted Value", ylab="Residual", pch=16, main="Fitted Value vs Residual Plot")
abline(h=0, col=2)
mt1 <- pod(CIG~BLAD+LUNG+KID, data=smoke, group=AREA, mean.function="parallel") pod.lm(fitt, group=AREA, mean.function="parallel") -> fitt4
factor(AREA) -> index2
plot(fitt4,pch=c("1","2","3","4"),colors=c(2:5))
plot.parallel(fitt4, colors=rainbow(nlevels(fitt4$group)), pch=1:nlevels(fitt4$group),key=TRUE,identify=TRUE,
xlab="Linear Predictor", ylab=as.character(c(formula(fitt4)[[2]]))) lm(CIG~BLAD+LUNG+KID+factor(AREA)) -> fitt33
PRESS(fitt33) summary(fitt33) anova(fitt33)
plot.it = TRUE) qqline(fitt33$res, col=4)
## model for Dummy Variable WEST 1 0
mt2 <- pod(CIG~BLAD+LUNG+KID, data=smoke, group=WEST, mean.function="parallel") pod.lm(fitt, group=WEST, mean.function="parallel") -> fitt5
plot(fitt5,pch=c("0", "1"),colors=c(2:3))
plot.pod(fitt5, colors=rainbow(nlevels(fitt5$group)),
pch=1:nlevels(fitt5$group),key=TRUE,identify=FALSE,
xlab="Linear Predictor", ylab=as.character(c(formula(fitt5)[[2]]))) lm(CIG~BLAD+LUNG+KID+factor(WEST)) -> fitt44
##Plot about the step regression
sel <- read.table("E:/reg/selection1.txt", header=TRUE) attach(sel)
str(sel)
plot(C.p.~P, xlab=expression(p), ylab=expression(C[p]), pch=16) text(4.1, 13, expression(X[1]),col=4,cex=0.8)
text(4.2, 13, expression(X[2]),col=4,cex=0.8) text(5.1, 9.8, expression(X[1]),col=4,cex=0.8) text(5.2, 9.8, expression(X[2]),col=4,cex=0.8) text(5.3, 9.8, expression(X[3]),col=4,cex=0.8) text(5.4, 9.8, expression(X[1]^2),col=4,cex=0.8)
mtext(expression(R[p]^2), side=2, at=.5, line=2.5) mtext(expression(p), side=1, at=4.5, line=2.5) text(4.1, 0.65, expression(X[1]),col=4,cex=0.8) text(4.2, 0.65, expression(X[2]),col=4,cex=0.8) text(4.3, 0.65, expression(X[3]),col=4,cex=0.8)
text(5.1, 0.71, expression(X[1]),col=4,cex=0.8) plot(C.p.~P, xlab=expression(p), ylab=expression(C[p]), pch=16) text(4.1, 12.2, expression(X[1]),col=4,cex=0.8)
plot(MSE~P, xlab=expression(p), ylab=expression(MS[Res](p)), pch=16) text(4.1, 0.00018, expression(X[1]),col=4,cex=0.8) text(5.4, 0.00018, expression(X[1]^2),col=4,cex=0.8) text(4.6, 0.000175, expression(X[2]),col=4,cex=0.8) text(4.7, 0.000175, expression(X[3]),col=4,cex=0.8) text(4.8, 0.000175, expression(X[2]^2),col=4,cex=0.8) text(4.9, 0.000175, expression(X[3]^2),col=4,cex=0.8) plot(R.Square~P, ann=FALSE, pch=16)
mtext(expression(R[p]^2), side=2, at=.5, line=2.5)
text(4.2, 0.68, expression(X[2]),col=4,cex=0.8) plot(C.p.~P, xlab=expression(p), ylab=expression(C[p]), pch=16)
text(2.1, 20, expression(X[2]),col=4,cex=0.8) text(3.2, 16, expression(X[2]),col=4,cex=0.8) text(3.1, 17, expression(X[1]^2),col=4,cex=0.8) text(3.2, 17, expression(X[2]),col=4,cex=0.8)
text(3.1, 20.5, expression(X[1]),col=4,cex=0.8) text(3.2, 20.6, expression(X[3]),col=4,cex=0.8)
text(3.1, 21.5, expression(X[1]^2),col=4,cex=0.8) text(3.2, 21.6, expression(X[3]),col=4,cex=0.8) text(3.1, 36, expression(X[1]),col=4,cex=0.8)
text(3.2, 36.1, expression(X[1]^2),col=4,cex=0.8) text(4.1, 2, expression(X[1]),col=4,cex=0.8) text(4.2, 2, expression(X[2]),col=4,cex=0.8) text(4.3, 2, expression(X[3]),col=4,cex=0.8)
text(4.1, 3.45, expression(X[2]),col=4,cex=0.8) text(4.2, 3.45, expression(X[3]),col=4,cex=0.8)
text(4.3, 3.45, expression(X[1]^2),col=4,cex=0.8) text(4.1, 17, expression(X[1]),col=4,cex=0.8) text(4.2, 17, expression(X[2]),col=4,cex=0.8)
text(4.1, 23, expression(X[1]),col=4,cex=0.8)
plot(MSE~P, xlab=expression(p), ylab=expression(MS[Res](p)), pch=16) text(2.1, 9.2, expression(X[2]),col=4,cex=0.8)
text(2.1, 12, expression(X[1]),col=4,cex=0.8) text(2.1, 12.3, expression(X[1]^2),col=4,cex=0.8) text(2.1, 12.6, expression(X[3]),col=4,cex=0.8) text(3.1, 6.3, expression(X[2]),col=4,cex=0.8) text(3.2, 6.3, expression(X[3]),col=4,cex=0.8) text(3.1, 8.8, expression(X[1]),col=4,cex=0.8) text(3.2, 8.8, expression(X[2]),col=4,cex=0.8) text(3.1, 9.0, expression(X[1]^2),col=4,cex=0.8) text(3.2, 9.0, expression(X[2]),col=4,cex=0.8) text(3.1, 9.5, expression(X[1]),col=4,cex=0.8) text(3.2, 9.5, expression(X[3]),col=4,cex=0.8) text(3.1, 9.8, expression(X[1]^2),col=4,cex=0.8) text(3.2, 9.8, expression(X[3]),col=4,cex=0.8) text(3.1, 12.1, expression(X[1]),col=4,cex=0.8) text(3.2, 12.1, expression(X[1]^2),col=4,cex=0.8) text(4.1, 6.4, expression(X[1]),col=4,cex=0.8) text(4.2, 6.4, expression(X[2]),col=4,cex=0.8) text(4.3, 6.4, expression(X[3]),col=4,cex=0.8) text(4.1, 6.6, expression(X[2]),col=4,cex=0.8) text(4.2, 6.6, expression(X[3]),col=4,cex=0.8) text(4.3, 6.6, expression(X[1]^2),col=4,cex=0.8) text(4.1, 8.7, expression(X[1]),col=4,cex=0.8) text(4.2, 8.7, expression(X[2]),col=4,cex=0.8) text(4.3, 8.7, expression(X[1]^2),col=4,cex=0.8) text(4.1, 9.7, expression(X[1]),col=4,cex=0.8) text(4.2, 9.7, expression(X[3]),col=4,cex=0.8) text(4.3, 9.7, expression(X[1]^2),col=4,cex=0.8) text(4.6, 6.5, expression(X[1]),col=4,cex=0.8) text(4.7, 6.5, expression(X[2]),col=4,cex=0.8)
plot(R.Square~P,ann=FALSE, pch=16)
mtext(expression(R[p]^2), side=2, at=.5, line=2.5) mtext(expression(p), side=1, at=3.5, line=2.5) text(4.9, 0.68, expression(X[1]^2),col=4,cex=0.8)