###-*- R -*- ### Examples from: "An Introduction to Statistical Modelling" ### By Annette Dobson ## Plant Weight Data (Page 9) ctl <- c(4.17,5.58,5.18,6.11,4.50,4.61,5.17,4.53,5.33,5.14) trt <- c(4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69) group <- gl(2,10,20,labels=c("Ctl","Trt")) weight <- c(ctl,trt) anova(lm(weight~group)) summary(lm(weight~group-1)) ## Birth Weight Data (Page 14) tmp <- matrix(c( 40,2968,38,2795,40,3163,35,2925,36,2625,37,2847, 41,3292,40,3473,37,2628,38,3176,40,3421,38,2975, 40,3317,36,2729,40,2935,38,2754,42,3210,39,2817, 40,3126,37,2539,36,2412,38,2991,39,2875,40,3231 ),nc=2,byrow=T) age <- tmp[,1] birthw <- tmp[,2] sex <- gl(2,12,24,labels=c("M","F")) plot(age,birthw,col=as.integer(sex)) summary(lm(birthw ~ sex + age), cor=T) summary(lm(birthw ~ sex + age -1), cor=T) summary(lm(birthw ~ sex + sex:age -1), cor=T) summary(glm(birthw ~ sex + age, family=gaussian())) summary(z <- glm(birthw ~ sex + age - 1, family=gaussian())) ## Poisson Regression Data (Page 42) x <- c(-1,-1,0,0,0,0,1,1,1) y <- c(2,3,6,7,8,9,10,12,15) summary(glm(y~x,family=poisson(link="identity"))) ## Calorie Data (Page 45) tmp <- matrix(c( 33,33,100,14, 40,47, 92,15, 37,49,135,18, 27,35,144,12, 30,46,140,15, 43,52,101,15, 34,62, 95,14, 48,23,101,17, 30,32, 98,15, 38,42,105,14, 50,31,108,17, 51,61, 85,19, 30,63,130,19, 36,40,127,20, 41,50,109,15, 42,64,107,16, 46,56,117,18, 24,61,100,13, 35,48,118,18, 37,28,102,14),nc=4,byrow=T) carb <- tmp[,1] age <- tmp[,2] wgt <- tmp[,3] prot <- tmp[,4] summary(lm(carb~age+wgt+prot)) ## Extended Plant Data (Page 59) ctl <- c(4.17,5.58,5.18,6.11,4.50,4.61,5.17,4.53,5.33,5.14) trtA <- c(4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69) trtB <- c(6.31,5.12,5.54,5.50,5.37,5.29,4.92,6.15,5.80,5.26) group <- gl(3,10,30,labels=c("Ctl","A","B")) weight <- c(ctl,trtA,trtB) anova(lm(weight~group)) summary(lm(weight~group)) ## Fictitious Anova Data (Page 64) y <- c(6.8,6.6,5.3,6.1,7.5,7.4,7.2,6.5,7.8,9.1,8.8,9.1) a <- gl(3,4,12) b <- gl(2,2,12) anova(z <- lm(y~a*b)) ## Achievement Scores (Page 70) y <- c(6,4,5,3,4,3,6,8,9,7,9,8,5,7,6,7,7,7,8,5,7) x <- c(3,1,3,1,2,1,4,4,5,5,4,3,1,2,3,2,2,3,4,1,4) m <- gl(3,7,21) print(anova(z <- lm(y~x+m))) ## Beetle Data (Page 78) tmp <- matrix(c( 1.6907,59, 6, 1.7242,60,13, 1.7552,62,18, 1.7842,56,28, 1.8113,63,52, 1.8369,59,53, 1.8610,62,61, 1.8839,60,60 ),nc=3,byrow=T) dose <- tmp[,1] n <- tmp[,2] x <- tmp[,3] dead <- cbind(x, n-x) summary(z <- glm(dead~dose,family=binomial(link=logit))) summary(z <- glm(dead~dose,family=binomial(link=probit))) summary(z <- glm(dead~dose,family=binomial(link=cloglog))) ## Anther Data (Page 84) ## Note that the proportions below are not exactly ## in accord with the sample sizes quoted below. ## In particular, the value 0.555 does not seem sensible. n <- c(102,99,108,76,81,90) p <- c(0.539,0.525,0.528,0.724,0.617,0.555) # x <- round(n*p) x <- n*p y <- cbind(x,n-x) f <- rep(c(40,150,350),2) g <- gl(2,3,6) summary(glm(y~g*f,family=binomial(link="logit"))) summary(glm(y~g+f,family=binomial(link="logit"))) summary(glm(y~f,family=binomial(link="logit"))) ## Tumour Data (Page 92) counts <- c(22,2,10,16,54,115,19,33,73,11,17,28) type <- gl(4,3,12,labels=c("freckle","superficial","nodular","indeterminate")) site <- gl(3,1,12,labels=c("head/neck","trunk","extremities")) data.frame(counts,type,site) summary(z <- glm(counts ~ type + site,family=poisson())) ## Randomized Controlled Trial (Page 93) counts <- c(18,17,15,20,10,20,25,13,12) outcome <- gl(3,1,9) treatment <- gl(3,3,9) summary(z <- glm(counts ~ outcome + treatment,family=poisson())) ## Peptic Ulcers and Blood Groups counts <- c(579,4219,911,4578,246,3775,361,4532,291,5261,396,6598) group <- gl(2,1,12,labels=c("cases","controls")) blood <- gl(2,2,12,labels=c("A","O")) city <- gl(3,4,12,labels=c("London","Manchester","Newcastle")) cbind(codes(group),codes(blood),codes(city),counts) summary(z1 <- glm(counts ~ group*city+blood+group:blood, family=poisson())) summary(z2 <- glm(counts ~ group*city+blood, family=poisson())) anova(z2,z1)