# # Exercise 11.4 # # Statistical Methods in Biology: Design and Analysis of Experiments and Regression # by S.J. Welham, S.A. Gezan, S.J. Clark & A. Mead (2014) # Chapman & Hall/CRC Press, Boca Raton, Florida. ISBN: 978-1-4398-0878-8 # Data from Cochran & Cox (1957, Table 4.1) # Version 1, 31/08/2014 # Set working directory - use setwd() function or from Session menu in RStudio # e.g. setwd("d:/stats4biol/data) # Set up packages to be used later - available from CRAN library(lsmeans) # for calculating predicted means library(ggplot2) # for plotting # Read data & assign factors scab <- read.table("scab.dat",sep="",header=T) scab$Row <- as.factor(scab$Row) scab$Col <- as.factor(scab$Col) scab$Treatment <- as.factor(scab$Treatment) scab$Sulphur <- as.factor(scab$Sulphur) scab$Timing <- as.factor(scab$Timing) summary(scab) # take logit transformation scab$logit <- log(scab$Scab/(100-scab$Scab)) # Plot logit(scab) - against suphur application, coloured by timing qplot(y=logit, x=Sulphur, colour=Timing, data=scab) # get control factor scab$Control <- as.factor(scab$Sulphur==0) # tabulate factor allocations table(scab$Control, scab$Sulphur, scab$Timing) # analysis of variance via lm, using nested explanatory structure scab.lm <- lm(logit~Control/(Sulphur*Timing), data=scab) anova(scab.lm) # residual plots plot(scab.lm, ask=FALSE) # get residuals and put into copy of data frame res <- residuals(scab.lm) scab.copy <- scab scab.copy$res <- res # Tile plot of residuals plot.2d <- ggplot(data=scab.copy, aes(y=Row, x=Col, z=res)) plot.2d + geom_tile(aes(fill=res)) # Plot residuals against row and column numbers - pattern across columns qplot(data=scab.copy, y=res, x=as.numeric(Col), colour=Row, geom=c("point","line")) qplot(data=scab.copy, y=res, x=as.numeric(Row), colour=Col, geom=c("point","line")) # add columns as extraneous factor scab.lm.2 <- lm(logit~Col+Control/(Sulphur*Timing), data=scab) anova(scab.lm.2) # residual plots plot(scab.lm.2, ask=FALSE) # get residuals and put into copy of data frame res.2 <- residuals(scab.lm.2) scab.copy$res.2 <- res.2 # Tile plot of residuals plot.2d <- ggplot(data=scab.copy, aes(y=Row, x=Col, z=res.2)) plot.2d + geom_tile(aes(fill=res.2)) # Plot residuals against row and column numbers - pattern across columns qplot(data=scab.copy, y=res.2, x=as.numeric(Col), colour=Row, geom=c("point","line")) qplot(data=scab.copy, y=res.2, x=as.numeric(Row), colour=Col, geom=c("point","line")) # Predictive model is additive model scab.lm.3 <- lm(logit~Col+Control/(Sulphur+Timing), data=scab) # Predictive model includes all explanatory terms scab.means <- lsmeans(scab.lm.3, c("Control","Sulphur","Timing")) scab.means scab.means.df <- summary(scab.means) # Remove missing predictions (combinations not available) scab.means.df <- subset(scab.means.df,!is.na(lsmean)) scab.means.df str(scab.means.df) # Add treatment labels to predictions data frame scab.means.df$lab <- c("300 A","600 A","1200 A","Control","300 S","600 S","1200 S") # Plot predictions with 95% CIs, labelled by treatment plot.means <- ggplot(data=scab.means.df, aes(y=lsmean, x=c(1:7))) plot.means <- plot.means + scale_x_continuous(breaks=c(1:7), labels=scab.means.df$lab) # Plot means with error bars plot.means + geom_point(shape=1, size=4) + geom_errorbar(aes(ymin=lower.CL, ymax=upper.CL, width=0.1)) + ggtitle("Predictions from additive model for treatments with 95% CI") # End of file