# # Example 18.8 # # Statistical Methods in Biology: Design and Analysis of Experiments and Regression # by S.J. Welham, S.A. Gezan, S.J. Clark & A. Mead (2014) # Chapman & Hall/CRC Press, Boca Raton, Florida. ISBN: 978-1-4398-0878-8 # Data from J Clarkson, University of Warwick # Version 1, 22/08/2015 # Set working directory - use setwd() function or from Session menu in RStudio # e.g. setwd("d:/stats4biol/data) # Set up packages to be used later - available from CRAN library(ggplot2) # for plotting library(lsmeans) # for getting predictions with averaging # Read data & assign factors bca <- read.table("bca.dat",sep="",header=T, na.strings="*") bca$Rep <- as.factor(bca$Rep) bca$Plot <- as.factor(bca$Plot) bca$Variety <- as.factor(bca$Variety) bca$Fungicide <- as.factor(bca$Fungicide) bca$BCA <- as.factor(bca$BCA) summary(bca) # Plot data qplot(data=bca, y=Emerged, x=Variety, facets=Fungicide~BCA, colour=Rep) qplot(data=bca, y=Disease/Emerged, x=Variety, facets=Fungicide~BCA, colour=Rep) # # Emergence data: as total unknown but large, treat as counts # # Fit preliminary model emerge.glm <- glm(Emerged~Rep+Variety*Fungicide*BCA, family=poisson(), data=bca) # Check for over-dispersion ResDev <- summary(emerge.glm)$deviance; ResDev ResDF <- summary(emerge.glm)$df.residual; ResDF p <- 1-pchisq(ResDev, df=ResDF); p # Re-fit with estimated dispersion parameter emerge.glm <- glm(Emerged~Rep+Variety*Fungicide*BCA, family=quasipoisson(), data=bca) # Check residual plots plot(emerge.glm, ask=FALSE) # Get marginal F-tests drop1(emerge.glm, test="F") # Drop 3-way interaction emerge.glm.2 <- glm(Emerged~Rep+Variety*Fungicide*BCA-Variety:Fungicide:BCA, family=quasipoisson(), data=bca) drop1(emerge.glm.2, test="F") # Drop Fungicide:BCA emerge.glm.3 <- glm(Emerged~Rep+Variety*Fungicide+BCA+BCA:Variety, family=quasipoisson(), data=bca) drop1(emerge.glm.3, test="F") # Drop Variety:BCA emerge.glm.4 <- glm(Emerged~Rep+Variety*Fungicide+BCA, family=quasipoisson(), data=bca) drop1(emerge.glm.4, test="F") # Drop Variety:Fungicide emerge.glm.5 <- glm(Emerged~Rep+Variety+Fungicide+BCA, family=quasipoisson(), data=bca) drop1(emerge.glm.5, test="F") # Predictive model has 3 main effects - examine coefficients summary(emerge.glm.5) # Get predictions on linear prediction scale emerge.glm.5.pv <- lsmeans(emerge.glm.5, ~Variety+Fungicide+BCA) emerge.glm.5.pv.df <- summary(emerge.glm.5.pv) # Back-transformed predictions emerge.glm.5.pv.df$back <- exp(emerge.glm.5.pv.df$lsmean) emerge.glm.5.pv.df # # Disease incidence # # Fit preliminary model disease.glm <- glm(cbind(Disease,Emerged-Disease)~Rep+Variety*Fungicide*BCA, family=binomial(), data=bca) # Check for over-dispersion ResDev <- summary(disease.glm)$deviance; ResDev ResDF <- summary(disease.glm)$df.residual; ResDF p <- 1-pchisq(ResDev, df=ResDF); p # Re-fit with estimated dispersion parameter disease.glm <- glm(cbind(Disease,Emerged-Disease)~Rep+Variety*Fungicide*BCA, family=quasibinomial(), data=bca) # Check residual plots plot(disease.glm, ask=FALSE) # Get marginal F-tests drop1(disease.glm, test="F") # Drop 3-way interaction disease.glm.2 <- glm(cbind(Disease,Emerged-Disease)~Rep+Variety*Fungicide*BCA-Variety:Fungicide:BCA, family=quasibinomial(), data=bca) drop1(disease.glm.2, test="F") # Drop Variety:BCA disease.glm.3 <- glm(cbind(Disease,Emerged-Disease)~Rep+Variety*Fungicide+BCA+BCA:Fungicide, family=quasibinomial(), data=bca) drop1(disease.glm.3, test="F") # Drop Variety:Fungicide disease.glm.4 <- glm(cbind(Disease,Emerged-Disease)~Rep+Variety+Fungicide+BCA+BCA:Fungicide, family=quasibinomial(), data=bca) drop1(disease.glm.4, test="F") # Predictive model has 3 main effects + BCA:Fungicide interaction - examine coefficients summary(disease.glm.4) # Get predictions on linear prediction scale disease.glm.4.pv <- lsmeans(disease.glm.4, ~Variety+Fungicide*BCA) disease.glm.4.pv.df <- summary(disease.glm.4.pv) # Back-transformed predictions disease.glm.4.pv.df$back <- exp(disease.glm.4.pv.df$lsmean)/(1+exp(disease.glm.4.pv.df$lsmean)) disease.glm.4.pv.df # Could estimate number of healthy plants by multiplying proportion healthy * emergence disease.glm.4.pv.df$healthy <- (1-disease.glm.4.pv.df$back) * emerge.glm.5.pv.df$back disease.glm.4.pv.df # Or we could model number of healthy plants directly bca$Healthy <- bca$healthyd - bca$Disease # Fit preliminary model healthy.glm <- glm(Healthy~Rep+Variety*Fungicide*BCA, family=poisson(), data=bca) # Check for over-dispersion ResDev <- summary(healthy.glm)$deviance; ResDev ResDF <- summary(healthy.glm)$df.residual; ResDF p <- 1-pchisq(ResDev, df=ResDF); p # Re-fit with estimated dispersion parameter healthy.glm <- glm(Healthy~Rep+Variety*Fungicide*BCA, family=quasipoisson(), data=bca) # Check residual plots plot(healthy.glm, ask=FALSE) # Get marginal F-tests drop1(healthy.glm, test="F") # Drop 3-way interaction healthy.glm.2 <- glm(Healthy~Rep+Variety*Fungicide*BCA-Variety:Fungicide:BCA, family=quasipoisson(), data=bca) drop1(healthy.glm.2, test="F") # Drop Variety:BCA healthy.glm.3 <- glm(Healthy~Rep+Variety*Fungicide+BCA+BCA:Fungicide, family=quasipoisson(), data=bca) drop1(healthy.glm.3, test="F") # Drop Variety:Fungicide healthy.glm.4 <- glm(Healthy~Rep+Variety+Fungicide+BCA+BCA:Fungicide, family=quasipoisson(), data=bca) drop1(healthy.glm.4, test="F") # Predictive model has 3 main effects plus BCA:Fungicide - examine coefficients summary(healthy.glm.4) # Get predictions on linear prediction scale healthy.glm.4.pv <- lsmeans(healthy.glm.4, ~Variety+Fungicide*BCA) healthy.glm.4.pv.df <- summary(healthy.glm.4.pv) # Back-transformed predictions healthy.glm.4.pv.df$back <- exp(healthy.glm.4.pv.df$lsmean) healthy.glm.4.pv.df # End of file