# # Exercise 13.2 # # Statistical Methods in Biology: Design and Analysis of Experiments and Regression # by S.J. Welham, S.A. Gezan, S.J. Clark & A. Mead (2014) # Chapman & Hall/CRC Press, Boca Raton, Florida. ISBN: 978-1-4398-0878-8 # Version 1, 06/12/2015 # Set working directory - use setwd() function or from Session menu in RStudio # e.g. setwd("d:/stats4biol/data) # Set up packages to be used later - available from CRAN library(ggplot2) # for plotting library(msm) # Read data cabbage <- read.table("cabbage.dat",sep="",header=T) summary(cabbage) # Plot NLeaves against Days qplot(y=NLeaves, x=Days, data=cabbage) # Fit SLR model cabbage.slr <- lm(NLeaves~Days, data=cabbage) # Plot fitted model # Set up basic plot using model object plot.obs <- ggplot(data=cabbage.slr, aes(y=NLeaves, x=Days) ) # Plot observations and fitted line plot.obs + geom_point() + geom_line(aes(y=fitted(cabbage.slr))) + ggtitle("Observed data with fitted SLR") # Plot residuals - variance appears to increase with fitted value plot(cabbage.slr, ask=FALSE) # Plot of residuals against explanatory variate qplot(y=rstandard(cabbage.slr), x=Days, data=cabbage.slr) # Take natural log transformation of number of leaves cabbage$logNL <- log(cabbage$NLeaves) # Re-fit SLR model cabbage.slr.2 <- lm(logNL~Days, data=cabbage) # Plot residuals - variance appears to increase with fitted value plot(cabbage.slr.2, ask=FALSE) # Plot of residuals against explanatory variate qplot(y=rstandard(cabbage.slr.2), x=Days, data=cabbage.slr) # Get fitted values and plot with data # Set up basic plot using model object plot.obs <- ggplot(data=cabbage.slr.2, aes(y=logNL, x=Days) ) # Plot observations and fitted line plot.obs + geom_point() + geom_line(aes(y=fitted(cabbage.slr.2))) + ggtitle("Observed data with fitted SLR") # SLR model with lack of fit # Create factor copy of variate Days cabbage$fDays <- as.factor(cabbage$Days) # Fit model with lack of fit cabbage.lof <- lm(logNL~Days+fDays, data=cabbage) # ANOVA table indicates no evidence of lack of fit present anova(cabbage.lof) # Go back to SLR # Get predictions on fine grid newD <- seq(from=0, to=46, by=1) cabbage.slr.pred <- predict(cabbage.slr.2, new=data.frame(Days=newD), interval="confidence", se.fit=TRUE) cabbage.slr.pred$fit cabbage.slr.pred.df <- data.frame(cabbage.slr.pred$fit) cabbage.slr.pred.df cabbage.slr.pred.df$back <- exp(cabbage.slr.pred.df$fit) cabbage.slr.pred.df$blwr <- exp(cabbage.slr.pred.df$lwr) cabbage.slr.pred.df$bupr <- exp(cabbage.slr.pred.df$upr) # Plot data with fitted model and 95% CI against logOP plot.obs <- ggplot(data=cabbage, aes(y=NLeaves, x=Days) ) plot.obs + geom_point() + geom_line(data=cabbage.slr.pred.df, aes(y=back, x=newD)) + geom_ribbon(data=cabbage.slr.pred.df, aes(y=back, x=newD, ymin=blwr, ymax=bupr, alpha=0.5)) + ggtitle("Predictions with data and 95% CI") # Compare with fitted line from SLR for mean numbers cabbage$meanSLRpred <- 7.299 + 0.261*cabbage$Days plot.obs + geom_point() + geom_line(data=cabbage.slr.pred.df, aes(y=back, x=newD)) + geom_line(data=cabbage, aes(y=meanSLRpred, x=Days), colour="Red") + ggtitle("Predictions from SLRs on numbers (black) and mean numbers (red)") # Save estimates and their variance matrix coef <- coef(cabbage.slr.2) vcov <- vcov(cabbage.slr.2) # calculate estimates of growth rate EstRate <- coef[2]*exp(coef[1]+coef[2]*newD) # Use delta method to get estimates of growthrate SE # Eg. get SE of growth rate at Days=0 deltamethod(~x2*exp(x1+0*x2), coef, vcov) # Loop to get SE for Days in range 0-46 SERate <- rep(0, 47) for (i in 1:47) { form <- sprintf("~x2*exp(x1+%f*x2)", i-1) SERate[i] <- deltamethod(as.formula(form), coef, vcov) } # calculate approx 95% CI for growth rate resdf <- df.residual(cabbage.slr.2) Lower <- EstRate - SERate*qt(0.975, resdf) Upper <- EstRate + SERate*qt(0.975, resdf) # Put index, estimated growth rate and approx CI into data frame estrate.df <- data.frame(newD,Lower,EstRate,Upper) estrate.df # End of file - see also Exercise 18.2