# # Exercise 17.2 # # Statistical Methods in Biology: Design and Analysis of Experiments and Regression # by S.J. Welham, S.A. Gezan, S.J. Clark & A. Mead (2014) # Chapman & Hall/CRC Press, Boca Raton, Florida. ISBN: 978-1-4398-0878-8 # Data from V. Buchanan-Wollaston (PRESTA), University of Warwick # Version 1, 11/10/2015 # Set working directory - use setwd() function or from Session menu in RStudio # e.g. setwd("d:/stats4biol/data) # Set up packages to be used later - available from CRAN library(ggplot2) # for plotting # Read data & create factor senescence <- read.table("senescence.dat",sep="",header=T) senescence$fDay <- as.factor(senescence$Day) summary(senescence) # Plot CATMA2A31585 against Day qplot(y=CATMA2A31585, x=Day, data=senescence) # Plot CATMA1A09000 against Day qplot(y=CATMA1A09000, x=Day, data=senescence) # # Part a: use polynomial models to predict trends # # CATMA2A31585 - shape suggests order 3/4 required, start with cubic # exclude unit 32 senescence$CATMA2Aex32 <- senescence$CATMA2A31585 senescence$CATMA2Aex32[32] <- NA # Cubic model C2A.cubic <- lm(CATMA2Aex32~Day + I(Day^2) + I(Day^3) , data=senescence) summary(C2A.cubic) anova(C2A.cubic) plot(C2A.cubic, ask=FALSE) # Get fitted values on finer grid & plot C2A.pred.cubic <- predict(C2A.cubic, new=data.frame(Day=c(19:39)), interval="confidence") C2A.pred.cubic.df <- data.frame(C2A.pred.cubic) C2A.pred.cubic.df$newDay <- c(19:39) # plot data with fitted cubic model plot.obs <- ggplot(data=senescence, aes(y=CATMA2A31585, x=Day) ) plot.obs + geom_point() + geom_line(data=C2A.pred.cubic.df, aes(y=fit, x=newDay)) # Check plot of residuals vs Day qplot(data=C2A.cubic, y=rstandard(C2A.cubic), x=Day) # Quartic model - no need for additional term C2A.qtic <- lm(CATMA2Aex32~Day + I(Day^2) + I(Day^3) + I(Day^4), data=senescence) summary(C2A.qtic) anova(C2A.qtic) # Formal test of lack of fit for cubic model C2A.lof <- lm(CATMA2Aex32~Day + I(Day^2) + I(Day^3) + fDay, data=senescence) anova(C2A.lof) # Add group means to cubic model plot plot.obs <- ggplot(data=senescence, aes(y=CATMA2A31585, x=Day) ) plot.obs + geom_point() + geom_line(data=C2A.pred.cubic.df, aes(y=fit, x=newDay)) + geom_point(data=C2A.lof, aes(y=fitted(C2A.lof), x=Day), colour="darkorange") # Get coefficients from final model summary(C2A.cubic)$coefficients # CATMA1A09000 - again shape suggests order 3/4 required, start with cubic # Cubic model C1A.cubic <- lm(CATMA1A09000~Day + I(Day^2) + I(Day^3) , data=senescence) summary(C1A.cubic) anova(C1A.cubic) plot(C1A.cubic, ask=FALSE) # Get fitted values on finer grid & plot C1A.pred.cubic <- predict(C1A.cubic, new=data.frame(Day=c(19:39)), interval="confidence") C1A.pred.cubic.df <- data.frame(C1A.pred.cubic) C1A.pred.cubic.df$newDay <- c(19:39) # plot data with fitted cubic model plot.obs <- ggplot(data=senescence, aes(y=CATMA1A09000, x=Day) ) plot.obs + geom_point() + geom_line(data=C1A.pred.cubic.df, aes(y=fit, x=newDay)) # Check plot of residuals vs Day qplot(data=C1A.cubic, y=rstandard(C1A.cubic), x=Day) # Quartic model - no need for additional term C1A.qtic <- lm(CATMA1A09000~Day + I(Day^2) + I(Day^3) + I(Day^4), data=senescence) summary(C1A.qtic) anova(C1A.qtic) # Formal test of lack of fit for cubic model C1A.lof <- lm(CATMA1A09000~Day + I(Day^2) + I(Day^3) + fDay, data=senescence) anova(C1A.lof) # Add group means to plot of cubic fit plot.obs <- ggplot(data=senescence, aes(y=CATMA1A09000, x=Day) ) plot.obs + geom_point() + geom_line(data=C1A.pred.cubic.df, aes(y=fit, x=newDay)) + geom_point(data=C1A.lof, aes(y=fitted(C1A.lof), x=Day), colour="darkorange") # Get coefficients from final model summary(C1A.cubic)$coefficients # # Part b: try non-linear models # # CATMA2A31585 - no model with this shape available! # CATMA1A09000 - try logistic and Gompertz models # Cubic model C1A.cubic <- lm(CATMA1A09000~Day + I(Day^2) + I(Day^3) , data=senescence) summary(C1A.cubic) anova(C1A.cubic) plot(C1A.cubic, ask=FALSE) # Fit 4-parameter logistic model of form a + (b-a)/(1+exp((d-x)/c) C1A.lgt <- nls(CATMA1A09000~SSfpl(Day,a,b,c,d), data=senescence) summary(C1A.lgt) C1A.lgt.coef <- data.frame(summary(C1A.lgt)$coefficients) C1A.lgt.coef # get fitted curve on fine scale grid - CIs not available C1A.lgt.pred <- predict(C1A.lgt, new=data.frame(Day=c(19:39))) C1A.lgt.pred.df <- data.frame(C1A.lgt.pred) C1A.lgt.pred.df$newDay <- c(19:39) C1A.lgt.pred.df # plot data with fitted cubic model & logistic model plot.obs <- ggplot(data=senescence, aes(y=CATMA1A09000, x=Day) ) plot.obs + geom_point() + geom_line(data=C1A.pred.cubic.df, aes(y=fit, x=newDay), colour="red") + geom_line(data=C1A.lgt.pred.df, aes(y=C1A.lgt.pred, x=newDay), colour="blue") + geom_point(data=C1A.lof, aes(y=fitted(C1A.lof), x=Day), colour="darkorange") + ggtitle("Predictions with data for cubic poly (red) and logistic (blue) models") # Fit Gompertz model of form alpha+beta*exp(-exp(-gamma*(Day-delta))) # No pre-initialised version of 4-parameter Gompertz available # So it is necessary to give starting values # Lower asymptote = alpha ~ 9 => alpha = 9 # Upper asymptote = alpha+beta ~ 11 => beta = 2 # Point at which direction of curvature switches ~ 25 => delta = 25 # Curve should go through point (C1A=10,Day=27) => gamma = 0.18 C1A.gpz <- nls(CATMA1A09000~alpha+beta*exp(-exp(-gamma*(Day-delta))), start=list(alpha=9, beta=2, gamma=0.18, delta=25),data=senescence) summary(C1A.gpz) C1A.gpz.coef <- data.frame(summary(C1A.gpz)$coefficients) C1A.gpz.coef # get fitted curve on fine scale grid - CIs not available C1A.gpz.pred <- predict(C1A.gpz, new=data.frame(Day=c(19:39))) C1A.gpz.pred.df <- data.frame(C1A.gpz.pred) C1A.gpz.pred.df$newDay <- c(19:39) C1A.gpz.pred.df # plot data with fitted cubic model plot.obs <- ggplot(data=senescence, aes(y=CATMA1A09000, x=Day) ) plot.obs + geom_point() + geom_line(data=C1A.pred.cubic.df, aes(y=fit, x=newDay), colour="red") + geom_line(data=C1A.lgt.pred.df, aes(y=C1A.lgt.pred, x=newDay), colour="blue") + geom_line(data=C1A.gpz.pred.df, aes(y=C1A.gpz.pred, x=newDay), colour="green") + geom_point(data=C1A.lof, aes(y=fitted(C1A.lof), x=Day), colour="darkorange") + ggtitle("Cubic polynomial (red), logistic (blue) & Gompertz (green) models") # Compare sqrt(ResMS) from the three models summary(C1A.gpz)$sigma summary(C1A.lgt)$sigma summary(C1A.cubic)$sigma # End of file