# Exercise 6.6 # Statistical Methods in Biology: Design and Analysis of Experiments and Regression # by S.J. Welham, S.A. Gezan, S.J. Clark & A. Mead (2014) # Chapman & Hall/CRC Press, Boca Raton, Florida. ISBN: 978-1-4398-0878-8 # Data from Rothamsted Research (A. Karp) # Version 1, 13/08/2014 # Set working directory - use setwd() function or from Session menu in RStudio # e.g. setwd("d:/stats4biol/data) # load external packages - availabel from CRAN library(lsmeans) # tables of predicted means & contrasts # Read data & assign factors metals <- read.table('metals.dat', sep="", header=TRUE, na.strings=c("NA","*")) metals$LandUse <- as.factor(metals$LandUse) summary(metals) # Check allocation of samples to land use categories table(metals$LandUse) ###### Analysis of Cadmium ###### # Analysis of variance of untransformed data metals.Cd.aov <- aov(Cd ~ LandUse, data=metals) summary(metals.Cd.aov) # Residual plots plot(metals.Cd.aov, ask=FALSE) hist(residuals(metals.Cd.aov)) # Boxplot of distribution of residuals boxplot(residuals(metals.Cd.aov)~LandUse, data=metals) # Check within groups means and variances " t.mean <- aggregate(Cd~LandUse, data=metals, FUN=mean) t.var <- aggregate(Cd~LandUse, data=metals, FUN=var) cbind(t.mean,t.var) # Bartlett test for homogeneity of variances b.test <- bartlett.test(Cd~LandUse, data=metals); b.test # Take log10-transformation metals$logCd <- log10(metals$Cd) # One-way ANOVA of log10-transformed data metals.Cd.aov.2 <- aov(logCd ~ LandUse, data=metals) summary(metals.Cd.aov.2) # Residual plots plot(metals.Cd.aov.2, ask=FALSE) hist(residuals(metals.Cd.aov.2)) boxplot(residuals(metals.Cd.aov.2)~LandUse, data=metals) # Take square-root-transformation metals$sqrtCd <- sqrt(metals$Cd) # One-way ANOVA of sqrt-transformed data metals.Cd.aov.3 <- aov(sqrtCd ~ LandUse, data=metals) summary(metals.Cd.aov.3) # Residual plots plot(metals.Cd.aov.3, ask=FALSE) hist(residuals(metals.Cd.aov.3)) boxplot(residuals(metals.Cd.aov.3)~LandUse, data=metals) # Check within groups means and variances " t.mean <- aggregate(sqrtCd~LandUse, data=metals, FUN=mean) t.var <- aggregate(sqrtCd~LandUse, data=metals, FUN=var) cbind(t.mean,t.var) # Bartlett test for homogeneity of variances b.test <- bartlett.test(sqrtCd~LandUse, data=metals); b.test # Take cube-root-transformation metals$cbrtCd <- (metals$Cd)^(1/3) # One-way ANOVA of Znberoot-transformed data metals.Cd.aov.4 <- aov(cbrtCd ~ LandUse, data=metals) summary(metals.Cd.aov.4) # Residual plots plot(metals.Cd.aov.4, ask=FALSE) hist(residuals(metals.Cd.aov.4)) boxplot(residuals(metals.Cd.aov.4)~LandUse, data=metals) # Check within groups means and variances " t.mean <- aggregate(cbrtCd~LandUse, data=metals, FUN=mean) t.var <- aggregate(cbrtCd~LandUse, data=metals, FUN=var) cbind(t.mean,t.var) # Bartlett test for homogeneity of variances b.test <- bartlett.test(cbrtCd~LandUse, data=metals); b.test # Save residuals from this model for use later " metals$ResidcubeCd <- residuals(metals.Cd.aov.4) # Get table of predicted means with back-transform metals.Cd.lsm <- lsmeans(metals.Cd.aov.4, ~LandUse) metals.Cd.lsm.df <- summary(metals.Cd.lsm) metals.Cd.lsm.df$bt <- (metals.Cd.lsm.df$lsmean)^3 metals.Cd.lsm.df ###### Analysis of Chromium ###### # Analysis of variance of untransformed data metals.Cr.aov <- aov(Cr ~ LandUse, data=metals) summary(metals.Cr.aov) # Residual plots plot(metals.Cr.aov, ask=FALSE) hist(residuals(metals.Cr.aov)) # Boxplot of distribution of residuals boxplot(residuals(metals.Cr.aov)~LandUse, data=metals) # Check within groups means and variances " t.mean <- aggregate(Cr~LandUse, data=metals, FUN=mean) t.var <- aggregate(Cr~LandUse, data=metals, FUN=var) cbind(t.mean,t.var) # Bartlett test for homogeneity of variances b.test <- bartlett.test(Cr~LandUse, data=metals); b.test # Get table of predicted means with back-transform metals.Cr.lsm <- lsmeans(metals.Cr.aov, ~LandUse) metals.Cr.lsm.df <- summary(metals.Cr.lsm) metals.Cr.lsm.df$bt <- (metals.Cr.lsm.df$lsmean)^3 metals.Cr.lsm.df ###### Analysis of Copper ###### # Analysis of variance of untransformed data metals.Cu.aov <- aov(Cu ~ LandUse, data=metals) summary(metals.Cu.aov) # Residual plots plot(metals.Cu.aov, ask=FALSE) hist(residuals(metals.Cu.aov)) # Boxplot of distribution of residuals boxplot(residuals(metals.Cu.aov)~LandUse[!is.na(Cu)], data=metals) # Check within groups means and variances " t.mean <- aggregate(Cu~LandUse, data=metals, FUN=mean) t.var <- aggregate(Cu~LandUse, data=metals, FUN=var) cbind(t.mean,t.var) # Bartlett test for homogeneity of variances b.test <- bartlett.test(Cu~LandUse, data=metals); b.test # Take log10-transformation metals$logCu <- log10(metals$Cu) # One-way ANOVA of log10-transformed data metals.Cu.aov.2 <- aov(logCu ~ LandUse, data=metals) summary(metals.Cu.aov.2) # Residual plots plot(metals.Cu.aov.2, ask=FALSE) hist(residuals(metals.Cu.aov.2)) boxplot(residuals(metals.Cu.aov.2)~LandUse[!is.na(Cu)], data=metals) # Try inverse-transformation metals$invCu <- 1/(metals$Cu) # One-way ANOVA of inverse-transformed data metals.Cu.aov.3 <- aov(invCu ~ LandUse, data=metals) summary(metals.Cu.aov.3) # Residual plots plot(metals.Cu.aov.3, ask=FALSE) hist(residuals(metals.Cu.aov.3)) boxplot(residuals(metals.Cu.aov.3)~LandUse[!is.na(Cu)], data=metals) # Get table of predicted means with back-transform from log10-transform metals.Cu.lsm <- lsmeans(metals.Cu.aov.2, ~LandUse) metals.Cu.lsm.df <- summary(metals.Cu.lsm) metals.Cu.lsm.df$bt <- 10^(metals.Cu.lsm.df$lsmean) metals.Cu.lsm.df ###### Analysis of Zinc ###### # Analysis of variance of untransformed data metals.Zn.aov <- aov(Zn ~ LandUse, data=metals) summary(metals.Zn.aov) # Residual plots plot(metals.Zn.aov, ask=FALSE) hist(residuals(metals.Zn.aov)) # Boxplot of distribution of residuals boxplot(residuals(metals.Zn.aov)~LandUse[!is.na(Zn)], data=metals) # Check within groups means and variances " t.mean <- aggregate(Zn~LandUse, data=metals, FUN=mean) t.var <- aggregate(Zn~LandUse, data=metals, FUN=var) cbind(t.mean,t.var) # Bartlett test for homogeneity of variances b.test <- bartlett.test(Zn~LandUse, data=metals); b.test # Take log10-transformation metals$logZn <- log10(metals$Zn) # One-way ANOVA of log10-transformed data metals.Zn.aov.2 <- aov(logZn ~ LandUse, data=metals) summary(metals.Zn.aov.2) # Residual plots plot(metals.Zn.aov.2, ask=FALSE) hist(residuals(metals.Zn.aov.2)) boxplot(residuals(metals.Zn.aov.2)~LandUse[!is.na(Zn)], data=metals) # Try inverse-transformation metals$invZn <- 1/(metals$Zn) # One-way ANOVA of inverse-transformed data metals.Zn.aov.3 <- aov(invZn ~ LandUse, data=metals) summary(metals.Zn.aov.3) # Residual plots plot(metals.Zn.aov.3, ask=FALSE) hist(residuals(metals.Zn.aov.3)) boxplot(residuals(metals.Zn.aov.3)~LandUse[!is.na(Zn)], data=metals) # Get table of predicted means with back-transform from log10-transform metals.Zn.lsm <- lsmeans(metals.Zn.aov.2, ~LandUse) metals.Zn.lsm.df <- summary(metals.Zn.lsm) metals.Zn.lsm.df$bt <- 10^(metals.Zn.lsm.df$lsmean) metals.Zn.lsm.df #### Investigate spatial corrleation in Cadmium measurements # Plot the co-ordinates of the spatial locations plot(metals$X, metals$Y, main="Co-ordinates of sampling locations") # Form factor versions of X and Y # Use limits to take into account that sample points are not on a perfect grid limits <- seq(from=-1,to=39,by=2)/8 metals$fy <- cut(metals$Y, breaks=limits) metals$fx <- cut(metals$X, breaks=limits) # Map data onto full grid Matresid <- matrix(nrow=length(levels(metals$fx)), ncol=length(levels(metals$fy))) metals$vy <- as.numeric(metals$fy) metals$vx <- as.numeric(metals$fx) for (i in 1:length(metals$vx)) { Matresid[metals$vx[i],metals$vy[i]] <- metals$ResidcubeCd[i] } # Heat map of residuals image(Matresid) # Save columns and plot neighbouring points (within columns) against each other df.col <- as.data.frame(Matresid) df.col.ext <- stack(df.col, select=-V20) df.col.prev <- stack(df.col, select=-V1) plot(y=df.col.ext$values, x=df.col.prev$values, main="Residuals vs column neighbour") # Save rows and plot neighbouring points (within rows) against each other tMatresid <- t(Matresid) df.row <- as.data.frame(tMatresid) df.row.ext <- stack(df.row, select=-V20) df.row.prev <- stack(df.row, select=-V1) plot(y=df.row.ext$values, x=df.row.prev$values, main="Residuals vs row neighbour")