#####====================================== SCRIPT Pigment Androuin ================================ #####===================================== Warm up ====== rm(list=ls()) if(!require(vegan)){install.packages("vegan")} if(!require(ade4)){install.packages("ade4")} if(!require(reshape)){install.packages("reshape")} if(!require(plyr)){install.packages("plyr")} if(!require(dplyr)){install.packages("dplyr")} if(!require(ggplot2)){install.packages("ggplot2")} if(!require(FactoMineR)){install.packages("FactoMineR")} if(!require(factoextra)){install.packages("factoextra")} if(!require(ggrepel)){install.packages("ggrepel")} if(!require(data.table)){install.packages("data.table")} # change working directory setwd(dir = "C:/Users/Thibault Androuin/Documents/Projet/Thèse/new results/Article 3/Open Access data") # attention au sens des slash #====================================================================================== # Pigment concentrations #====================================================================================== # import data from https://www.seanoe.org/data/00646/75809/ or with DOI:10.17882/75809 source = read.csv2("source_pigments.csv", dec = ".", h=T) str(source) # plot chl a chla <- ddply(source, .(Date,Source), summarize, mean = round(mean(chlorophyll.A), 5), sd = round(sd(chlorophyll.A), 5)); chla ggplot(data=chla, aes(y=mean, x=Date, fill = Source)) + geom_bar(stat="identity", position=position_dodge(), color="black") + geom_errorbar(data=chla, aes(ymin=mean-sd, ymax=mean+sd), width=.2, position=position_dodge(.9)) + scale_fill_manual(values= c("#E41A1C", "#4DAF4A", "#377EB8")) + theme_bw() + scale_x_discrete(name="Date") + scale_y_continuous(name="Chlorophyll a concentration (µg/L or mg/m²)") + theme(axis.text.x = element_text(size=20), axis.text.y = element_text(size=20), axis.title.x = element_text(size=20), axis.title.y = element_text(size=20), panel.background = element_blank(), plot.background = element_blank()) + labs(fill = "Source") # test kruskal.test(source$chlorophyll.A[source$Source == "Biofilm"], source$Date[source$Source == "Biofilm"]) kruskal.test(source$chlorophyll.A[source$Source == "SPOM"], source$Date[source$Source == "SPOM"]) kruskal.test(source$chlorophyll.A[source$Source == "SSOM"], source$Date[source$Source == "SSOM"]) wilcox.test(source$chlorophyll.A[source$Source == "Biofilm"], source$chlorophyll.A[source$Source == "SSOM"]) # plot fuco fuco <- ddply(source, .(Date,Source), summarize, mean = round(mean(fucoxanthin), 5), sd = round(sd(fucoxanthin), 5)); fuco ggplot(data=fuco, aes(y=mean, x=Date, fill = Source)) + geom_bar(stat="identity", position=position_dodge(), color="black") + geom_errorbar(data=fuco, aes(ymin=mean-sd, ymax=mean+sd), width=.2, position=position_dodge(.9)) + scale_fill_manual(values= c("#E41A1C", "#4DAF4A", "#377EB8")) + theme_bw() + scale_x_discrete(name="Date") + scale_y_continuous(name="Fucoxanthin concentration (µg/L or mg/m²)") + theme(axis.text.x = element_text(size=20), axis.text.y = element_text(size=20), axis.title.x = element_text(size=20), axis.title.y = element_text(size=20), panel.background = element_blank(), plot.background = element_blank()) + labs(fill = "Source") # Anova test kruskal.test(source$fucoxanthin[source$Source == "Biofilm"], source$Date[source$Source == "Biofilm"]) kruskal.test(source$fucoxanthin[source$Source == "SPOM"], source$Date[source$Source == "SPOM"]) kruskal.test(source$fucoxanthin[source$Source == "SSOM"], source$Date[source$Source == "SSOM"]) wilcox.test(source$fucoxanthin[source$Source == "Biofilm"], source$fucoxanthin[source$Source == "SSOM"]) # chl b / chl a ratio source$ratio.b = source$chlorophyll.B/source$chlorophyll.A ratio.b <- ddply(source, .(Date,Source), summarize, mean = round(mean(ratio.b), 5), sd = round(sd(ratio.b), 5)); ratio.b ggplot(data=ratio.b, aes(y=mean, x=Date, fill = Source)) + geom_bar(stat="identity", position=position_dodge(), color="black") + geom_errorbar(data=ratio.b, aes(ymin=mean-sd, ymax=mean+sd), width=.2, position=position_dodge(.9)) + scale_fill_manual(values= c("#E41A1C", "#4DAF4A", "#377EB8")) + theme_bw() + scale_x_discrete(name="Date") + scale_y_continuous(name="Chlorophyll b / Chlorophyll a ratio") + theme(axis.text.x = element_text(size=20), axis.text.y = element_text(size=20), axis.title.x = element_text(size=20), axis.title.y = element_text(size=20), panel.background = element_blank(), plot.background = element_blank()) + labs(fill = "Source") # Anova test m <- aov(source$ratio.b ~ source$Source * source$Date) summary(m) TukeyHSD(m) hist(residuals(m)) plot(fitted(m),residuals(m)) # fuco / chl a ratio source$ratio.fuco = source$fucoxanthin / source$chlorophyll.A ratio.fuco <- ddply(source, .(Date, Source), summarize, mean = round(mean(ratio.fuco), 5), sd = round(sd(ratio.fuco), 5)); ratio.fuco ggplot(data=ratio.fuco, aes(y=mean, x=Date, fill = Source)) + geom_bar(stat="identity", position=position_dodge(), color="black") + geom_errorbar(data=ratio.fuco, aes(ymin=mean-sd, ymax=mean+sd), width=.2, position=position_dodge(.9)) + scale_fill_manual(values= c("#E41A1C", "#4DAF4A", "#377EB8")) + theme_bw() + scale_x_discrete(name="Date") + scale_y_continuous(name="Fucoxanthin / Chlorophyll a ratio") + theme(axis.text.x = element_text(size=20), axis.text.y = element_text(size=20), axis.title.x = element_text(size=20), axis.title.y = element_text(size=20), panel.background = element_blank(), plot.background = element_blank()) + labs(fill = "Source") # Anova test m <- aov(source$ratio.fuco ~ source$Source * source$Date) summary(m) TukeyHSD(m) hist(residuals(m)) plot(fitted(m),residuals(m)) # not homogeneous kruskal.test(source$ratio.fuco, source$Date) kruskal.test(source$ratio.fuco, source$Source) pairwise.wilcox.test(source$ratio.fuco, source$Source, p.adjust.method = "bonferroni") # chl c / chl a ratio source$ratio.c = source$chlorophyll.C/source$chlorophyll.A ratio.c <- ddply(source, .(Date,Source), summarize, mean = round(mean(ratio.c), 5), sd = round(sd(ratio.c), 5)); ratio.c ggplot(data=ratio.c, aes(y=mean, x=Date, fill = Source)) + geom_bar(stat="identity", position=position_dodge(), color="black") + geom_errorbar(data=ratio.c, aes(ymin=mean-sd, ymax=mean+sd), width=.2, position=position_dodge(.9)) + scale_fill_manual(values= c("#E41A1C", "#4DAF4A", "#377EB8")) + theme_bw() + scale_x_discrete(name="Date") + scale_y_continuous(name="Chlorophyll c / Chlorophyll a ratio") + theme(axis.text.x = element_text(size=20), axis.text.y = element_text(size=20), axis.title.x = element_text(size=20), axis.title.y = element_text(size=20), panel.background = element_blank(), plot.background = element_blank()) + labs(fill = "Source") # Anova test m <- aov(source$ratio.c ~ source$Source * source$Date) summary(m) TukeyHSD(m) hist(residuals(m)) plot(fitted(m),residuals(m)) # not homogeneous kruskal.test(source$ratio.c, source$Date) kruskal.test(source$ratio.c, source$Source) pairwise.wilcox.test(source$ratio.c, source$Source, p.adjust.method = "bonferroni") #====================================================================================== # Pigment Percentages #====================================================================================== # Get relative percentages source.per = data.frame(source[,1:3], source[,4:24]/rowSums(source[,4:24])*100) # mean and standard deviation tab.mean = aggregate(source.per[, 4:24], list(source.per$Source, source.per$Date), mean, na.rm=T) tab.mean <- tab.mean[, colSums(tab.mean != 0, na.rm=T) > 0] tab.sd = aggregate(source.per[, 4:24], list(source.per$Source, source.per$Date), sd, na.rm=T) tab.sd <- tab.sd[, colSums(tab.sd != 0, na.rm=T) > 0] # nMDS NMDS.source=metaMDS(source.per[,4:24], distance = "bray", k=2, trymax = 200, autotransform = F) stressplot(NMDS.source) NMDS.source$stress data.scores.source <- as.data.frame(scores(NMDS.source)) #Using the scores function from vegan to extract the site scores and convert to a data.frame data.scores.source$site <- rownames(data.scores.source) data.scores.source$Species <- source.per$Source # create a column of site names, from the rownames of data.scores data.scores.source$Date <- source.per$Date species.scores.source <- as.data.frame(scores(NMDS.source, "species")) #Using the scores function from vegan to extract the species scores and convert to a data.frame species.scores.source$species <- rownames(species.scores.source) # create a column of species, from the rownames of species.scores # convex hull for each sources grp.a <- data.scores.source[data.scores.source$Species == "Biofilm", ][chull(data.scores.source[data.scores.source$Species == "Biofilm", c("NMDS1", "NMDS2")]), ] grp.b <- data.scores.source[data.scores.source$Species == "SPOM", ][chull(data.scores.source[data.scores.source$Species == "SPOM", c("NMDS1", "NMDS2")]), ] grp.c <- data.scores.source[data.scores.source$Species == "SSOM", ][chull(data.scores.source[data.scores.source$Species == "SSOM", c("NMDS1", "NMDS2")]), ] hull.data.source <- rbind(grp.a, grp.b, grp.c) hull.data.source # plot ggplot ggplot() + geom_polygon(data=hull.data.source,aes(x=NMDS1,y=NMDS2,fill=Species,colour=Species, group=Species),alpha=0.30) + # add the convex hulls geom_point(data=data.scores.source,aes(x=NMDS1,y=NMDS2,colour=Species, fill = Species, shape = Date),size=4) + # add the point markers scale_colour_manual(values= c("red", "#4DAF4A", "#0072B2")) + scale_fill_manual(values= c("red", "#4DAF4A", "#0072B2")) + scale_shape_manual(values=c(22,23,24,25,21)) + theme_bw() + annotate("text", x=0.49, y=0.49, label= paste("Stress =",round(NMDS.source$stress,2)), size = 7) + labs(color = "Source") + labs(shape = "Date") + labs(fill = "Source") + theme(axis.text.x = element_text(size=20), axis.text.y = element_text(size=20), legend.text = element_text(colour="black", size=20), legend.title = element_text(colour="black", size=20), axis.title.x = element_text(size=20), axis.title.y = element_text(size=20), panel.background = element_blank(), plot.background = element_blank()) ## Calculate multivariate dispersions dis.source <- vegdist(source.per[,4:24], distance = "bray") mod.source <- betadisper(dis.source, source.per$Date:source.per$Source) ## Perform test anova(mod.source) # permanova adonis(source.per[,4:24] ~ source.per$Date * source.per$Source, permutations = 999) # interaction significative pairwise.adonis <- function(x,factors, sim.function = 'vegdist', sim.method = 'bray', p.adjust.m ='bonferroni') { library(vegan) co = combn(unique(as.character(factors)),2) pairs = c() F.Model =c() R2 = c() p.value = c() for(elem in 1:ncol(co)){ if(sim.function == 'daisy'){ library(cluster); x1 = daisy(x[factors %in% c(co[1,elem],co[2,elem]),],metric=sim.method) } else{x1 = vegdist(x[factors %in% c(co[1,elem],co[2,elem]),],method=sim.method)} ad = adonis(x1 ~ factors[factors %in% c(co[1,elem],co[2,elem])] ); pairs = c(pairs,paste(co[1,elem],'vs',co[2,elem])); F.Model =c(F.Model,ad$aov.tab[1,4]); R2 = c(R2,ad$aov.tab[1,5]); p.value = c(p.value,ad$aov.tab[1,6]) } p.adjusted = p.adjust(p.value,method=p.adjust.m) sig = c(rep('',length(p.adjusted))) sig[p.adjusted <= 0.05] <-'.' sig[p.adjusted <= 0.01] <-'*' sig[p.adjusted <= 0.001] <-'**' sig[p.adjusted <= 0.0001] <-'***' pairw.res = data.frame(pairs,F.Model,R2,p.value,p.adjusted,sig) print("Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1") return(pairw.res) } # post hoc tests pair.onto = pairwise.adonis(source.per[,4:24], source.per$Date:source.per$Source , sim.method = "bray", p.adjust.m = "bonferroni") pairwise.adonis(source.per[,4:24], source.per$Date, sim.method = "bray", p.adjust.m = "bonferroni") pairwise.adonis(source.per[,4:24], source.per$Source , sim.method = "bray", p.adjust.m = "bonferroni") # simper simper = simper(source.per[,4:24], source.per$Source) sim = summary(simper) # table Biofilm_SPOM = data.frame(cbind(as.data.frame(round(sim$Biofilm_SPOM[1:15,6],2))), row.names(sim$Biofilm_SPOM)[1:15], row.names = NULL) ; colnames(Biofilm_SPOM) = c("Percentage : Biofilm_SPOM ", "Pigments") Biofilm_SSOM = data.frame(cbind(as.data.frame(round(sim$Biofilm_SSOM[1:15,6],2))), row.names(sim$Biofilm_SSOM)[1:15], row.names = NULL) ; colnames(Biofilm_SSOM) = c("Percentage : Biofilm_SSOM ", "Pigments") SPOM_SSOM = data.frame(cbind(as.data.frame(round(sim$SPOM_SSOM[1:15,6],2))), row.names(sim$SPOM_SSOM)[1:15], row.names = NULL) ; colnames(SPOM_SSOM) = c("Percentage : SPOM_SSOM ", "Pigments") SIMPER_results_pig = as.data.frame(cbind(Biofilm_SPOM, Biofilm_SSOM, SPOM_SSOM)) head(SIMPER_results_pig)