diff --git a/code/07_buildCWMs.Rmd b/code/07_buildCWMs.Rmd index 952f33fdef73a67d4782a40d2384ca73817f282e..2bdad6c569dd22fb470c3f1551afb7568d3c5226 100644 --- a/code/07_buildCWMs.Rmd +++ b/code/07_buildCWMs.Rmd @@ -15,3 +15,370 @@ output: html_document **Drafted:** Francesco Maria Sabatini **Revised:** **version:** 1.0 + +This reports documents the construction of Community Weighted Means (CWMs) and Variance (CWVs), complementing species composition data from sPlot 3.0 and Plant functional traits from TRY 5.0. +Functional Traits were received by [Jens Kattge](jkattge@bgc-jena.mpg.de) on Jan 21, 2020. + +```{r results="hide", message=F, warning=F} +library(reshape2) +library(tidyverse) +library(readr) +library(data.table) +library(knitr) +library(kableExtra) +library(stringr) +``` + +!!!! NO INFO ON TRAIT X18 !!!! + +# Import data +```{r} +load("../_output/DT_sPlot3.0.RData") +load("../_output/Backbone3.0.RData") +``` + +Import TRY data +```{r} +try.species <- read_csv("../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/input_data/hierarchy.info.csv", + locale = locale(encoding = "latin1")) +try.allinfo <- read_csv("../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/input_data/traits_x_georef_wide_table.csv", + locale = locale(encoding = "latin1"), + col_types = paste0(c("dddccccc",rep("c", 84)), collapse="")) +try.individuals0 <- read_csv("../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/gapfilled_data/mean_gap_filled_back_transformed.csv", + locale = locale(encoding = "latin1")) + +#Ancillary function to change to lower case +firstup <- function(x) { + substr(x, 1, 1) <- toupper(substr(x, 1, 1)) + x +} + +try.species <- try.species %>% + mutate(Species=tolower(Species)) %>% + mutate(Species=firstup(Species)) %>% + +try.allinfo <- try.allinfo %>% + mutate(Species=tolower(Species)) %>% + mutate(Species=firstup(Species)) %>% + + + +#calculate statistics of data from TRY +n.indiv <- try.species %>% + group_by(Species) %>% + summarize(n=n()) %>% + pull(n) +``` +There are `r nrow(try.species)` individual observations from `r nrow(try.species %>% distinct(Species))` distinct species in `r nrow(try.species %>% distinct(Genus))` distinct genera. + +\newline \newline + + +## Attach resolved names from Backbone +```{r} +try.species.names <- try.allinfo %>% + dplyr::select(Species, Genus) %>% + left_join(Backbone %>% + dplyr::select(Name_sPlot_TRY, Name_short) %>% + rename(Species=Name_sPlot_TRY), + by="Species") %>% + dplyr::select(Species, Name_short, Genus) +``` +After attaching resolved names, TRY data contains information on `r try.species.names %>% distinct(Name_short) %>% nrow()`. +\newline +Check for how many of the species in sPlot, trait information is available in TRY. +```{r} +sPlot.species <- DT2 %>% + distinct(species) + +sPlot.in.TRY <- sPlot.species %>% + filter(species %in% (try.species.names %>% + distinct(Name_short) %>% + pull(Name_short))) + +##check match when considering taxa resolved ONLY at genus level + + +``` +Out of the `r nrow(sPlot.species)` standardizes species names in sPlot 3.0, `r nrow(sPlot.in.TRY)` (`r round(nrow(sPlot.in.TRY)/nrow(sPlot.species)*100,1)`%) also occur in TRY 5.0. + + + +## Create legend of trait names +```{r, warning=F} +trait.legend <- data.frame(full=try.allinfo %>% + dplyr::select(starts_with("StdValue_")) %>% + colnames() %>% + gsub("StdValue_", "", .) %>% + sort()) %>% + mutate(full=as.character(full)) %>% + mutate(traitcode=parse_number(full)) %>% + arrange(traitcode) %>% + dplyr::select(traitcode, everything()) %>% + mutate(full=gsub(pattern = "^[0-9]+_", replacement="", full)) %>% + mutate(short=c("StemDens", "RootingDepth","LeafC.perdrymass", "LeafN","LeafP", + "StemDiam","SeedMass", "Seed.length","LeafThickness","LDMC", + "LeafNperArea","LeafDryMass.single","Leaf.delta.15N","SeedGerminationRate", + "Seed.num.rep.unit","LeafLength","LeafWidth","LeafCN.ratio","Leaffreshmass", + "Stem.cond.dens","Chromosome.n","Chromosome.cDNAcont", + "Disp.unit.leng","StemConduitDiameter","Wood.vessel.length", + "WoodFiberLength","SpecificRootLength.fine","SpecificRootLength", + "PlantHeight.veg","PlantHeight.generative","LeafArea.leaf.noPet", + "LeafArea.leaflet.noPet","LeafArea.leaf.wPet","LeafArea.leaflet.wPet", + "LeafArea.leaf.undef","LeafArea.leaflet.undef","LeafArea.undef.undef", + "SLA.noPet", "SLA.wPet","SLA.undef", "LeafWaterCont")) %>% + ## Add SLA missing from allinfo file + bind_rows(data.frame(traitcode=11, full="Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA)", short="SLA")) %>% + arrange(traitcode) %>% + #create a column to mark traits for which gap filled data is available. + mutate(available=paste0("X", traitcode) %in% colnames(try.individuals0)) +``` + +```{r, echo=F} +knitr::kable(trait.legend, + caption="Legend of trait from TRY") %>% + kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), + full_width = F, position = "center") +``` + +## Calculate species and genus level trait means and sd +```{r} +#create string to rename traits +col.to <- trait.legend %>% + filter(available==T) %>% + pull(short) +col.from <- trait.legend %>% + filter(available==T) %>% + mutate(traitcode=paste0("X", traitcode)) %>% + pull(traitcode) + +## Calculate species level trait means and sd. +try.species.means <- try.species.names %>% + dplyr::select(Species) %>% + bind_cols(try.individuals0 %>% + rename_at(col.from, .funs=function(x) col.to) %>% + dplyr::select(-X1)) %>% + group_by(Species) %>% + left_join(x={.} %>% + summarize(n=n()), + y={.} %>% + summarize_at(.vars=vars(StemDens:LeafWaterCont ), + .funs=list(mean=~mean(.), sd=~sd(.))), + by="Species") %>% + dplyr::select(Species, n, everything()) + + +## Calculate genus level trait means and sd. +try.genus.means <- try.species.names %>% + dplyr::select(Genus) %>% + bind_cols(try.individuals0 %>% + rename_at(col.from, .funs=function(x) col.to) %>% + dplyr::select(-X1)) %>% + group_by(Genus) %>% + left_join(x={.} %>% + summarize(n=n()), + y={.} %>% + summarize_at(.vars=vars(StemDens:LeafWaterCont ), + .funs=list(mean=~mean(.), sd=~sd(.))), + by="Genus") %>% + dplyr::select(Genus, n, everything()) + +``` +The average number of observations per species and genus is `r round(mean(try.species.means$n),1)` and `r round(mean(try.genus.means$n),1)`, respectively. As many as `r sum(try.species.means$n==1)` species have only one observation (`r sum(try.genus.means$n==1)` at the genus level). + +Match taxa based on species, if available, or Genus (when rank_correct==Genus) +```{r} +try.combined.means <- try.genus.means %>% + rename(Taxon_name=Genus) %>% + mutate(Rank_correct="genus") %>% + bind_rows(try.species.means %>% + rename(Taxon_name=Species) %>% + mutate(Rank_correct="species")) %>% + dplyr::select(Taxon_name, Rank_correct, everything()) + +total.matches <- DT2 %>% + distinct(species, Rank_correct) %>% + left_join(try.combined.means %>% + dplyr::rename(species=Taxon_name), + by=c("species", "Rank_correct")) %>% + filter(!is.na(SLA_mean)) %>% + nrow() +``` + +Calculate summary statistics for species- and genus-level mean traits +```{r} +mysummary <- try.combined.means %>% + group_by(Rank_correct) %>% + summarize_at(.vars=vars(StemDens_mean:LeafWaterCont_sd), + .funs=list(min=~min(., na.rm=T), + q025=~quantile(., 0.25, na.rm=T), + q50=~quantile(., 0.50, na.rm=T), + q75=~quantile(., .75, na.rm=T), + max=~max(., na.rm=T), + mean=~mean(., na.rm=T), + sd=~sd(., na.rm=T))) %>% + gather(variable, value, -Rank_correct) %>% + separate(variable, sep="_", into=c("variable", "mean.sd", "stat")) %>% + mutate(stat=factor(stat, levels=c("min", "q025", "q50", "q75", "max", "mean", "sd"))) %>% + spread(key=stat, value=value) %>% + arrange(desc(Rank_correct)) + +``` + +```{r, echo=F} +knitr::kable(mysummary, + caption="Summary statistics for each trait, when summarized across species or genera") %>% + kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), + full_width = F, position = "center") +``` + + + +## Calculate CWMs and CWVs for each plot +Merge vegetation layers, where necessary. Combine cover values across layers +```{r} +#Ancillary function +# Combine cover accounting for layers +combine.cover <- function(x, datatype){ + while (length(x)>1){ + x[2] <- x[1]+(100-x[1])*x[2]/100 + x <- x[-1] + } + return(x) +} + +DT2t <- DT2 %>% + ## temporary! + filter(PlotObservationID %in% sample(unique(DT2$PlotObservationID), 100, replace=F)) %>% + group_by(PlotObservationID, species, Rank_correct) %>% + summarize(Relative.cover=combine.cover(Relative.cover, cover_code)) %>% + ungroup() + +``` + +Calculate CWMs and CWV, as well as plot coverage statistics (proportion of total cover for which trait info exist, and proportion of species for which we have trait info) + +```{r, cache=T, cache.lazy=F} +# Merge species data table with traits +CWM0 <- DT2t %>% + left_join(try.combined.means %>% + dplyr::rename(species=Taxon_name) %>% + dplyr::select(species, Rank_correct, ends_with("_mean")), + by=c("species", "Rank_correct")) + +# Calculate CWM for each trait in each plot +CWM1 <- CWM0 %>% + group_by(PlotObservationID) %>% + summarize_at(.vars= vars(StemDens_mean:LeafWaterCont_mean), + .funs = list(~weighted.mean(., Relative.cover, na.rm=T))) %>% + dplyr::select(PlotObservationID, order(colnames(.))) %>% + gather(key=variable, value=CWM, -PlotObservationID) + +# Calculate coverage for each trait in each plot +CWM2 <- CWM0 %>% + mutate_at(.funs = list(~if_else(is.na(.),0,1) * Relative.cover), + .vars = vars(StemDens_mean:LeafWaterCont_mean)) %>% + group_by(PlotObservationID) %>% + summarize_at(.vars= vars(StemDens_mean:LeafWaterCont_mean), + .funs = list(~sum(., na.rm=T))) %>% + dplyr::select(PlotObservationID, order(colnames(.))) %>% + gather(key=variable, value=trait.coverage, -PlotObservationID) + + +# Calculate CWV +variance2.fun <- function(trait, abu){ + res <- as.double(NA) + #nam <- nam[!is.na(trait)] + abu <- abu[!is.na(trait)] + trait <- trait[!is.na(trait)] + abu <- abu/sum(abu) + if (length(trait)>1){ + # you need more than 1 observation to calculate + # skewness and kurtosis + # for calculation see + # http://r.789695.n4.nabble.com/Weighted-skewness-and-curtosis-td4709956.html + m.trait <- weighted.mean(trait,abu) + res <- sum(abu*(trait-m.trait)^2) + } + res +} + +CWM3 <- CWM0 %>% + group_by(PlotObservationID) %>% + summarize_at(.vars= vars(StemDens_mean:LeafWaterCont_mean), + .funs = list(~variance2.fun(., Relative.cover))) %>% + dplyr::select(PlotObservationID, order(colnames(.))) %>% + gather(key=variable, value=CWV, -PlotObservationID) + +## Calculate proportion of species having traits +CWM4 <- CWM0 %>% + group_by(PlotObservationID) %>% + summarize_at(.vars= vars(StemDens_mean:LeafWaterCont_mean), + .funs = list(~sum(!is.na(.)))) %>% + dplyr::select(PlotObservationID, order(colnames(.))) %>% + gather(key=variable, value=n.sp.with.trait, -PlotObservationID) + +# Join together +CWM <- CWM1 %>% + left_join(CWM2, by=c("PlotObservationID", "variable")) %>% + left_join(CWM3, by=c("PlotObservationID", "variable")) %>% + left_join(CWM4, by=c("PlotObservationID", "variable")) %>% + left_join(CWM0 %>% + group_by(PlotObservationID) %>% + summarize(sp.richness=n()), by=c("PlotObservationID")) %>% + mutate(sp.with.trait.prop=n.sp.with.trait/sp.richness) %>% + dplyr::select(PlotObservationID, variable, sp.richness, sp.with.trait.prop, trait.coverage, CWM, CWV) %>% + arrange(PlotObservationID) + +``` +## Explore CWM output + +```{r, echo=F} +knitr::kable(CWM %>% + filter(PlotObservationID %in% + sample(unique(DT2$PlotObservationID), 3, replace=F)), + caption="Community weighted means of 3 randomly selected plots") %>% + kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), + full_width = F, position = "center") +``` + +Calculate summary statistics for CWMs and CWVs +```{r} +CWM.summary <- CWM %>% + rename(myvar=variable) %>% + group_by(myvar) %>% + summarize_at(.vars=vars(CWM:CWV), + .funs=list(num.NAs=~sum(is.na(.)), + min=~min(., na.rm=T), + q025=~quantile(., 0.25, na.rm=T), + q50=~quantile(., 0.50, na.rm=T), + q75=~quantile(., .75, na.rm=T), + max=~max(., na.rm=T), + mean=~mean(., na.rm=T), + sd=~sd(., na.rm=T))) %>% + gather(key=variable, value=value, -myvar) %>% + separate(variable, sep="_", into=c("metric", "stat")) %>% + mutate(stat=factor(stat, levels=c("num.NAs", "min", "q025", "q50", "q75", "max", "mean", "sd"))) %>% + spread(key=stat, value=value) %>% + arrange(metric, myvar) + +``` + +```{r, echo=F} +knitr::kable(CWM.summary, + caption="Summary of CWMs and CWVs across all plots") %>% + kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), + full_width = F, position = "center") +``` + + +## Export CWM and species mean trait values +```{r} +save(try.combined.means, CWM, file="../_output/Traits_CWMs.RData") +``` + + + + + +