Skip to content
Snippets Groups Projects
Select Git revision
  • 8138532765444fa815e6033ab53c68c836d237cd
  • master default protected
2 results

07_buildCWMs.Rmd

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    title: "sPlot3.0 - Build CWMs"
    author: "Francesco Maria Sabatini"
    date: "3/4/2020"
    output: html_document
    ![](/data/sPlot/users/Francesco/_sPlot_Management/splot-long-rgb.png "sPlot Logo")

    Timestamp: r date()
    Drafted: Francesco Maria Sabatini
    Revised:
    version: 1.0

    This reports documents the construction of Community Weighted Means (CWMs) and Variance (CWVs), complementing species composition data from sPlot 3.0 and Plant functional traits from TRY 5.0. Functional Traits were received by Jens Kattge on Jan 21, 2020.

    library(tidyverse)
    library(readr)
    library(data.table)
    library(knitr)
    library(kableExtra)
    library(stringr)

    !!!! NO INFO ON TRAIT X18 !!!!

    Import data

    load("../_output/DT_sPlot3.0.RData")
    load("../_output/Backbone3.0.RData")

    Import TRY data

    try.species <- read_csv("../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/input_data/hierarchy.info.csv", 
                             locale = locale(encoding = "latin1")) 
    try.allinfo <- read_csv("../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/input_data/traits_x_georef_wide_table.csv", 
                            locale = locale(encoding = "latin1"), 
                            col_types = paste0(c("dddccccc",rep("c", 84)), collapse=""))
    try.individuals0 <- read_csv("../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/gapfilled_data/mean_gap_filled_back_transformed.csv", 
             locale = locale(encoding = "latin1"))
    
    #Ancillary function to change species names to lower case
    firstup <- function(x) {
      substr(x, 1, 1) <- toupper(substr(x, 1, 1))
      x
    }
    
    try.species <- try.species %>% 
      mutate(Species=tolower(Species)) %>%
      mutate(Species=firstup(Species)) 
    
    try.allinfo <- try.allinfo %>% 
      mutate(Species=tolower(Species)) %>%
      mutate(Species=firstup(Species)) 
    
    #calculate statistics of data from TRY
    n.indiv <- try.species %>% 
      group_by(Species) %>% 
      summarize(n=n()) %>% 
      pull(n)

    There are r nrow(try.species) individual observations from r nrow(try.species %>% distinct(Species)) distinct species in r nrow(try.species %>% distinct(Genus)) distinct genera.

    \newline \newline

    Attach resolved names from Backbone

    try.species.names <- try.allinfo %>% 
      dplyr::select(Species, Genus) %>% 
      left_join(Backbone %>% 
                  dplyr::select(Name_sPlot_TRY, Name_short) %>% 
                  rename(Species=Name_sPlot_TRY), 
                by="Species") %>% 
      dplyr::select(Species, Name_short, Genus)

    After attaching resolved names, TRY data contains information on r try.species.names %>% distinct(Name_short) %>% nrow().
    \newline Check for how many of the species in sPlot, trait information is available in TRY.

    sPlot.species <- DT2 %>% 
      distinct(species) 
    
    sPlot.in.TRY <- sPlot.species %>% 
      filter(species %in% (try.species.names %>% 
                                      distinct(Name_short) %>% 
                                      pull(Name_short))) %>% 

    Out of the r nrow(sPlot.species) standardizes species names in sPlot 3.0, r nrow(sPlot.in.TRY) (r round(nrow(sPlot.in.TRY)/nrow(sPlot.species)*100,1)%) also occur in TRY 5.0.

    Create legend of trait names

    trait.legend <- data.frame(full=try.allinfo %>% 
                                 dplyr::select(starts_with("StdValue_")) %>% 
                                 colnames() %>% 
                                 gsub("StdValue_", "", .) %>% 
                                 sort()) %>%
      mutate(full=as.character(full)) %>% 
      mutate(traitcode=parse_number(full)) %>% 
      arrange(traitcode) %>% 
      dplyr::select(traitcode, everything()) %>% 
      mutate(full=gsub(pattern = "^[0-9]+_", replacement="", full)) %>% 
      mutate(short=c("StemDens", "RootingDepth","LeafC.perdrymass", "LeafN","LeafP",
                     "StemDiam","SeedMass", "Seed.length","LeafThickness","LDMC",
                     "LeafNperArea","LeafDryMass.single","Leaf.delta.15N","SeedGerminationRate",
                     "Seed.num.rep.unit","LeafLength","LeafWidth","LeafCN.ratio","Leaffreshmass",
                     "Stem.cond.dens","Chromosome.n","Chromosome.cDNAcont", 
                     "Disp.unit.leng","StemConduitDiameter","Wood.vessel.length",
                     "WoodFiberLength","SpecificRootLength.fine","SpecificRootLength",
                     "PlantHeight.veg","PlantHeight.generative","LeafArea.leaf.noPet",
                     "LeafArea.leaflet.noPet","LeafArea.leaf.wPet","LeafArea.leaflet.wPet",
                     "LeafArea.leaf.undef","LeafArea.leaflet.undef","LeafArea.undef.undef",
                     "SLA.noPet", "SLA.wPet","SLA.undef", "LeafWaterCont")) %>% 
      ## Add SLA missing from allinfo file
      bind_rows(data.frame(traitcode=11, full="Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA)", short="SLA")) %>% 
      arrange(traitcode) %>% 
      #create a column to mark traits for which gap filled data is available.
      mutate(available=paste0("X", traitcode) %in% colnames(try.individuals0))
    knitr::kable(trait.legend, 
      caption="Legend of trait from TRY") %>%
        kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                      full_width = F, position = "center")

    There are some traits with unexpected negative entries:

    try.species.names %>% 
        dplyr::select(Species) %>% 
        bind_cols(try.individuals0 %>% 
                      rename_at(col.from, .funs=function(x) col.to) %>% 
                      dplyr::select(-X1)) %>% 
      gather(variable, value, -Species) %>% 
      filter(value<0) %>% 
      group_by(variable) %>% 
      summarize(n=n())

    I asked Jens Kattget for clarifications.

    Calculate species and genus level trait means and sd

    #create string to rename traits
    col.to <- trait.legend %>% 
      filter(available==T) %>% 
      pull(short) 
    col.from <- trait.legend %>% 
      filter(available==T) %>% 
      mutate(traitcode=paste0("X", traitcode))  %>% 
      pull(traitcode) 
    
    ## Calculate species level trait means and sd.
    try.species.means <- try.species.names %>% 
      dplyr::select(Species) %>% 
      bind_cols(try.individuals0 %>% 
                  rename_at(col.from, .funs=function(x) col.to) %>% 
                  dplyr::select(-X1)) %>% 
      group_by(Species) %>% 
      left_join(x={.} %>% 
                  summarize(n=n()), 
                y={.} %>% 
                  summarize_at(.vars=vars(StemDens:LeafWaterCont ),
                               .funs=list(mean=~mean(.), sd=~sd(.))),
                by="Species") %>% 
      dplyr::select(Species, n, everything())
      
    
    ## Calculate genus level trait means and sd.
    try.genus.means <- try.species.names %>% 
      dplyr::select(Genus) %>% 
      bind_cols(try.individuals0 %>% 
                  rename_at(col.from, .funs=function(x) col.to) %>% 
                  dplyr::select(-X1)) %>% 
      group_by(Genus) %>% 
      left_join(x={.} %>% 
                  summarize(n=n()), 
                y={.} %>% 
                  summarize_at(.vars=vars(StemDens:LeafWaterCont ),
                               .funs=list(mean=~mean(.), sd=~sd(.))),
                by="Genus") %>% 
      dplyr::select(Genus, n, everything())
    

    The average number of observations per species and genus is r round(mean(try.species.means$n),1) and r round(mean(try.genus.means$n),1), respectively. As many as r sum(try.species.means$n==1) species have only one observation (r sum(try.genus.means$n==1) at the genus level).

    Match taxa based on species, if available, or Genus (when rank_correct==Genus)

    try.combined.means <- try.genus.means %>% 
      rename(Taxon_name=Genus) %>% 
      mutate(Rank_correct="genus") %>% 
      bind_rows(try.species.means %>% 
                  rename(Taxon_name=Species) %>% 
                  mutate(Rank_correct="species")) %>% 
      dplyr::select(Taxon_name, Rank_correct, everything())
    
    total.matches <- DT2 %>%
      distinct(species, Rank_correct) %>%
      left_join(try.combined.means %>%
                  dplyr::rename(species=Taxon_name), 
                by=c("species", "Rank_correct")) %>% 
      filter(!is.na(SLA_mean)) %>% 
      nrow()

    The total number of matched taxa (either at species, or genus level) is r total.matches.

    Calculate summary statistics for species- and genus-level mean traits

    mysummary <- try.combined.means %>% 
                   group_by(Rank_correct) %>% 
                   summarize_at(.vars=vars(StemDens_mean:LeafWaterCont_sd),
                                .funs=list(min=~min(., na.rm=T),
                                           q025=~quantile(., 0.25, na.rm=T), 
                                           q50=~quantile(., 0.50, na.rm=T), 
                                           q75=~quantile(., .75, na.rm=T), 
                                           max=~max(., na.rm=T), 
                                           mean=~mean(., na.rm=T), 
                                           sd=~sd(., na.rm=T))) %>% 
      gather(variable, value, -Rank_correct) %>% 
      separate(variable, sep="_", into=c("variable", "mean.sd", "stat")) %>% 
      mutate(stat=factor(stat, levels=c("min", "q025", "q50", "q75", "max", "mean", "sd"))) %>% 
      spread(key=stat, value=value) %>% 
      arrange(desc(Rank_correct))
      
    knitr::kable(mysummary, 
      caption="Summary statistics for each trait, when summarized across species or genera") %>%
        kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                      full_width = F, position = "center")

    Calculate CWMs and CWVs for each plot

    Merge vegetation layers, where necessary. Combine cover values across layers

    #Ancillary function
    # Combine cover accounting for layers
    combine.cover <- function(x, datatype){
        while (length(x)>1){
          x[2] <- x[1]+(100-x[1])*x[2]/100
          x <- x[-1]
        }
      return(x)
    }
    
    DT2 <- DT2 %>% 
      #temporary
      filter(PlotObservationID %in% sample(unique(DT2$PlotObservationID), 1000, replace=F)) %>% 
      group_by(PlotObservationID, species, Rank_correct) %>%
      summarize(Relative.cover=combine.cover(Relative.cover, cover_code)) %>%
      ungroup()
    

    Calculate CWMs and CWV, as well as plot coverage statistics (proportion of total cover for which trait info exist, and proportion of species for which we have trait info). To avoid misleading result, CWM is calculated ONLY for plots for which we have some abundance information. All plots where Ab_scale=="pa" in ANY of the layers are therefore excluded.

    # Tag plots where at least one layer only has p\a information 
    any_pa <- DT2 %>% 
      distinct(PlotObservationID, Ab_scale) %>% 
      group_by(PlotObservationID) %>% 
      summarize(any.pa=any(Ab_scale=="pa")) %>% 
      filter(any.pa==T) %>% 
      pull(PlotObservationID)
    length(any_pa)
    
    # Exclude plots above and merge species data table with traits
    CWM0 <- DT2 %>%
      filter(!PlotObservationID %in% any_pa) %>% 
      left_join(try.combined.means %>%
                  dplyr::rename(species=Taxon_name) %>% 
                  dplyr::select(species, Rank_correct, ends_with("_mean")), 
                by=c("species", "Rank_correct"))
    
    # Calculate CWM for each trait in each plot
    CWM1 <- CWM0 %>%
      group_by(PlotObservationID) %>%
      summarize_at(.vars= vars(StemDens_mean:LeafWaterCont_mean),
                   .funs = list(~weighted.mean(., Relative.cover, na.rm=T))) %>%
      dplyr::select(PlotObservationID, order(colnames(.))) %>%
      gather(key=variable, value=CWM, -PlotObservationID)
    
    # Calculate coverage for each trait in each plot
    CWM2 <- CWM0 %>%
      mutate_at(.funs = list(~if_else(is.na(.),0,1) * Relative.cover), 
                .vars = vars(StemDens_mean:LeafWaterCont_mean)) %>%
      group_by(PlotObservationID) %>%
      summarize_at(.vars= vars(StemDens_mean:LeafWaterCont_mean),
                   .funs = list(~sum(., na.rm=T))) %>%
      dplyr::select(PlotObservationID, order(colnames(.))) %>%
      gather(key=variable, value=trait.coverage, -PlotObservationID)
      
    
    # Calculate CWV
    variance2.fun <- function(trait, abu){
      res <- as.double(NA)
      #nam <- nam[!is.na(trait)]
      abu <- abu[!is.na(trait)]
      trait <- trait[!is.na(trait)]
      abu <- abu/sum(abu)
      if (length(trait)>1){
        # you need more than 1 observation to calculate
        # skewness and kurtosis
        # for calculation see 
        # http://r.789695.n4.nabble.com/Weighted-skewness-and-curtosis-td4709956.html
        m.trait <- weighted.mean(trait,abu)
        res <- sum(abu*(trait-m.trait)^2)
      }
      res
    }
    
    CWM3 <- CWM0 %>%
      group_by(PlotObservationID) %>%
      summarize_at(.vars= vars(StemDens_mean:LeafWaterCont_mean),
                   .funs = list(~variance2.fun(., Relative.cover))) %>%
      dplyr::select(PlotObservationID, order(colnames(.))) %>%
      gather(key=variable, value=CWV, -PlotObservationID)
    
    ## Calculate proportion of species having traits
    CWM4 <- CWM0 %>%
      group_by(PlotObservationID) %>%
      summarize_at(.vars= vars(StemDens_mean:LeafWaterCont_mean),
                   .funs = list(~sum(!is.na(.)))) %>%
      dplyr::select(PlotObservationID, order(colnames(.))) %>%
      gather(key=variable, value=n.sp.with.trait, -PlotObservationID)
    
    # Join together
    CWM <- CWM1 %>%
      left_join(CWM2, by=c("PlotObservationID", "variable")) %>%
      left_join(CWM3, by=c("PlotObservationID", "variable")) %>%
      left_join(CWM4, by=c("PlotObservationID", "variable")) %>%
      left_join(CWM0 %>% 
                  group_by(PlotObservationID) %>%
                  summarize(sp.richness=n()), by=c("PlotObservationID")) %>%
      mutate(prop.sp.with.trait=n.sp.with.trait/sp.richness) %>%
      dplyr::select(PlotObservationID, variable, sp.richness, prop.sp.with.trait, trait.coverage, CWM, CWV) %>% 
      arrange(PlotObservationID)
    

    Explore CWM output

    knitr::kable(CWM %>% 
                   filter(PlotObservationID %in%
                            sample(unique(DT2$PlotObservationID), 3, replace=F)), 
      caption="Community weighted means of 3 randomly selected plots") %>%
        kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                      full_width = F, position = "center")

    Calculate summary statistics for CWMs and CWVs

    CWM.summary <- CWM %>% 
      rename(myvar=variable) %>% 
      group_by(myvar) %>% 
      summarize_at(.vars=vars(CWM:CWV),
                    .funs=list(num.NAs=~sum(is.na(.)), 
                               min=~min(., na.rm=T),
                               q025=~quantile(., 0.25, na.rm=T), 
                               q50=~quantile(., 0.50, na.rm=T), 
                               q75=~quantile(., .75, na.rm=T), 
                               max=~max(., na.rm=T), 
                               mean=~mean(., na.rm=T), 
                               sd=~sd(., na.rm=T))) %>% 
      gather(key=variable, value=value, -myvar) %>% 
      separate(variable, sep="_", into=c("metric", "stat")) %>% 
      mutate(stat=factor(stat, levels=c("num.NAs", "min", "q025", "q50", "q75", "max", "mean", "sd"))) %>% 
      spread(key=stat, value=value) %>% 
      arrange(metric, myvar)
      
    knitr::kable(CWM.summary, 
      caption="Summary of CWMs and CWVs across all plots") %>%
        kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                      full_width = F, position = "center")

    Export CWM and species mean trait values

    save(try.combined.means, CWM, file="../_output/Traits_CWMs.RData")

    ##SessionInfo

    sessionInfo()