Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
title: "sPlot3.0 - Build CWMs"
author: "Francesco Maria Sabatini"
date: "3/4/2020"
output: html_document
![](/data/sPlot/users/Francesco/_sPlot_Management/splot-long-rgb.png "sPlot Logo")

Timestamp: r date()
Drafted: Francesco Maria Sabatini
Revised:
version: 1.0

This reports documents the construction of Community Weighted Means (CWMs) and Variance (CWVs), complementing species composition data from sPlot 3.0 and Plant functional traits from TRY 5.0. Functional Traits were received by Jens Kattge on Jan 21, 2020.

library(tidyverse)
library(readr)
library(data.table)
library(knitr)
library(kableExtra)
library(stringr)

!!!! NO INFO ON TRAIT X18 !!!!

Import data

load("../_output/DT_sPlot3.0.RData")
load("../_output/Backbone3.0.RData")

Import TRY data

try.species <- read_csv("../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/input_data/hierarchy.info.csv", 
                         locale = locale(encoding = "latin1")) 
try.allinfo <- read_csv("../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/input_data/traits_x_georef_wide_table.csv", 
                        locale = locale(encoding = "latin1"), 
                        col_types = paste0(c("dddccccc",rep("c", 84)), collapse=""))
try.individuals0 <- read_csv("../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/gapfilled_data/mean_gap_filled_back_transformed.csv", 
         locale = locale(encoding = "latin1"))

#Ancillary function to change species names to lower case
firstup <- function(x) {
  substr(x, 1, 1) <- toupper(substr(x, 1, 1))
  x
}

try.species <- try.species %>% 
  mutate(Species=tolower(Species)) %>%
  mutate(Species=firstup(Species)) 

try.allinfo <- try.allinfo %>% 
  mutate(Species=tolower(Species)) %>%
  mutate(Species=firstup(Species)) 

#calculate statistics of data from TRY
n.indiv <- try.species %>% 
  group_by(Species) %>% 
  summarize(n=n()) %>% 
  pull(n)

There are r nrow(try.species) individual observations from r nrow(try.species %>% distinct(Species)) distinct species in r nrow(try.species %>% distinct(Genus)) distinct genera.

\newline \newline

Attach resolved names from Backbone

try.species.names <- try.allinfo %>% 
  dplyr::select(Species, Genus) %>% 
  left_join(Backbone %>% 
              dplyr::select(Name_sPlot_TRY, Name_short) %>% 
              rename(Species=Name_sPlot_TRY), 
            by="Species") %>% 
  dplyr::select(Species, Name_short, Genus)

After attaching resolved names, TRY data contains information on r try.species.names %>% distinct(Name_short) %>% nrow().
\newline Check for how many of the species in sPlot, trait information is available in TRY.

sPlot.species <- DT2 %>% 
  distinct(species) 

sPlot.in.TRY <- sPlot.species %>% 
  filter(species %in% (try.species.names %>% 
                                  distinct(Name_short) %>% 
                                  pull(Name_short))) %>% 

Out of the r nrow(sPlot.species) standardizes species names in sPlot 3.0, r nrow(sPlot.in.TRY) (r round(nrow(sPlot.in.TRY)/nrow(sPlot.species)*100,1)%) also occur in TRY 5.0.

Create legend of trait names

trait.legend <- data.frame(full=try.allinfo %>% 
                             dplyr::select(starts_with("StdValue_")) %>% 
                             colnames() %>% 
                             gsub("StdValue_", "", .) %>% 
                             sort()) %>%
  mutate(full=as.character(full)) %>% 
  mutate(traitcode=parse_number(full)) %>% 
  arrange(traitcode) %>% 
  dplyr::select(traitcode, everything()) %>% 
  mutate(full=gsub(pattern = "^[0-9]+_", replacement="", full)) %>% 
  mutate(short=c("StemDens", "RootingDepth","LeafC.perdrymass", "LeafN","LeafP",
                 "StemDiam","SeedMass", "Seed.length","LeafThickness","LDMC",
                 "LeafNperArea","LeafDryMass.single","Leaf.delta.15N","SeedGerminationRate",
                 "Seed.num.rep.unit","LeafLength","LeafWidth","LeafCN.ratio","Leaffreshmass",
                 "Stem.cond.dens","Chromosome.n","Chromosome.cDNAcont", 
                 "Disp.unit.leng","StemConduitDiameter","Wood.vessel.length",
                 "WoodFiberLength","SpecificRootLength.fine","SpecificRootLength",
                 "PlantHeight.veg","PlantHeight.generative","LeafArea.leaf.noPet",
                 "LeafArea.leaflet.noPet","LeafArea.leaf.wPet","LeafArea.leaflet.wPet",
                 "LeafArea.leaf.undef","LeafArea.leaflet.undef","LeafArea.undef.undef",
                 "SLA.noPet", "SLA.wPet","SLA.undef", "LeafWaterCont")) %>% 
  ## Add SLA missing from allinfo file
  bind_rows(data.frame(traitcode=11, full="Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA)", short="SLA")) %>% 
  arrange(traitcode) %>% 
  #create a column to mark traits for which gap filled data is available.
  mutate(available=paste0("X", traitcode) %in% colnames(try.individuals0))
knitr::kable(trait.legend, 
  caption="Legend of trait from TRY") %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                  full_width = F, position = "center")

There are some traits with unexpected negative entries:

try.species.names %>% 
    dplyr::select(Species) %>% 
    bind_cols(try.individuals0 %>% 
                  rename_at(col.from, .funs=function(x) col.to) %>% 
                  dplyr::select(-X1)) %>% 
  gather(variable, value, -Species) %>% 
  filter(value<0) %>% 
  group_by(variable) %>% 
  summarize(n=n())

I asked Jens Kattget for clarifications.

Calculate species and genus level trait means and sd

#create string to rename traits
col.to <- trait.legend %>% 
  filter(available==T) %>% 
  pull(short) 
col.from <- trait.legend %>% 
  filter(available==T) %>% 
  mutate(traitcode=paste0("X", traitcode))  %>% 
  pull(traitcode) 

## Calculate species level trait means and sd.
try.species.means <- try.species.names %>% 
  dplyr::select(Species) %>% 
  bind_cols(try.individuals0 %>% 
              rename_at(col.from, .funs=function(x) col.to) %>% 
              dplyr::select(-X1)) %>% 
  group_by(Species) %>% 
  left_join(x={.} %>% 
              summarize(n=n()), 
            y={.} %>% 
              summarize_at(.vars=vars(StemDens:LeafWaterCont ),
                           .funs=list(mean=~mean(.), sd=~sd(.))),
            by="Species") %>% 
  dplyr::select(Species, n, everything())
  

## Calculate genus level trait means and sd.
try.genus.means <- try.species.names %>% 
  dplyr::select(Genus) %>% 
  bind_cols(try.individuals0 %>% 
              rename_at(col.from, .funs=function(x) col.to) %>% 
              dplyr::select(-X1)) %>% 
  group_by(Genus) %>% 
  left_join(x={.} %>% 
              summarize(n=n()), 
            y={.} %>% 
              summarize_at(.vars=vars(StemDens:LeafWaterCont ),
                           .funs=list(mean=~mean(.), sd=~sd(.))),
            by="Genus") %>% 
  dplyr::select(Genus, n, everything())

The average number of observations per species and genus is r round(mean(try.species.means$n),1) and r round(mean(try.genus.means$n),1), respectively. As many as r sum(try.species.means$n==1) species have only one observation (r sum(try.genus.means$n==1) at the genus level).

Match taxa based on species, if available, or Genus (when rank_correct==Genus)

try.combined.means <- try.genus.means %>% 
  rename(Taxon_name=Genus) %>% 
  mutate(Rank_correct="genus") %>% 
  bind_rows(try.species.means %>% 
              rename(Taxon_name=Species) %>% 
              mutate(Rank_correct="species")) %>% 
  dplyr::select(Taxon_name, Rank_correct, everything())

total.matches <- DT2 %>%
  distinct(species, Rank_correct) %>%
  left_join(try.combined.means %>%
              dplyr::rename(species=Taxon_name), 
            by=c("species", "Rank_correct")) %>% 
  filter(!is.na(SLA_mean)) %>% 
  nrow()

The total number of matched taxa (either at species, or genus level) is r total.matches.

Calculate summary statistics for species- and genus-level mean traits

mysummary <- try.combined.means %>% 
               group_by(Rank_correct) %>% 
               summarize_at(.vars=vars(StemDens_mean:LeafWaterCont_sd),
                            .funs=list(min=~min(., na.rm=T),
                                       q025=~quantile(., 0.25, na.rm=T), 
                                       q50=~quantile(., 0.50, na.rm=T), 
                                       q75=~quantile(., .75, na.rm=T), 
                                       max=~max(., na.rm=T), 
                                       mean=~mean(., na.rm=T), 
                                       sd=~sd(., na.rm=T))) %>% 
  gather(variable, value, -Rank_correct) %>% 
  separate(variable, sep="_", into=c("variable", "mean.sd", "stat")) %>% 
  mutate(stat=factor(stat, levels=c("min", "q025", "q50", "q75", "max", "mean", "sd"))) %>% 
  spread(key=stat, value=value) %>% 
  arrange(desc(Rank_correct))
  
knitr::kable(mysummary, 
  caption="Summary statistics for each trait, when summarized across species or genera") %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                  full_width = F, position = "center")

Calculate CWMs and CWVs for each plot

Merge vegetation layers, where necessary. Combine cover values across layers

#Ancillary function
# Combine cover accounting for layers
combine.cover <- function(x, datatype){
    while (length(x)>1){
      x[2] <- x[1]+(100-x[1])*x[2]/100
      x <- x[-1]
    }
  return(x)
}

DT2 <- DT2 %>% 
  #temporary
  filter(PlotObservationID %in% sample(unique(DT2$PlotObservationID), 1000, replace=F)) %>% 
  group_by(PlotObservationID, species, Rank_correct) %>%
  summarize(Relative.cover=combine.cover(Relative.cover, cover_code)) %>%
  ungroup()

Calculate CWMs and CWV, as well as plot coverage statistics (proportion of total cover for which trait info exist, and proportion of species for which we have trait info). To avoid misleading result, CWM is calculated ONLY for plots for which we have some abundance information. All plots where Ab_scale=="pa" in ANY of the layers are therefore excluded.

# Tag plots where at least one layer only has p\a information 
any_pa <- DT2 %>% 
  distinct(PlotObservationID, Ab_scale) %>% 
  group_by(PlotObservationID) %>% 
  summarize(any.pa=any(Ab_scale=="pa")) %>% 
  filter(any.pa==T) %>% 
  pull(PlotObservationID)
length(any_pa)

# Exclude plots above and merge species data table with traits
CWM0 <- DT2 %>%
  filter(!PlotObservationID %in% any_pa) %>% 
  left_join(try.combined.means %>%
              dplyr::rename(species=Taxon_name) %>% 
              dplyr::select(species, Rank_correct, ends_with("_mean")), 
            by=c("species", "Rank_correct"))

# Calculate CWM for each trait in each plot
CWM1 <- CWM0 %>%
  group_by(PlotObservationID) %>%
  summarize_at(.vars= vars(StemDens_mean:LeafWaterCont_mean),
               .funs = list(~weighted.mean(., Relative.cover, na.rm=T))) %>%
  dplyr::select(PlotObservationID, order(colnames(.))) %>%
  gather(key=variable, value=CWM, -PlotObservationID)

# Calculate coverage for each trait in each plot
CWM2 <- CWM0 %>%
  mutate_at(.funs = list(~if_else(is.na(.),0,1) * Relative.cover), 
            .vars = vars(StemDens_mean:LeafWaterCont_mean)) %>%
  group_by(PlotObservationID) %>%
  summarize_at(.vars= vars(StemDens_mean:LeafWaterCont_mean),
               .funs = list(~sum(., na.rm=T))) %>%
  dplyr::select(PlotObservationID, order(colnames(.))) %>%
  gather(key=variable, value=trait.coverage, -PlotObservationID)
  

# Calculate CWV
variance2.fun <- function(trait, abu){
  res <- as.double(NA)
  #nam <- nam[!is.na(trait)]
  abu <- abu[!is.na(trait)]
  trait <- trait[!is.na(trait)]
  abu <- abu/sum(abu)
  if (length(trait)>1){
    # you need more than 1 observation to calculate
    # skewness and kurtosis
    # for calculation see 
    # http://r.789695.n4.nabble.com/Weighted-skewness-and-curtosis-td4709956.html
    m.trait <- weighted.mean(trait,abu)
    res <- sum(abu*(trait-m.trait)^2)
  }
  res
}

CWM3 <- CWM0 %>%
  group_by(PlotObservationID) %>%
  summarize_at(.vars= vars(StemDens_mean:LeafWaterCont_mean),
               .funs = list(~variance2.fun(., Relative.cover))) %>%
  dplyr::select(PlotObservationID, order(colnames(.))) %>%
  gather(key=variable, value=CWV, -PlotObservationID)

## Calculate proportion of species having traits
CWM4 <- CWM0 %>%
  group_by(PlotObservationID) %>%
  summarize_at(.vars= vars(StemDens_mean:LeafWaterCont_mean),
               .funs = list(~sum(!is.na(.)))) %>%
  dplyr::select(PlotObservationID, order(colnames(.))) %>%
  gather(key=variable, value=n.sp.with.trait, -PlotObservationID)

# Join together
CWM <- CWM1 %>%
  left_join(CWM2, by=c("PlotObservationID", "variable")) %>%
  left_join(CWM3, by=c("PlotObservationID", "variable")) %>%
  left_join(CWM4, by=c("PlotObservationID", "variable")) %>%
  left_join(CWM0 %>% 
              group_by(PlotObservationID) %>%
              summarize(sp.richness=n()), by=c("PlotObservationID")) %>%
  mutate(prop.sp.with.trait=n.sp.with.trait/sp.richness) %>%
  dplyr::select(PlotObservationID, variable, sp.richness, prop.sp.with.trait, trait.coverage, CWM, CWV) %>% 
  arrange(PlotObservationID)

Explore CWM output

knitr::kable(CWM %>% 
               filter(PlotObservationID %in%
                        sample(unique(DT2$PlotObservationID), 3, replace=F)), 
  caption="Community weighted means of 3 randomly selected plots") %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                  full_width = F, position = "center")

Calculate summary statistics for CWMs and CWVs

CWM.summary <- CWM %>% 
  rename(myvar=variable) %>% 
  group_by(myvar) %>% 
  summarize_at(.vars=vars(CWM:CWV),
                .funs=list(num.NAs=~sum(is.na(.)), 
                           min=~min(., na.rm=T),
                           q025=~quantile(., 0.25, na.rm=T), 
                           q50=~quantile(., 0.50, na.rm=T), 
                           q75=~quantile(., .75, na.rm=T), 
                           max=~max(., na.rm=T), 
                           mean=~mean(., na.rm=T), 
                           sd=~sd(., na.rm=T))) %>% 
  gather(key=variable, value=value, -myvar) %>% 
  separate(variable, sep="_", into=c("metric", "stat")) %>% 
  mutate(stat=factor(stat, levels=c("num.NAs", "min", "q025", "q50", "q75", "max", "mean", "sd"))) %>% 
  spread(key=stat, value=value) %>% 
  arrange(metric, myvar)
  
knitr::kable(CWM.summary, 
  caption="Summary of CWMs and CWVs across all plots") %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                  full_width = F, position = "center")

Export CWM and species mean trait values

save(try.combined.means, CWM, file="../_output/Traits_CWMs.RData")

##SessionInfo

sessionInfo()