-
Francesco Sabatini authoredFrancesco Sabatini authored
- Import data
- Attach resolved names from Backbone
- Create legend of trait names
- Calculate species and genus level trait means and sd
- Match taxa based on species, if available, or Genus (when rank_correct==Genus)
- Calculate summary statistics for species- and genus-level mean traits
- Calculate CWMs and CWVs for each plot
- Explore CWM output
- Export CWM and species mean trait values
title: "sPlot3.0 - Build CWMs"
author: "Francesco Maria Sabatini"
date: "3/4/2020"
output: html_document
Timestamp: r date()
Drafted: Francesco Maria Sabatini
Revised:
version: 1.0
This reports documents the construction of Community Weighted Means (CWMs) and Variance (CWVs), complementing species composition data from sPlot 3.0 and Plant functional traits from TRY 5.0. Functional Traits were received by Jens Kattge on Jan 21, 2020.
library(tidyverse)
library(readr)
library(data.table)
library(knitr)
library(kableExtra)
library(stringr)
!!!! NO INFO ON TRAIT X18 !!!!
Import data
load("../_output/DT_sPlot3.0.RData")
load("../_output/Backbone3.0.RData")
Import TRY data
try.species <- read_csv("../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/input_data/hierarchy.info.csv",
locale = locale(encoding = "latin1"))
try.allinfo <- read_csv("../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/input_data/traits_x_georef_wide_table.csv",
locale = locale(encoding = "latin1"),
col_types = paste0(c("dddccccc",rep("c", 84)), collapse=""))
try.individuals0 <- read_csv("../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/gapfilled_data/mean_gap_filled_back_transformed.csv",
locale = locale(encoding = "latin1"))
#Ancillary function to change species names to lower case
firstup <- function(x) {
substr(x, 1, 1) <- toupper(substr(x, 1, 1))
x
}
try.species <- try.species %>%
mutate(Species=tolower(Species)) %>%
mutate(Species=firstup(Species))
try.allinfo <- try.allinfo %>%
mutate(Species=tolower(Species)) %>%
mutate(Species=firstup(Species))
#calculate statistics of data from TRY
n.indiv <- try.species %>%
group_by(Species) %>%
summarize(n=n()) %>%
pull(n)
There are r nrow(try.species)
individual observations from r nrow(try.species %>% distinct(Species))
distinct species in r nrow(try.species %>% distinct(Genus))
distinct genera.
\newline \newline
Attach resolved names from Backbone
try.species.names <- try.allinfo %>%
dplyr::select(Species, Genus) %>%
left_join(Backbone %>%
dplyr::select(Name_sPlot_TRY, Name_short) %>%
rename(Species=Name_sPlot_TRY),
by="Species") %>%
dplyr::select(Species, Name_short, Genus)
After attaching resolved names, TRY data contains information on r try.species.names %>% distinct(Name_short) %>% nrow()
.
\newline
Check for how many of the species in sPlot, trait information is available in TRY.
sPlot.species <- DT2 %>%
distinct(species)
sPlot.in.TRY <- sPlot.species %>%
filter(species %in% (try.species.names %>%
distinct(Name_short) %>%
pull(Name_short))) %>%
Out of the r nrow(sPlot.species)
standardizes species names in sPlot 3.0, r nrow(sPlot.in.TRY)
(r round(nrow(sPlot.in.TRY)/nrow(sPlot.species)*100,1)
%) also occur in TRY 5.0.
Create legend of trait names
trait.legend <- data.frame(full=try.allinfo %>%
dplyr::select(starts_with("StdValue_")) %>%
colnames() %>%
gsub("StdValue_", "", .) %>%
sort()) %>%
mutate(full=as.character(full)) %>%
mutate(traitcode=parse_number(full)) %>%
arrange(traitcode) %>%
dplyr::select(traitcode, everything()) %>%
mutate(full=gsub(pattern = "^[0-9]+_", replacement="", full)) %>%
mutate(short=c("StemDens", "RootingDepth","LeafC.perdrymass", "LeafN","LeafP",
"StemDiam","SeedMass", "Seed.length","LeafThickness","LDMC",
"LeafNperArea","LeafDryMass.single","Leaf.delta.15N","SeedGerminationRate",
"Seed.num.rep.unit","LeafLength","LeafWidth","LeafCN.ratio","Leaffreshmass",
"Stem.cond.dens","Chromosome.n","Chromosome.cDNAcont",
"Disp.unit.leng","StemConduitDiameter","Wood.vessel.length",
"WoodFiberLength","SpecificRootLength.fine","SpecificRootLength",
"PlantHeight.veg","PlantHeight.generative","LeafArea.leaf.noPet",
"LeafArea.leaflet.noPet","LeafArea.leaf.wPet","LeafArea.leaflet.wPet",
"LeafArea.leaf.undef","LeafArea.leaflet.undef","LeafArea.undef.undef",
"SLA.noPet", "SLA.wPet","SLA.undef", "LeafWaterCont")) %>%
## Add SLA missing from allinfo file
bind_rows(data.frame(traitcode=11, full="Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA)", short="SLA")) %>%
arrange(traitcode) %>%
#create a column to mark traits for which gap filled data is available.
mutate(available=paste0("X", traitcode) %in% colnames(try.individuals0))
knitr::kable(trait.legend,
caption="Legend of trait from TRY") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = F, position = "center")
There are some traits with unexpected negative entries:
try.species.names %>%
dplyr::select(Species) %>%
bind_cols(try.individuals0 %>%
rename_at(col.from, .funs=function(x) col.to) %>%
dplyr::select(-X1)) %>%
gather(variable, value, -Species) %>%
filter(value<0) %>%
group_by(variable) %>%
summarize(n=n())
I asked Jens Kattget for clarifications.
Calculate species and genus level trait means and sd
#create string to rename traits
col.to <- trait.legend %>%
filter(available==T) %>%
pull(short)
col.from <- trait.legend %>%
filter(available==T) %>%
mutate(traitcode=paste0("X", traitcode)) %>%
pull(traitcode)
## Calculate species level trait means and sd.
try.species.means <- try.species.names %>%
dplyr::select(Species) %>%
bind_cols(try.individuals0 %>%
rename_at(col.from, .funs=function(x) col.to) %>%
dplyr::select(-X1)) %>%
group_by(Species) %>%
left_join(x={.} %>%
summarize(n=n()),
y={.} %>%
summarize_at(.vars=vars(StemDens:LeafWaterCont ),
.funs=list(mean=~mean(.), sd=~sd(.))),
by="Species") %>%
dplyr::select(Species, n, everything())
## Calculate genus level trait means and sd.
try.genus.means <- try.species.names %>%
dplyr::select(Genus) %>%
bind_cols(try.individuals0 %>%
rename_at(col.from, .funs=function(x) col.to) %>%
dplyr::select(-X1)) %>%
group_by(Genus) %>%
left_join(x={.} %>%
summarize(n=n()),
y={.} %>%
summarize_at(.vars=vars(StemDens:LeafWaterCont ),
.funs=list(mean=~mean(.), sd=~sd(.))),
by="Genus") %>%
dplyr::select(Genus, n, everything())
The average number of observations per species and genus is r round(mean(try.species.means$n),1)
and r round(mean(try.genus.means$n),1)
, respectively. As many as r sum(try.species.means$n==1)
species have only one observation (r sum(try.genus.means$n==1)
at the genus level).
Match taxa based on species, if available, or Genus (when rank_correct==Genus)
try.combined.means <- try.genus.means %>%
rename(Taxon_name=Genus) %>%
mutate(Rank_correct="genus") %>%
bind_rows(try.species.means %>%
rename(Taxon_name=Species) %>%
mutate(Rank_correct="species")) %>%
dplyr::select(Taxon_name, Rank_correct, everything())
total.matches <- DT2 %>%
distinct(species, Rank_correct) %>%
left_join(try.combined.means %>%
dplyr::rename(species=Taxon_name),
by=c("species", "Rank_correct")) %>%
filter(!is.na(SLA_mean)) %>%
nrow()
The total number of matched taxa (either at species, or genus level) is r total.matches
.
Calculate summary statistics for species- and genus-level mean traits
mysummary <- try.combined.means %>%
group_by(Rank_correct) %>%
summarize_at(.vars=vars(StemDens_mean:LeafWaterCont_sd),
.funs=list(min=~min(., na.rm=T),
q025=~quantile(., 0.25, na.rm=T),
q50=~quantile(., 0.50, na.rm=T),
q75=~quantile(., .75, na.rm=T),
max=~max(., na.rm=T),
mean=~mean(., na.rm=T),
sd=~sd(., na.rm=T))) %>%
gather(variable, value, -Rank_correct) %>%
separate(variable, sep="_", into=c("variable", "mean.sd", "stat")) %>%
mutate(stat=factor(stat, levels=c("min", "q025", "q50", "q75", "max", "mean", "sd"))) %>%
spread(key=stat, value=value) %>%
arrange(desc(Rank_correct))
knitr::kable(mysummary,
caption="Summary statistics for each trait, when summarized across species or genera") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = F, position = "center")
Calculate CWMs and CWVs for each plot
Merge vegetation layers, where necessary. Combine cover values across layers
#Ancillary function
# Combine cover accounting for layers
combine.cover <- function(x, datatype){
while (length(x)>1){
x[2] <- x[1]+(100-x[1])*x[2]/100
x <- x[-1]
}
return(x)
}
DT2 <- DT2 %>%
#temporary
filter(PlotObservationID %in% sample(unique(DT2$PlotObservationID), 1000, replace=F)) %>%
group_by(PlotObservationID, species, Rank_correct) %>%
summarize(Relative.cover=combine.cover(Relative.cover, cover_code)) %>%
ungroup()
Calculate CWMs and CWV, as well as plot coverage statistics (proportion of total cover for which trait info exist, and proportion of species for which we have trait info). To avoid misleading result, CWM is calculated ONLY for plots for which we have some abundance information. All plots where Ab_scale
=="pa" in ANY of the layers are therefore excluded.
# Tag plots where at least one layer only has p\a information
any_pa <- DT2 %>%
distinct(PlotObservationID, Ab_scale) %>%
group_by(PlotObservationID) %>%
summarize(any.pa=any(Ab_scale=="pa")) %>%
filter(any.pa==T) %>%
pull(PlotObservationID)
length(any_pa)
# Exclude plots above and merge species data table with traits
CWM0 <- DT2 %>%
filter(!PlotObservationID %in% any_pa) %>%
left_join(try.combined.means %>%
dplyr::rename(species=Taxon_name) %>%
dplyr::select(species, Rank_correct, ends_with("_mean")),
by=c("species", "Rank_correct"))
# Calculate CWM for each trait in each plot
CWM1 <- CWM0 %>%
group_by(PlotObservationID) %>%
summarize_at(.vars= vars(StemDens_mean:LeafWaterCont_mean),
.funs = list(~weighted.mean(., Relative.cover, na.rm=T))) %>%
dplyr::select(PlotObservationID, order(colnames(.))) %>%
gather(key=variable, value=CWM, -PlotObservationID)
# Calculate coverage for each trait in each plot
CWM2 <- CWM0 %>%
mutate_at(.funs = list(~if_else(is.na(.),0,1) * Relative.cover),
.vars = vars(StemDens_mean:LeafWaterCont_mean)) %>%
group_by(PlotObservationID) %>%
summarize_at(.vars= vars(StemDens_mean:LeafWaterCont_mean),
.funs = list(~sum(., na.rm=T))) %>%
dplyr::select(PlotObservationID, order(colnames(.))) %>%
gather(key=variable, value=trait.coverage, -PlotObservationID)
# Calculate CWV
variance2.fun <- function(trait, abu){
res <- as.double(NA)
#nam <- nam[!is.na(trait)]
abu <- abu[!is.na(trait)]
trait <- trait[!is.na(trait)]
abu <- abu/sum(abu)
if (length(trait)>1){
# you need more than 1 observation to calculate
# skewness and kurtosis
# for calculation see
# http://r.789695.n4.nabble.com/Weighted-skewness-and-curtosis-td4709956.html
m.trait <- weighted.mean(trait,abu)
res <- sum(abu*(trait-m.trait)^2)
}
res
}
CWM3 <- CWM0 %>%
group_by(PlotObservationID) %>%
summarize_at(.vars= vars(StemDens_mean:LeafWaterCont_mean),
.funs = list(~variance2.fun(., Relative.cover))) %>%
dplyr::select(PlotObservationID, order(colnames(.))) %>%
gather(key=variable, value=CWV, -PlotObservationID)
## Calculate proportion of species having traits
CWM4 <- CWM0 %>%
group_by(PlotObservationID) %>%
summarize_at(.vars= vars(StemDens_mean:LeafWaterCont_mean),
.funs = list(~sum(!is.na(.)))) %>%
dplyr::select(PlotObservationID, order(colnames(.))) %>%
gather(key=variable, value=n.sp.with.trait, -PlotObservationID)
# Join together
CWM <- CWM1 %>%
left_join(CWM2, by=c("PlotObservationID", "variable")) %>%
left_join(CWM3, by=c("PlotObservationID", "variable")) %>%
left_join(CWM4, by=c("PlotObservationID", "variable")) %>%
left_join(CWM0 %>%
group_by(PlotObservationID) %>%
summarize(sp.richness=n()), by=c("PlotObservationID")) %>%
mutate(prop.sp.with.trait=n.sp.with.trait/sp.richness) %>%
dplyr::select(PlotObservationID, variable, sp.richness, prop.sp.with.trait, trait.coverage, CWM, CWV) %>%
arrange(PlotObservationID)
Explore CWM output
knitr::kable(CWM %>%
filter(PlotObservationID %in%
sample(unique(DT2$PlotObservationID), 3, replace=F)),
caption="Community weighted means of 3 randomly selected plots") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = F, position = "center")
Calculate summary statistics for CWMs and CWVs
CWM.summary <- CWM %>%
rename(myvar=variable) %>%
group_by(myvar) %>%
summarize_at(.vars=vars(CWM:CWV),
.funs=list(num.NAs=~sum(is.na(.)),
min=~min(., na.rm=T),
q025=~quantile(., 0.25, na.rm=T),
q50=~quantile(., 0.50, na.rm=T),
q75=~quantile(., .75, na.rm=T),
max=~max(., na.rm=T),
mean=~mean(., na.rm=T),
sd=~sd(., na.rm=T))) %>%
gather(key=variable, value=value, -myvar) %>%
separate(variable, sep="_", into=c("metric", "stat")) %>%
mutate(stat=factor(stat, levels=c("num.NAs", "min", "q025", "q50", "q75", "max", "mean", "sd"))) %>%
spread(key=stat, value=value) %>%
arrange(metric, myvar)
knitr::kable(CWM.summary,
caption="Summary of CWMs and CWVs across all plots") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = F, position = "center")
Export CWM and species mean trait values
save(try.combined.means, CWM, file="../_output/Traits_CWMs.RData")
##SessionInfo
sessionInfo()