07_buildCWMs functioning. Excluded pa

8f8ab7f1 · Francesco Sabatini · a84d6ee1 · 8f8ab7f1
Commit 8f8ab7f1 authored 5 years ago by Francesco Sabatini
--- a/code/07_buildCWMs.Rmd
+++ b/code/07_buildCWMs.Rmd
@@ -28,8 +28,6 @@ library(kableExtra)
 library(stringr)
 ```
-!!!! NO INFO ON TRAIT X18 !!!!
 # Import data
 ```{r}
 load("../_output/DT_sPlot3.0.RData")
@@ -38,27 +36,16 @@ load("../_output/Backbone3.0.RData")
 Import TRY data
 ```{r, warning=F, message=F}
-try.species <- read_csv("../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/input_data/hierarchy.info.csv", 
+try.species <- read_csv(
-                         locale = locale(encoding = "latin1")) 
+  "../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/input_data/hierarchy.info.csv",
-try.allinfo <- read_csv("../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/input_data/traits_x_georef_wide_table.csv", 
+  locale = locale(encoding = "latin1")) 
-                        locale = locale(encoding = "latin1"), 
+try.allinfo <- read_csv(
+  "../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/input_data/traits_x_georef_wide_table.csv", 
+  locale = locale(encoding = "latin1"), 
                        col_types = paste0(c("dddccccc",rep("c", 84)), collapse=""))
-try.individuals0 <- read_csv("../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/gapfilled_data/mean_gap_filled_back_transformed.csv", 
+try.individuals0 <- read_csv(
-         locale = locale(encoding = "latin1"))
+  "../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/gapfilled_data/mean_gap_filled_back_transformed.csv", 
+  locale = locale(encoding = "latin1"))
-#Ancillary function to change species names to lower case
-firstup <- function(x) {
-  substr(x, 1, 1) <- toupper(substr(x, 1, 1))
-  x
-}
-try.species <- try.species %>% 
-  mutate(Species=tolower(Species)) %>%
-  mutate(Species=firstup(Species)) 
-try.allinfo <- try.allinfo %>% 
-  mutate(Species=tolower(Species)) %>%
-  mutate(Species=firstup(Species)) 
 #calculate statistics of data from TRY
 n.indiv <- try.species %>% 
@@ -66,10 +53,26 @@ n.indiv <- try.species %>%
  summarize(n=n()) %>% 
  pull(n)
 ```
-There are `r nrow(try.species)` individual observations from `r nrow(try.species %>% distinct(Species))` distinct species in `r nrow(try.species %>% distinct(Genus))` distinct genera. 
+There are `r nrow(try.species)` individual observations from `r nrow(try.species %>% distinct(Species))` distinct (unresolved) species in `r nrow(try.species %>% distinct(Genus))` distinct (unresolved) genera. 
 \newline \newline
+```{r deprecated, echo=F, eval=F}
+# #Ancillary function to change species names to lower case
+# firstup <- function(x) {
+#   substr(x, 1, 1) <- toupper(substr(x, 1, 1))
+#   x
+# }
+# 
+# try.species <- try.species %>% 
+#   mutate(Species=tolower(Species)) %>%
+#   mutate(Species=firstup(Species)) 
+# 
+# try.allinfo <- try.allinfo %>% 
+#   mutate(Species=tolower(Species)) %>%
+#   mutate(Species=firstup(Species)) 
+```
 ## Attach resolved names from Backbone
 ```{r}
@@ -81,8 +84,8 @@ try.species.names <- try.allinfo %>%
            by="Species") %>% 
  dplyr::select(Species, Name_short, Genus)
 ```
-After attaching resolved names, TRY data contains information on `r try.species.names %>% distinct(Name_short) %>% nrow()`.  
+After attaching resolved names, TRY data contains information on `r try.species.names %>% distinct(Name_short) %>% nrow()` species.  
-\newline
+\newline \newline
 Check for how many of the species in sPlot, trait information is available in TRY.
 ```{r}
 sPlot.species <- DT2 %>% 
@@ -91,9 +94,9 @@ sPlot.species <- DT2 %>%
 sPlot.in.TRY <- sPlot.species %>% 
  filter(species %in% (try.species.names %>% 
                                  distinct(Name_short) %>% 
-                                  pull(Name_short))) %>% 
+                                  pull(Name_short))) 
 ```
-Out of the `r nrow(sPlot.species)` standardizes species names in sPlot 3.0, `r nrow(sPlot.in.TRY)` (`r round(nrow(sPlot.in.TRY)/nrow(sPlot.species)*100,1)`%) also occur in TRY 5.0.
+Out of the `r nrow(sPlot.species)` standardizes species names in sPlot 3.0, `r nrow(sPlot.in.TRY)` (`r round(nrow(sPlot.in.TRY)/nrow(sPlot.species)*100,1)`%) also occur in TRY 5.0. This number does not account for matches at the genus level. 
@@ -121,7 +124,12 @@ trait.legend <- data.frame(full=try.allinfo %>%
                 "LeafArea.leaf.undef","LeafArea.leaflet.undef","LeafArea.undef.undef",
                 "SLA.noPet", "SLA.wPet","SLA.undef", "LeafWaterCont")) %>% 
  ## Add SLA missing from allinfo file
-  bind_rows(data.frame(traitcode=11, full="Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA)", short="SLA")) %>% 
+  bind_rows(data.frame(traitcode=11, 
+                       full="Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA)",
+                       short="SLA")) %>% 
+  bind_rows(data.frame(traitcode=18, 
+                       full="Plant height (vegetative + generative)", 
+                       short="PlantHeight")) %>%
  arrange(traitcode) %>% 
  #create a column to mark traits for which gap filled data is available.
  mutate(available=paste0("X", traitcode) %in% colnames(try.individuals0))
@@ -133,56 +141,70 @@ knitr::kable(trait.legend,
    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                  full_width = F, position = "center")
 ```
+Use trait legend to change naming of `try.individuals0` data.frame of traits
+```{r}
+#create string to rename traits
+col.to <- trait.legend %>% 
+  filter(available==T) %>% 
+  pull(short) 
+col.from <- trait.legend %>% 
+  filter(available==T) %>% 
+  mutate(traitcode=paste0("X", traitcode))  %>% 
+  pull(traitcode) 
+try.individuals <- try.individuals0 %>% 
+              rename_at(col.from, .funs=function(x) col.to)
+```
-There are some traits with unexpected negative entries:
+Check traits at the individual level. There are some traits with unexpected negative entries:
 ```{r}
 try.species.names %>% 
-    dplyr::select(Species) %>% 
+    dplyr::select(Name_short) %>% 
-    bind_cols(try.individuals0 %>% 
+    bind_cols(try.individuals %>% 
-                  rename_at(col.from, .funs=function(x) col.to) %>% 
                  dplyr::select(-X1)) %>% 
-  gather(variable, value, -Species) %>% 
+  gather(variable, value, -Name_short) %>% 
  filter(value<0) %>% 
  group_by(variable) %>% 
  summarize(n=n())
 ```
-I asked Jens Kattget for clarifications.
+According to Jens Kattge, the entries for `Leaf.delta.15N` are legitimate, while in the other cases, it may be due to bad predictions. He suggested to delete these negative records.
+```{r}
+toexclude <- try.individuals %>% 
+  gather(variable, value, -X1) %>% 
+  filter(variable != "Leaf.delta.15N") %>% 
+  filter(value<0) %>% 
+  pull(X1)
+try.individuals <- try.species.names %>% 
+  dplyr::select(Name_short) %>% 
+  bind_cols(try.individuals) %>% 
+  filter(!X1 %in% toexclude) %>% 
+  dplyr::select(-X1)
+```
+This results in the exclusion of `r length(toexclude)` individuals. In this way the total number of species included in TRY reduces to `r try.individuals %>% distinct(Name_short) %>% nrow()`
 ## Calculate species and genus level trait means and sd
 ```{r}
-#create string to rename traits
+## Calculate species level trait means and sd. 
-col.to <- trait.legend %>% 
-  filter(available==T) %>% 
-  pull(short) 
-col.from <- trait.legend %>% 
-  filter(available==T) %>% 
-  mutate(traitcode=paste0("X", traitcode))  %>% 
-  pull(traitcode) 
-## Calculate species level trait means and sd.
+try.species.means <- try.individuals %>% 
-try.species.means <- try.species.names %>% 
+  group_by(Name_short) %>% 
-  dplyr::select(Species) %>% 
+  #Add a field to indivate the number of observation per taxon
-  bind_cols(try.individuals0 %>% 
-              rename_at(col.from, .funs=function(x) col.to) %>% 
-              dplyr::select(-X1)) %>% 
-  group_by(Species) %>% 
  left_join(x={.} %>% 
              summarize(n=n()), 
            y={.} %>% 
              summarize_at(.vars=vars(StemDens:LeafWaterCont ),
                           .funs=list(mean=~mean(.), sd=~sd(.))),
-            by="Species") %>% 
+            by="Name_short") %>% 
-  dplyr::select(Species, n, everything())
+  dplyr::select(Name_short, n, everything())
 ## Calculate genus level trait means and sd.
-try.genus.means <- try.species.names %>% 
+try.genus.means <- try.individuals %>% 
-  dplyr::select(Genus) %>% 
+  mutate(Genus=word(Name_short, 1)) %>% 
-  bind_cols(try.individuals0 %>% 
-              rename_at(col.from, .funs=function(x) col.to) %>% 
-              dplyr::select(-X1)) %>% 
  group_by(Genus) %>% 
  left_join(x={.} %>% 
              summarize(n=n()), 
@@ -191,17 +213,17 @@ try.genus.means <- try.species.names %>%
                           .funs=list(mean=~mean(.), sd=~sd(.))),
            by="Genus") %>% 
  dplyr::select(Genus, n, everything())
 ```
 The average number of observations per species and genus is `r round(mean(try.species.means$n),1)` and `r round(mean(try.genus.means$n),1)`, respectively. As many as `r sum(try.species.means$n==1)` species have only one observation (`r sum(try.genus.means$n==1)` at the genus level).  
-## Match taxa based on species, if available, or Genus (when rank_correct==Genus)
+## Match taxa based on species, if available, or Genus
+Combined the trait means based on species and genera into a single object, and check how many of these taxa match to the (resolved) species names in `DT2`.
 ```{r, warning=F}
 try.combined.means <- try.genus.means %>% 
  rename(Taxon_name=Genus) %>% 
  mutate(Rank_correct="genus") %>% 
  bind_rows(try.species.means %>% 
-              rename(Taxon_name=Species) %>% 
+              rename(Taxon_name=Name_short) %>% 
              mutate(Rank_correct="species")) %>% 
  dplyr::select(Taxon_name, Rank_correct, everything())
@@ -243,7 +265,6 @@ knitr::kable(mysummary,
 ```
 ## Calculate CWMs and CWVs for each plot
 Merge vegetation layers, where necessary. Combine cover values across layers
 ```{r}
@@ -257,19 +278,17 @@ combine.cover <- function(x, datatype){
  return(x)
 }
-DT2 <- DT2 %>% 
+DT2.comb <- DT2 %>% 
  #temporary
-  filter(PlotObservationID %in% sample(unique(DT2$PlotObservationID), 1000, replace=F)) %>% 
  group_by(PlotObservationID, species, Rank_correct) %>%
  summarize(Relative.cover=combine.cover(Relative.cover, cover_code)) %>%
  ungroup()
 ```
-Calculate CWMs and CWV, as well as plot coverage statistics (proportion of total cover for which trait info exist, and proportion of species for which we have trait info). To avoid misleading result, CWM is calculated ONLY for plots for which we have some abundance information. All plots where `Ab_scale`=="pa" in **ANY** of the layers are therefore excluded. 
+Calculate CWMs and CWV, as well as plot coverage statistics (proportion of total cover for which trait info exist, and proportion of species for which we have trait info). To avoid misleading results, CWM is calculated ONLY for plots for which we have some abundance information. All plots where `Ab_scale`=="pa" in **ANY** of the layers are therefore excluded. 
-```{r, cache=T,  cache.lazy=F}
+```{r, cache=T,  cache.lazy=F, warning=F}
-# Tag plots where at least one layer only has p\a information 
+# Tag plots where at least one layer has only p\a information 
 any_pa <- DT2 %>% 
  distinct(PlotObservationID, Ab_scale) %>% 
  group_by(PlotObservationID) %>% 
@@ -279,7 +298,7 @@ any_pa <- DT2 %>%
 length(any_pa)
 # Exclude plots above and merge species data table with traits
-CWM0 <- DT2 %>%
+CWM0 <- DT2.comb %>%
  filter(!PlotObservationID %in% any_pa) %>% 
  left_join(try.combined.means %>%
              dplyr::rename(species=Taxon_name) %>% 
@@ -304,17 +323,15 @@ CWM2 <- CWM0 %>%
  dplyr::select(PlotObservationID, order(colnames(.))) %>%
  gather(key=variable, value=trait.coverage, -PlotObservationID)
 # Calculate CWV
+# Ancillary function
 variance2.fun <- function(trait, abu){
  res <- as.double(NA)
-  #nam <- nam[!is.na(trait)]
  abu <- abu[!is.na(trait)]
  trait <- trait[!is.na(trait)]
  abu <- abu/sum(abu)
  if (length(trait)>1){
-    # you need more than 1 observation to calculate
+    # you need more than 1 observation to calculate variance
-    # skewness and kurtosis
    # for calculation see 
    # http://r.789695.n4.nabble.com/Weighted-skewness-and-curtosis-td4709956.html
    m.trait <- weighted.mean(trait,abu)
@@ -392,12 +409,39 @@ knitr::kable(CWM.summary,
 ```
+Calculate summary statistics for trait coverage in plots
+```{r}
+coverage.summary <- CWM %>% 
+  separate(variable, sep="_", into=c("metric", "stat")) %>% 
+  filter(metric=="SLA") %>% 
+  summarize_at(.vars=vars(prop.sp.with.trait:trait.coverage),
+                .funs=list(num.NAs=~sum(is.na(.)), 
+                           min=~min(., na.rm=T),
+                           q025=~quantile(., 0.25, na.rm=T), 
+                           q50=~quantile(., 0.50, na.rm=T), 
+                           q75=~quantile(., .75, na.rm=T), 
+                           max=~max(., na.rm=T), 
+                           mean=~mean(., na.rm=T), 
+                           sd=~sd(., na.rm=T))) %>% 
+  gather(variable, value) %>% 
+  separate(variable, sep="_", into=c("metric", "stat")) %>% 
+  mutate(stat=factor(stat, levels=c("num.NAs", "min", "q025", "q50", "q75", "max", "mean", "sd"))) %>% 
+  spread(key=stat, value=value)
+```
+```{r, echo=F}
+knitr::kable(coverage.summary, 
+  caption="Summary of CWMs and CWVs across all plots") %>%
+    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
+                  full_width = F, position = "center")
+```
 ## Export CWM and species mean trait values
 ```{r}
 save(try.combined.means, CWM, file="../_output/Traits_CWMs.RData")
 ```
-##SessionInfo
+## SessionInfo
 ```{r}
 sessionInfo()
 ```