Skip to content
Snippets Groups Projects
Commit 8f8ab7f1 authored by Francesco Sabatini's avatar Francesco Sabatini
Browse files

07_buildCWMs functioning. Excluded pa

parent a84d6ee1
No related branches found
No related tags found
No related merge requests found
...@@ -28,8 +28,6 @@ library(kableExtra) ...@@ -28,8 +28,6 @@ library(kableExtra)
library(stringr) library(stringr)
``` ```
!!!! NO INFO ON TRAIT X18 !!!!
# Import data # Import data
```{r} ```{r}
load("../_output/DT_sPlot3.0.RData") load("../_output/DT_sPlot3.0.RData")
...@@ -38,27 +36,16 @@ load("../_output/Backbone3.0.RData") ...@@ -38,27 +36,16 @@ load("../_output/Backbone3.0.RData")
Import TRY data Import TRY data
```{r, warning=F, message=F} ```{r, warning=F, message=F}
try.species <- read_csv("../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/input_data/hierarchy.info.csv", try.species <- read_csv(
locale = locale(encoding = "latin1")) "../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/input_data/hierarchy.info.csv",
try.allinfo <- read_csv("../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/input_data/traits_x_georef_wide_table.csv", locale = locale(encoding = "latin1"))
locale = locale(encoding = "latin1"), try.allinfo <- read_csv(
"../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/input_data/traits_x_georef_wide_table.csv",
locale = locale(encoding = "latin1"),
col_types = paste0(c("dddccccc",rep("c", 84)), collapse="")) col_types = paste0(c("dddccccc",rep("c", 84)), collapse=""))
try.individuals0 <- read_csv("../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/gapfilled_data/mean_gap_filled_back_transformed.csv", try.individuals0 <- read_csv(
locale = locale(encoding = "latin1")) "../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/gapfilled_data/mean_gap_filled_back_transformed.csv",
locale = locale(encoding = "latin1"))
#Ancillary function to change species names to lower case
firstup <- function(x) {
substr(x, 1, 1) <- toupper(substr(x, 1, 1))
x
}
try.species <- try.species %>%
mutate(Species=tolower(Species)) %>%
mutate(Species=firstup(Species))
try.allinfo <- try.allinfo %>%
mutate(Species=tolower(Species)) %>%
mutate(Species=firstup(Species))
#calculate statistics of data from TRY #calculate statistics of data from TRY
n.indiv <- try.species %>% n.indiv <- try.species %>%
...@@ -66,10 +53,26 @@ n.indiv <- try.species %>% ...@@ -66,10 +53,26 @@ n.indiv <- try.species %>%
summarize(n=n()) %>% summarize(n=n()) %>%
pull(n) pull(n)
``` ```
There are `r nrow(try.species)` individual observations from `r nrow(try.species %>% distinct(Species))` distinct species in `r nrow(try.species %>% distinct(Genus))` distinct genera. There are `r nrow(try.species)` individual observations from `r nrow(try.species %>% distinct(Species))` distinct (unresolved) species in `r nrow(try.species %>% distinct(Genus))` distinct (unresolved) genera.
\newline \newline \newline \newline
```{r deprecated, echo=F, eval=F}
# #Ancillary function to change species names to lower case
# firstup <- function(x) {
# substr(x, 1, 1) <- toupper(substr(x, 1, 1))
# x
# }
#
# try.species <- try.species %>%
# mutate(Species=tolower(Species)) %>%
# mutate(Species=firstup(Species))
#
# try.allinfo <- try.allinfo %>%
# mutate(Species=tolower(Species)) %>%
# mutate(Species=firstup(Species))
```
## Attach resolved names from Backbone ## Attach resolved names from Backbone
```{r} ```{r}
...@@ -81,8 +84,8 @@ try.species.names <- try.allinfo %>% ...@@ -81,8 +84,8 @@ try.species.names <- try.allinfo %>%
by="Species") %>% by="Species") %>%
dplyr::select(Species, Name_short, Genus) dplyr::select(Species, Name_short, Genus)
``` ```
After attaching resolved names, TRY data contains information on `r try.species.names %>% distinct(Name_short) %>% nrow()`. After attaching resolved names, TRY data contains information on `r try.species.names %>% distinct(Name_short) %>% nrow()` species.
\newline \newline \newline
Check for how many of the species in sPlot, trait information is available in TRY. Check for how many of the species in sPlot, trait information is available in TRY.
```{r} ```{r}
sPlot.species <- DT2 %>% sPlot.species <- DT2 %>%
...@@ -91,9 +94,9 @@ sPlot.species <- DT2 %>% ...@@ -91,9 +94,9 @@ sPlot.species <- DT2 %>%
sPlot.in.TRY <- sPlot.species %>% sPlot.in.TRY <- sPlot.species %>%
filter(species %in% (try.species.names %>% filter(species %in% (try.species.names %>%
distinct(Name_short) %>% distinct(Name_short) %>%
pull(Name_short))) %>% pull(Name_short)))
``` ```
Out of the `r nrow(sPlot.species)` standardizes species names in sPlot 3.0, `r nrow(sPlot.in.TRY)` (`r round(nrow(sPlot.in.TRY)/nrow(sPlot.species)*100,1)`%) also occur in TRY 5.0. Out of the `r nrow(sPlot.species)` standardizes species names in sPlot 3.0, `r nrow(sPlot.in.TRY)` (`r round(nrow(sPlot.in.TRY)/nrow(sPlot.species)*100,1)`%) also occur in TRY 5.0. This number does not account for matches at the genus level.
...@@ -121,7 +124,12 @@ trait.legend <- data.frame(full=try.allinfo %>% ...@@ -121,7 +124,12 @@ trait.legend <- data.frame(full=try.allinfo %>%
"LeafArea.leaf.undef","LeafArea.leaflet.undef","LeafArea.undef.undef", "LeafArea.leaf.undef","LeafArea.leaflet.undef","LeafArea.undef.undef",
"SLA.noPet", "SLA.wPet","SLA.undef", "LeafWaterCont")) %>% "SLA.noPet", "SLA.wPet","SLA.undef", "LeafWaterCont")) %>%
## Add SLA missing from allinfo file ## Add SLA missing from allinfo file
bind_rows(data.frame(traitcode=11, full="Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA)", short="SLA")) %>% bind_rows(data.frame(traitcode=11,
full="Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA)",
short="SLA")) %>%
bind_rows(data.frame(traitcode=18,
full="Plant height (vegetative + generative)",
short="PlantHeight")) %>%
arrange(traitcode) %>% arrange(traitcode) %>%
#create a column to mark traits for which gap filled data is available. #create a column to mark traits for which gap filled data is available.
mutate(available=paste0("X", traitcode) %in% colnames(try.individuals0)) mutate(available=paste0("X", traitcode) %in% colnames(try.individuals0))
...@@ -133,56 +141,70 @@ knitr::kable(trait.legend, ...@@ -133,56 +141,70 @@ knitr::kable(trait.legend,
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = F, position = "center") full_width = F, position = "center")
``` ```
Use trait legend to change naming of `try.individuals0` data.frame of traits
```{r}
#create string to rename traits
col.to <- trait.legend %>%
filter(available==T) %>%
pull(short)
col.from <- trait.legend %>%
filter(available==T) %>%
mutate(traitcode=paste0("X", traitcode)) %>%
pull(traitcode)
try.individuals <- try.individuals0 %>%
rename_at(col.from, .funs=function(x) col.to)
```
There are some traits with unexpected negative entries:
Check traits at the individual level. There are some traits with unexpected negative entries:
```{r} ```{r}
try.species.names %>% try.species.names %>%
dplyr::select(Species) %>% dplyr::select(Name_short) %>%
bind_cols(try.individuals0 %>% bind_cols(try.individuals %>%
rename_at(col.from, .funs=function(x) col.to) %>%
dplyr::select(-X1)) %>% dplyr::select(-X1)) %>%
gather(variable, value, -Species) %>% gather(variable, value, -Name_short) %>%
filter(value<0) %>% filter(value<0) %>%
group_by(variable) %>% group_by(variable) %>%
summarize(n=n()) summarize(n=n())
``` ```
I asked Jens Kattget for clarifications. According to Jens Kattge, the entries for `Leaf.delta.15N` are legitimate, while in the other cases, it may be due to bad predictions. He suggested to delete these negative records.
```{r}
toexclude <- try.individuals %>%
gather(variable, value, -X1) %>%
filter(variable != "Leaf.delta.15N") %>%
filter(value<0) %>%
pull(X1)
try.individuals <- try.species.names %>%
dplyr::select(Name_short) %>%
bind_cols(try.individuals) %>%
filter(!X1 %in% toexclude) %>%
dplyr::select(-X1)
```
This results in the exclusion of `r length(toexclude)` individuals. In this way the total number of species included in TRY reduces to `r try.individuals %>% distinct(Name_short) %>% nrow()`
## Calculate species and genus level trait means and sd ## Calculate species and genus level trait means and sd
```{r} ```{r}
#create string to rename traits ## Calculate species level trait means and sd.
col.to <- trait.legend %>%
filter(available==T) %>%
pull(short)
col.from <- trait.legend %>%
filter(available==T) %>%
mutate(traitcode=paste0("X", traitcode)) %>%
pull(traitcode)
## Calculate species level trait means and sd. try.species.means <- try.individuals %>%
try.species.means <- try.species.names %>% group_by(Name_short) %>%
dplyr::select(Species) %>% #Add a field to indivate the number of observation per taxon
bind_cols(try.individuals0 %>%
rename_at(col.from, .funs=function(x) col.to) %>%
dplyr::select(-X1)) %>%
group_by(Species) %>%
left_join(x={.} %>% left_join(x={.} %>%
summarize(n=n()), summarize(n=n()),
y={.} %>% y={.} %>%
summarize_at(.vars=vars(StemDens:LeafWaterCont ), summarize_at(.vars=vars(StemDens:LeafWaterCont ),
.funs=list(mean=~mean(.), sd=~sd(.))), .funs=list(mean=~mean(.), sd=~sd(.))),
by="Species") %>% by="Name_short") %>%
dplyr::select(Species, n, everything()) dplyr::select(Name_short, n, everything())
## Calculate genus level trait means and sd. ## Calculate genus level trait means and sd.
try.genus.means <- try.species.names %>% try.genus.means <- try.individuals %>%
dplyr::select(Genus) %>% mutate(Genus=word(Name_short, 1)) %>%
bind_cols(try.individuals0 %>%
rename_at(col.from, .funs=function(x) col.to) %>%
dplyr::select(-X1)) %>%
group_by(Genus) %>% group_by(Genus) %>%
left_join(x={.} %>% left_join(x={.} %>%
summarize(n=n()), summarize(n=n()),
...@@ -191,17 +213,17 @@ try.genus.means <- try.species.names %>% ...@@ -191,17 +213,17 @@ try.genus.means <- try.species.names %>%
.funs=list(mean=~mean(.), sd=~sd(.))), .funs=list(mean=~mean(.), sd=~sd(.))),
by="Genus") %>% by="Genus") %>%
dplyr::select(Genus, n, everything()) dplyr::select(Genus, n, everything())
``` ```
The average number of observations per species and genus is `r round(mean(try.species.means$n),1)` and `r round(mean(try.genus.means$n),1)`, respectively. As many as `r sum(try.species.means$n==1)` species have only one observation (`r sum(try.genus.means$n==1)` at the genus level). The average number of observations per species and genus is `r round(mean(try.species.means$n),1)` and `r round(mean(try.genus.means$n),1)`, respectively. As many as `r sum(try.species.means$n==1)` species have only one observation (`r sum(try.genus.means$n==1)` at the genus level).
## Match taxa based on species, if available, or Genus (when rank_correct==Genus) ## Match taxa based on species, if available, or Genus
Combined the trait means based on species and genera into a single object, and check how many of these taxa match to the (resolved) species names in `DT2`.
```{r, warning=F} ```{r, warning=F}
try.combined.means <- try.genus.means %>% try.combined.means <- try.genus.means %>%
rename(Taxon_name=Genus) %>% rename(Taxon_name=Genus) %>%
mutate(Rank_correct="genus") %>% mutate(Rank_correct="genus") %>%
bind_rows(try.species.means %>% bind_rows(try.species.means %>%
rename(Taxon_name=Species) %>% rename(Taxon_name=Name_short) %>%
mutate(Rank_correct="species")) %>% mutate(Rank_correct="species")) %>%
dplyr::select(Taxon_name, Rank_correct, everything()) dplyr::select(Taxon_name, Rank_correct, everything())
...@@ -243,7 +265,6 @@ knitr::kable(mysummary, ...@@ -243,7 +265,6 @@ knitr::kable(mysummary,
``` ```
## Calculate CWMs and CWVs for each plot ## Calculate CWMs and CWVs for each plot
Merge vegetation layers, where necessary. Combine cover values across layers Merge vegetation layers, where necessary. Combine cover values across layers
```{r} ```{r}
...@@ -257,19 +278,17 @@ combine.cover <- function(x, datatype){ ...@@ -257,19 +278,17 @@ combine.cover <- function(x, datatype){
return(x) return(x)
} }
DT2 <- DT2 %>% DT2.comb <- DT2 %>%
#temporary #temporary
filter(PlotObservationID %in% sample(unique(DT2$PlotObservationID), 1000, replace=F)) %>%
group_by(PlotObservationID, species, Rank_correct) %>% group_by(PlotObservationID, species, Rank_correct) %>%
summarize(Relative.cover=combine.cover(Relative.cover, cover_code)) %>% summarize(Relative.cover=combine.cover(Relative.cover, cover_code)) %>%
ungroup() ungroup()
``` ```
Calculate CWMs and CWV, as well as plot coverage statistics (proportion of total cover for which trait info exist, and proportion of species for which we have trait info). To avoid misleading result, CWM is calculated ONLY for plots for which we have some abundance information. All plots where `Ab_scale`=="pa" in **ANY** of the layers are therefore excluded. Calculate CWMs and CWV, as well as plot coverage statistics (proportion of total cover for which trait info exist, and proportion of species for which we have trait info). To avoid misleading results, CWM is calculated ONLY for plots for which we have some abundance information. All plots where `Ab_scale`=="pa" in **ANY** of the layers are therefore excluded.
```{r, cache=T, cache.lazy=F} ```{r, cache=T, cache.lazy=F, warning=F}
# Tag plots where at least one layer only has p\a information # Tag plots where at least one layer has only p\a information
any_pa <- DT2 %>% any_pa <- DT2 %>%
distinct(PlotObservationID, Ab_scale) %>% distinct(PlotObservationID, Ab_scale) %>%
group_by(PlotObservationID) %>% group_by(PlotObservationID) %>%
...@@ -279,7 +298,7 @@ any_pa <- DT2 %>% ...@@ -279,7 +298,7 @@ any_pa <- DT2 %>%
length(any_pa) length(any_pa)
# Exclude plots above and merge species data table with traits # Exclude plots above and merge species data table with traits
CWM0 <- DT2 %>% CWM0 <- DT2.comb %>%
filter(!PlotObservationID %in% any_pa) %>% filter(!PlotObservationID %in% any_pa) %>%
left_join(try.combined.means %>% left_join(try.combined.means %>%
dplyr::rename(species=Taxon_name) %>% dplyr::rename(species=Taxon_name) %>%
...@@ -304,17 +323,15 @@ CWM2 <- CWM0 %>% ...@@ -304,17 +323,15 @@ CWM2 <- CWM0 %>%
dplyr::select(PlotObservationID, order(colnames(.))) %>% dplyr::select(PlotObservationID, order(colnames(.))) %>%
gather(key=variable, value=trait.coverage, -PlotObservationID) gather(key=variable, value=trait.coverage, -PlotObservationID)
# Calculate CWV # Calculate CWV
# Ancillary function
variance2.fun <- function(trait, abu){ variance2.fun <- function(trait, abu){
res <- as.double(NA) res <- as.double(NA)
#nam <- nam[!is.na(trait)]
abu <- abu[!is.na(trait)] abu <- abu[!is.na(trait)]
trait <- trait[!is.na(trait)] trait <- trait[!is.na(trait)]
abu <- abu/sum(abu) abu <- abu/sum(abu)
if (length(trait)>1){ if (length(trait)>1){
# you need more than 1 observation to calculate # you need more than 1 observation to calculate variance
# skewness and kurtosis
# for calculation see # for calculation see
# http://r.789695.n4.nabble.com/Weighted-skewness-and-curtosis-td4709956.html # http://r.789695.n4.nabble.com/Weighted-skewness-and-curtosis-td4709956.html
m.trait <- weighted.mean(trait,abu) m.trait <- weighted.mean(trait,abu)
...@@ -392,12 +409,39 @@ knitr::kable(CWM.summary, ...@@ -392,12 +409,39 @@ knitr::kable(CWM.summary,
``` ```
Calculate summary statistics for trait coverage in plots
```{r}
coverage.summary <- CWM %>%
separate(variable, sep="_", into=c("metric", "stat")) %>%
filter(metric=="SLA") %>%
summarize_at(.vars=vars(prop.sp.with.trait:trait.coverage),
.funs=list(num.NAs=~sum(is.na(.)),
min=~min(., na.rm=T),
q025=~quantile(., 0.25, na.rm=T),
q50=~quantile(., 0.50, na.rm=T),
q75=~quantile(., .75, na.rm=T),
max=~max(., na.rm=T),
mean=~mean(., na.rm=T),
sd=~sd(., na.rm=T))) %>%
gather(variable, value) %>%
separate(variable, sep="_", into=c("metric", "stat")) %>%
mutate(stat=factor(stat, levels=c("num.NAs", "min", "q025", "q50", "q75", "max", "mean", "sd"))) %>%
spread(key=stat, value=value)
```
```{r, echo=F}
knitr::kable(coverage.summary,
caption="Summary of CWMs and CWVs across all plots") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = F, position = "center")
```
## Export CWM and species mean trait values ## Export CWM and species mean trait values
```{r} ```{r}
save(try.combined.means, CWM, file="../_output/Traits_CWMs.RData") save(try.combined.means, CWM, file="../_output/Traits_CWMs.RData")
``` ```
##SessionInfo ## SessionInfo
```{r} ```{r}
sessionInfo() sessionInfo()
``` ```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment