From f51252471f91f02d71a9f34f6d5fb51c9bc5dbec Mon Sep 17 00:00:00 2001 From: Francesco Sabatini <francesco.sabatini@idiv.de> Date: Thu, 12 Mar 2020 16:26:33 +0100 Subject: [PATCH] Complement families in Backbone using TRY --- code/03_TaxonomicBackbone.Rmd | 56 ++++++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 14 deletions(-) diff --git a/code/03_TaxonomicBackbone.Rmd b/code/03_TaxonomicBackbone.Rmd index 58a3076..26eb62f 100644 --- a/code/03_TaxonomicBackbone.Rmd +++ b/code/03_TaxonomicBackbone.Rmd @@ -74,7 +74,7 @@ DT0 <- readr::read_delim("../sPlot_data_export/sPlot_3_0_2_species_test.csv", ## Import lists of species classified into groups These objects are defined in the appendix ```{r} -load("../derived/taxa_manual.RData") +load("../_derived/taxa_manual.RData") ``` @@ -916,6 +916,7 @@ save(tnrs.res.certain, tnrs.res.uncertain, file="../_derived/TNRS_submit/tnrs.it Many unmatched records do contain subspecies information which could not be retrieved in TNRS, although genus and species seem to be spelled correctly. Also, sometimes the mismatch derives from having the word 'species' or 'sp' at the end of the name. ```{r} tnrs.submit.iter2 <- data.frame(old=tnrs.res.uncertain$Name_submitted) %>% + mutate(old=as.character(old)) %>% mutate(new=old) # delete remaining records of mushroom species @@ -1199,10 +1200,17 @@ After iteration 3, there are still `r nrow(tnrs.ncbi.uncertain)` unresolved taxa Generate names list from `tnrs.ncbi.uncertain` to be matched against `The Plant List`, using `Taxonstand::TPL`. Add to this list, also all those species that in the first iterations did not return an accepted name. ```{r, eval = F} -tpl.submit <- tnrs.res.certain %>% filter(is.na(Accepted_name)) %>% dplyr::select(Name_submitted) %>% - bind_rows(tnrs.res.iter2.certain %>% filter(is.na(Accepted_name)) %>% dplyr::select(Name_submitted)) %>% - bind_rows(tnrs.ncbi.certain %>% filter(is.na(Accepted_name)) %>% dplyr::select(Name_submitted)) %>% - bind_rows(tnrs.ncbi.uncertain %>% dplyr::select(Name_submitted)) %>% +tpl.submit <- tnrs.res.certain %>% + filter(is.na(Accepted_name)) %>% + dplyr::select(Name_submitted) %>% + bind_rows(tnrs.res.iter2.certain %>% + filter(is.na(Accepted_name)) %>% + dplyr::select(Name_submitted)) %>% + bind_rows(tnrs.ncbi.certain %>% + filter(is.na(Accepted_name)) %>% + dplyr::select(Name_submitted)) %>% + bind_rows(tnrs.ncbi.uncertain %>% + dplyr::select(Name_submitted)) %>% distinct() nrow(tpl.submit) write_csv(tpl.submit, path="../_derived/TPL/tpl.submit.csv") @@ -1213,8 +1221,8 @@ chunks <- split(indices, sort(indices%%99)) library(doParallel) library(parallel) -cl <- makeForkCluster(3, outfile="") -registerDoParallel(3) +cl <- makeForkCluster(5, outfile="") +registerDoParallel(cl) tpl.ncbi <- foreach(i=1:length(chunks), .combine=rbind) %dopar% { tmp <- (TPL(tpl.submit$Name_submitted[chunks[[i]]])) @@ -1370,7 +1378,6 @@ sum((is.na(Backbone$Family_correct))) ``` ### Resolve genera with missing family info with `TNRS` ```{r, eval=F} - Genera_submit <- Backbone %>% filter(is.na(Family_correct)) %>% dplyr::select(Genus_correct) %>% @@ -1413,8 +1420,31 @@ Backbone <- Backbone %>% #Records with missing family info sum(is.na(Backbone$Family_correct)) ``` +### Complement with data from `TRY 5.0` +Data from try were received by [Jens Kattge](jkattge@bgc-jena.mpg.de) on Jan 21, 2020. +```{r, warning=F, message=F} +# Species, Genus, Family from try +try.species <- read_csv( + "../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/input_data/hierarchy.info.csv", + locale = locale(encoding = "latin1")) + +Backbone <- Backbone %>% + left_join(try.species %>% + dplyr::select(Genus_correct=Genus, family=Family) %>% + distinct() %>% + filter(family != "") %>% + group_by(Genus_correct), + by="Genus_correct") %>% + mutate(Family_correct=coalesce(Family_correct, family)) %>% + dplyr::select(-family) + +# Remaining records with missing family info +sum((is.na(Backbone$Family_correct))) +``` -### Complement with data from `The Catalogue of Life`. + + +### Complement with data from `The Catalogue of Life` ```{r, eval=F} #Download data from Catalogue of Life - 2019 download.file("http://www.catalogueoflife.org/DCA_Export/zip/archive-kingdom-plantae-bl3.zip", @@ -1459,9 +1489,7 @@ Backbone <- Backbone %>% filter(genus %in% Genera_missing$Genus_correct) %>% rename(Genus_correct=genus), by="Genus_correct") %>% - mutate(Family_correct=ifelse(is.na(Family_correct) & !is.na(family), - family, - Family_correct)) %>% + mutate(Family_correct=coalesce(Family_correct, family)) %>% dplyr::select(-family) #Records with missing family info @@ -1652,7 +1680,7 @@ table(Backbone$is_vascular_species, exclude=NULL) ```{r echo=F} knitr::kable(Backbone %>% sample_n(20), - caption="Example of Backbone (only 20 randomly selected taxa shown") %>% + caption="Example of Backbone (only 20 randomly selected taxa shown)") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), latex_options = "basic", full_width = F, position = "center") @@ -1987,7 +2015,7 @@ algae_diatoms <- c("Sargassaceae", "Chordaceae", "Cocconeidaceae", "Desmarestiac #diatoms below "Thalassiosiraceae", "Cymbellaceae", "Naviculaceae","Bacillariaceae") -save(mushroom, vascular, lichens, lichen.genera, mosses, algae_diatoms, file="../derived/taxa_manual.RData") +save(mushroom, vascular, lichens, lichen.genera, mosses, algae_diatoms, file="../_derived/taxa_manual.RData") ``` -- GitLab