diff --git a/code/03_TaxonomicBackbone.Rmd b/code/03_TaxonomicBackbone.Rmd index 58a30762ffead76feabc4665da96b2bae65debfd..26eb62f9382e6a2df211b80ec358611e619deb7d 100644 --- a/code/03_TaxonomicBackbone.Rmd +++ b/code/03_TaxonomicBackbone.Rmd @@ -74,7 +74,7 @@ DT0 <- readr::read_delim("../sPlot_data_export/sPlot_3_0_2_species_test.csv", ## Import lists of species classified into groups These objects are defined in the appendix ```{r} -load("../derived/taxa_manual.RData") +load("../_derived/taxa_manual.RData") ``` @@ -916,6 +916,7 @@ save(tnrs.res.certain, tnrs.res.uncertain, file="../_derived/TNRS_submit/tnrs.it Many unmatched records do contain subspecies information which could not be retrieved in TNRS, although genus and species seem to be spelled correctly. Also, sometimes the mismatch derives from having the word 'species' or 'sp' at the end of the name. ```{r} tnrs.submit.iter2 <- data.frame(old=tnrs.res.uncertain$Name_submitted) %>% + mutate(old=as.character(old)) %>% mutate(new=old) # delete remaining records of mushroom species @@ -1199,10 +1200,17 @@ After iteration 3, there are still `r nrow(tnrs.ncbi.uncertain)` unresolved taxa Generate names list from `tnrs.ncbi.uncertain` to be matched against `The Plant List`, using `Taxonstand::TPL`. Add to this list, also all those species that in the first iterations did not return an accepted name. ```{r, eval = F} -tpl.submit <- tnrs.res.certain %>% filter(is.na(Accepted_name)) %>% dplyr::select(Name_submitted) %>% - bind_rows(tnrs.res.iter2.certain %>% filter(is.na(Accepted_name)) %>% dplyr::select(Name_submitted)) %>% - bind_rows(tnrs.ncbi.certain %>% filter(is.na(Accepted_name)) %>% dplyr::select(Name_submitted)) %>% - bind_rows(tnrs.ncbi.uncertain %>% dplyr::select(Name_submitted)) %>% +tpl.submit <- tnrs.res.certain %>% + filter(is.na(Accepted_name)) %>% + dplyr::select(Name_submitted) %>% + bind_rows(tnrs.res.iter2.certain %>% + filter(is.na(Accepted_name)) %>% + dplyr::select(Name_submitted)) %>% + bind_rows(tnrs.ncbi.certain %>% + filter(is.na(Accepted_name)) %>% + dplyr::select(Name_submitted)) %>% + bind_rows(tnrs.ncbi.uncertain %>% + dplyr::select(Name_submitted)) %>% distinct() nrow(tpl.submit) write_csv(tpl.submit, path="../_derived/TPL/tpl.submit.csv") @@ -1213,8 +1221,8 @@ chunks <- split(indices, sort(indices%%99)) library(doParallel) library(parallel) -cl <- makeForkCluster(3, outfile="") -registerDoParallel(3) +cl <- makeForkCluster(5, outfile="") +registerDoParallel(cl) tpl.ncbi <- foreach(i=1:length(chunks), .combine=rbind) %dopar% { tmp <- (TPL(tpl.submit$Name_submitted[chunks[[i]]])) @@ -1370,7 +1378,6 @@ sum((is.na(Backbone$Family_correct))) ``` ### Resolve genera with missing family info with `TNRS` ```{r, eval=F} - Genera_submit <- Backbone %>% filter(is.na(Family_correct)) %>% dplyr::select(Genus_correct) %>% @@ -1413,8 +1420,31 @@ Backbone <- Backbone %>% #Records with missing family info sum(is.na(Backbone$Family_correct)) ``` +### Complement with data from `TRY 5.0` +Data from try were received by [Jens Kattge](jkattge@bgc-jena.mpg.de) on Jan 21, 2020. +```{r, warning=F, message=F} +# Species, Genus, Family from try +try.species <- read_csv( + "../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/input_data/hierarchy.info.csv", + locale = locale(encoding = "latin1")) + +Backbone <- Backbone %>% + left_join(try.species %>% + dplyr::select(Genus_correct=Genus, family=Family) %>% + distinct() %>% + filter(family != "") %>% + group_by(Genus_correct), + by="Genus_correct") %>% + mutate(Family_correct=coalesce(Family_correct, family)) %>% + dplyr::select(-family) + +# Remaining records with missing family info +sum((is.na(Backbone$Family_correct))) +``` -### Complement with data from `The Catalogue of Life`. + + +### Complement with data from `The Catalogue of Life` ```{r, eval=F} #Download data from Catalogue of Life - 2019 download.file("http://www.catalogueoflife.org/DCA_Export/zip/archive-kingdom-plantae-bl3.zip", @@ -1459,9 +1489,7 @@ Backbone <- Backbone %>% filter(genus %in% Genera_missing$Genus_correct) %>% rename(Genus_correct=genus), by="Genus_correct") %>% - mutate(Family_correct=ifelse(is.na(Family_correct) & !is.na(family), - family, - Family_correct)) %>% + mutate(Family_correct=coalesce(Family_correct, family)) %>% dplyr::select(-family) #Records with missing family info @@ -1652,7 +1680,7 @@ table(Backbone$is_vascular_species, exclude=NULL) ```{r echo=F} knitr::kable(Backbone %>% sample_n(20), - caption="Example of Backbone (only 20 randomly selected taxa shown") %>% + caption="Example of Backbone (only 20 randomly selected taxa shown)") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), latex_options = "basic", full_width = F, position = "center") @@ -1987,7 +2015,7 @@ algae_diatoms <- c("Sargassaceae", "Chordaceae", "Cocconeidaceae", "Desmarestiac #diatoms below "Thalassiosiraceae", "Cymbellaceae", "Naviculaceae","Bacillariaceae") -save(mushroom, vascular, lichens, lichen.genera, mosses, algae_diatoms, file="../derived/taxa_manual.RData") +save(mushroom, vascular, lichens, lichen.genera, mosses, algae_diatoms, file="../_derived/taxa_manual.RData") ```