Skip to content
Snippets Groups Projects
Commit f5125247 authored by Francesco Sabatini's avatar Francesco Sabatini
Browse files

Complement families in Backbone using TRY

parent 96ac9040
No related branches found
No related tags found
No related merge requests found
...@@ -74,7 +74,7 @@ DT0 <- readr::read_delim("../sPlot_data_export/sPlot_3_0_2_species_test.csv", ...@@ -74,7 +74,7 @@ DT0 <- readr::read_delim("../sPlot_data_export/sPlot_3_0_2_species_test.csv",
## Import lists of species classified into groups ## Import lists of species classified into groups
These objects are defined in the appendix These objects are defined in the appendix
```{r} ```{r}
load("../derived/taxa_manual.RData") load("../_derived/taxa_manual.RData")
``` ```
...@@ -916,6 +916,7 @@ save(tnrs.res.certain, tnrs.res.uncertain, file="../_derived/TNRS_submit/tnrs.it ...@@ -916,6 +916,7 @@ save(tnrs.res.certain, tnrs.res.uncertain, file="../_derived/TNRS_submit/tnrs.it
Many unmatched records do contain subspecies information which could not be retrieved in TNRS, although genus and species seem to be spelled correctly. Also, sometimes the mismatch derives from having the word 'species' or 'sp' at the end of the name. Many unmatched records do contain subspecies information which could not be retrieved in TNRS, although genus and species seem to be spelled correctly. Also, sometimes the mismatch derives from having the word 'species' or 'sp' at the end of the name.
```{r} ```{r}
tnrs.submit.iter2 <- data.frame(old=tnrs.res.uncertain$Name_submitted) %>% tnrs.submit.iter2 <- data.frame(old=tnrs.res.uncertain$Name_submitted) %>%
mutate(old=as.character(old)) %>%
mutate(new=old) mutate(new=old)
# delete remaining records of mushroom species # delete remaining records of mushroom species
...@@ -1199,10 +1200,17 @@ After iteration 3, there are still `r nrow(tnrs.ncbi.uncertain)` unresolved taxa ...@@ -1199,10 +1200,17 @@ After iteration 3, there are still `r nrow(tnrs.ncbi.uncertain)` unresolved taxa
Generate names list from `tnrs.ncbi.uncertain` to be matched against `The Plant List`, using `Taxonstand::TPL`. Add to this list, also all those species that in the first iterations did not return an accepted name. Generate names list from `tnrs.ncbi.uncertain` to be matched against `The Plant List`, using `Taxonstand::TPL`. Add to this list, also all those species that in the first iterations did not return an accepted name.
```{r, eval = F} ```{r, eval = F}
tpl.submit <- tnrs.res.certain %>% filter(is.na(Accepted_name)) %>% dplyr::select(Name_submitted) %>% tpl.submit <- tnrs.res.certain %>%
bind_rows(tnrs.res.iter2.certain %>% filter(is.na(Accepted_name)) %>% dplyr::select(Name_submitted)) %>% filter(is.na(Accepted_name)) %>%
bind_rows(tnrs.ncbi.certain %>% filter(is.na(Accepted_name)) %>% dplyr::select(Name_submitted)) %>% dplyr::select(Name_submitted) %>%
bind_rows(tnrs.ncbi.uncertain %>% dplyr::select(Name_submitted)) %>% bind_rows(tnrs.res.iter2.certain %>%
filter(is.na(Accepted_name)) %>%
dplyr::select(Name_submitted)) %>%
bind_rows(tnrs.ncbi.certain %>%
filter(is.na(Accepted_name)) %>%
dplyr::select(Name_submitted)) %>%
bind_rows(tnrs.ncbi.uncertain %>%
dplyr::select(Name_submitted)) %>%
distinct() distinct()
nrow(tpl.submit) nrow(tpl.submit)
write_csv(tpl.submit, path="../_derived/TPL/tpl.submit.csv") write_csv(tpl.submit, path="../_derived/TPL/tpl.submit.csv")
...@@ -1213,8 +1221,8 @@ chunks <- split(indices, sort(indices%%99)) ...@@ -1213,8 +1221,8 @@ chunks <- split(indices, sort(indices%%99))
library(doParallel) library(doParallel)
library(parallel) library(parallel)
cl <- makeForkCluster(3, outfile="") cl <- makeForkCluster(5, outfile="")
registerDoParallel(3) registerDoParallel(cl)
tpl.ncbi <- foreach(i=1:length(chunks), .combine=rbind) %dopar% { tpl.ncbi <- foreach(i=1:length(chunks), .combine=rbind) %dopar% {
tmp <- (TPL(tpl.submit$Name_submitted[chunks[[i]]])) tmp <- (TPL(tpl.submit$Name_submitted[chunks[[i]]]))
...@@ -1370,7 +1378,6 @@ sum((is.na(Backbone$Family_correct))) ...@@ -1370,7 +1378,6 @@ sum((is.na(Backbone$Family_correct)))
``` ```
### Resolve genera with missing family info with `TNRS` ### Resolve genera with missing family info with `TNRS`
```{r, eval=F} ```{r, eval=F}
Genera_submit <- Backbone %>% Genera_submit <- Backbone %>%
filter(is.na(Family_correct)) %>% filter(is.na(Family_correct)) %>%
dplyr::select(Genus_correct) %>% dplyr::select(Genus_correct) %>%
...@@ -1413,8 +1420,31 @@ Backbone <- Backbone %>% ...@@ -1413,8 +1420,31 @@ Backbone <- Backbone %>%
#Records with missing family info #Records with missing family info
sum(is.na(Backbone$Family_correct)) sum(is.na(Backbone$Family_correct))
``` ```
### Complement with data from `TRY 5.0`
Data from try were received by [Jens Kattge](jkattge@bgc-jena.mpg.de) on Jan 21, 2020.
```{r, warning=F, message=F}
# Species, Genus, Family from try
try.species <- read_csv(
"../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/input_data/hierarchy.info.csv",
locale = locale(encoding = "latin1"))
Backbone <- Backbone %>%
left_join(try.species %>%
dplyr::select(Genus_correct=Genus, family=Family) %>%
distinct() %>%
filter(family != "") %>%
group_by(Genus_correct),
by="Genus_correct") %>%
mutate(Family_correct=coalesce(Family_correct, family)) %>%
dplyr::select(-family)
# Remaining records with missing family info
sum((is.na(Backbone$Family_correct)))
```
### Complement with data from `The Catalogue of Life`.
### Complement with data from `The Catalogue of Life`
```{r, eval=F} ```{r, eval=F}
#Download data from Catalogue of Life - 2019 #Download data from Catalogue of Life - 2019
download.file("http://www.catalogueoflife.org/DCA_Export/zip/archive-kingdom-plantae-bl3.zip", download.file("http://www.catalogueoflife.org/DCA_Export/zip/archive-kingdom-plantae-bl3.zip",
...@@ -1459,9 +1489,7 @@ Backbone <- Backbone %>% ...@@ -1459,9 +1489,7 @@ Backbone <- Backbone %>%
filter(genus %in% Genera_missing$Genus_correct) %>% filter(genus %in% Genera_missing$Genus_correct) %>%
rename(Genus_correct=genus), rename(Genus_correct=genus),
by="Genus_correct") %>% by="Genus_correct") %>%
mutate(Family_correct=ifelse(is.na(Family_correct) & !is.na(family), mutate(Family_correct=coalesce(Family_correct, family)) %>%
family,
Family_correct)) %>%
dplyr::select(-family) dplyr::select(-family)
#Records with missing family info #Records with missing family info
...@@ -1652,7 +1680,7 @@ table(Backbone$is_vascular_species, exclude=NULL) ...@@ -1652,7 +1680,7 @@ table(Backbone$is_vascular_species, exclude=NULL)
```{r echo=F} ```{r echo=F}
knitr::kable(Backbone %>% knitr::kable(Backbone %>%
sample_n(20), sample_n(20),
caption="Example of Backbone (only 20 randomly selected taxa shown") %>% caption="Example of Backbone (only 20 randomly selected taxa shown)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
latex_options = "basic", latex_options = "basic",
full_width = F, position = "center") full_width = F, position = "center")
...@@ -1987,7 +2015,7 @@ algae_diatoms <- c("Sargassaceae", "Chordaceae", "Cocconeidaceae", "Desmarestiac ...@@ -1987,7 +2015,7 @@ algae_diatoms <- c("Sargassaceae", "Chordaceae", "Cocconeidaceae", "Desmarestiac
#diatoms below #diatoms below
"Thalassiosiraceae", "Cymbellaceae", "Naviculaceae","Bacillariaceae") "Thalassiosiraceae", "Cymbellaceae", "Naviculaceae","Bacillariaceae")
save(mushroom, vascular, lichens, lichen.genera, mosses, algae_diatoms, file="../derived/taxa_manual.RData") save(mushroom, vascular, lichens, lichen.genera, mosses, algae_diatoms, file="../_derived/taxa_manual.RData")
``` ```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment