06_buildDT.Rmd
title: "sPlot 3.0 - Build DT"
author: "Francesco Maria Sabatini"
date: "2/24/2020"
output: html_document
MEMO CHECK Field cover code! It seems to have species characters
Timestamp: r date()
Drafted: Francesco Maria Sabatini
Revised:
version: 1.0
This report documents the construction of the DT table for sPlot 3.0. It is based on dataset sPlot_3.0.2, received on 24/07/2019 from Stephan Hennekens.
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(readr)
library(xlsx)
library(knitr)
library(kableExtra)
#save temporary files
write("TMPDIR = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('TMPDIR'), '.Renviron'))
write("R_USER = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('R_USER'), '.Renviron'))
#rasterOptions(tmpdir="/data/sPlot/users/Francesco/_tmp")
Search and replace unclosed quotation marks and escape them. Run in Linux terminal
# escape all double quotation marks. Run in Linux terminal
# sed 's/"/\\"/g' sPlot_3_0_2_header.csv > sPlot_3_0_2_header_test.csv
Import data Table
DT table is the species x plot matrix, in long format.
DT0 <- readr::read_delim("../sPlot_data_export/sPlot_3_0_2_species_test.csv",
delim="\t",
col_type = cols(
PlotObservationID = col_double(),
Taxonomy = col_character(),
`Taxon group` = col_character(),
`Taxon group ID` = col_double(),
`Turboveg2 concept` = col_character(),
`Matched concept` = col_character(),
Match = col_double(),
Layer = col_double(),
`Cover %` = col_double(),
`Cover code` = col_character(),
x_ = col_double()
)
)
nplots <- length(unique(DT0$PlotObservationID))
nspecies <- length(unique(DT0$`Matched concept`))
Species data include r nrow(DT0)
species * plot records, across r nplots
plots and including r nspecies
non-resolved species.
\newline
set.seed <- 1984
sampled <- sample(unique(DT0$PlotObservationID), 10, replace=F)
knitr::kable(DT0 %>%
filter(PlotObservationID %in% sampled[1:3]),
caption="Example of initial DT table (3 randomly selected plots shown)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = F, position = "center")
Import taxonomic backbone
load("../_output/Backbone3.0.RData")
Match species names from DT0 to those in Backbone
DT1 <- DT0 %>%
left_join(Backbone %>%
dplyr::select(Name_sPlot_TRY, Name_short, `Taxon group`) %>%
rename(`Matched concept`=Name_sPlot_TRY,
Taxongroup_BB=`Taxon group`),
by="Matched concept")
Explore name matching based on Backbone v1.2
Select species entries that changed after taxonomic standardization, as a way to check the backbone.
name.check <- DT1 %>%
dplyr::select(`Turboveg2 concept`:`Matched concept`, Name_short) %>%
rename(Name_TNRS=Name_short) %>%
distinct() %>%
mutate(Matched_short=word(`Matched concept`, start = 1L, end=2L)) %>%
filter(is.na(Name_TNRS) | Matched_short != Name_TNRS) %>%
dplyr::select(-Matched_short) %>%
arrange(Name_TNRS)
knitr::kable(name.check %>% sample_n(30),
caption="Check 30 random species names from DT after matching to backbone") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = F, position = "center")
Check the most common species names from DT after matching to backbone
name.check.freq <- DT1 %>%
dplyr::select(`Turboveg2 concept`:`Matched concept`, Name_short) %>%
rename(Name_TNRS=Name_short) %>%
group_by(`Turboveg2 concept`, `Matched concept`, Name_TNRS) %>%
summarize(n=n()) %>%
mutate(Matched_short=word(`Matched concept`, start = 1L, end=2L)) %>%
filter(is.na(Name_TNRS) | Matched_short != Name_TNRS) %>%
dplyr::select(-Matched_short) %>%
ungroup() %>%
arrange(desc(n))
knitr::kable(name.check.freq %>% slice(1:40),
caption="Check 40 most common species names from DT after matching to backbone") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = F, position = "center")
taxon group
Complete field Coalesce Taxon group
info from Backbone
table(DT1$`Taxon group`, exclude=NULL)
DT1 <- DT1 %>%
mutate(`Taxon group`=ifelse(`Taxon group`=="Unknown", NA, `Taxon group`)) %>%
mutate(Taxongroup_BB=ifelse(Taxongroup_BB=="Unknown", NA, Taxongroup_BB)) %>%
mutate(`Taxon group`=coalesce(`Taxon group`, Taxongroup_BB)) %>%
dplyr::select(-Taxongroup_BB)
table(DT1$`Taxon group`, exclude=NULL)
Cross-complement
DT1 <- DT1 %>%
left_join(DT1 %>%
filter(!is.na(Name_short)) %>%
filter(`Taxon group` != "Unknown") %>%
dplyr::select(Name_short, `Taxon group`) %>%
distinct(Name_short, .keep_all=T) %>%
rename(TaxonGroup_compl=`Taxon group`),
by="Name_short") %>%
mutate(`Taxon group`=coalesce(`Taxon group`, TaxonGroup_compl)) %>%
dplyr::select(-TaxonGroup_compl)
table(DT1$`Taxon group`, exclude=NULL)
Check species with conflicting Taxon group
information and fix manually.
#Attach genus info
DT1 <- DT1 %>%
left_join(Backbone %>%
dplyr::select(Name_sPlot_TRY, Name_short) %>%
mutate(Genus=word(Name_short, 1, 1)) %>%
dplyr::select(-Name_short) %>%
rename(`Matched concept`=Name_sPlot_TRY),
by="Matched concept") %>%
mutate(`Taxon group`=fct_collapse(`Taxon group`,
Alga_Stonewort=c("Alga", "Stonewort")))
#manually fix some know problems
mosses.gen <- c("Hypnum", "Brachytheciastrum",
"Brachythecium","Hypnum", "Zygodon", "Oxymitra", "Bryophyta", "Musci", '\\\"Moos\\\"')
vascular.gen <- c("Polystichum", "Hypericum", "Peltaria", "Pancovia", "Calythrix", "Ripogonum",
"Notogrammitis", "Fuscospora", "Lophozonia", "Rostellularia",
"Hesperostipa", "Microsorium",
"Angiosperm","Dicotyledonae", "Spermatophy")
alga.gen <- c("Chara", "Characeae", "Tonina", "Nostoc", "Entermorpha", "Hydrocoleum" )
DT1 <- DT1 %>%
mutate(`Taxon group`=replace(`Taxon group`,
list=Genus %in% mosses.gen,
values="Moss")) %>%
mutate(`Taxon group`=replace(`Taxon group`,
list=Genus %in% vascular.gen,
values="Vascular plant")) %>%
mutate(`Taxon group`=replace(`Taxon group`,
list=Genus %in% alga.gen,
values="Alga_Stonewort")) %>%
mutate(`Taxon group`=replace(`Taxon group`,
list=Genus %in% c(lichen.genera, "Lichenes"),
values="Lichen")) %>%
mutate(`Taxon group`=replace(`Taxon group`,
list=Genus %in% mushroom,
values="Mushroom"))
table(DT1$`Taxon group`, exclude=NULL)
#check for conflicts in attribution of genera to Taxon groups
conflict <- DT1 %>%
filter(!is.na(Name_short)) %>%
dplyr::select(Genus, `Taxon group`) %>%
filter(!is.na(`Taxon group`)) %>%
distinct() %>%
group_by(Genus) %>%
summarize(n=n()) %>%
filter(n>1) %>%
arrange(desc(n)) %>%
pull(Genus)
Delete all records of fungi
DT1 <- DT1 %>%
dplyr::select(-Genus) %>%
left_join(DT1 %>%
distinct(`Matched concept`) %>%
mutate(Genus=word(`Matched concept`, 1)),
by="Matched concept") %>%
mutate(`Taxon group`=replace(`Taxon group`,
list=Genus %in% mushroom,
values = "Mushroom")) %>%
mutate(`Taxon group`=replace(`Taxon group`,
list=Genus %in% lichen.genera,
values="Lichen")) %>%
mutate(`Taxon group`=replace(`Taxon group`,
list=Genus %in% mosses.gen,
values="Moss")) %>%
mutate(`Taxon group`=replace(`Taxon group`,
list=Genus %in% vascular.gen,
values="Vascular plant")) %>%
mutate(`Taxon group` = fct_explicit_na(`Taxon group`, "Unknown")) %>%
filter(`Taxon group`!="Mushroom")# %>%
#dplyr::select(-Genus)
table(DT1$`Taxon group`, exclude=NULL)
Check the most frequent species for which we don't have taxon group info
DT1 %>%
filter(`Taxon group` == "Unknown") %>%
group_by(Genus) %>%
summarize(n=n()) %>%
arrange(desc(n)) %>%
slice(1:40)
Calculate relative cover per layer per species in each plot
DT1 <- DT1 %>%
mutate(tmp.cover=ifelse(`Cover code` %in% c("x_BA", "x_IC", "x_SC", "x_IV", "x_RF"),
x_, `Cover %`)) %>%
left_join(x=.,
y={.} %>%
group_by(PlotObservationID, Layer) %>%
summarize(tot.cover=sum(tmp.cover)),
by=c("PlotObservationID", "Layer")) %>%
mutate(Relative.cover=tmp.cover/tot.cover)
Clean DT and export
DT2 <- DT1 %>%
dplyr::select(PlotObservationID, Name_short, `Turboveg2 concept`, `Taxon group`, Layer:x_, Relative.cover ) %>%
rename(species_original=`Turboveg2 concept`,
species=Name_short,
taxon_group=`Taxon group`,
cover_perc=`Cover %`,
cover_code=`Cover code`)
The output of the DT table contains r nrow(DT2)
records, over r length(unique(DT2$PlotObservationID))
plots. The total number of taxa is r length(unique(DT2$Species_original))
and r length(unique(DT2$Species_matched))
, before and after standardization, respectively. Information on the Taxon group
is available for r DT2 %>% filter(Taxon_group!="Unknown") %>% distinct(Species_matched) %>% nrow()
standardized species.
save(DT2, file = "../_output/DT_sPlot3.0.RData")
!!! ADD Explanation of fields!!!