diff --git a/01_Extract_data_Project_31.Rmd b/01_Extract_data_Project_31.Rmd new file mode 100755 index 0000000000000000000000000000000000000000..de59e4fb2735f66450fd26b0cc9ca84514054ac2 --- /dev/null +++ b/01_Extract_data_Project_31.Rmd @@ -0,0 +1,363 @@ +--- +title: 'Project #31 - Data Extraction' +output: + html_document: default + toc: TRUE +always_allow_html: yes +--- + + +<center> +  +</center> + + + + +**Timestamp:** `r date()` +**Drafted:** Francesco Maria Sabatini +**Version:** 1.0 + + +This report documents the data extraction for **sPlot project proposal #31** - *The adaptive value of xylem physiology within and across global ecoregions* as requested by Daniel Laughlin and Jesse Robert Fleri + + + + +```{r results="hide", message=F} +library(tidyverse) + +library(knitr) +library(kableExtra) +library(viridis) +library(grid) +library(gridExtra) + +#library(vegan) +#library(xlsx) +#library(caret) +#library(foreign) +#library(raster) + +library(downloader) +library(sp) +library(sf) +library(rgdal) +library(rgeos) + +#save temporary files +write("TMPDIR = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('TMPDIR'), '.Renviron')) +write("R_USER = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('R_USER'), '.Renviron')) +#rasterOptions(tmpdir="/data/sPlot/users/Francesco/_tmp") +``` + +Import data from sPlot 3.0 +```{r} +#Import sPlot data +load("/data/sPlot/releases/sPlot3.0/header_sPlot3.0.RData") +load("/data/sPlot/releases/sPlot3.0/DT_sPlot3.0.RData") +load("/data/sPlot/releases/sPlot3.0/Traits_CWMs_sPlot3.RData") +load("/data/sPlot/releases/sPlot3.0/SoilClim_sPlot3.RData") +``` +Import data on xylem traits, provided by Jesse Robert Fleri on October 26th, 2020. +```{r} +load("xylem_data.RData") +``` + +# 1 Extract plots from sPlot based on species with xylem traits +Extract all plots containing at least one species in the xylem list. +```{r, message=F, results=F, warning=F} +species_list <- xylem_data$Species +plot.sel <- DT2 %>% + filter(DT2$species %in% species_list) %>% + dplyr::select(PlotObservationID) %>% + distinct() %>% + pull(PlotObservationID) + +#exclude plots without geographic information +header.xylem <- header %>% + filter(PlotObservationID %in% plot.sel) %>% + filter(!is.na(Latitude)) +#refine plot.sel +plot.sel <- header.xylem$PlotObservationID + +DT.xylem <- DT2 %>% + filter(taxon_group %in% c("Vascular plant", "Unknown")) %>% + filter(PlotObservationID %in% plot.sel) + +``` + +Out of the `r length(species_list)` species in the sRoot list, `r sum(unique(DT2$species) %in% species_list)` species are present in sPlot, for a total of `r nrow(DT.xylem %>% filter(species %in% species_list))` plots., across `r length(plot.sel)` plots. + +# 2 Extract woody species +This is partial selection, as we don't have information on the growth form of all species in sPlot +```{r} +#load list of woody species, as provided to me by Alexander Zizka, within sPlot project #21 +load("../Project_21/_input/evowood_species_list.rda") + +#Select all woody species, as well as those without GrowthForm information +#extract relevant traits from TRY +woody_species_traits <- sPlot.traits %>% + dplyr::select(species, GrowthForm, is.tree.or.tall.shrub, n, + starts_with("StemDens"), + starts_with("Stem.cond.dens"), + starts_with("StemConduitDiameter"), + starts_with("LDCM"), + starts_with("SLA"), + starts_with("PlantHeight"), + starts_with("Wood"), + starts_with("SpecificRootLength_mean")) %>% + filter( (species %in% species_list) | + (species %in% synonyms$name_binomial) | + grepl(pattern = "tree|shrub", x = GrowthForm) | + is.tree.or.tall.shrub==T + ) + +table(woody_species_traits$GrowthForm, exclude=NULL) +# MEMO: some standardization needed in sPlot 3.0 +# Including the data from A.Zizka in the selection +# increases the number of selected species only marginally (from ~21k to ~22k) +``` + +```{r, echo=F} +knitr::kable(woody_species_traits %>% + sample_n(20), caption="Example of gap-filled trait data from TRY (20 randomly selected species") %>% + kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "center") +``` + +Selected traits are: + +- StemDens - 4 - Stem specific density (SSD) or wood density (stem dry mass per stem fresh volume) (g/cm^3) +- SLA - 11 - Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA) +- PlantHeight - 18 - Plant height (vegetative + generative) +- StemDiam - 21 - Stem diameter (m) +- Stem.cond.dens - 169 - Stem conduit density (vessels and tracheids) (mm^-2) +- StemConduitDiameter - 281 - Stem conduit diameter (vessels, tracheids)_micro (m) +- Wood.vessel.length - 282 - Wood vessel element length; stem conduit (vessel and tracheids) element length_micro (m) +- WoodFiberLength - 289 - Wood fiber lengths_micro (m) +- SpecificRootLength - 1080 - Root length per root dry mass (specific root length, SRL) (cm/g) + +Codes correspond to those reported in [TRY](https://www.try-db.org/TryWeb/Home.php) + +```{r} +#subset DT.xylem to only retain woody species +DT.xylem <- DT.xylem %>% + filter(species %in% (woody_species_traits$species)) +nrow(DT.xylem) +``` + + +# 3 Calculate CWMs and trait coverage +Calculate CWM and trait coverage for each trait and each plot. Select plots having more than 80% coverage for at least one trait. +```{r, cache=T, cache.lazy=F} +# Merge species data table with traits +CWM.xylem0 <- DT.xylem %>% + as.tbl() %>% + dplyr::select(PlotObservationID, species, Relative.cover) %>% + left_join(xylem_data %>% + dplyr::rename(species=Species) %>% + dplyr::select(species, P50, Ks), + by="species") + +# Calculate CWM for each trait in each plot +CWM.xylem1 <- CWM.xylem0 %>% + group_by(PlotObservationID) %>% + summarize_at(.vars= vars(P50:Ks), + .funs = list(~weighted.mean(., Relative.cover, na.rm=T))) %>% + dplyr::select(PlotObservationID, order(colnames(.))) %>% + pivot_longer(-PlotObservationID, names_to="trait", values_to="trait.value") + +# Calculate coverage for each trait in each plot +CWM.xylem2 <- CWM.xylem0 %>% + mutate_at(.funs = list(~if_else(is.na(.),0,1) * Relative.cover), + .vars = vars(P50:Ks)) %>% + group_by(PlotObservationID) %>% + summarize_at(.vars= vars(P50:Ks), + .funs = list(~sum(., na.rm=T)), + .groups="drop") %>% + dplyr::select(PlotObservationID, order(colnames(.))) %>% + pivot_longer(-PlotObservationID, names_to="trait", values_to="trait.coverage") + +# Calculate CWV +variance2.fun <- function(trait, abu){ + res <- as.double(NA) + #nam <- nam[!is.na(trait)] + abu <- abu[!is.na(trait)] + trait <- trait[!is.na(trait)] + abu <- abu/sum(abu) + if (length(trait)>1){ + # you need more than 1 observation to calculate + # skewness and kurtosis + # for calculation see + # http://r.789695.n4.nabble.com/Weighted-skewness-and-curtosis-td4709956.html + m.trait <- weighted.mean(trait,abu) + res <- sum(abu*(trait-m.trait)^2) + } + res +} + +CWM.xylem3 <- CWM.xylem0 %>% + group_by(PlotObservationID) %>% + summarize_at(.vars= vars(P50:Ks), + .funs = list(~variance2.fun(., Relative.cover))) %>% + dplyr::select(PlotObservationID, order(colnames(.))) %>% + pivot_longer(-PlotObservationID, names_to="trait", values_to="trait.variance") + +## Calculate proportion of species having traits +CWM.xylem4 <- CWM.xylem0 %>% + group_by(PlotObservationID) %>% + summarize_at(.vars= vars(P50:Ks), + .funs = list(~sum(!is.na(.)))) %>% + dplyr::select(PlotObservationID, order(colnames(.))) %>% + pivot_longer(-PlotObservationID, names_to="trait", values_to="trait.nspecies") + +# Join together +CWM.xylem <- CWM.xylem1 %>% + left_join(CWM.xylem2, by=c("PlotObservationID", "trait")) %>% + left_join(CWM.xylem3, by=c("PlotObservationID", "trait")) %>% + left_join(CWM.xylem4, by=c("PlotObservationID", "trait")) %>% + left_join(CWM.xylem0 %>% + group_by(PlotObservationID) %>% + summarize(sp.richness=n()), by=c("PlotObservationID"), + .groups="drop") %>% + mutate(trait.coverage.nspecies=trait.nspecies/sp.richness) %>% + #filter(trait.coverage>=0.8) %>% + arrange(PlotObservationID) +``` + + + +```{r, echo=F} +knitr::kable(CWM.xylem[1:20,], caption="Example of CWM data file") %>% + kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "center") +``` + +```{r} +CWM.xylem08 <- CWM.xylem %>% + filter(trait.coverage>=0.8) +``` + +```{r, echo=F} +knitr::kable(CWM.xylem08 %>% + group_by(variable) %>% + summarize("num.plots"=n()), caption="Number of plots with >=.8 coverage per trait") %>% + kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "center") +``` + + +```{r} +#Create list of plots having at least one trait with >=.8 coverage and extract header data + +#plot80perc <- (CWM.xylem %>% +# dplyr::select(PlotObservationID) %>% +# distinct())$PlotObservationID + +#DT.xylem08 <- DT.xylem %>% +# filter(PlotObservationID %in% header.xylem$PlotID) + +#CWM.xylem <- CWM.xylem %>% +# filter(PlotObservationID %in% header.xylem$PlotID) +``` + +Completeness of header data +```{r, echo=F} +knitr::kable(data.frame(Completeness_perc=colSums(!is.na(header.xylem))/ + nrow(header.xylem)*100)[-c(1,2),,drop=F], + caption="Header file - Columns present and % completeness") %>% + kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "center") +``` +The process results in `r nrow(header.xylem)` plots selected, for a total of `r nrow(CWM.xylem)` trait * plot combinations. + + +Geographical distribution of plots +```{r, fig.align="center", fig.height=4, fig.width=6, message=F, warning=F} +countries <- map_data("world") +ggworld <- ggplot(countries, aes(x=long, y=lat, group = group)) + + geom_polygon(col=NA, lwd=3, fill = gray(0.9)) + + geom_point(data=header.xylem, aes(x=Longitude, y=Latitude, group=1), col="red", alpha=0.5, cex=0.7, shape="+") + + theme_bw() +ggworld +``` + + +Summarize data across data sets in sPlot, and create list of data custodians +```{r, warning=F, message=F} +db.out <- fread("/data/sPlot/users/Francesco/_sPlot_Management/Consortium/Databases.out.csv") %>% + dplyr::select(`GIVD ID`, Custodian) +data.origin <- header.xylem %>% + group_by(`GIVD ID`) %>% + summarize(Num.plot=n()) %>% + left_join(db.out) +``` + +```{r, echo=F} +knitr::kable(data.origin, caption="Data Origin") %>% + kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "center") +``` + +The data derive from `r nrow(data.origin)` datasets. + +# 4 Extract climate and soils data + +```{r} +soilclim.xylem <- soilclim %>% + filter(PlotObservationID %in% plot.sel) %>% + rename(Elevation=Elevation_median, -Elevation_q2.5, -Elevation_q97.5) +``` + +```{r, echo=F} +knitr::kable(soilclim.xylem %>% + sample_n(20), caption="Example of climatic and soil variables for 20 randomly selected plots. All values represent the median in a circle centered on the plot coordinates, having a radius equal to the plot's location uncertainty (capped to 50 km for computing reasons)") %>% + kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "center") +``` + +The procedure used to obtain these environmental predictors is described [here](https://git.idiv.de/fs40gaho/splot3_build/-/raw/master/public/05_ExtractEnvironment.html?inline=false) (Click to download the report) + + +Bioclimatic variables (bio01-bio19) derive from [CHELSA](http://chelsa-climate.org/bioclim/) + +Codes: + +Bio1 = Annual Mean Temperature +Bio2 = Mean Diurnal Range +Bio3 = Isothermality +Bio4 = Temperature Seasonality +Bio5 = Max Temperature of Warmest Month +Bio6 = Min Temperature of Coldest Month +Bio7 = Temperature Annual Range +Bio8 = Mean Temperature of Wettest Quarter +Bio9 = Mean Temperature of Driest Quarter +Bio10 = Mean Temperature of Warmest Quarter +Bio11 = Mean Temperature of Coldest Quarter +Bio12 = Annual Precipitation +Bio13 = Precipitation of Wettest Month +Bio14 = Precipitation of Driest Month +Bio15 = Precipitation Seasonality +Bio16 = Precipitation of Wettest Quarter +Bio17 = Precipitation of Driest Quarter +Bio18 = Precipitation of Warmest Quarter +Bio19 = Precipitation of Coldest Quarter + +\newline \newline + +Soil variables (5 cm depth) derive from the [ISRIC dataset](https://www.isric.org/), downloaded at 250-m resolution + +CECSOL Cation Exchange capacity of soil +CLYPPT Clay mass fraction in % +CRFVOL Coarse fragments volumetric in % +ORCDRC Soil Organic Carbon Content in g/kg +PHIHOX Soil pH x 10 in H20 +SLTPPT Silt mass fraction in % +SNDPPT Sand mass fraction in % +BLDFIE Bulk Density (fine earth) in kg/m3 +P.ret.cat Phosphorous Retention - Categorical value, see [ISRIC 2011-06](https://www.isric.org/documents/document-type/isric-report-201106-global-distribution-soil-phosphorus-retention-potential) +\newline \newline + + +# 4 Export & SessionInfo + +```{r, eval=T} +save( woody_species_traits, DT.xylem, CWM.xylem, header.xylem, + file="_derived/Xylem_sPlot.RData" ) +sessionInfo() +```