From 3a47b40e269713bbae67cc6d383fdf395f82c219 Mon Sep 17 00:00:00 2001 From: Francesco Sabatini <francesco.sabatini@idiv.de> Date: Sat, 28 Nov 2020 00:23:39 +0100 Subject: [PATCH] Aligned 05 and 06 --- code/05_ExtractEnvironment.Rmd | 23 ++++++++++------ code/06_buildDT.Rmd | 48 +++++++++++++++++++++------------- 2 files changed, 45 insertions(+), 26 deletions(-) diff --git a/code/05_ExtractEnvironment.Rmd b/code/05_ExtractEnvironment.Rmd index 787843a..3038aca 100644 --- a/code/05_ExtractEnvironment.Rmd +++ b/code/05_ExtractEnvironment.Rmd @@ -33,9 +33,6 @@ library(sf) library(rgeos) library(raster) library(rworldmap) -#library(elevatr) -#library(rnaturalearth) -#library(dggridR) source("A98_PredictorsExtract.R") @@ -44,7 +41,7 @@ write("TMPDIR = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('TM write("R_USER = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('R_USER'), '.Renviron')) rasterOptions(tmpdir="/data/sPlot/users/Francesco/_tmp") -#Ancillary variables +#Ancillary functions get.summary <- function(x){x %>% summarize_all(.funs=list(num.NAs=~sum(is.na(.)), min=~min(., na.rm=T), @@ -67,7 +64,7 @@ get.summary <- function(x){x %>% load("../_output/header_sPlot3.0.RData") ``` -Create spatial point dataframe for sPlot data to intersect with spatial layers. Include the fiels `Location uncertainty` to allow for fuzzy matching. Each plot was intersected with the corresponding layer of environmental (soil, climate) attributes accounting for their location uncertainty (minimum set to 250m). For computing reasons, the maximum location uncertainty was set to 10km. The user should therefore be aware that the variability in climate and soil features estimations may be underestimated for plots with very high location uncertainty. +Create spatial point dataframe for sPlot data to intersect with spatial layers. Include the fields `Location uncertainty` to allow for fuzzy matching. Each plot was intersected with the corresponding layer of environmental (soil, climate) attributes accounting for their location uncertainty (minimum set to 250m). For computing reasons, the maximum location uncertainty was set to 10km. The user should therefore be aware that the variability in climate and soil features estimations may be underestimated for plots with very high location uncertainty. ```{r, eval=F} header.shp <- header %>% filter(!is.na(Longitude) | !is.na(Latitude)) @@ -124,7 +121,16 @@ for(i in 1:19){ } ``` Intersect `header.shp` with each raster of the CHELSA collection. -Performed in EVE HPC cluster using function `A98_PredictorsExtract.R`. Divided in 99 chunks. +Performed in EVE HPC cluster using function `A98_PredictorsExtract.R`. ~~Divided in 99 chunks.~~ +```{shell, eval=F} +#make sure to setup the right function (robust.mean\robust.sd) and the correct file type (raster) in submit-A98.sh +for pred in ls /data/splot/_data/Predictors/*.tif +do +qsub submit-A98.sh $pred +done +``` + + ```{r, warning=F, message=F, eval=F} header.shp.path <- "../_derived/header.shp.shp" @@ -214,7 +220,7 @@ download(url.snd, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/SNDPPT_M_sl2 ``` Intersect `header.shp` with each raster of the ISRIC collection. -Performed in EVE HPC cluster using function `A98_PredictorsExtract.R`. Divided in 99 chunks. +Performed in EVE HPC cluster using function `A98_PredictorsExtract.R`. ~~Divided in 99 chunks.~~ ```{r, message=F, eval=F} for(i in 1:8){ ff <- list.files("/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/", @@ -276,7 +282,8 @@ knitr::kable(tmp.sum, ``` ## 4 Elevation variability -Reimport output (calculated in `04_buildHeader`) +Reimport output (calculated in `04_buildHeader`). +Extract elevation for each plot: mean and 2.5, 50, and 97.5 quantiles for a buffer area having a radius equal to the location uncertainty of each plot (but only if location uncertainty < 50 km). DEM derive from package [elevatr](https://cran.r-project.org/web/packages/elevatr/vignettes/introduction_to_elevatr.html#get_raster_elevation_data), which uses the [Terrain Tiles on Amazon Web Services](https://registry.opendata.aws/terrain-tiles/). Resolutions of DEM rasters vary by region. I set a zoom factor z=10, which corresponds to ~ 75-150 m. Sources are: SRTM, data.gov.at in Austria, NRCAN in Canada, SRTM, NED/3DEP 1/3 arcsec, data.gov.uk in United Kingdom, INEGI in Mexico, ArcticDEM in latitudes above 60°, LINZ in New Zealand, Kartverket in Norway, as described [here](https://github.com/tilezen/joerd/blob/master/docs/data-sources.md). ```{r, message=F} elevation.out <- read_csv("../_derived/elevatr/elevation.out.csv") ``` diff --git a/code/06_buildDT.Rmd b/code/06_buildDT.Rmd index 876ab29..605b102 100644 --- a/code/06_buildDT.Rmd +++ b/code/06_buildDT.Rmd @@ -10,19 +10,18 @@ output: html_document  </center> - -MEMO!! WHAT TO DO WITH LAYER WHEN IS CONSISTENTLY ZERO IN A PLOT? CHANGE TO NA? -WHAT TO DO INSTEAD WHEN LAYER==0 IN A PLOT WHERE LAYER INFO IS OTHERWISE AVAILABLE? -!!! ADD Explanation of fields!!! - - **Timestamp:** `r date()` **Drafted:** Francesco Maria Sabatini -**Revised:** -**version:** 1.0 +**Revised:** Helge Bruelheide +**Version:** 1.1 This report documents the construction of the DT table for sPlot 3.0. It is based on dataset sPlot_3.0.2, received on 24/07/2019 from Stephan Hennekens. - + +Caution: Layer information is not available for each species in each plot. In case of missing information Layer is set to zero. + +*Changes in version 1.1* - +1) Added explanation of fields +2) Fixed `taxon_group` of Friesodielsia ```{r results="hide", message=F, warning=F} knitr::opts_chunk$set(echo = TRUE) @@ -173,7 +172,7 @@ DT1 <- DT1 %>% table(DT1$`Taxon group`, exclude=NULL) ``` -Those taxa for which a measuress of Basal Area exists can be safely assumed to belong to vascular plants +Those taxa for which a measures of Basal Area exists can be safely assumed to belong to vascular plants ```{r} DT1 <- DT1 %>% @@ -234,7 +233,8 @@ mosses.gen <- c("Hypnum", "Brachytheciastrum","Brachythecium","Hypnum", "Zygodon", "Oxymitra", "Bryophyta", "Musci", '\\\"Moos\\\"') vascular.gen <- c("Polystichum", "Hypericum", "Peltaria", "Pancovia", "Calythrix", "Ripogonum", "Notogrammitis", "Fuscospora", "Lophozonia", "Rostellularia", - "Hesperostipa", "Microsorium", "Angiosperm","Dicotyledonae", "Spermatophy") + "Hesperostipa", "Microsorium", "Angiosperm","Dicotyledonae", "Spermatophy", + "Oxymitra", "Friesodielsia") alga.gen <- c("Chara", "Characeae", "Tonina", "Nostoc", "Entermorpha", "Hydrocoleum" ) DT1 <- DT1 %>% @@ -327,7 +327,6 @@ DT1 <- DT1 %>% ``` Fix some errors. There are some plots where all species have zeros in the field `Cover %`. Some of them are marked as p\\a (`Cover code=="x"`), but other not. Consider all this plots as presence\\absence and transform `Cover %` to 1. -!! There are some other plots having layers with all zeros. This should be double-checked, but are not being transformed here !! ```{r} allzeroes <- DT1 %>% group_by(PlotObservationID) %>% @@ -439,14 +438,15 @@ DT1 %>% ```{r} DT2 <- DT1 %>% dplyr::select(PlotObservationID, Name_short, `Turboveg2 concept`, Rank_correct, `Taxon group`, Layer:x_, Ab_scale, Abundance, Relative.cover ) %>% - rename(species_original=`Turboveg2 concept`, - species=Name_short, - taxon_group=`Taxon group`, - cover_perc=`Cover %`, - cover_code=`Cover code`) + rename(Species_original=`Turboveg2 concept`, + Species=Name_short, + Taxon_group=`Taxon group`, + Cover_perc=`Cover %`, + Cover_code=`Cover code`, + Relative_cover=Relative.cover) ``` -The output of the DT table contains `r nrow(DT2)` records, over `r length(unique(DT2$PlotObservationID))` plots. The total number of taxa is `r length(unique(DT2$species_original))` and `r length(unique(DT2$species))`, before and after standardization, respectively. Information on the `Taxon group` is available for `r DT2 %>% filter(taxon_group!="Unknown") %>% distinct(species) %>% nrow()` standardized species. +The output of the DT table contains `r nrow(DT2)` records, over `r length(unique(DT2$PlotObservationID))` plots. The total number of taxa is `r length(unique(DT2$Species_original))` and `r length(unique(DT2$species))`, before and after standardization, respectively. Information on the `Taxon group` is available for `r DT2 %>% filter(Taxon_group!="Unknown") %>% distinct(Species) %>% nrow()` standardized species. ```{r, echo=F} knitr::kable(DT2 %>% @@ -455,6 +455,18 @@ knitr::kable(DT2 %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "center") ``` +## Field List + +- PlotObservationID - Plot ID, as in `header` +- Species - Resolved species name, based on taxonomic backbone +- Species_original - Original species name, as provided by data contributor +- Rank_correct - Taxonomic rank at which the `species_original` was matched +- Taxon_group - Possible entries are: "Alga_Stonewort", "Lichen", "Moss", "Vascular plant","Unknown" +- Layer - Vegetation layer, as specified in Turboveg: 0 - No layer specified, 1 - Upper tree layer, 2 - Middle tree layer, 3 - Lower tree layer, 4 - Upper shrub layer, 5 - Lower shrub layer, 6 - Herb layer, 7 - Juvenile, 8 - Seedling, 9 - Moss layer. +- Cover_code - Cover value in original data, before transformation to percentage cover +- Ab_scale - Abundance scale in original data. Possible values are: CoverPerc: Cover Percentage, pa: Presence absence, x_BA: Basal Area, x_IC: Individual count, x_SC: Stem count, x_IV: Relative Importance, x_RF: Relative Frequency. +- Abundance - Abundance value, in original value, or as transformed from original `Cover code` to quantitative values. +- Relative_cover - Abundance of each species after being to normalized to 1 in each plot. ```{r} -- GitLab