diff --git a/code/05_ExtractEnvironment.Rmd b/code/05_ExtractEnvironment.Rmd index 6f23e0994368530a51001bc55ccd1ebf4c93901a..466af8349f43a839cb368e52815b14e4ebc116bd 100644 --- a/code/05_ExtractEnvironment.Rmd +++ b/code/05_ExtractEnvironment.Rmd @@ -44,17 +44,34 @@ source("A98_PredictorsExtract.R") write("TMPDIR = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('TMPDIR'), '.Renviron')) write("R_USER = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('R_USER'), '.Renviron')) rasterOptions(tmpdir="/data/sPlot/users/Francesco/_tmp") + +#Ancillary variables +get.summary <- function(x){x %>% + summarize_all(.funs=list(no.NAs=~sum(is.na(.)), + min=~min(., na.rm=T), + q025=~quantile(., 0.25, na.rm=T), + q50=~quantile(., 0.50, na.rm=T), + q75=~quantile(., .75, na.rm=T), + max=~max(., na.rm=T), + mean=~mean(., na.rm=T), + sd=~sd(., na.rm=T))) %>% + gather(variable, value) %>% + separate(variable, sep="_", into=c("variable", "stat")) %>% + mutate(stat=factor(stat, levels=c("no.NAs", "min", "q025", "q50", "q75", "max", "mean", "sd"))) %>% + spread(key=stat, value=value) +} + + ``` -## 1.1 Import header data +## 1 Import header data ```{r} load("../_output/header_splot3.0.RData") -header <- header.out ``` -Create spatial point dataframe for sPlot data to intersect with spatial layers -```{r} +Create spatial point dataframe for sPlot data to intersect with spatial layers. Include the fiels `Location uncertainty` to allow for fuzzy matching. Each plot was intersected with the corresponding layer of environmental (soil, climate) attributes accounting for their location uncertainty (minimum set to 250m). For computing reasons, the maximum location uncertainty was set to 10km. The user should therefore be aware that the variability in climate and soil features estimations may be underestimated for plots with very high location uncertainty. +```{r, eval=F} header.shp <- header %>% filter(!is.na(Longitude) | !is.na(Latitude)) header.shp <- SpatialPointsDataFrame(coords= header.shp %>% @@ -64,24 +81,16 @@ header.shp <- SpatialPointsDataFrame(coords= header.shp %>% loc.uncert=header.shp$`Location uncertainty (m)`, `GIVD ID`=header.shp$`GIVD ID`)) ``` - -## 1.2 Vector predictors -### 1.2.1 Ecoregions -```{r, message=F, eval=F} -##Import spatial data on Ecoregions -ecoreg <- readOGR("../sPlot/_Ancillary/official", layer="wwf_terr_ecos") - -ecoreg@data <- ecoreg@data %>% - dplyr::select(OBJECTID, ECO_NAME, REALM, BIOME, ECO_NUM, ECO_ID, eco_code) - +```{r, echo=F} +header.shp <- readOGR(dsn="../_derived", layer="header.shp") +colnames(header.shp@data) <- c("PlotObservationID", "loc.uncert", "GIVD ID") ``` -## 1.3 Raster data -Download, import and reproject all raster data to same extent, CRS and resolution. Stack them and save derived files for easier future work -### 1.3.1 Climate -Bioclimatic variables (bio01-bio19) derive from [CHELSA](http://chelsa-climate.org/bioclim/) -Codes: +## 2 Climate +Bioclimatic variables (bio01-bio19) derive from [CHELSA](http://chelsa-climate.org/bioclim/) + +Codes: Bio1 = Annual Mean Temperature Bio2 = Mean Diurnal Range @@ -92,7 +101,7 @@ Bio6 = Min Temperature of Coldest Month Bio7 = Temperature Annual Range Bio8 = Mean Temperature of Wettest Quarter Bio9 = Mean Temperature of Driest Quarter -Bio10 = Mean Temperature of Warmest Quarter +Bio10 = Mean Temperature of Warmest Quarter Bio11 = Mean Temperature of Coldest Quarter Bio12 = Annual Precipitation Bio13 = Precipitation of Wettest Month @@ -135,18 +144,43 @@ for(i in 1:19){ ncores = 4, typp = "raster", output).ff2 } ``` -Reimport and assemble output, and join it to header. +Reimport and assemble output. ```{r, message=F, warning=F} chelsa.files <- list.files(path="../_derived/output_pred/", pattern="CHELSA_bio10_[0-9]+.csv$", full.names=T) -chelsa.out <- do.call(cbind, lapply(chelsa.files, function(x) {read_csv(x) %>% - column_to_rownames("X1")})) +chelsa.out <- do.call(cbind, lapply(chelsa.files, + function(x) {read_csv(x,col_types = cols(X1 = col_character(), + V1 = col_double())) %>% + column_to_rownames("X1")})) +colnames(chelsa.out) <- paste0("bio", stringr::str_pad(1:19, width=2, side="left", pad="0")) +#same for sd values +chelsa.sd.files <- list.files(path="../_derived/output_pred/", pattern="CHELSA_bio10_[0-9]+_sd.csv$", full.names=T) +chelsa.sd.out <- do.call(cbind, lapply(chelsa.sd.files, + function(x) {read_csv(x, col_types = cols(X1 = col_character(), + V1 = col_double())) %>% + column_to_rownames("X1")})) +colnames(chelsa.sd.out) <- paste0("bio", stringr::str_pad(1:19, width=2, side="left", pad="0"), "_sd") +``` +Show summaries +```{r, echo=F} +tmp.sum <- get.summary(chelsa.out) +knitr::kable(tmp.sum, + caption="Summary statistics for chelsa mean statistics") %>% + kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), + full_width = F, position = "center") ``` +```{r, echo=F} +tmp.sum <- get.summary(chelsa.sd.out) +knitr::kable(tmp.sum, + caption="Summary statistics for chelsa s.d. statistics") %>% + kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), + full_width = F, position = "center") +``` -### 1.3.2 Soil +## 3 Soil Soil variables (5 cm depth) derive from the [ISRIC dataset](https://www.isric.org/), downloaded at 250-m resolution CECSOL Cation Exchange capacity of soil @@ -155,7 +189,7 @@ CRFVOL Coarse fragments volumetric in % ORCDRC Soil Organic Carbon Content in g/kg PHIHOX Soil pH x 10 in H20 SLTPPT Silt mass fraction in % -SNDPTT Sand mass fraction in % +SNDPPT Sand mass fraction in % BLDFIE Bulk Density (fine earth) in kg/m3 P.ret.cat Phosphorous Retention - Categorical value, see [ISRIC 2011-06](https://www.isric.org/documents/document-type/isric-report-201106-global-distribution-soil-phosphorus-retention-potential) \newline \newline @@ -200,6 +234,66 @@ for(i in 1:8){ } ``` +Reimport and assemble output. +```{r, message=F, warning=F} +ISRIC.layer.names <- c("BLDFIE", "CECSOL","CLYPPT","CRFVOL","ORCDRC","PHIHOX","SLTPPT","SNDPPT") +ISRIC.layer.names <- paste0(ISRIC.layer.names, "_M_sl2_250m_ll") +isric.files <- list.files(path="../_derived/output_pred/", + pattern=paste0(paste0(ISRIC.layer.names, ".csv$"), collapse="|"), + full.names=T) + +isric.out <- do.call(cbind, lapply(isric.files, + function(x) {read_csv(x, col_types = cols(X1 = col_character(), + V1 = col_double())) %>% + column_to_rownames("X1")})) +colnames(isric.out) <- ISRIC.layer.names +#same for sd values +isric.sd.files <- list.files(path="../_derived/output_pred/", + pattern=paste0(paste0(ISRIC.layer.names, "_sd.csv$"), collapse="|"), + full.names=T) +isric.sd.out <- do.call(cbind, lapply(isric.sd.files, + function(x) {read_csv(x, col_types = cols(X1 = col_character(), + V1 = col_double())) %>% + column_to_rownames("X1")})) +colnames(isric.sd.out) <- paste0(ISRIC.layer.names, "_sd") +``` +Show summaries +```{r, echo=F} +tmp.sum <- get.summary(isric.out) + +knitr::kable(tmp.sum, + caption="Summary statistics for chelsa mean statistics") %>% + kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), + full_width = F, position = "center") +``` + +```{r, echo=F} +tmp.sum <- get.summary(chelsa.sd.out) + +knitr::kable(tmp.sum, + caption="Summary statistics for chelsa s.d. statistics") %>% + kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), + full_width = F, position = "center") +``` + + +## 4 Create output and export + +```{r} +soilclim <- header %>% + dplyr::select(PlotObservationID) %>% + left_join(header.shp@data %>% + dplyr::select(PlotObservationID) %>% + bind_cols(chelsa.out) %>% + bind_cols(chelsa.sd.out) %>% + bind_cols(isric.out) %>% + bind_cols(isric.sd.out) %>% + distinct(), + by="PlotObservationID") + +save(soilclim, file = "../_output/SoilClim_sPlot3.RData") +``` +