-
Francesco Sabatini authoredFrancesco Sabatini authored
title: "sPlot 3.0 - Environmental Data"
author: "Francesco Maria Sabatini"
date: "2/14/2020"
output: html_document
Timestamp: r date()
Drafted: Francesco Maria Sabatini
Revised:
version: 1.0
This report documents how environmental variables were extracted when constructing sPlot 3.0. It is based on dataset sPlot_3.0.2, received on 24/07/2019 from Stephan Hennekens.
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(viridis)
library(readr)
library(xlsx)
library(knitr)
library(kableExtra)
## Spatial packages
library(rgdal)
library(sp)
library(sf)
library(rgeos)
library(raster)
library(rworldmap)
#library(elevatr)
#library(rnaturalearth)
#library(dggridR)
source("A98_PredictorsExtract.R")
#save temporary files
write("TMPDIR = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('TMPDIR'), '.Renviron'))
write("R_USER = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('R_USER'), '.Renviron'))
rasterOptions(tmpdir="/data/sPlot/users/Francesco/_tmp")
#Ancillary variables
get.summary <- function(x){x %>%
summarize_all(.funs=list(num.NAs=~sum(is.na(.)),
min=~min(., na.rm=T),
q025=~quantile(., 0.25, na.rm=T),
q50=~quantile(., 0.50, na.rm=T),
q75=~quantile(., .75, na.rm=T),
max=~max(., na.rm=T),
mean=~mean(., na.rm=T),
sd=~sd(., na.rm=T))) %>%
gather(variable, value) %>%
separate(variable, sep="_", into=c("variable", "stat")) %>%
mutate(stat=factor(stat, levels=c("num.NAs", "min", "q025", "q50", "q75", "max", "mean", "sd"))) %>%
spread(key=stat, value=value)
}
1 Import header data
load("../_output/header_sPlot3.0.RData")
Create spatial point dataframe for sPlot data to intersect with spatial layers. Include the fiels Location uncertainty
to allow for fuzzy matching. Each plot was intersected with the corresponding layer of environmental (soil, climate) attributes accounting for their location uncertainty (minimum set to 250m). For computing reasons, the maximum location uncertainty was set to 10km. The user should therefore be aware that the variability in climate and soil features estimations may be underestimated for plots with very high location uncertainty.
header.shp <- header %>%
filter(!is.na(Longitude) | !is.na(Latitude))
header.shp <- SpatialPointsDataFrame(coords= header.shp %>%
dplyr::select(Longitude, Latitude),
proj4string = CRS("+init=epsg:4326"),
data=data.frame(PlotObservationID= header.shp$PlotObservationID,
loc.uncert=header.shp$`Location uncertainty (m)`,
`GIVD ID`=header.shp$`GIVD ID`))
header.shp <- readOGR(dsn="../_derived", layer="header.shp")
colnames(header.shp@data) <- c("PlotObservationID", "loc.uncert", "GIVD ID")
2 Climate
Bioclimatic variables (bio01-bio19) derive from CHELSA
Codes:
Bio1 = Annual Mean Temperature
Bio2 = Mean Diurnal Range
Bio3 = Isothermality
Bio4 = Temperature Seasonality
Bio5 = Max Temperature of Warmest Month
Bio6 = Min Temperature of Coldest Month
Bio7 = Temperature Annual Range
Bio8 = Mean Temperature of Wettest Quarter
Bio9 = Mean Temperature of Driest Quarter
Bio10 = Mean Temperature of Warmest Quarter
Bio11 = Mean Temperature of Coldest Quarter
Bio12 = Annual Precipitation
Bio13 = Precipitation of Wettest Month
Bio14 = Precipitation of Driest Month
Bio15 = Precipitation Seasonality
Bio16 = Precipitation of Wettest Quarter
Bio17 = Precipitation of Driest Quarter
Bio18 = Precipitation of Warmest Quarter
Bio19 = Precipitation of Coldest Quarter
\newline \newline Download raster files:
library(downloader)
url.chelsa <- list()
for(i in 1:19){
ii <- stringr::str_pad(1:19, width=2, side="left", pad="0")[i]
url.chelsa[[i]] <- paste("https://www.wsl.ch/lud/chelsa/data/bioclim/integer/CHELSA_bio10_", ii, ".tif", sep="")
download(url.chelsa[[i]],
paste("/data/sPlot/users/Francesco/Ancillary_Data/CHELSA/CHELSA_bio10_", ii, ".tif", sep=""),
mode = "wb")
}
Intersect header.shp
with each raster of the CHELSA collection.
Performed in EVE HPC cluster using function A98_PredictorsExtract.R
. Divided in 99 chunks.
header.shp.path <- "../_derived/header.shp.shp"
for(i in 1:19){
ff <- paste("/data/sPlot/users/Francesco/Ancillary_Data/CHELSA/CHELSA_bio10_",
stringr::str_pad(i, width=2, side="left", pad="0"), ".tif", sep="")
#define output paths
output.ff1 <- paste("/data/sPlot/users/Francesco/sPlot3/_derived/output_pred/CHELSA_bio10_",
stringr::str_pad(i, width=2, side="left", pad="0"), ".csv", sep="")
output.ff2 <- paste("/data/sPlot/users/Francesco/sPlot3/_derived/output_pred/CHELSA_bio10",
stringr::str_pad(i, width=2, side="left", pad="0"), "_sd.csv", sep="")
# Run PredExtr function - sink to file
PredExtr(x.shp = header.shp.path, toextract = ff, myfunction = robust.mean,
ncores = 4, typp = "raster", output.ff1)
PredExtr(x.shp = header.shp.path, toextract = ff, myfunction = robust.sd,
ncores = 4, typp = "raster", output.ff2)
}
Reimport and assemble output.
chelsa.files <- list.files(path="../_derived/output_pred/", pattern="CHELSA_bio10_[0-9]+.csv$", full.names=T)
chelsa.out <- do.call(cbind, lapply(chelsa.files,
function(x) {read_csv(x,col_types = cols(X1 = col_character(),
V1 = col_double())) %>%
column_to_rownames("X1")}))
colnames(chelsa.out) <- paste0("bio", stringr::str_pad(1:19, width=2, side="left", pad="0"))
#same for sd values
chelsa.sd.files <- list.files(path="../_derived/output_pred/", pattern="CHELSA_bio10_[0-9]+_sd.csv$", full.names=T)
chelsa.sd.out <- do.call(cbind, lapply(chelsa.sd.files,
function(x) {read_csv(x, col_types = cols(X1 = col_character(),
V1 = col_double())) %>%
column_to_rownames("X1")}))
colnames(chelsa.sd.out) <- paste0("bio", stringr::str_pad(1:19, width=2, side="left", pad="0"), "sd")
Show summaries
tmp.sum <- get.summary(chelsa.out)
knitr::kable(tmp.sum,
caption="Summary statistics for chelsa mean statistics", digits = 3) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = F, position = "center")
tmp.sum <- get.summary(chelsa.sd.out)
knitr::kable(tmp.sum,
caption="Summary statistics for chelsa s.d. statistics") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = F, position = "center")
3 Soil
Soil variables (5 cm depth) derive from the ISRIC dataset, downloaded at 250-m resolution
CECSOL Cation Exchange capacity of soil
CLYPPT Clay mass fraction in %
CRFVOL Coarse fragments volumetric in %
ORCDRC Soil Organic Carbon Content in g/kg
PHIHOX Soil pH x 10 in H20
SLTPPT Silt mass fraction in %
SNDPPT Sand mass fraction in %
BLDFIE Bulk Density (fine earth) in kg/m3
P.ret.cat Phosphorous Retention - Categorical value, see ISRIC 2011-06
\newline \newline
Download ISRIC soil data
url.bulk <- "https://files.isric.org/soilgrids/data/recent/BLDFIE_M_sl2_250m_ll.tif"
url.cec <- "https://files.isric.org/soilgrids/data/recent/CECSOL_M_sl2_250m_ll.tif"
url.cly <- "https://files.isric.org/soilgrids/data/recent/CLYPPT_M_sl2_250m_ll.tif"
url.crf <- "https://files.isric.org/soilgrids/data/recent/CRFVOL_M_sl2_250m_ll.tif"
url.orc <- "https://files.isric.org/soilgrids/data/recent/ORCDRC_M_sl2_250m_ll.tif"
url.pH <- "https://files.isric.org/soilgrids/data/recent/PHIHOX_M_sl2_250m_ll.tif"
url.slt <- "https://files.isric.org/soilgrids/data/recent/SLTPPT_M_sl2_250m_ll.tif"
url.snd <- "https://files.isric.org/soilgrids/data/recent/SNDPPT_M_sl2_250m_ll.tif"
download(url.bulk, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/BLDFIE_M_sl2_250m_ll.tif", mode = "wb")
download(url.cec, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CECSOL_M_sl2_250m_ll.tif", mode = "wb")
download(url.cly, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CLYPPT_M_sl2_250m_ll.tif", mode = "wb")
download(url.crf, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CRFVOL_M_sl2_250m_ll.tif", mode = "wb")
download(url.orc, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/ORCDRC_M_sl2_250m_ll.tif", mode = "wb")
download(url.pH, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/PHIHOX_M_sl2_250m_ll.tif", mode = "wb")
download(url.slt, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/SLTPPT_M_sl2_250m_ll.tif", mode = "wb")
download(url.snd, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/SNDPPT_M_sl2_250m_ll.tif", mode = "wb")
Intersect header.shp
with each raster of the ISRIC collection.
Performed in EVE HPC cluster using function A98_PredictorsExtract.R
. Divided in 99 chunks.
for(i in 1:8){
ff <- list.files("/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/",
pattern = "^[^(Gene)]", full.names = T)[i]
isric.i <- raster(ff)
#define output paths
filename <- str_split(list.files("/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/",
pattern = "^[^(Gene)]", full.names = T)[i], pattern="//")[[1]][2]
output.ff1 <- gsub(pattern=".tif", replacement=".csv", filename)
output.ff2 <- gsub(pattern=".tif", replacement="_sd.csv", filename)
# Run PredExtr function - sink to file
PredExtr(x.shp = header.shp.path, toextract = isric.i, myfunction = robust.mean,
ncores = 4, typp = "raster", output.ff1)
PredExtr(x.shp = header.shp.path, toextract = isric.i, myfunction = robust.sd,
ncores = 4, typp = "raster", output.ff2)
}
Reimport and assemble output.
ISRIC.layer.names <- c("BLDFIE", "CECSOL","CLYPPT","CRFVOL","ORCDRC","PHIHOX","SLTPPT","SNDPPT")
ISRIC.layer.names1 <- paste0(ISRIC.layer.names, "_M_sl2_250m_ll")
isric.files <- list.files(path="../_derived/output_pred/",
pattern=paste0(paste0(ISRIC.layer.names1, ".csv$"), collapse="|"),
full.names=T)
isric.out <- do.call(cbind, lapply(isric.files,
function(x) {read_csv(x, col_types = cols(X1 = col_character(),
V1 = col_double())) %>%
column_to_rownames("X1")}))
colnames(isric.out) <- ISRIC.layer.names
#same for sd values
isric.sd.files <- list.files(path="../_derived/output_pred/",
pattern=paste0(paste0(ISRIC.layer.names1, "_sd.csv$"), collapse="|"),
full.names=T)
isric.sd.out <- do.call(cbind, lapply(isric.sd.files,
function(x) {read_csv(x, col_types = cols(X1 = col_character(),
V1 = col_double())) %>%
column_to_rownames("X1")}))
colnames(isric.sd.out) <- paste0(ISRIC.layer.names, "sd")
Show summaries
tmp.sum <- get.summary(isric.out)
knitr::kable(tmp.sum,
caption="Summary statistics for isric mean statistics", digits = 3) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = F, position = "center")
tmp.sum <- get.summary(isric.sd.out)
knitr::kable(tmp.sum,
caption="Summary statistics for isric s.d. statistics", digits = 3) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = F, position = "center")
4 Create output and export
soilclim <- header %>%
dplyr::select(PlotObservationID) %>%
left_join(header.shp@data %>%
dplyr::select(PlotObservationID) %>%
bind_cols(chelsa.out) %>%
bind_cols(chelsa.sd.out) %>%
bind_cols(isric.out) %>%
bind_cols(isric.sd.out) %>%
distinct(),
by="PlotObservationID")
knitr::kable(soilclim %>%
sample_n(20),
caption="Show environmenal info for 20 randomly selected plots ", digits = 3) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = F, position = "center")
save(soilclim, file = "../_output/SoilClim_sPlot3.RData")
ANNEX 1 - Code to Extract predictors