Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
title: "sPlot 3.0 - Environmental Data"
author: "Francesco Maria Sabatini"
date: "2/14/2020"
output: html_document
![](/data/sPlot/users/Francesco/_sPlot_Management/splot-long-rgb.png "sPlot Logo")

Timestamp: r date()
Drafted: Francesco Maria Sabatini
Revised:
version: 1.0

This report documents how environmental variables were extracted when constructing sPlot 3.0. It is based on dataset sPlot_3.0.2, received on 24/07/2019 from Stephan Hennekens.

knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(viridis)
library(readr)
library(xlsx)
library(knitr)
library(kableExtra)

## Spatial packages
library(rgdal)
library(sp)
library(sf)
library(rgeos)
library(raster)
library(rworldmap)
#library(elevatr)
#library(rnaturalearth)
#library(dggridR)

source("A98_PredictorsExtract.R")

#save temporary files
write("TMPDIR = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('TMPDIR'), '.Renviron'))
write("R_USER = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('R_USER'), '.Renviron'))
rasterOptions(tmpdir="/data/sPlot/users/Francesco/_tmp")

#Ancillary variables
get.summary <- function(x){x %>% 
    summarize_all(.funs=list(num.NAs=~sum(is.na(.)), 
                             min=~min(., na.rm=T),
                             q025=~quantile(., 0.25, na.rm=T), 
                             q50=~quantile(., 0.50, na.rm=T), 
                             q75=~quantile(., .75, na.rm=T), 
                             max=~max(., na.rm=T), 
                             mean=~mean(., na.rm=T), 
                             sd=~sd(., na.rm=T))) %>% 
    gather(variable, value) %>% 
    separate(variable, sep="_", into=c("variable", "stat")) %>% 
    mutate(stat=factor(stat, levels=c("num.NAs", "min", "q025", "q50", "q75", "max", "mean", "sd"))) %>% 
    spread(key=stat, value=value)
}

1 Import header data

load("../_output/header_sPlot3.0.RData")

Create spatial point dataframe for sPlot data to intersect with spatial layers. Include the fiels Location uncertainty to allow for fuzzy matching. Each plot was intersected with the corresponding layer of environmental (soil, climate) attributes accounting for their location uncertainty (minimum set to 250m). For computing reasons, the maximum location uncertainty was set to 10km. The user should therefore be aware that the variability in climate and soil features estimations may be underestimated for plots with very high location uncertainty.

header.shp <- header %>%
  filter(!is.na(Longitude) | !is.na(Latitude))
header.shp <- SpatialPointsDataFrame(coords= header.shp %>% 
                                        dplyr::select(Longitude, Latitude),
                               proj4string = CRS("+init=epsg:4326"), 
                               data=data.frame(PlotObservationID= header.shp$PlotObservationID, 
                                               loc.uncert=header.shp$`Location uncertainty (m)`, 
                                               `GIVD ID`=header.shp$`GIVD ID`))
header.shp <- readOGR(dsn="../_derived", layer="header.shp")
colnames(header.shp@data) <- c("PlotObservationID", "loc.uncert", "GIVD ID")

2 Climate

Bioclimatic variables (bio01-bio19) derive from CHELSA

Codes:

Bio1 = Annual Mean Temperature
Bio2 = Mean Diurnal Range
Bio3 = Isothermality
Bio4 = Temperature Seasonality
Bio5 = Max Temperature of Warmest Month
Bio6 = Min Temperature of Coldest Month
Bio7 = Temperature Annual Range
Bio8 = Mean Temperature of Wettest Quarter
Bio9 = Mean Temperature of Driest Quarter
Bio10 = Mean Temperature of Warmest Quarter
Bio11 = Mean Temperature of Coldest Quarter
Bio12 = Annual Precipitation
Bio13 = Precipitation of Wettest Month
Bio14 = Precipitation of Driest Month
Bio15 = Precipitation Seasonality
Bio16 = Precipitation of Wettest Quarter
Bio17 = Precipitation of Driest Quarter
Bio18 = Precipitation of Warmest Quarter
Bio19 = Precipitation of Coldest Quarter

\newline \newline Download raster files:

library(downloader)
url.chelsa <- list()
for(i in 1:19){
  ii <- stringr::str_pad(1:19, width=2, side="left", pad="0")[i]
  url.chelsa[[i]] <- paste("https://www.wsl.ch/lud/chelsa/data/bioclim/integer/CHELSA_bio10_", ii, ".tif", sep="")
  download(url.chelsa[[i]],
           paste("/data/sPlot/users/Francesco/Ancillary_Data/CHELSA/CHELSA_bio10_", ii, ".tif", sep=""), 
           mode = "wb")
}

Intersect header.shp with each raster of the CHELSA collection.
Performed in EVE HPC cluster using function A98_PredictorsExtract.R. Divided in 99 chunks.

header.shp.path <- "../_derived/header.shp.shp"

for(i in 1:19){
  ff <- paste("/data/sPlot/users/Francesco/Ancillary_Data/CHELSA/CHELSA_bio10_", 
              stringr::str_pad(i, width=2, side="left", pad="0"), ".tif", sep="")
  #define output paths
  output.ff1 <- paste("/data/sPlot/users/Francesco/sPlot3/_derived/output_pred/CHELSA_bio10_", 
              stringr::str_pad(i, width=2, side="left", pad="0"), ".csv", sep="")
  output.ff2 <- paste("/data/sPlot/users/Francesco/sPlot3/_derived/output_pred/CHELSA_bio10", 
              stringr::str_pad(i, width=2, side="left", pad="0"), "_sd.csv", sep="")
  # Run PredExtr function - sink to file
  PredExtr(x.shp = header.shp.path, toextract = ff, myfunction = robust.mean, 
                         ncores = 4, typp = "raster", output.ff1)
  PredExtr(x.shp = header.shp.path, toextract = ff, myfunction = robust.sd, 
                         ncores = 4, typp = "raster", output.ff2)
}

Reimport and assemble output.

chelsa.files <- list.files(path="../_derived/output_pred/", pattern="CHELSA_bio10_[0-9]+.csv$", full.names=T)
chelsa.out <- do.call(cbind, lapply(chelsa.files, 
                                    function(x) {read_csv(x,col_types = cols(X1 = col_character(),
                                                                             V1 = col_double())) %>% 
                                        column_to_rownames("X1")}))
colnames(chelsa.out) <- paste0("bio", stringr::str_pad(1:19, width=2, side="left", pad="0"))
#same for sd values
chelsa.sd.files <- list.files(path="../_derived/output_pred/", pattern="CHELSA_bio10_[0-9]+_sd.csv$", full.names=T)
chelsa.sd.out <- do.call(cbind, lapply(chelsa.sd.files, 
                                       function(x) {read_csv(x, col_types = cols(X1 = col_character(),
                                                                                 V1 = col_double())) %>% 
                                           column_to_rownames("X1")}))
colnames(chelsa.sd.out) <- paste0("bio", stringr::str_pad(1:19, width=2, side="left", pad="0"), "sd")

Show summaries

tmp.sum <- get.summary(chelsa.out)

knitr::kable(tmp.sum, 
  caption="Summary statistics for chelsa mean statistics", digits = 3) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                  full_width = F, position = "center")
tmp.sum <- get.summary(chelsa.sd.out)

knitr::kable(tmp.sum, 
  caption="Summary statistics for chelsa s.d. statistics") %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                  full_width = F, position = "center")

3 Soil

Soil variables (5 cm depth) derive from the ISRIC dataset, downloaded at 250-m resolution

CECSOL Cation Exchange capacity of soil
CLYPPT Clay mass fraction in %
CRFVOL Coarse fragments volumetric in %
ORCDRC Soil Organic Carbon Content in g/kg
PHIHOX Soil pH x 10 in H20
SLTPPT Silt mass fraction in %
SNDPPT Sand mass fraction in %
BLDFIE Bulk Density (fine earth) in kg/m3
P.ret.cat Phosphorous Retention - Categorical value, see ISRIC 2011-06
\newline \newline Download ISRIC soil data

url.bulk <- "https://files.isric.org/soilgrids/data/recent/BLDFIE_M_sl2_250m_ll.tif"
url.cec <- "https://files.isric.org/soilgrids/data/recent/CECSOL_M_sl2_250m_ll.tif"
url.cly <- "https://files.isric.org/soilgrids/data/recent/CLYPPT_M_sl2_250m_ll.tif"
url.crf <- "https://files.isric.org/soilgrids/data/recent/CRFVOL_M_sl2_250m_ll.tif"
url.orc <- "https://files.isric.org/soilgrids/data/recent/ORCDRC_M_sl2_250m_ll.tif"
url.pH <- "https://files.isric.org/soilgrids/data/recent/PHIHOX_M_sl2_250m_ll.tif"
url.slt <- "https://files.isric.org/soilgrids/data/recent/SLTPPT_M_sl2_250m_ll.tif"
url.snd <- "https://files.isric.org/soilgrids/data/recent/SNDPPT_M_sl2_250m_ll.tif"

download(url.bulk, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/BLDFIE_M_sl2_250m_ll.tif", mode = "wb")
download(url.cec, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CECSOL_M_sl2_250m_ll.tif", mode = "wb")
download(url.cly, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CLYPPT_M_sl2_250m_ll.tif", mode = "wb")
download(url.crf, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CRFVOL_M_sl2_250m_ll.tif", mode = "wb")
download(url.orc, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/ORCDRC_M_sl2_250m_ll.tif", mode = "wb")
download(url.pH, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/PHIHOX_M_sl2_250m_ll.tif", mode = "wb")
download(url.slt, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/SLTPPT_M_sl2_250m_ll.tif", mode = "wb")
download(url.snd, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/SNDPPT_M_sl2_250m_ll.tif", mode = "wb")

Intersect header.shp with each raster of the ISRIC collection.
Performed in EVE HPC cluster using function A98_PredictorsExtract.R. Divided in 99 chunks.

for(i in 1:8){
  ff <- list.files("/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/", 
                                pattern = "^[^(Gene)]", full.names = T)[i]
  isric.i <- raster(ff)
  #define output paths
  filename <- str_split(list.files("/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/", 
           pattern = "^[^(Gene)]", full.names = T)[i], pattern="//")[[1]][2]
  output.ff1 <- gsub(pattern=".tif", replacement=".csv", filename)
  output.ff2 <- gsub(pattern=".tif", replacement="_sd.csv", filename)
  # Run PredExtr function - sink to file
  PredExtr(x.shp = header.shp.path, toextract = isric.i, myfunction = robust.mean, 
                         ncores = 4, typp = "raster", output.ff1)
  PredExtr(x.shp = header.shp.path, toextract = isric.i, myfunction = robust.sd, 
                         ncores = 4, typp = "raster", output.ff2)
}

Reimport and assemble output.

ISRIC.layer.names <- c("BLDFIE", "CECSOL","CLYPPT","CRFVOL","ORCDRC","PHIHOX","SLTPPT","SNDPPT")
ISRIC.layer.names1 <- paste0(ISRIC.layer.names, "_M_sl2_250m_ll")
isric.files <- list.files(path="../_derived/output_pred/", 
                          pattern=paste0(paste0(ISRIC.layer.names1, ".csv$"), collapse="|"), 
                          full.names=T)

isric.out <- do.call(cbind, lapply(isric.files,  
                                   function(x) {read_csv(x, col_types = cols(X1 = col_character(),
                                                                             V1 = col_double())) %>% 
                                       column_to_rownames("X1")}))
colnames(isric.out) <- ISRIC.layer.names
#same for sd values
isric.sd.files <- list.files(path="../_derived/output_pred/", 
                          pattern=paste0(paste0(ISRIC.layer.names1, "_sd.csv$"), collapse="|"), 
                          full.names=T)
isric.sd.out <- do.call(cbind, lapply(isric.sd.files, 
                                      function(x) {read_csv(x, col_types = cols(X1 = col_character(),
                                                                                 V1 = col_double())) %>% 
                                           column_to_rownames("X1")}))
colnames(isric.sd.out) <- paste0(ISRIC.layer.names, "sd")

Show summaries

tmp.sum <- get.summary(isric.out)

knitr::kable(tmp.sum, 
  caption="Summary statistics for isric mean statistics", digits = 3) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                  full_width = F, position = "center")
tmp.sum <- get.summary(isric.sd.out)

knitr::kable(tmp.sum, 
  caption="Summary statistics for isric s.d. statistics", digits = 3) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                  full_width = F, position = "center")

4 Create output and export

soilclim <- header %>% 
  dplyr::select(PlotObservationID) %>% 
  left_join(header.shp@data %>% 
              dplyr::select(PlotObservationID) %>% 
              bind_cols(chelsa.out) %>%
              bind_cols(chelsa.sd.out) %>% 
              bind_cols(isric.out) %>% 
              bind_cols(isric.sd.out) %>% 
              distinct(), 
            by="PlotObservationID")
knitr::kable(soilclim %>% 
               sample_n(20), 
  caption="Show environmenal info for 20 randomly selected plots ", digits = 3) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                  full_width = F, position = "center")
save(soilclim, file = "../_output/SoilClim_sPlot3.RData")

ANNEX 1 - Code to Extract predictors