Timestamp: Mon Nov 30 19:33:09 2020
Drafted: Francesco Maria Sabatini
Revised:
version: 1.0

This report documents how environmental variables were extracted when constructing sPlot 3.0. It is based on dataset sPlot_3.0.2, received on 24/07/2019 from Stephan Hennekens.

knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(viridis)
library(readr)
library(xlsx)
library(knitr)
library(kableExtra)

## Spatial packages
library(rgdal)
library(sp)
library(sf)
library(rgeos)
library(raster)
library(rworldmap)

source("A98_PredictorsExtract.R")

#save temporary files
write("TMPDIR = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('TMPDIR'), '.Renviron'))
write("R_USER = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('R_USER'), '.Renviron'))
rasterOptions(tmpdir="/data/sPlot/users/Francesco/_tmp")

#Ancillary functions
get.summary <- function(x){x %>% 
    summarize_all(.funs=list(num.NAs=~sum(is.na(.)), 
                             min=~min(., na.rm=T),
                             q025=~quantile(., 0.25, na.rm=T), 
                             q50=~quantile(., 0.50, na.rm=T), 
                             q75=~quantile(., .75, na.rm=T), 
                             max=~max(., na.rm=T), 
                             mean=~mean(., na.rm=T), 
                             sd=~sd(., na.rm=T))) %>% 
    gather(variable, value) %>% 
    separate(variable, sep="_", into=c("variable", "stat")) %>% 
    mutate(stat=factor(stat, levels=c("num.NAs", "min", "q025", "q50", "q75", "max", "mean", "sd"))) %>% 
    spread(key=stat, value=value)
}

1 Import header data

load("../_output/header_sPlot3.0.RData")

Create spatial point dataframe for sPlot data to intersect with spatial layers. Include the fields Location uncertainty to allow for fuzzy matching. Each plot was intersected with the corresponding layer of environmental (soil, climate) attributes accounting for their location uncertainty (minimum set to 250m). For computing reasons, the maximum location uncertainty was set to 10km. The user should therefore be aware that the variability in climate and soil features estimations may be underestimated for plots with very high location uncertainty.

header.shp <- header %>%
  filter(!is.na(Longitude) | !is.na(Latitude))
header.shp <- SpatialPointsDataFrame(coords= header.shp %>% 
                                        dplyr::select(Longitude, Latitude),
                               proj4string = CRS("+init=epsg:4326"), 
                               data=data.frame(PlotObservationID= header.shp$PlotObservationID, 
                                               loc.uncert=header.shp$`Location uncertainty (m)`, 
                                               `GIVD ID`=header.shp$`GIVD ID`))
## OGR data source with driver: ESRI Shapefile 
## Source: "/data/sPlot/users/Francesco/sPlot3/_derived", layer: "header.shp"
## with 1976235 features
## It has 3 fields

2 Climate

Bioclimatic variables (bio01-bio19) derive from CHELSA

Codes:

Bio1 = Annual Mean Temperature
Bio2 = Mean Diurnal Range
Bio3 = Isothermality
Bio4 = Temperature Seasonality
Bio5 = Max Temperature of Warmest Month
Bio6 = Min Temperature of Coldest Month
Bio7 = Temperature Annual Range
Bio8 = Mean Temperature of Wettest Quarter
Bio9 = Mean Temperature of Driest Quarter
Bio10 = Mean Temperature of Warmest Quarter
Bio11 = Mean Temperature of Coldest Quarter
Bio12 = Annual Precipitation
Bio13 = Precipitation of Wettest Month
Bio14 = Precipitation of Driest Month
Bio15 = Precipitation Seasonality
Bio16 = Precipitation of Wettest Quarter
Bio17 = Precipitation of Driest Quarter
Bio18 = Precipitation of Warmest Quarter
Bio19 = Precipitation of Coldest Quarter

Download raster files:

library(downloader)
url.chelsa <- list()
for(i in 1:19){
  ii <- stringr::str_pad(1:19, width=2, side="left", pad="0")[i]
  url.chelsa[[i]] <- paste("https://www.wsl.ch/lud/chelsa/data/bioclim/integer/CHELSA_bio10_", ii, ".tif", sep="")
  download(url.chelsa[[i]],
           paste("/data/sPlot/users/Francesco/Ancillary_Data/CHELSA/CHELSA_bio10_", ii, ".tif", sep=""), 
           mode = "wb")
}

Intersect header.shp with each raster of the CHELSA collection.
Performed in EVE HPC cluster using function A98_PredictorsExtract.R. Divided in 99 chunks.

#make sure to setup the right function (robust.mean\robust.sd) and the correct file type (raster) in submit-A98.sh 
for pred in ls /data/splot/_data/Predictors/*.tif
do
qsub submit-A98.sh $pred
done 
header.shp.path <- "../_derived/header.shp.shp"

for(i in 1:19){
  ff <- paste("/data/sPlot/users/Francesco/Ancillary_Data/CHELSA/CHELSA_bio10_", 
              stringr::str_pad(i, width=2, side="left", pad="0"), ".tif", sep="")
  #define output paths
  output.ff1 <- paste("/data/sPlot/users/Francesco/sPlot3/_derived/output_pred/CHELSA_bio10_", 
              stringr::str_pad(i, width=2, side="left", pad="0"), ".csv", sep="")
  output.ff2 <- paste("/data/sPlot/users/Francesco/sPlot3/_derived/output_pred/CHELSA_bio10", 
              stringr::str_pad(i, width=2, side="left", pad="0"), "_sd.csv", sep="")
  # Run PredExtr function - sink to file
  PredExtr(x.shp = header.shp.path, toextract = ff, myfunction = robust.mean, 
                         ncores = 4, typp = "raster", output.ff1)
  PredExtr(x.shp = header.shp.path, toextract = ff, myfunction = robust.sd, 
                         ncores = 4, typp = "raster", output.ff2)
}

Reimport and assemble output.

chelsa.files <- list.files(path="../_derived/output_pred/", pattern="CHELSA_bio10_[0-9]+.csv$", full.names=T)
chelsa.out <- do.call(cbind, lapply(chelsa.files, 
                                    function(x) {read_csv(x,col_types = cols(X1 = col_character(),
                                                                             V1 = col_double())) %>% 
                                        column_to_rownames("X1")}))
colnames(chelsa.out) <- paste0("bio", stringr::str_pad(1:19, width=2, side="left", pad="0"))
#same for sd values
chelsa.sd.files <- list.files(path="../_derived/output_pred/", pattern="CHELSA_bio10_[0-9]+_sd.csv$", full.names=T)
chelsa.sd.out <- do.call(cbind, lapply(chelsa.sd.files, 
                                       function(x) {read_csv(x, col_types = cols(X1 = col_character(),
                                                                                 V1 = col_double())) %>% 
                                           column_to_rownames("X1")}))
colnames(chelsa.sd.out) <- paste0("bio", stringr::str_pad(1:19, width=2, side="left", pad="0"), "sd")

Show summaries

## Warning: attributes are not identical across measure variables;
## they will be dropped
Summary statistics for chelsa mean statistics
variable num.NAs min q025 q50 q75 max mean sd
bio01 4040 -270.578 82.000 96.000 110.000 307.00 97.023 43.401
bio02 4040 6.000 54.000 71.000 80.000 144.00 66.982 21.456
bio03 4040 67.000 233.000 275.000 300.000 832.00 270.146 70.353
bio04 4040 88.000 5397.000 6164.000 7339.000 21703.00 6373.841 1793.695
bio05 4040 -77.642 201.407 224.000 252.000 452.00 229.713 43.397
bio06 4040 -431.000 -50.449 -12.000 12.575 252.00 -19.443 61.260
bio07 4040 26.000 206.000 248.709 285.000 675.00 249.163 63.721
bio08 4040 -208.000 86.000 144.013 177.000 358.00 132.390 62.851
bio09 4040 -377.000 -1.000 57.000 98.333 373.00 63.304 87.406
bio10 4040 -111.628 167.000 178.000 201.264 375.00 186.105 38.709
bio11 4040 -400.000 -14.000 11.000 39.763 279.00 12.339 58.380
bio12 4040 0.000 604.000 761.000 907.000 6554.00 849.872 472.599
bio13 4040 0.000 74.000 85.000 113.000 993.75 104.780 64.680
bio14 4040 0.000 29.848 41.000 51.000 411.00 43.612 27.856
bio15 4040 5.000 17.000 24.000 35.000 331.00 28.480 16.999
bio16 4040 0.000 216.620 247.000 325.000 2832.50 302.450 182.994
bio17 4040 0.000 94.000 131.000 163.000 1251.00 139.050 86.495
bio18 4040 0.000 176.000 216.000 253.000 2285.00 236.268 146.904
bio19 4040 0.000 112.000 163.000 210.511 2645.00 183.224 120.097
## Warning: attributes are not identical across measure variables;
## they will be dropped
Summary statistics for chelsa s.d. statistics
variable num.NAs min q025 q50 q75 max mean sd
bio01sd 1384369 0 0.7800182 2.0310466 5.9857381 51.162192 4.846720 6.7012074
bio02sd 1384369 0 0.0000000 0.1793201 0.5003541 4.392056 0.397326 0.5871731
bio03sd 1384369 0 0.4435312 0.7527727 1.7079089 23.952856 1.391100 1.5943478
bio04sd 1384369 0 7.7781746 15.0614901 31.6960500 242.616057 25.970859 27.2246007
bio05sd 1384369 0 0.9082952 2.2158230 5.6568542 53.383878 5.095573 6.9402301
bio06sd 1384369 0 0.7996481 1.8866058 5.7739680 52.067653 4.665683 6.3305406
bio07sd 1384369 0 0.5001703 0.6595651 1.3733545 8.507203 1.085018 1.0246997
bio08sd 1384369 0 1.0725245 5.6493818 16.9136010 148.421139 12.643420 16.6528157
bio09sd 1384369 0 0.9536438 2.7201027 8.7700028 144.249783 7.687753 12.0662592
bio10sd 1384369 0 0.7754730 2.1454694 5.6568542 53.720906 5.023930 6.9683276
bio11sd 1384369 0 0.7806987 1.8978051 5.8134709 52.062384 4.608413 6.3346350
bio12sd 1384369 0 17.4683532 42.9959747 71.1728646 713.159827 62.144824 64.5121259
bio13sd 1384369 0 2.2862699 5.9902205 10.5262733 99.739245 8.186887 8.8333873
bio14sd 1384369 0 0.7874771 2.2079022 4.3447815 36.986231 3.404173 3.7164424
bio15sd 1384369 0 0.5163978 0.7071068 1.7593330 10.929618 1.283538 1.2838996
bio16sd 1384369 0 6.6037769 17.3523720 29.9374956 297.255136 23.389558 25.4133442
bio17sd 1384369 0 2.5631032 6.9268396 13.9031189 113.488278 10.832693 11.8698016
bio18sd 1384369 0 4.3574734 12.4533991 21.0158670 345.418948 18.467468 22.3670495
bio19sd 1384369 0 3.5121519 9.8499438 18.2777876 296.915363 15.135450 17.0292687

3 Soil

Soil variables (5 cm depth) derive from the ISRIC dataset, downloaded at 250-m resolution

CECSOL Cation Exchange capacity of soil
CLYPPT Clay mass fraction in %
CRFVOL Coarse fragments volumetric in %
ORCDRC Soil Organic Carbon Content in g/kg
PHIHOX Soil pH x 10 in H20
SLTPPT Silt mass fraction in %
SNDPPT Sand mass fraction in %
BLDFIE Bulk Density (fine earth) in kg/m3
P.ret.cat Phosphorous Retention - Categorical value, see ISRIC 2011-06
Download ISRIC soil data

url.bulk <- "https://files.isric.org/soilgrids/data/recent/BLDFIE_M_sl2_250m_ll.tif"
url.cec <- "https://files.isric.org/soilgrids/data/recent/CECSOL_M_sl2_250m_ll.tif"
url.cly <- "https://files.isric.org/soilgrids/data/recent/CLYPPT_M_sl2_250m_ll.tif"
url.crf <- "https://files.isric.org/soilgrids/data/recent/CRFVOL_M_sl2_250m_ll.tif"
url.orc <- "https://files.isric.org/soilgrids/data/recent/ORCDRC_M_sl2_250m_ll.tif"
url.pH <- "https://files.isric.org/soilgrids/data/recent/PHIHOX_M_sl2_250m_ll.tif"
url.slt <- "https://files.isric.org/soilgrids/data/recent/SLTPPT_M_sl2_250m_ll.tif"
url.snd <- "https://files.isric.org/soilgrids/data/recent/SNDPPT_M_sl2_250m_ll.tif"

download(url.bulk, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/BLDFIE_M_sl2_250m_ll.tif", mode = "wb")
download(url.cec, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CECSOL_M_sl2_250m_ll.tif", mode = "wb")
download(url.cly, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CLYPPT_M_sl2_250m_ll.tif", mode = "wb")
download(url.crf, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CRFVOL_M_sl2_250m_ll.tif", mode = "wb")
download(url.orc, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/ORCDRC_M_sl2_250m_ll.tif", mode = "wb")
download(url.pH, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/PHIHOX_M_sl2_250m_ll.tif", mode = "wb")
download(url.slt, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/SLTPPT_M_sl2_250m_ll.tif", mode = "wb")
download(url.snd, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/SNDPPT_M_sl2_250m_ll.tif", mode = "wb")

Intersect header.shp with each raster of the ISRIC collection.
Performed in EVE HPC cluster using function A98_PredictorsExtract.R. Divided in 99 chunks.

for(i in 1:8){
  ff <- list.files("/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/", 
                                pattern = "^[^(Gene)]", full.names = T)[i]
  isric.i <- raster(ff)
  #define output paths
  filename <- str_split(list.files("/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/", 
           pattern = "^[^(Gene)]", full.names = T)[i], pattern="//")[[1]][2]
  output.ff1 <- gsub(pattern=".tif", replacement=".csv", filename)
  output.ff2 <- gsub(pattern=".tif", replacement="_sd.csv", filename)
  # Run PredExtr function - sink to file
  PredExtr(x.shp = header.shp.path, toextract = isric.i, myfunction = robust.mean, 
                         ncores = 4, typp = "raster", output.ff1)
  PredExtr(x.shp = header.shp.path, toextract = isric.i, myfunction = robust.sd, 
                         ncores = 4, typp = "raster", output.ff2)
}

Reimport and assemble output.

ISRIC.layer.names <- c("BLDFIE", "CECSOL","CLYPPT","CRFVOL","ORCDRC","PHIHOX","SLTPPT","SNDPPT")
ISRIC.layer.names1 <- paste0(ISRIC.layer.names, "_M_sl2_250m_ll")
isric.files <- list.files(path="../_derived/output_pred/", 
                          pattern=paste0(paste0(ISRIC.layer.names1, ".csv$"), collapse="|"), 
                          full.names=T)

isric.out <- do.call(cbind, lapply(isric.files,  
                                   function(x) {read_csv(x, col_types = cols(X1 = col_character(),
                                                                             V1 = col_double())) %>% 
                                       column_to_rownames("X1")}))
colnames(isric.out) <- ISRIC.layer.names
#same for sd values
isric.sd.files <- list.files(path="../_derived/output_pred/", 
                          pattern=paste0(paste0(ISRIC.layer.names1, "_sd.csv$"), collapse="|"), 
                          full.names=T)
isric.sd.out <- do.call(cbind, lapply(isric.sd.files, 
                                      function(x) {read_csv(x, col_types = cols(X1 = col_character(),
                                                                                 V1 = col_double())) %>% 
                                           column_to_rownames("X1")}))
colnames(isric.sd.out) <- paste0(ISRIC.layer.names, "sd")

Show summaries

## Warning: attributes are not identical across measure variables;
## they will be dropped
Summary statistics for isric mean statistics
variable num.NAs min q025 q50 q75 max mean sd
BLDFIE 19205 93.667 646.600 796.000 1046.500 1718.500 846.194 280.873
CECSOL 19205 1.000 17.600 21.750 26.500 157.333 22.657 7.182
CLYPPT 19205 0.000 13.600 18.852 23.450 67.000 18.616 7.325
CRFVOL 19205 0.000 6.800 10.937 16.000 55.000 11.707 6.089
ORCDRC 19205 0.000 29.616 50.833 89.000 688.500 65.566 47.791
PHIHOX 19205 38.000 54.167 59.500 64.424 94.200 59.971 7.846
SLTPPT 19205 2.250 28.000 36.289 40.500 80.667 34.163 10.037
SNDPPT 19205 2.400 36.667 43.667 56.500 95.000 47.207 14.710
## Warning: attributes are not identical across measure variables;
## they will be dropped
Summary statistics for isric s.d. statistics
variable num.NAs min q025 q50 q75 max mean sd
BLDFIEsd 115683 0 19.047 30.928 55.756 271.328 41.588 31.638
CECSOLsd 115683 0 0.837 1.390 2.246 50.912 1.744 1.369
CLYPPTsd 115683 0 0.753 1.140 1.826 13.546 1.442 1.057
CRFVOLsd 115683 0 0.584 1.033 1.673 15.556 1.272 0.894
ORCDRCsd 115683 0 3.270 6.229 12.095 91.914 9.499 9.402
PHIHOXsd 115683 0 0.707 1.069 1.982 10.533 1.613 1.424
SLTPPTsd 115683 0 0.837 1.329 2.160 20.569 1.772 1.434
SNDPPTsd 115683 0 1.033 1.708 3.033 25.153 2.449 2.206

4 Elevation variability

Reimport output (calculated in 04_buildHeader).
Extract elevation for each plot: mean and 2.5, 50, and 97.5 quantiles for a buffer area having a radius equal to the location uncertainty of each plot (but only if location uncertainty < 50 km). DEM derive from package elevatr, which uses the Terrain Tiles on Amazon Web Services. Resolutions of DEM rasters vary by region. I set a zoom factor z=10, which corresponds to ~ 75-150 m. Sources are: SRTM, data.gov.at in Austria, NRCAN in Canada, SRTM, NED/3DEP 1/3 arcsec, data.gov.uk in United Kingdom, INEGI in Mexico, ArcticDEM in latitudes above 60°, LINZ in New Zealand, Kartverket in Norway, as described here.

elevation.out <- read_csv("../_derived/elevatr/elevation.out.csv")

5 Create output and export

soilclim <- header %>% 
  dplyr::select(PlotObservationID) %>% 
  left_join(elevation.out %>% 
              dplyr::select(PlotObservationID, Elevation_median, Elevation_q2.5, Elevation_q97.5, Elevation_DEM.res=DEM.res) %>% 
              filter(!is.na(Elevation_median)), 
            by="PlotObservationID") %>% 
  left_join(header.shp@data %>% 
              dplyr::select(PlotObservationID) %>% 
              bind_cols(chelsa.out) %>%
              bind_cols(chelsa.sd.out) %>% 
              bind_cols(isric.out) %>% 
              bind_cols(isric.sd.out) %>% 
              distinct(), 
            by="PlotObservationID")
dim(soilclim)
## [1] 1977654      59
nrow(soilclim)==nrow(header)
## [1] TRUE
Show environmenal info for 20 randomly selected plots
PlotObservationID Elevation_median Elevation_q2.5 Elevation_q97.5 Elevation_DEM.res bio01 bio02 bio03 bio04 bio05 bio06 bio07 bio08 bio09 bio10 bio11 bio12 bio13 bio14 bio15 bio16 bio17 bio18 bio19 bio01sd bio02sd bio03sd bio04sd bio05sd bio06sd bio07sd bio08sd bio09sd bio10sd bio11sd bio12sd bio13sd bio14sd bio15sd bio16sd bio17sd bio18sd bio19sd BLDFIE CECSOL CLYPPT CRFVOL ORCDRC PHIHOX SLTPPT SNDPPT BLDFIEsd CECSOLsd CLYPPTsd CRFVOLsd ORCDRCsd PHIHOXsd SLTPPTsd SNDPPTsd
775462 NA NA NA NA 76.061 70.000 279.338 6405.877 211.936 -39.302 251.324 76.821 29.147 165.180 -8.232 761.555 79.473 51.718 12.841 223.834 164.140 203.669 167.738 8.707 0.000 1.197 50.369 9.311 8.024 1.373 70.265 29.501 9.292 8.043 139.293 14.023 9.941 1.229 40.576 31.363 37.281 31.893 764.495 22.529 20.401 15.363 50.290 57.032 45.685 33.895 100.426 2.943 3.189 2.421 18.903 6.166 2.840 5.189
354536 27 27 27 153 85.000 29.000 145.000 5953.000 189.000 -8.000 197.000 171.000 19.000 171.000 7.000 600.000 61.000 33.000 19.000 179.000 111.000 179.000 114.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 628.167 19.500 13.333 6.000 60.000 62.167 25.000 61.500 38.102 1.225 0.516 0.000 7.772 1.329 0.894 1.517
555488 31 16 112 306 197.826 96.475 304.657 7813.017 364.384 47.666 316.670 92.885 305.664 305.664 92.885 699.048 134.759 8.477 67.278 364.521 25.978 25.978 364.521 2.394 1.661 1.826 66.666 1.303 3.637 3.607 3.148 1.614 1.614 3.148 34.153 9.616 0.721 3.290 25.725 2.268 2.268 25.725 1496.033 26.706 32.400 8.240 14.802 74.690 36.271 31.356 45.333 3.290 4.194 3.779 3.809 1.953 3.286 4.262
1325451 NA NA NA NA 92.000 72.000 266.000 7151.000 238.000 -33.000 270.000 185.000 -2.000 189.000 -3.000 580.000 84.000 27.000 38.000 240.000 84.000 230.000 98.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 662.167 16.000 4.333 7.500 61.667 45.000 11.667 84.000 17.971 1.265 0.516 0.837 2.251 0.000 0.516 0.632
1094315 1 0 3 153 99.000 36.333 203.667 4964.833 194.000 15.000 178.833 89.000 70.000 171.000 33.000 817.167 85.000 40.000 22.000 255.000 133.000 237.000 166.000 0.000 0.516 1.033 4.535 0.000 0.000 0.408 0.000 0.000 0.000 0.000 0.753 0.000 0.000 0.000 0.000 0.000 0.000 0.000 623.000 18.099 17.769 6.505 54.538 66.319 30.516 51.692 52.282 1.521 3.581 0.947 14.954 1.943 2.794 5.955
1818650 75 75 75 153 113.000 53.000 349.000 3217.000 191.000 40.000 151.000 102.000 67.000 159.000 67.000 4766.000 495.000 308.000 16.000 1378.000 925.000 1119.000 925.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 637.333 45.667 22.000 29.333 201.000 51.333 31.833 46.333 15.345 3.011 0.632 2.582 2.828 1.033 0.408 1.033
1137041 10 10 10 153 102.000 66.000 295.000 5538.000 224.000 2.000 222.000 175.000 83.000 180.000 29.000 792.000 77.000 47.000 13.000 226.000 156.000 219.000 177.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 503.600 18.400 21.600 6.600 26.400 59.800 34.000 44.800 24.583 1.673 0.548 0.548 2.191 1.643 1.871 1.304
724067 297 297 297 153 116.000 74.000 314.000 5619.000 248.000 13.000 236.000 58.000 191.000 197.000 44.000 924.000 98.000 59.000 13.000 284.000 189.000 201.000 268.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 771.667 13.167 18.833 13.000 37.000 55.500 34.000 47.667 24.736 0.753 0.753 0.894 3.688 0.548 0.894 1.751
572233 NA NA NA NA 157.000 49.000 229.000 5737.000 272.000 58.000 214.000 138.000 234.000 243.000 85.000 854.000 126.000 22.000 43.000 378.000 90.000 90.000 200.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 900.500 30.250 28.000 17.750 82.750 66.250 30.000 42.000 62.126 1.708 0.816 0.957 8.884 0.957 1.414 1.826
362303 NA NA NA NA 89.000 30.000 154.000 5876.000 193.000 -2.000 195.000 173.000 25.000 173.000 12.000 605.000 63.000 35.000 18.000 180.000 115.000 180.000 119.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 698.800 24.000 15.800 7.200 85.600 61.000 26.400 57.800 48.674 0.707 1.304 0.837 14.117 1.225 1.817 1.304
719985 1256 1256 1256 153 81.000 81.000 304.000 6287.000 228.000 -39.000 267.000 56.000 166.000 173.000 1.000 1066.000 132.000 42.000 28.000 386.000 154.000 180.000 263.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 822.500 26.750 22.250 19.000 69.000 63.500 39.750 37.750 41.203 1.258 1.258 0.000 7.257 1.291 1.258 1.258
862025 25 21 28 153 114.000 20.000 160.000 3716.500 180.000 53.000 127.000 86.000 157.000 167.500 65.000 825.500 106.000 45.000 30.000 317.000 137.000 150.000 216.000 0.000 0.000 0.000 2.121 0.000 0.000 0.000 0.000 0.000 0.707 0.000 3.536 0.000 0.000 0.000 1.414 0.000 1.414 0.000 987.107 14.286 16.286 9.179 26.643 62.250 45.857 37.929 19.623 0.659 0.810 0.612 2.129 0.887 1.268 0.940
1740082 5 5 5 153 175.000 51.000 197.000 7229.000 304.000 44.000 260.000 223.000 80.000 278.000 76.000 1693.000 259.000 51.000 48.000 692.000 161.000 531.000 181.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 1234.500 17.250 24.750 8.750 24.250 62.000 40.000 35.250 23.530 0.957 0.957 1.258 2.500 0.000 0.816 0.500
654128 1647 1647 1647 153 48.000 81.000 301.000 6359.000 194.000 -73.000 267.000 22.000 133.000 139.000 -34.000 1695.000 164.000 116.000 9.000 486.000 376.000 376.000 414.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 656.600 29.200 23.600 18.000 119.600 56.800 37.800 39.400 37.773 1.095 1.342 1.581 6.189 0.837 1.483 1.342
868857 9 0 18 153 88.223 40.298 189.617 6172.713 202.798 -9.734 212.681 173.691 8.202 177.021 8.202 572.723 61.500 31.255 18.851 182.564 106.181 182.223 106.181 0.642 0.602 2.090 8.010 0.632 0.764 0.832 4.040 0.665 0.688 0.665 15.769 1.366 0.950 0.567 4.721 3.366 4.842 3.366 791.720 19.798 12.358 6.030 46.807 62.361 25.376 62.257 146.533 6.869 2.741 0.837 24.924 4.658 3.763 5.022
1802910 394 337 474 153 115.000 68.250 215.250 8835.250 267.250 -50.000 317.750 236.750 -9.750 236.750 -9.750 1057.000 206.500 21.250 67.750 592.500 83.750 592.500 83.750 1.633 0.500 0.500 11.529 1.708 1.633 0.500 2.062 1.708 2.062 1.708 65.788 12.819 1.708 1.258 32.357 6.702 32.357 6.702 1094.568 21.392 20.878 13.568 46.770 58.703 38.986 40.162 34.149 1.214 1.110 1.453 7.955 1.412 1.188 1.098
1114815 NA NA NA NA 100.192 56.606 274.708 5287.815 211.674 5.658 206.015 91.548 77.870 175.394 30.130 846.202 83.808 50.623 14.990 245.301 165.988 241.195 176.736 1.032 1.537 4.154 29.742 1.216 1.711 2.435 58.745 0.954 0.872 1.340 29.520 3.259 1.719 0.581 9.227 5.993 9.437 6.172 432.643 18.256 8.049 6.094 79.848 52.406 16.506 75.437 78.968 5.232 6.527 1.180 30.872 6.364 7.098 13.293
889902 34 13 56 153 88.883 55.638 243.234 6166.649 213.883 -15.011 228.989 169.989 8.277 176.043 8.277 598.032 65.213 32.968 17.819 191.181 113.223 184.681 113.223 0.853 0.483 1.248 8.778 1.190 0.769 0.711 0.933 0.754 0.926 0.754 23.103 2.824 1.274 0.622 8.229 4.365 7.897 4.365 745.109 16.538 12.276 7.213 36.672 62.734 28.645 59.055 90.264 2.050 1.191 0.838 15.253 2.437 3.029 3.581
9179 610 380 1124 153 75.625 79.000 280.365 7139.833 221.323 -61.094 282.438 168.052 -17.188 171.438 -21.823 1123.635 144.083 64.323 25.396 414.938 207.385 381.312 236.208 12.332 0.000 1.674 57.818 12.911 11.492 1.478 13.033 11.538 12.859 11.436 192.499 25.372 10.954 2.391 72.774 35.921 67.981 42.456 655.192 32.423 25.703 17.628 87.663 57.431 45.248 29.035 56.723 4.712 1.805 2.127 30.703 2.256 2.476 2.899
187079 383 383 383 153 76.000 54.000 314.000 4125.000 173.000 1.000 171.000 37.000 80.000 137.000 25.000 1235.000 141.000 75.000 22.000 409.000 227.000 287.000 321.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 431.375 24.875 21.000 18.000 130.000 54.000 34.750 44.250 26.704 0.835 1.069 2.000 14.861 0.926 1.488 1.282
save(soilclim, file = "../_output/SoilClim_sPlot3.RData")

ANNEX 1 - Code to Extract predictors

# define ancillary functions
robust.mean <- function(x){mean(x, na.rm=T)}
robust.mode <- function(x){if(any(x!=0)) {
  a <- x[which(x!=0)] #exclude zero (i.e. NAs)
  return(as.numeric(names(sort(table(a), decreasing=T))[1]))} else
    return(NA)}
robust.sd <- function(x){sd(x, na.rm=T)}

#main function
PredExtr <- function(x.shp, myfunction=NA, output=NA, 
                     toextract, typp=c("raster", "shp"), ncores, chunkn=1, chunk.i=NA){
  print("Load Packages")
  require(foreach)
  require(parallel)
  require(doParallel)
  require(raster)
  require(rgeos)
  require(rgdal)
  require(geosphere) 
  require(spatialEco)
  require(sf)
 
  print(paste("Loading", typp, "data :", toextract))
  print(paste("output will be:", output))
  if(typp=="raster"){ mypredictor <- raster(toextract)} else
  mypredictor <- readOGR(toextract)
  header.shp <- readOGR(x.shp)
  crs(mypredictor) <- crs(header.shp) #should be verified beforehand!

  ## Divide in chunks if requested
  if(chunkn>1 & !is.na(chunk.i)){
    print(paste("divide in chunks and run on chunk n.", chunk.i))
    indices <- 1:length(header.shp)
    chunks <- split(indices, sort(indices%%chunkn))
    header.shp <- header.shp[chunks[[chunk.i]],]
    } 

  print("myfunction defined as")
  myfunction <- get(myfunction)
  print(myfunction)
  
  print("go parallel")
  cl <- makeForkCluster(ncores, outfile="" )
  registerDoParallel(cl)

  
  if(typp=="raster"){
    print("start main foreach loop")
    out <- foreach(i=1:length(header.shp), .packages=c('raster'), .combine=rbind) %dopar% { 
    tmp <- raster::extract(mypredictor, header.shp[i,], 
                         buffer=min(10000,  max(250, header.shp@data$lc_ncrt[i])), fun=myfunction) 
  }
  if(!is.na(output)) {write.csv(out, file = output)} else{return(out)}
  } else {
    print("Match sequentially")
    out <- sp::over(x=header.shp, y=mypredictor) 
    i.toassign <- which(is.na(out[,1]))
    toassign <- header.shp[i.toassign,]
    print(paste(length(toassign), "plots not matched directly - seek for NN"))
  if(length(toassign)>0){
    # Crack if multipolygon
    if(any( (st_geometry_type(mypredictor %>% st_as_sf)) == "MULTIPOLYGON")) {
      ## explode multipolygon
      mypredictor.expl <- explode(mypredictor) %>% 
        as_Spatial()
      } else {
        mypredictor.expl <- mypredictor}
    print("start main foreach loop")
    nearest <- foreach(i=1:length(toassign), .packages=c('raster'), .combine=rbind) %dopar% { 
      print(i)
      ## create a subset of geoentities based on a 5° buffer radius around each target plot.
      tmp.buff <- gBuffer(toassign[i,], width=5) 
      tryCatch(
        tmp.mypredictor <- spatialEco::spatial.select(
          x = tmp.buff,
          y = mypredictor.expl,
          distance = 0.1,
          predicate = "intersect"
        ),
        error = function(e) {
          print(paste("Nothing close enough for plot", toassign@data$PlotObservationID[i]))
        }
      )
      # find nearest neighbour  
      nearest.tmp <- tryCatch(tmp.mypredictor@data[geosphere::dist2Line(toassign[i,], tmp.mypredictor)[,"ID"],],
                              error = function(e){
                                ee <- myptedictor@data[1,]
                                ee[1,] <- rep(NA, ncol(mypredictor))
                                }
                              )
      return(nearest.tmp)
    }

    out[i.toassign,] <- nearest
   }
    if(!is.na(output)) {write.csv(out, file = output)} else{return(out)}
  }
  stopCluster(cl)
  }