Timestamp: Wed Dec 2 00:46:49 2020
Drafted: Francesco Maria Sabatini
Revised:
version: 1.0

This report documents how environmental variables were extracted when constructing sPlot 3.0. It is based on dataset sPlot_3.0.2, received on 24/07/2019 from Stephan Hennekens.

knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(viridis)
library(readr)
library(xlsx)
library(knitr)
library(kableExtra)

## Spatial packages
library(rgdal)
library(sp)
library(sf)
library(rgeos)
library(raster)
library(rworldmap)

source("A98_PredictorsExtract.R")

#save temporary files
write("TMPDIR = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('TMPDIR'), '.Renviron'))
write("R_USER = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('R_USER'), '.Renviron'))
rasterOptions(tmpdir="/data/sPlot/users/Francesco/_tmp")

#Ancillary functions
get.summary <- function(x){x %>% 
    summarize_all(.funs=list(num.NAs=~sum(is.na(.)), 
                             min=~min(., na.rm=T),
                             q025=~quantile(., 0.25, na.rm=T), 
                             q50=~quantile(., 0.50, na.rm=T), 
                             q75=~quantile(., .75, na.rm=T), 
                             max=~max(., na.rm=T), 
                             mean=~mean(., na.rm=T), 
                             sd=~sd(., na.rm=T))) %>% 
    gather(variable, value) %>% 
    separate(variable, sep="_", into=c("variable", "stat")) %>% 
    mutate(stat=factor(stat, levels=c("num.NAs", "min", "q025", "q50", "q75", "max", "mean", "sd"))) %>% 
    spread(key=stat, value=value)
}

1 Import header data

load("../_output/header_sPlot3.0.RData")

Create spatial point dataframe for sPlot data to intersect with spatial layers. Include the fields Location uncertainty to allow for fuzzy matching. Each plot was intersected with the corresponding layer of environmental (soil, climate) attributes accounting for their location uncertainty (minimum set to 250m). For computing reasons, the maximum location uncertainty was set to 10km. The user should therefore be aware that the variability in climate and soil features estimations may be underestimated for plots with very high location uncertainty.

header.shp <- header %>%
  filter(!is.na(Longitude) | !is.na(Latitude))
header.shp <- SpatialPointsDataFrame(coords= header.shp %>% 
                                        dplyr::select(Longitude, Latitude),
                               proj4string = CRS("+init=epsg:4326"), 
                               data=data.frame(PlotObservationID= header.shp$PlotObservationID, 
                                               loc.uncert=header.shp$`Location uncertainty (m)`, 
                                               `GIVD ID`=header.shp$`GIVD ID`))
## OGR data source with driver: ESRI Shapefile 
## Source: "/data/sPlot/users/Francesco/sPlot3/_derived", layer: "header.shp"
## with 1976235 features
## It has 3 fields

2 Climate

Bioclimatic variables (bio01-bio19) derive from CHELSA

Codes:

Bio1 = Annual Mean Temperature
Bio2 = Mean Diurnal Range
Bio3 = Isothermality
Bio4 = Temperature Seasonality
Bio5 = Max Temperature of Warmest Month
Bio6 = Min Temperature of Coldest Month
Bio7 = Temperature Annual Range
Bio8 = Mean Temperature of Wettest Quarter
Bio9 = Mean Temperature of Driest Quarter
Bio10 = Mean Temperature of Warmest Quarter
Bio11 = Mean Temperature of Coldest Quarter
Bio12 = Annual Precipitation
Bio13 = Precipitation of Wettest Month
Bio14 = Precipitation of Driest Month
Bio15 = Precipitation Seasonality
Bio16 = Precipitation of Wettest Quarter
Bio17 = Precipitation of Driest Quarter
Bio18 = Precipitation of Warmest Quarter
Bio19 = Precipitation of Coldest Quarter

Download raster files:

library(downloader)
url.chelsa <- list()
for(i in 1:19){
  ii <- stringr::str_pad(1:19, width=2, side="left", pad="0")[i]
  url.chelsa[[i]] <- paste("https://www.wsl.ch/lud/chelsa/data/bioclim/integer/CHELSA_bio10_", ii, ".tif", sep="")
  download(url.chelsa[[i]],
           paste("/data/sPlot/users/Francesco/Ancillary_Data/CHELSA/CHELSA_bio10_", ii, ".tif", sep=""), 
           mode = "wb")
}

Intersect header.shp with each raster of the CHELSA collection.
Performed in EVE HPC cluster using function A98_PredictorsExtract.R. Divided in 99 chunks.

#make sure to setup the right function (robust.mean\robust.sd) and the correct file type (raster) in submit-A98.sh 
for pred in ls /data/splot/_data/Predictors/*.tif
do
qsub submit-A98.sh $pred
done 
header.shp.path <- "../_derived/header.shp.shp"

for(i in 1:19){
  ff <- paste("/data/sPlot/users/Francesco/Ancillary_Data/CHELSA/CHELSA_bio10_", 
              stringr::str_pad(i, width=2, side="left", pad="0"), ".tif", sep="")
  #define output paths
  output.ff1 <- paste("/data/sPlot/users/Francesco/sPlot3/_derived/output_pred/CHELSA_bio10_", 
              stringr::str_pad(i, width=2, side="left", pad="0"), ".csv", sep="")
  output.ff2 <- paste("/data/sPlot/users/Francesco/sPlot3/_derived/output_pred/CHELSA_bio10", 
              stringr::str_pad(i, width=2, side="left", pad="0"), "_sd.csv", sep="")
  # Run PredExtr function - sink to file
  PredExtr(x.shp = header.shp.path, toextract = ff, myfunction = robust.mean, 
                         ncores = 4, typp = "raster", output.ff1)
  PredExtr(x.shp = header.shp.path, toextract = ff, myfunction = robust.sd, 
                         ncores = 4, typp = "raster", output.ff2)
}

Reimport and assemble output.

chelsa.files <- list.files(path="../_derived/output_pred/", pattern="CHELSA_bio10_[0-9]+.csv$", full.names=T)
chelsa.out <- do.call(cbind, lapply(chelsa.files, 
                                    function(x) {read_csv(x,col_types = cols(X1 = col_character(),
                                                                             V1 = col_double())) %>% 
                                        column_to_rownames("X1")}))
colnames(chelsa.out) <- paste0("bio", stringr::str_pad(1:19, width=2, side="left", pad="0"))
#same for sd values
chelsa.sd.files <- list.files(path="../_derived/output_pred/", pattern="CHELSA_bio10_[0-9]+_sd.csv$", full.names=T)
chelsa.sd.out <- do.call(cbind, lapply(chelsa.sd.files, 
                                       function(x) {read_csv(x, col_types = cols(X1 = col_character(),
                                                                                 V1 = col_double())) %>% 
                                           column_to_rownames("X1")}))
colnames(chelsa.sd.out) <- paste0("bio", stringr::str_pad(1:19, width=2, side="left", pad="0"), "sd")

Show summaries

## Warning: attributes are not identical across measure variables;
## they will be dropped
Summary statistics for chelsa mean statistics
variable num.NAs min q025 q50 q75 max mean sd
bio01 4040 -270.578 82.000 96.000 110.000 307.00 97.023 43.401
bio02 4040 6.000 54.000 71.000 80.000 144.00 66.982 21.456
bio03 4040 67.000 233.000 275.000 300.000 832.00 270.146 70.353
bio04 4040 88.000 5397.000 6164.000 7339.000 21703.00 6373.841 1793.695
bio05 4040 -77.642 201.407 224.000 252.000 452.00 229.713 43.397
bio06 4040 -431.000 -50.449 -12.000 12.575 252.00 -19.443 61.260
bio07 4040 26.000 206.000 248.709 285.000 675.00 249.163 63.721
bio08 4040 -208.000 86.000 144.013 177.000 358.00 132.390 62.851
bio09 4040 -377.000 -1.000 57.000 98.333 373.00 63.304 87.406
bio10 4040 -111.628 167.000 178.000 201.264 375.00 186.105 38.709
bio11 4040 -400.000 -14.000 11.000 39.763 279.00 12.339 58.380
bio12 4040 0.000 604.000 761.000 907.000 6554.00 849.872 472.599
bio13 4040 0.000 74.000 85.000 113.000 993.75 104.780 64.680
bio14 4040 0.000 29.848 41.000 51.000 411.00 43.612 27.856
bio15 4040 5.000 17.000 24.000 35.000 331.00 28.480 16.999
bio16 4040 0.000 216.620 247.000 325.000 2832.50 302.450 182.994
bio17 4040 0.000 94.000 131.000 163.000 1251.00 139.050 86.495
bio18 4040 0.000 176.000 216.000 253.000 2285.00 236.268 146.904
bio19 4040 0.000 112.000 163.000 210.511 2645.00 183.224 120.097
## Warning: attributes are not identical across measure variables;
## they will be dropped
Summary statistics for chelsa s.d. statistics
variable num.NAs min q025 q50 q75 max mean sd
bio01sd 1384369 0 0.7800182 2.0310466 5.9857381 51.162192 4.846720 6.7012074
bio02sd 1384369 0 0.0000000 0.1793201 0.5003541 4.392056 0.397326 0.5871731
bio03sd 1384369 0 0.4435312 0.7527727 1.7079089 23.952856 1.391100 1.5943478
bio04sd 1384369 0 7.7781746 15.0614901 31.6960500 242.616057 25.970859 27.2246007
bio05sd 1384369 0 0.9082952 2.2158230 5.6568542 53.383878 5.095573 6.9402301
bio06sd 1384369 0 0.7996481 1.8866058 5.7739680 52.067653 4.665683 6.3305406
bio07sd 1384369 0 0.5001703 0.6595651 1.3733545 8.507203 1.085018 1.0246997
bio08sd 1384369 0 1.0725245 5.6493818 16.9136010 148.421139 12.643420 16.6528157
bio09sd 1384369 0 0.9536438 2.7201027 8.7700028 144.249783 7.687753 12.0662592
bio10sd 1384369 0 0.7754730 2.1454694 5.6568542 53.720906 5.023930 6.9683276
bio11sd 1384369 0 0.7806987 1.8978051 5.8134709 52.062384 4.608413 6.3346350
bio12sd 1384369 0 17.4683532 42.9959747 71.1728646 713.159827 62.144824 64.5121259
bio13sd 1384369 0 2.2862699 5.9902205 10.5262733 99.739245 8.186887 8.8333873
bio14sd 1384369 0 0.7874771 2.2079022 4.3447815 36.986231 3.404173 3.7164424
bio15sd 1384369 0 0.5163978 0.7071068 1.7593330 10.929618 1.283538 1.2838996
bio16sd 1384369 0 6.6037769 17.3523720 29.9374956 297.255136 23.389558 25.4133442
bio17sd 1384369 0 2.5631032 6.9268396 13.9031189 113.488278 10.832693 11.8698016
bio18sd 1384369 0 4.3574734 12.4533991 21.0158670 345.418948 18.467468 22.3670495
bio19sd 1384369 0 3.5121519 9.8499438 18.2777876 296.915363 15.135450 17.0292687

3 Soil

Soil variables (5 cm depth) derive from the ISRIC dataset, downloaded at 250-m resolution

CECSOL Cation Exchange capacity of soil
CLYPPT Clay mass fraction in %
CRFVOL Coarse fragments volumetric in %
ORCDRC Soil Organic Carbon Content in g/kg
PHIHOX Soil pH x 10 in H20
SLTPPT Silt mass fraction in %
SNDPPT Sand mass fraction in %
BLDFIE Bulk Density (fine earth) in kg/m3
P.ret.cat Phosphorous Retention - Categorical value, see ISRIC 2011-06
Download ISRIC soil data

url.bulk <- "https://files.isric.org/soilgrids/data/recent/BLDFIE_M_sl2_250m_ll.tif"
url.cec <- "https://files.isric.org/soilgrids/data/recent/CECSOL_M_sl2_250m_ll.tif"
url.cly <- "https://files.isric.org/soilgrids/data/recent/CLYPPT_M_sl2_250m_ll.tif"
url.crf <- "https://files.isric.org/soilgrids/data/recent/CRFVOL_M_sl2_250m_ll.tif"
url.orc <- "https://files.isric.org/soilgrids/data/recent/ORCDRC_M_sl2_250m_ll.tif"
url.pH <- "https://files.isric.org/soilgrids/data/recent/PHIHOX_M_sl2_250m_ll.tif"
url.slt <- "https://files.isric.org/soilgrids/data/recent/SLTPPT_M_sl2_250m_ll.tif"
url.snd <- "https://files.isric.org/soilgrids/data/recent/SNDPPT_M_sl2_250m_ll.tif"

download(url.bulk, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/BLDFIE_M_sl2_250m_ll.tif", mode = "wb")
download(url.cec, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CECSOL_M_sl2_250m_ll.tif", mode = "wb")
download(url.cly, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CLYPPT_M_sl2_250m_ll.tif", mode = "wb")
download(url.crf, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CRFVOL_M_sl2_250m_ll.tif", mode = "wb")
download(url.orc, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/ORCDRC_M_sl2_250m_ll.tif", mode = "wb")
download(url.pH, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/PHIHOX_M_sl2_250m_ll.tif", mode = "wb")
download(url.slt, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/SLTPPT_M_sl2_250m_ll.tif", mode = "wb")
download(url.snd, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/SNDPPT_M_sl2_250m_ll.tif", mode = "wb")

Intersect header.shp with each raster of the ISRIC collection.
Performed in EVE HPC cluster using function A98_PredictorsExtract.R. Divided in 99 chunks.

for(i in 1:8){
  ff <- list.files("/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/", 
                                pattern = "^[^(Gene)]", full.names = T)[i]
  isric.i <- raster(ff)
  #define output paths
  filename <- str_split(list.files("/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/", 
           pattern = "^[^(Gene)]", full.names = T)[i], pattern="//")[[1]][2]
  output.ff1 <- gsub(pattern=".tif", replacement=".csv", filename)
  output.ff2 <- gsub(pattern=".tif", replacement="_sd.csv", filename)
  # Run PredExtr function - sink to file
  PredExtr(x.shp = header.shp.path, toextract = isric.i, myfunction = robust.mean, 
                         ncores = 4, typp = "raster", output.ff1)
  PredExtr(x.shp = header.shp.path, toextract = isric.i, myfunction = robust.sd, 
                         ncores = 4, typp = "raster", output.ff2)
}

Reimport and assemble output.

ISRIC.layer.names <- c("BLDFIE", "CECSOL","CLYPPT","CRFVOL","ORCDRC","PHIHOX","SLTPPT","SNDPPT")
ISRIC.layer.names1 <- paste0(ISRIC.layer.names, "_M_sl2_250m_ll")
isric.files <- list.files(path="../_derived/output_pred/", 
                          pattern=paste0(paste0(ISRIC.layer.names1, ".csv$"), collapse="|"), 
                          full.names=T)

isric.out <- do.call(cbind, lapply(isric.files,  
                                   function(x) {read_csv(x, col_types = cols(X1 = col_character(),
                                                                             V1 = col_double())) %>% 
                                       column_to_rownames("X1")}))
colnames(isric.out) <- ISRIC.layer.names
#same for sd values
isric.sd.files <- list.files(path="../_derived/output_pred/", 
                          pattern=paste0(paste0(ISRIC.layer.names1, "_sd.csv$"), collapse="|"), 
                          full.names=T)
isric.sd.out <- do.call(cbind, lapply(isric.sd.files, 
                                      function(x) {read_csv(x, col_types = cols(X1 = col_character(),
                                                                                 V1 = col_double())) %>% 
                                           column_to_rownames("X1")}))
colnames(isric.sd.out) <- paste0(ISRIC.layer.names, "sd")

Show summaries

## Warning: attributes are not identical across measure variables;
## they will be dropped
Summary statistics for isric mean statistics
variable num.NAs min q025 q50 q75 max mean sd
BLDFIE 19205 93.667 646.600 796.000 1046.500 1718.500 846.194 280.873
CECSOL 19205 1.000 17.600 21.750 26.500 157.333 22.657 7.182
CLYPPT 19205 0.000 13.600 18.852 23.450 67.000 18.616 7.325
CRFVOL 19205 0.000 6.800 10.937 16.000 55.000 11.707 6.089
ORCDRC 19205 0.000 29.616 50.833 89.000 688.500 65.566 47.791
PHIHOX 19205 38.000 54.167 59.500 64.424 94.200 59.971 7.846
SLTPPT 19205 2.250 28.000 36.289 40.500 80.667 34.163 10.037
SNDPPT 19205 2.400 36.667 43.667 56.500 95.000 47.207 14.710
## Warning: attributes are not identical across measure variables;
## they will be dropped
Summary statistics for isric s.d. statistics
variable num.NAs min q025 q50 q75 max mean sd
BLDFIEsd 115683 0 19.047 30.928 55.756 271.328 41.588 31.638
CECSOLsd 115683 0 0.837 1.390 2.246 50.912 1.744 1.369
CLYPPTsd 115683 0 0.753 1.140 1.826 13.546 1.442 1.057
CRFVOLsd 115683 0 0.584 1.033 1.673 15.556 1.272 0.894
ORCDRCsd 115683 0 3.270 6.229 12.095 91.914 9.499 9.402
PHIHOXsd 115683 0 0.707 1.069 1.982 10.533 1.613 1.424
SLTPPTsd 115683 0 0.837 1.329 2.160 20.569 1.772 1.434
SNDPPTsd 115683 0 1.033 1.708 3.033 25.153 2.449 2.206

4 Elevation variability

Reimport output (calculated in 04_buildHeader).
Extract elevation for each plot: mean and 2.5, 50, and 97.5 quantiles for a buffer area having a radius equal to the location uncertainty of each plot (but only if location uncertainty < 50 km). DEM derive from package elevatr, which uses the Terrain Tiles on Amazon Web Services. Resolutions of DEM rasters vary by region. I set a zoom factor z=10, which corresponds to ~ 75-150 m. Sources are: SRTM, data.gov.at in Austria, NRCAN in Canada, SRTM, NED/3DEP 1/3 arcsec, data.gov.uk in United Kingdom, INEGI in Mexico, ArcticDEM in latitudes above 60°, LINZ in New Zealand, Kartverket in Norway, as described here.

elevation.out <- read_csv("../_derived/elevatr/elevation.out.csv")

5 Create output and export

soilclim <- header %>% 
  dplyr::select(PlotObservationID) %>% 
  left_join(elevation.out %>% 
              dplyr::select(PlotObservationID, Elevation_median, Elevation_q2.5, Elevation_q97.5, Elevation_DEM.res=DEM.res) %>% 
              filter(!is.na(Elevation_median)), 
            by="PlotObservationID") %>% 
  left_join(header.shp@data %>% 
              dplyr::select(PlotObservationID) %>% 
              bind_cols(chelsa.out) %>%
              bind_cols(chelsa.sd.out) %>% 
              bind_cols(isric.out) %>% 
              bind_cols(isric.sd.out) %>% 
              distinct(), 
            by="PlotObservationID")
dim(soilclim)
## [1] 1977637      59
nrow(soilclim)==nrow(header)
## [1] TRUE
Show environmenal info for 20 randomly selected plots
PlotObservationID Elevation_median Elevation_q2.5 Elevation_q97.5 Elevation_DEM.res bio01 bio02 bio03 bio04 bio05 bio06 bio07 bio08 bio09 bio10 bio11 bio12 bio13 bio14 bio15 bio16 bio17 bio18 bio19 bio01sd bio02sd bio03sd bio04sd bio05sd bio06sd bio07sd bio08sd bio09sd bio10sd bio11sd bio12sd bio13sd bio14sd bio15sd bio16sd bio17sd bio18sd bio19sd BLDFIE CECSOL CLYPPT CRFVOL ORCDRC PHIHOX SLTPPT SNDPPT BLDFIEsd CECSOLsd CLYPPTsd CRFVOLsd ORCDRCsd PHIHOXsd SLTPPTsd SNDPPTsd
975437 1936 1907 1954 153 19.000 81.000 296.000 6588.000 163.000 -109.000 272.000 37.000 -64.000 112.000 -68.000 1232.000 181.000 41.000 42.000 465.000 132.000 406.000 149.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 788.200 29.000 9.800 16.800 124.400 53.200 33.000 57.000 15.353 2.345 0.837 1.095 4.669 1.095 0.707 1.000
608072 1921 1921 1921 153 36.000 43.000 228.000 5079.000 141.000 -48.000 189.000 -6.000 102.000 113.000 -28.000 1102.000 149.000 20.000 47.000 443.000 76.000 90.000 345.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 825.600 23.200 15.200 24.400 99.200 57.200 34.400 50.400 18.366 0.837 0.837 2.302 6.834 0.837 0.548 1.140
1098143 0 -2 9 153 104.669 42.609 233.346 4878.632 202.105 19.564 182.541 93.000 79.008 174.985 39.383 819.113 87.150 42.692 21.947 259.180 139.865 240.744 170.098 0.472 1.199 4.322 20.210 0.940 0.856 1.612 0.577 0.194 0.122 0.488 6.095 0.830 0.464 0.256 2.292 1.386 1.933 1.348 769.653 18.646 20.892 7.186 51.822 68.317 29.332 49.791 120.389 2.468 3.055 2.207 27.441 3.160 4.696 7.085
1709089 NA NA NA NA 33.000 92.000 293.000 7559.000 195.000 -119.000 314.000 20.000 106.000 136.000 -71.000 479.000 81.000 6.000 63.000 241.000 20.000 24.000 152.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 1043.000 27.000 16.000 30.000 82.000 63.000 37.000 47.000 NA NA NA NA NA NA NA NA
891822 27 15 44 153 88.096 43.287 200.606 6174.649 204.830 -11.096 215.904 169.266 7.840 176.426 7.840 603.713 65.096 32.213 19.862 193.628 110.277 189.628 110.277 0.442 0.650 2.001 8.448 0.900 0.330 0.868 0.532 0.423 0.539 0.423 10.963 1.127 0.602 0.347 3.328 2.157 3.580 2.157 789.075 16.134 12.150 5.962 33.124 63.913 28.013 59.806 79.775 2.081 1.491 0.812 9.217 2.916 2.700 3.631
917685 49 49 49 153 93.000 68.000 266.000 6733.000 233.000 -22.000 255.000 158.000 29.000 186.000 5.000 525.000 65.000 31.000 23.000 182.000 97.000 164.000 98.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 862.400 14.000 8.200 8.200 29.600 58.800 20.600 71.000 25.822 1.225 0.837 0.837 6.387 1.789 1.517 1.414
723466 275 275 275 153 116.000 73.000 317.000 5520.000 246.000 15.000 231.000 59.000 190.000 196.000 45.000 944.000 104.000 61.000 14.000 298.000 194.000 213.000 276.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 1015.400 14.000 19.400 12.000 27.000 62.200 44.200 36.800 11.349 0.000 0.894 0.707 2.449 1.483 1.304 1.304
232336 616 616 616 153 73.000 75.000 278.000 6939.000 215.000 -54.000 269.000 163.000 -18.000 167.000 -20.000 714.000 91.000 36.000 32.000 269.000 111.000 259.000 123.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 773.600 21.400 15.400 15.600 41.600 51.800 44.400 40.200 24.572 0.548 1.673 0.548 5.225 1.095 1.140 0.837
1474207 NA NA NA NA 56.624 75.508 261.866 7516.794 206.720 -81.642 288.360 136.465 -41.996 156.808 -45.807 886.752 115.188 50.275 26.331 320.340 155.629 275.125 167.186 9.167 0.500 1.505 51.066 9.608 8.436 1.367 15.249 8.553 9.636 8.422 102.049 14.275 7.126 2.231 37.213 19.916 32.491 23.182 637.471 27.315 14.827 18.843 99.487 48.897 41.868 43.266 104.590 4.540 2.263 2.616 27.671 3.395 2.198 3.603
1942503 1244 1244 1244 153 169.000 123.000 361.000 7838.000 346.000 4.000 342.000 273.000 243.000 274.000 64.000 433.000 75.000 10.000 48.000 205.000 32.000 121.000 113.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 1493.500 16.750 23.500 21.000 10.750 69.750 23.000 53.500 10.599 0.500 1.732 1.414 0.500 0.500 0.000 1.732
1623555 80 80 80 153 101.000 64.000 333.000 4424.000 209.000 17.000 192.000 59.000 79.000 166.000 45.000 694.000 74.000 47.000 14.000 215.000 149.000 170.000 160.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 672.833 22.667 25.667 13.833 47.000 64.500 36.333 37.833 29.363 1.506 1.211 0.408 4.336 1.049 0.516 1.472
1230829 400 400 400 153 -8.000 60.000 181.000 9391.000 174.000 -160.000 334.000 119.000 -97.000 127.000 -130.000 650.000 97.000 33.000 37.000 271.000 101.000 258.000 112.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 445.900 28.100 10.500 9.500 132.600 47.600 37.800 52.100 27.875 1.792 0.527 0.527 4.789 0.843 1.317 1.101
432728 63 63 64 153 82.000 43.000 210.000 5723.000 193.000 -12.000 205.000 66.000 51.000 164.000 7.000 741.000 82.000 42.000 21.000 243.000 135.000 195.000 153.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 698.333 14.167 10.833 8.000 46.500 62.667 27.667 61.167 22.651 0.408 0.408 0.000 2.168 1.033 1.966 2.317
279885 1091 1091 1091 153 47.000 75.000 280.000 6828.000 189.000 -79.000 268.000 136.000 30.000 140.000 -44.000 1235.000 133.000 75.000 18.000 388.000 247.000 373.000 346.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 559.333 30.667 12.167 18.000 203.333 47.167 37.167 50.833 42.075 1.033 0.753 0.894 24.006 1.722 0.753 1.329
360454 5 5 5 153 84.000 38.000 186.000 5835.000 194.000 -10.000 204.000 108.000 52.000 168.000 8.000 620.000 64.000 35.000 19.000 190.000 115.000 179.000 120.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 640.429 16.714 11.429 6.429 78.714 56.571 20.857 67.857 11.487 1.113 1.134 0.535 8.220 0.787 0.690 1.345
1693286 124 123 125 153 266.000 104.000 447.000 4165.000 365.000 133.000 233.000 300.000 211.000 312.000 196.000 514.000 145.000 2.000 113.000 397.000 8.000 200.000 13.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 1407.750 26.500 21.750 10.500 8.250 73.250 11.250 67.000 18.892 1.000 0.500 1.000 0.500 0.500 0.957 1.155
154784 194 191 197 153 96.000 49.000 309.000 3861.000 184.000 24.000 160.000 62.000 97.000 153.000 47.000 1172.000 133.000 73.000 22.000 397.000 220.000 259.000 302.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 560.500 22.000 15.833 16.333 104.000 51.333 39.000 45.000 16.159 1.095 1.472 0.816 12.554 0.816 0.632 1.789
890457 6 1 18 153 89.223 44.351 204.787 6152.883 206.457 -10.181 216.702 170.149 10.702 177.309 9.106 613.532 70.777 32.894 21.415 208.372 112.500 196.628 112.596 0.444 0.667 2.130 6.596 0.542 0.463 0.890 0.387 4.983 0.487 0.401 13.263 1.779 0.710 0.495 5.137 2.483 4.667 2.420 692.145 18.328 12.584 5.938 56.042 61.367 24.860 62.521 133.302 4.656 2.997 0.791 29.938 4.081 3.296 4.756
156245 144 137 152 153 99.000 59.000 323.000 4314.000 202.000 18.000 184.000 59.000 157.000 162.000 45.000 861.000 96.000 51.000 20.000 283.000 162.000 181.000 212.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 678.000 25.167 25.167 14.500 47.000 60.500 45.500 29.500 24.323 0.753 2.137 0.548 1.897 1.049 1.049 1.378
1783831 387 387 387 153 137.000 40.000 155.000 7401.000 270.000 15.000 256.000 223.000 76.000 246.000 37.000 1969.000 265.000 132.000 26.000 738.000 409.000 529.000 431.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 945.200 24.800 22.800 15.800 84.000 50.000 38.400 38.800 28.261 0.837 0.837 0.837 3.808 1.000 1.140 1.304
save(soilclim, file = "../_output/SoilClim_sPlot3.RData")

ANNEX 1 - Code to Extract predictors

# define ancillary functions
robust.mean <- function(x){mean(x, na.rm=T)}
robust.mode <- function(x){if(any(x!=0)) {
  a <- x[which(x!=0)] #exclude zero (i.e. NAs)
  return(as.numeric(names(sort(table(a), decreasing=T))[1]))} else
    return(NA)}
robust.sd <- function(x){sd(x, na.rm=T)}

#main function
PredExtr <- function(x.shp, myfunction=NA, output=NA, 
                     toextract, typp=c("raster", "shp"), ncores, chunkn=1, chunk.i=NA){
  print("Load Packages")
  require(foreach)
  require(parallel)
  require(doParallel)
  require(raster)
  require(rgeos)
  require(rgdal)
  require(geosphere) 
  require(spatialEco)
  require(sf)
 
  print(paste("Loading", typp, "data :", toextract))
  print(paste("output will be:", output))
  if(typp=="raster"){ mypredictor <- raster(toextract)} else
  mypredictor <- readOGR(toextract)
  header.shp <- readOGR(x.shp)
  crs(mypredictor) <- crs(header.shp) #should be verified beforehand!

  ## Divide in chunks if requested
  if(chunkn>1 & !is.na(chunk.i)){
    print(paste("divide in chunks and run on chunk n.", chunk.i))
    indices <- 1:length(header.shp)
    chunks <- split(indices, sort(indices%%chunkn))
    header.shp <- header.shp[chunks[[chunk.i]],]
    } 

  print("myfunction defined as")
  myfunction <- get(myfunction)
  print(myfunction)
  
  print("go parallel")
  cl <- makeForkCluster(ncores, outfile="" )
  registerDoParallel(cl)

  
  if(typp=="raster"){
    print("start main foreach loop")
    out <- foreach(i=1:length(header.shp), .packages=c('raster'), .combine=rbind) %dopar% { 
    tmp <- raster::extract(mypredictor, header.shp[i,], 
                         buffer=min(10000,  max(250, header.shp@data$lc_ncrt[i])), fun=myfunction) 
  }
  if(!is.na(output)) {write.csv(out, file = output)} else{return(out)}
  } else {
    print("Match sequentially")
    out <- sp::over(x=header.shp, y=mypredictor) 
    i.toassign <- which(is.na(out[,1]))
    toassign <- header.shp[i.toassign,]
    print(paste(length(toassign), "plots not matched directly - seek for NN"))
  if(length(toassign)>0){
    # Crack if multipolygon
    if(any( (st_geometry_type(mypredictor %>% st_as_sf)) == "MULTIPOLYGON")) {
      ## explode multipolygon
      mypredictor.expl <- explode(mypredictor) %>% 
        as_Spatial()
      } else {
        mypredictor.expl <- mypredictor}
    print("start main foreach loop")
    nearest <- foreach(i=1:length(toassign), .packages=c('raster'), .combine=rbind) %dopar% { 
      print(i)
      ## create a subset of geoentities based on a 5° buffer radius around each target plot.
      tmp.buff <- gBuffer(toassign[i,], width=5) 
      tryCatch(
        tmp.mypredictor <- spatialEco::spatial.select(
          x = tmp.buff,
          y = mypredictor.expl,
          distance = 0.1,
          predicate = "intersect"
        ),
        error = function(e) {
          print(paste("Nothing close enough for plot", toassign@data$PlotObservationID[i]))
        }
      )
      # find nearest neighbour  
      nearest.tmp <- tryCatch(tmp.mypredictor@data[geosphere::dist2Line(toassign[i,], tmp.mypredictor)[,"ID"],],
                              error = function(e){
                                ee <- myptedictor@data[1,]
                                ee[1,] <- rep(NA, ncol(mypredictor))
                                }
                              )
      return(nearest.tmp)
    }

    out[i.toassign,] <- nearest
   }
    if(!is.na(output)) {write.csv(out, file = output)} else{return(out)}
  }
  stopCluster(cl)
  }