Timestamp: Fri Nov 27 16:35:00 2020
Drafted: Francesco Maria Sabatini
Revised:
version: 1.0

This report documents how environmental variables were extracted when constructing sPlot 3.0. It is based on dataset sPlot_3.0.2, received on 24/07/2019 from Stephan Hennekens.

knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(viridis)
library(readr)
library(xlsx)
library(knitr)
library(kableExtra)

## Spatial packages
library(rgdal)
library(sp)
library(sf)
library(rgeos)
library(raster)
library(rworldmap)

source("A98_PredictorsExtract.R")

#save temporary files
write("TMPDIR = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('TMPDIR'), '.Renviron'))
write("R_USER = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('R_USER'), '.Renviron'))
rasterOptions(tmpdir="/data/sPlot/users/Francesco/_tmp")

#Ancillary functions
get.summary <- function(x){x %>% 
    summarize_all(.funs=list(num.NAs=~sum(is.na(.)), 
                             min=~min(., na.rm=T),
                             q025=~quantile(., 0.25, na.rm=T), 
                             q50=~quantile(., 0.50, na.rm=T), 
                             q75=~quantile(., .75, na.rm=T), 
                             max=~max(., na.rm=T), 
                             mean=~mean(., na.rm=T), 
                             sd=~sd(., na.rm=T))) %>% 
    gather(variable, value) %>% 
    separate(variable, sep="_", into=c("variable", "stat")) %>% 
    mutate(stat=factor(stat, levels=c("num.NAs", "min", "q025", "q50", "q75", "max", "mean", "sd"))) %>% 
    spread(key=stat, value=value)
}

1 Import header data

load("../_output/header_sPlot3.0.RData")

Create spatial point dataframe for sPlot data to intersect with spatial layers. Include the fields Location uncertainty to allow for fuzzy matching. Each plot was intersected with the corresponding layer of environmental (soil, climate) attributes accounting for their location uncertainty (minimum set to 250m). For computing reasons, the maximum location uncertainty was set to 10km. The user should therefore be aware that the variability in climate and soil features estimations may be underestimated for plots with very high location uncertainty.

header.shp <- header %>%
  filter(!is.na(Longitude) | !is.na(Latitude))
header.shp <- SpatialPointsDataFrame(coords= header.shp %>% 
                                        dplyr::select(Longitude, Latitude),
                               proj4string = CRS("+init=epsg:4326"), 
                               data=data.frame(PlotObservationID= header.shp$PlotObservationID, 
                                               loc.uncert=header.shp$`Location uncertainty (m)`, 
                                               `GIVD ID`=header.shp$`GIVD ID`))
## OGR data source with driver: ESRI Shapefile 
## Source: "/data/sPlot/users/Francesco/sPlot3/_derived", layer: "header.shp"
## with 1976235 features
## It has 3 fields

2 Climate

Bioclimatic variables (bio01-bio19) derive from CHELSA

Codes:

Bio1 = Annual Mean Temperature
Bio2 = Mean Diurnal Range
Bio3 = Isothermality
Bio4 = Temperature Seasonality
Bio5 = Max Temperature of Warmest Month
Bio6 = Min Temperature of Coldest Month
Bio7 = Temperature Annual Range
Bio8 = Mean Temperature of Wettest Quarter
Bio9 = Mean Temperature of Driest Quarter
Bio10 = Mean Temperature of Warmest Quarter
Bio11 = Mean Temperature of Coldest Quarter
Bio12 = Annual Precipitation
Bio13 = Precipitation of Wettest Month
Bio14 = Precipitation of Driest Month
Bio15 = Precipitation Seasonality
Bio16 = Precipitation of Wettest Quarter
Bio17 = Precipitation of Driest Quarter
Bio18 = Precipitation of Warmest Quarter
Bio19 = Precipitation of Coldest Quarter

Download raster files:

library(downloader)
url.chelsa <- list()
for(i in 1:19){
  ii <- stringr::str_pad(1:19, width=2, side="left", pad="0")[i]
  url.chelsa[[i]] <- paste("https://www.wsl.ch/lud/chelsa/data/bioclim/integer/CHELSA_bio10_", ii, ".tif", sep="")
  download(url.chelsa[[i]],
           paste("/data/sPlot/users/Francesco/Ancillary_Data/CHELSA/CHELSA_bio10_", ii, ".tif", sep=""), 
           mode = "wb")
}

Intersect header.shp with each raster of the CHELSA collection.
Performed in EVE HPC cluster using function A98_PredictorsExtract.R. Divided in 99 chunks.

#make sure to setup the right function (robust.mean\robust.sd) and the correct file type (raster) in submit-A98.sh 
for pred in ls /data/splot/_data/Predictors/*.tif
do
qsub submit-A98.sh $pred
done 
header.shp.path <- "../_derived/header.shp.shp"

for(i in 1:19){
  ff <- paste("/data/sPlot/users/Francesco/Ancillary_Data/CHELSA/CHELSA_bio10_", 
              stringr::str_pad(i, width=2, side="left", pad="0"), ".tif", sep="")
  #define output paths
  output.ff1 <- paste("/data/sPlot/users/Francesco/sPlot3/_derived/output_pred/CHELSA_bio10_", 
              stringr::str_pad(i, width=2, side="left", pad="0"), ".csv", sep="")
  output.ff2 <- paste("/data/sPlot/users/Francesco/sPlot3/_derived/output_pred/CHELSA_bio10", 
              stringr::str_pad(i, width=2, side="left", pad="0"), "_sd.csv", sep="")
  # Run PredExtr function - sink to file
  PredExtr(x.shp = header.shp.path, toextract = ff, myfunction = robust.mean, 
                         ncores = 4, typp = "raster", output.ff1)
  PredExtr(x.shp = header.shp.path, toextract = ff, myfunction = robust.sd, 
                         ncores = 4, typp = "raster", output.ff2)
}

Reimport and assemble output.

chelsa.files <- list.files(path="../_derived/output_pred/", pattern="CHELSA_bio10_[0-9]+.csv$", full.names=T)
chelsa.out <- do.call(cbind, lapply(chelsa.files, 
                                    function(x) {read_csv(x,col_types = cols(X1 = col_character(),
                                                                             V1 = col_double())) %>% 
                                        column_to_rownames("X1")}))
colnames(chelsa.out) <- paste0("bio", stringr::str_pad(1:19, width=2, side="left", pad="0"))
#same for sd values
chelsa.sd.files <- list.files(path="../_derived/output_pred/", pattern="CHELSA_bio10_[0-9]+_sd.csv$", full.names=T)
chelsa.sd.out <- do.call(cbind, lapply(chelsa.sd.files, 
                                       function(x) {read_csv(x, col_types = cols(X1 = col_character(),
                                                                                 V1 = col_double())) %>% 
                                           column_to_rownames("X1")}))
colnames(chelsa.sd.out) <- paste0("bio", stringr::str_pad(1:19, width=2, side="left", pad="0"), "sd")

Show summaries

## Warning: attributes are not identical across measure variables;
## they will be dropped
Summary statistics for chelsa mean statistics
variable num.NAs min q025 q50 q75 max mean sd
bio01 4040 -270.578 82.000 96.000 110.000 307.00 97.023 43.401
bio02 4040 6.000 54.000 71.000 80.000 144.00 66.982 21.456
bio03 4040 67.000 233.000 275.000 300.000 832.00 270.146 70.353
bio04 4040 88.000 5397.000 6164.000 7339.000 21703.00 6373.841 1793.695
bio05 4040 -77.642 201.407 224.000 252.000 452.00 229.713 43.397
bio06 4040 -431.000 -50.449 -12.000 12.575 252.00 -19.443 61.260
bio07 4040 26.000 206.000 248.709 285.000 675.00 249.163 63.721
bio08 4040 -208.000 86.000 144.013 177.000 358.00 132.390 62.851
bio09 4040 -377.000 -1.000 57.000 98.333 373.00 63.304 87.406
bio10 4040 -111.628 167.000 178.000 201.264 375.00 186.105 38.709
bio11 4040 -400.000 -14.000 11.000 39.763 279.00 12.339 58.380
bio12 4040 0.000 604.000 761.000 907.000 6554.00 849.872 472.599
bio13 4040 0.000 74.000 85.000 113.000 993.75 104.780 64.680
bio14 4040 0.000 29.848 41.000 51.000 411.00 43.612 27.856
bio15 4040 5.000 17.000 24.000 35.000 331.00 28.480 16.999
bio16 4040 0.000 216.620 247.000 325.000 2832.50 302.450 182.994
bio17 4040 0.000 94.000 131.000 163.000 1251.00 139.050 86.495
bio18 4040 0.000 176.000 216.000 253.000 2285.00 236.268 146.904
bio19 4040 0.000 112.000 163.000 210.511 2645.00 183.224 120.097
## Warning: attributes are not identical across measure variables;
## they will be dropped
Summary statistics for chelsa s.d. statistics
variable num.NAs min q025 q50 q75 max mean sd
bio01sd 1384369 0 0.7800182 2.0310466 5.9857381 51.162192 4.846720 6.7012074
bio02sd 1384369 0 0.0000000 0.1793201 0.5003541 4.392056 0.397326 0.5871731
bio03sd 1384369 0 0.4435312 0.7527727 1.7079089 23.952856 1.391100 1.5943478
bio04sd 1384369 0 7.7781746 15.0614901 31.6960500 242.616057 25.970859 27.2246007
bio05sd 1384369 0 0.9082952 2.2158230 5.6568542 53.383878 5.095573 6.9402301
bio06sd 1384369 0 0.7996481 1.8866058 5.7739680 52.067653 4.665683 6.3305406
bio07sd 1384369 0 0.5001703 0.6595651 1.3733545 8.507203 1.085018 1.0246997
bio08sd 1384369 0 1.0725245 5.6493818 16.9136010 148.421139 12.643420 16.6528157
bio09sd 1384369 0 0.9536438 2.7201027 8.7700028 144.249783 7.687753 12.0662592
bio10sd 1384369 0 0.7754730 2.1454694 5.6568542 53.720906 5.023930 6.9683276
bio11sd 1384369 0 0.7806987 1.8978051 5.8134709 52.062384 4.608413 6.3346350
bio12sd 1384369 0 17.4683532 42.9959747 71.1728646 713.159827 62.144824 64.5121259
bio13sd 1384369 0 2.2862699 5.9902205 10.5262733 99.739245 8.186887 8.8333873
bio14sd 1384369 0 0.7874771 2.2079022 4.3447815 36.986231 3.404173 3.7164424
bio15sd 1384369 0 0.5163978 0.7071068 1.7593330 10.929618 1.283538 1.2838996
bio16sd 1384369 0 6.6037769 17.3523720 29.9374956 297.255136 23.389558 25.4133442
bio17sd 1384369 0 2.5631032 6.9268396 13.9031189 113.488278 10.832693 11.8698016
bio18sd 1384369 0 4.3574734 12.4533991 21.0158670 345.418948 18.467468 22.3670495
bio19sd 1384369 0 3.5121519 9.8499438 18.2777876 296.915363 15.135450 17.0292687

3 Soil

Soil variables (5 cm depth) derive from the ISRIC dataset, downloaded at 250-m resolution

CECSOL Cation Exchange capacity of soil
CLYPPT Clay mass fraction in %
CRFVOL Coarse fragments volumetric in %
ORCDRC Soil Organic Carbon Content in g/kg
PHIHOX Soil pH x 10 in H20
SLTPPT Silt mass fraction in %
SNDPPT Sand mass fraction in %
BLDFIE Bulk Density (fine earth) in kg/m3
P.ret.cat Phosphorous Retention - Categorical value, see ISRIC 2011-06
Download ISRIC soil data

url.bulk <- "https://files.isric.org/soilgrids/data/recent/BLDFIE_M_sl2_250m_ll.tif"
url.cec <- "https://files.isric.org/soilgrids/data/recent/CECSOL_M_sl2_250m_ll.tif"
url.cly <- "https://files.isric.org/soilgrids/data/recent/CLYPPT_M_sl2_250m_ll.tif"
url.crf <- "https://files.isric.org/soilgrids/data/recent/CRFVOL_M_sl2_250m_ll.tif"
url.orc <- "https://files.isric.org/soilgrids/data/recent/ORCDRC_M_sl2_250m_ll.tif"
url.pH <- "https://files.isric.org/soilgrids/data/recent/PHIHOX_M_sl2_250m_ll.tif"
url.slt <- "https://files.isric.org/soilgrids/data/recent/SLTPPT_M_sl2_250m_ll.tif"
url.snd <- "https://files.isric.org/soilgrids/data/recent/SNDPPT_M_sl2_250m_ll.tif"

download(url.bulk, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/BLDFIE_M_sl2_250m_ll.tif", mode = "wb")
download(url.cec, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CECSOL_M_sl2_250m_ll.tif", mode = "wb")
download(url.cly, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CLYPPT_M_sl2_250m_ll.tif", mode = "wb")
download(url.crf, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CRFVOL_M_sl2_250m_ll.tif", mode = "wb")
download(url.orc, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/ORCDRC_M_sl2_250m_ll.tif", mode = "wb")
download(url.pH, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/PHIHOX_M_sl2_250m_ll.tif", mode = "wb")
download(url.slt, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/SLTPPT_M_sl2_250m_ll.tif", mode = "wb")
download(url.snd, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/SNDPPT_M_sl2_250m_ll.tif", mode = "wb")

Intersect header.shp with each raster of the ISRIC collection.
Performed in EVE HPC cluster using function A98_PredictorsExtract.R. Divided in 99 chunks.

for(i in 1:8){
  ff <- list.files("/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/", 
                                pattern = "^[^(Gene)]", full.names = T)[i]
  isric.i <- raster(ff)
  #define output paths
  filename <- str_split(list.files("/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/", 
           pattern = "^[^(Gene)]", full.names = T)[i], pattern="//")[[1]][2]
  output.ff1 <- gsub(pattern=".tif", replacement=".csv", filename)
  output.ff2 <- gsub(pattern=".tif", replacement="_sd.csv", filename)
  # Run PredExtr function - sink to file
  PredExtr(x.shp = header.shp.path, toextract = isric.i, myfunction = robust.mean, 
                         ncores = 4, typp = "raster", output.ff1)
  PredExtr(x.shp = header.shp.path, toextract = isric.i, myfunction = robust.sd, 
                         ncores = 4, typp = "raster", output.ff2)
}

Reimport and assemble output.

ISRIC.layer.names <- c("BLDFIE", "CECSOL","CLYPPT","CRFVOL","ORCDRC","PHIHOX","SLTPPT","SNDPPT")
ISRIC.layer.names1 <- paste0(ISRIC.layer.names, "_M_sl2_250m_ll")
isric.files <- list.files(path="../_derived/output_pred/", 
                          pattern=paste0(paste0(ISRIC.layer.names1, ".csv$"), collapse="|"), 
                          full.names=T)

isric.out <- do.call(cbind, lapply(isric.files,  
                                   function(x) {read_csv(x, col_types = cols(X1 = col_character(),
                                                                             V1 = col_double())) %>% 
                                       column_to_rownames("X1")}))
colnames(isric.out) <- ISRIC.layer.names
#same for sd values
isric.sd.files <- list.files(path="../_derived/output_pred/", 
                          pattern=paste0(paste0(ISRIC.layer.names1, "_sd.csv$"), collapse="|"), 
                          full.names=T)
isric.sd.out <- do.call(cbind, lapply(isric.sd.files, 
                                      function(x) {read_csv(x, col_types = cols(X1 = col_character(),
                                                                                 V1 = col_double())) %>% 
                                           column_to_rownames("X1")}))
colnames(isric.sd.out) <- paste0(ISRIC.layer.names, "sd")

Show summaries

## Warning: attributes are not identical across measure variables;
## they will be dropped
Summary statistics for isric mean statistics
variable num.NAs min q025 q50 q75 max mean sd
BLDFIE 19205 93.667 646.600 796.000 1046.500 1718.500 846.194 280.873
CECSOL 19205 1.000 17.600 21.750 26.500 157.333 22.657 7.182
CLYPPT 19205 0.000 13.600 18.852 23.450 67.000 18.616 7.325
CRFVOL 19205 0.000 6.800 10.937 16.000 55.000 11.707 6.089
ORCDRC 19205 0.000 29.616 50.833 89.000 688.500 65.566 47.791
PHIHOX 19205 38.000 54.167 59.500 64.424 94.200 59.971 7.846
SLTPPT 19205 2.250 28.000 36.289 40.500 80.667 34.163 10.037
SNDPPT 19205 2.400 36.667 43.667 56.500 95.000 47.207 14.710
## Warning: attributes are not identical across measure variables;
## they will be dropped
Summary statistics for isric s.d. statistics
variable num.NAs min q025 q50 q75 max mean sd
BLDFIEsd 115683 0 19.047 30.928 55.756 271.328 41.588 31.638
CECSOLsd 115683 0 0.837 1.390 2.246 50.912 1.744 1.369
CLYPPTsd 115683 0 0.753 1.140 1.826 13.546 1.442 1.057
CRFVOLsd 115683 0 0.584 1.033 1.673 15.556 1.272 0.894
ORCDRCsd 115683 0 3.270 6.229 12.095 91.914 9.499 9.402
PHIHOXsd 115683 0 0.707 1.069 1.982 10.533 1.613 1.424
SLTPPTsd 115683 0 0.837 1.329 2.160 20.569 1.772 1.434
SNDPPTsd 115683 0 1.033 1.708 3.033 25.153 2.449 2.206

4 Elevation variability

Reimport output (calculated in 04_buildHeader)

elevation.out <- read_csv("../_derived/elevatr/elevation.out.csv")

5 Create output and export

soilclim <- header %>% 
  dplyr::select(PlotObservationID) %>% 
  left_join(elevation.out %>% 
              dplyr::select(PlotObservationID, Elevation_median, Elevation_q2.5, Elevation_q97.5, Elevation_DEM.res=DEM.res), 
            by="PlotObservationID") %>% 
  left_join(header.shp@data %>% 
              dplyr::select(PlotObservationID) %>% 
              bind_cols(chelsa.out) %>%
              bind_cols(chelsa.sd.out) %>% 
              bind_cols(isric.out) %>% 
              bind_cols(isric.sd.out) %>% 
              distinct(), 
            by="PlotObservationID")
Show environmenal info for 20 randomly selected plots
PlotObservationID Elevation_median Elevation_q2.5 Elevation_q97.5 Elevation_DEM.res bio01 bio02 bio03 bio04 bio05 bio06 bio07 bio08 bio09 bio10 bio11 bio12 bio13 bio14 bio15 bio16 bio17 bio18 bio19 bio01sd bio02sd bio03sd bio04sd bio05sd bio06sd bio07sd bio08sd bio09sd bio10sd bio11sd bio12sd bio13sd bio14sd bio15sd bio16sd bio17sd bio18sd bio19sd BLDFIE CECSOL CLYPPT CRFVOL ORCDRC PHIHOX SLTPPT SNDPPT BLDFIEsd CECSOLsd CLYPPTsd CRFVOLsd ORCDRCsd PHIHOXsd SLTPPTsd SNDPPTsd
539861 357 357 357 153 97.000 79.000 260.000 8018.000 251.000 -52.000 304.000 199.000 -9.000 201.000 -13.000 552.000 79.000 23.000 42.000 235.000 72.000 188.000 87.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 1315.200 26.000 29.200 4.600 25.000 68.200 40.400 30.800 13.236 0.000 0.837 0.894 2.236 0.837 0.548 0.837
1037246 3 2 4 153 100.833 24.000 148.667 4751.000 186.000 24.167 161.667 95.000 69.667 170.000 37.667 802.833 86.500 34.500 27.000 259.500 117.000 244.167 163.167 0.408 0.000 0.816 3.847 0.000 0.408 0.516 0.000 0.516 0.000 0.516 5.492 0.548 0.548 0.000 1.643 1.095 1.472 1.472 893.679 18.661 16.339 8.661 68.375 65.054 24.411 59.161 63.863 4.231 3.282 2.999 10.302 2.475 3.813 6.862
65686 NA NA NA 153 139.000 75.000 262.000 7385.000 290.000 4.000 286.000 69.000 244.000 244.000 44.000 555.000 85.000 15.000 45.000 250.000 51.000 51.000 171.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 1226.167 24.167 25.167 13.000 23.500 70.500 32.500 42.167 18.606 2.483 0.408 1.095 0.837 0.548 0.548 0.753
1110023 -2 -3 -1 153 103.000 52.000 264.667 5104.833 208.500 11.833 197.000 87.833 79.000 176.000 35.000 835.500 84.500 46.500 19.000 251.500 152.000 243.500 174.833 0.000 0.000 0.516 3.430 0.548 0.408 0.000 0.408 0.000 0.000 0.000 3.391 0.548 0.548 0.000 1.643 1.095 1.225 0.408 278.495 34.066 22.440 6.341 220.352 61.659 32.659 44.956 73.285 2.375 3.609 1.024 33.479 1.727 4.206 7.368
175118 NA NA NA 153 98.000 34.000 255.000 3522.000 169.000 37.000 132.000 90.000 99.000 149.000 54.000 1268.000 150.000 72.000 25.000 424.000 217.000 302.000 301.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 678.000 23.167 12.833 20.833 90.167 55.667 35.333 52.000 32.305 3.189 0.983 1.722 19.104 1.211 1.751 2.366
1173784 7 7 7 153 101.000 57.000 277.000 5297.000 213.000 6.000 207.000 51.000 79.000 176.000 31.000 838.000 83.000 51.000 14.000 244.000 167.000 234.000 177.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 398.333 15.333 7.000 5.333 78.833 53.500 17.000 76.167 72.445 1.506 0.894 0.816 8.565 1.049 0.632 1.472
33430 522 405 720 153 82.460 79.120 278.060 7272.860 231.500 -54.260 285.780 176.700 -12.020 180.080 -17.040 768.660 95.620 41.800 28.520 284.360 129.060 263.220 132.780 5.556 0.328 0.818 30.836 5.870 5.013 0.887 5.790 5.231 5.834 5.063 87.367 12.295 4.682 1.515 36.014 14.157 31.964 14.227 716.668 24.103 21.804 15.846 58.920 54.131 42.895 35.297 45.364 3.651 1.873 2.128 16.665 2.106 1.579 2.667
898441 43 24 59 153 87.851 52.457 231.713 6198.936 211.000 -15.223 226.245 169.340 6.926 175.787 6.926 611.181 71.394 32.479 22.564 211.936 110.766 197.245 110.766 0.829 0.501 1.535 5.189 0.762 0.941 0.683 0.874 0.833 0.853 0.833 22.535 2.952 1.225 0.560 8.772 4.208 8.064 4.208 753.491 14.721 10.296 6.814 40.009 60.748 25.523 64.157 122.269 2.073 1.516 0.977 17.041 4.494 3.978 4.850
539784 384 384 384 153 94.000 78.000 258.000 8016.000 248.000 -54.000 302.000 197.000 -12.000 198.000 -16.000 518.000 70.000 22.000 37.000 208.000 70.000 175.000 87.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 1241.750 26.000 28.500 7.750 31.250 68.500 40.500 30.500 9.777 0.816 1.000 0.500 0.957 1.000 0.577 1.291
1009615 19 18 19 153 72.000 35.000 149.000 6800.000 194.000 -38.000 233.000 142.000 -5.000 171.000 -18.000 654.000 80.000 31.000 33.000 236.000 95.000 213.000 110.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 616.429 26.429 12.571 11.714 130.714 49.143 23.143 64.286 33.055 1.397 0.976 0.488 12.189 1.345 0.690 1.113
1772224 186 186 186 153 176.000 45.000 192.000 6483.000 288.000 55.000 233.000 218.000 118.000 267.000 88.000 2571.000 434.000 94.000 47.000 1139.000 304.000 794.000 320.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 1090.500 23.250 25.250 15.500 70.000 50.500 38.750 36.000 11.030 1.708 0.957 1.291 8.602 0.577 0.957 0.816
1675374 NA NA NA NA -7.007 66.025 169.682 11811.672 196.601 -192.559 389.131 140.512 -27.801 154.517 -161.792 360.553 74.404 11.345 72.413 219.219 35.030 190.616 49.672 1.379 0.289 0.469 57.843 2.029 1.379 1.763 1.592 1.098 1.850 1.446 6.688 1.682 0.476 2.046 5.380 1.334 5.579 1.619 511.360 38.882 9.643 4.371 267.495 51.579 51.252 39.084 51.921 6.390 1.221 1.597 18.486 1.382 2.130 2.039
1975517 1314 1314 1314 153 129.000 114.000 280.000 9828.000 352.000 -54.000 406.000 152.000 18.000 272.000 -4.000 165.000 23.000 8.000 31.000 64.000 25.000 42.000 28.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 1441.500 20.250 21.500 21.750 7.000 83.500 40.750 37.750 3.109 1.500 0.577 2.062 0.000 0.577 0.500 0.957
1431064 NA NA NA NA 120.420 86.000 281.757 7755.832 278.655 -27.627 306.296 196.594 18.320 224.002 13.294 655.075 90.008 39.170 26.594 249.809 119.178 181.041 131.144 3.193 0.000 0.917 30.781 3.493 2.805 0.782 3.549 2.797 3.455 2.765 56.049 9.526 3.282 2.840 25.091 9.870 18.950 11.049 1298.680 27.241 26.378 3.207 21.487 68.139 41.659 31.960 34.128 1.500 1.756 1.098 2.910 1.619 1.398 1.932
592215 NA NA NA NA 117.482 74.000 310.095 5769.304 251.930 12.575 239.346 144.013 67.824 200.703 43.223 744.488 80.236 50.779 12.760 217.087 156.079 192.671 183.147 2.031 0.000 0.383 15.061 2.216 1.887 0.660 16.914 2.720 2.145 1.898 62.959 6.811 4.345 0.581 18.326 13.903 16.499 16.216 988.585 16.251 20.636 12.025 23.639 62.966 38.217 41.121 92.959 2.246 3.460 1.835 5.752 4.465 3.487 5.647
1090086 5 5 5 153 97.000 57.000 275.000 5384.000 210.000 1.000 209.000 166.000 74.000 173.000 26.000 816.000 83.000 48.000 17.000 236.000 157.000 233.000 167.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 349.667 18.833 8.167 6.500 130.833 55.500 14.833 76.833 6.743 1.602 0.753 0.548 11.125 0.837 1.472 2.229
920013 -1 -1 0 153 92.308 53.154 251.846 5642.615 207.077 -4.000 211.154 152.000 67.000 173.000 18.000 809.077 88.000 41.000 24.769 261.846 134.385 255.692 144.000 0.480 0.376 0.987 4.331 0.277 0.000 0.555 0.000 0.000 0.000 0.000 0.641 0.000 0.000 0.439 0.376 0.506 0.751 0.000 677.078 19.449 23.049 7.356 58.585 65.800 37.893 39.166 71.387 2.306 1.952 0.877 24.499 1.538 2.532 3.429
856654 NA NA NA NA 96.000 62.000 276.000 5781.000 219.000 -5.000 224.000 177.000 75.000 177.000 20.000 696.000 72.000 42.000 15.000 214.000 141.000 214.000 144.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 611.167 13.667 7.333 4.500 38.500 57.500 22.500 69.833 60.806 1.033 0.816 0.837 4.680 1.049 0.837 0.983
1708947 NA NA NA NA 100.000 95.000 297.000 7672.000 258.000 -60.000 318.000 89.000 174.000 200.000 -10.000 547.000 98.000 4.000 67.000 266.000 12.000 20.000 161.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 1047.000 30.000 24.000 17.000 49.000 69.000 38.000 38.000 NA NA NA NA NA NA NA NA
376861 37 37 37 153 86.000 49.000 237.000 5664.000 200.000 -9.000 209.000 110.000 58.000 167.000 12.000 845.000 90.000 47.000 21.000 263.000 150.000 249.000 175.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 474.833 17.167 6.667 8.333 78.333 51.333 19.167 74.000 17.233 1.602 1.633 1.366 13.604 1.862 1.169 2.280
save(soilclim, file = "../_output/SoilClim_sPlot3.RData")

ANNEX 1 - Code to Extract predictors

# define ancillary functions
robust.mean <- function(x){mean(x, na.rm=T)}
robust.mode <- function(x){if(any(x!=0)) {
  a <- x[which(x!=0)] #exclude zero (i.e. NAs)
  return(as.numeric(names(sort(table(a), decreasing=T))[1]))} else
    return(NA)}
robust.sd <- function(x){sd(x, na.rm=T)}

#main function
PredExtr <- function(x.shp, myfunction=NA, output=NA, 
                     toextract, typp=c("raster", "shp"), ncores, chunkn=1, chunk.i=NA){
  print("Load Packages")
  require(foreach)
  require(parallel)
  require(doParallel)
  require(raster)
  require(rgeos)
  require(rgdal)
  require(geosphere) 
 
  print(paste("Loading", typp, "data :", toextract))
  print(paste("output will be:", output))
  if(typp=="raster"){ mypredictor <- raster(toextract)} else
  mypredictor <- readOGR(toextract)
  colnames(mypredictor@data)
  header.shp <-readOGR(x.shp)
  crs(mypredictor) <- crs(header.shp) #should be verified beforehand!
  
  

  ## Divide in chunks if requested
  if(chunkn>1 & !is.na(chunk.i)){
    print(paste("divide in chunks and run on chunk n.", chunk.i))
    indices <- 1:length(header.shp)
    chunks <- split(indices, sort(indices%%chunkn))
    header.shp <- header.shp[chunks[[chunk.i]],]
    } 

  print("myfunction defined as")
  myfunction <- get(myfunction)
  print(myfunction)
  
  print("go parallel")
  cl <- makeForkCluster(ncores, outfile="" )
  registerDoParallel(cl)

  print("start main foreach loop")
  if(typp=="raster"){
  out <- foreach(i=1:length(header.shp), .packages=c('raster'), .combine=rbind) %dopar% { 
    tmp <- raster::extract(mypredictor, header.shp[i,], 
                         buffer=min(10000,  max(250, header.shp@data$lc_ncrt[i])), fun=myfunction) 
  }
  if(!is.na(output)) {write.csv(out, file = output)} else{return(out)}
  } else {
    out <- sp::over(x=header.shp, y=mypredictor) 
    toassign <- header.shp[which(is.na(out[,1])),]
    print(paste(length(toassign), "plots not matched directly - seek for NN"))
  if(length(toassign)>0){
    
    nearest <- foreach(i=1:length(toassign), .packages=c('raster'), .combine=rbind) %dopar% { 
      print(i)
      nearest.tmp <- tryCatch(geosphere::dist2Line(header.shp[i,], mypredictor),
                              error = function(e){data.frame(distance=NA, lon=NA, lat=NA,ID=NA)}
                              )
      #nearest.tmp <- geosphere::dist2Line(toassign[i,], mypredictor)
      return(nearest.tmp)
    }
    

    out[which(is.na(out[,1])),] <- mypredictor@data[nearest[,"ID"],]
   }
    if(!is.na(output)) {write.csv(out, file = output)} else{return(out)}
  }
  stopCluster(cl)
  }