Timestamp: Sun Mar 15 13:56:24 2020
Drafted: Francesco Maria Sabatini
Revised:
version: 1.0

This report documents how environmental variables were extracted when constructing sPlot 3.0. It is based on dataset sPlot_3.0.2, received on 24/07/2019 from Stephan Hennekens.

knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(viridis)
library(readr)
library(xlsx)
library(knitr)
library(kableExtra)

## Spatial packages
library(rgdal)
library(sp)
library(sf)
library(rgeos)
library(raster)
library(rworldmap)
#library(elevatr)
#library(rnaturalearth)
#library(dggridR)

source("A98_PredictorsExtract.R")

#save temporary files
write("TMPDIR = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('TMPDIR'), '.Renviron'))
write("R_USER = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('R_USER'), '.Renviron'))
rasterOptions(tmpdir="/data/sPlot/users/Francesco/_tmp")

#Ancillary variables
get.summary <- function(x){x %>% 
    summarize_all(.funs=list(num.NAs=~sum(is.na(.)), 
                             min=~min(., na.rm=T),
                             q025=~quantile(., 0.25, na.rm=T), 
                             q50=~quantile(., 0.50, na.rm=T), 
                             q75=~quantile(., .75, na.rm=T), 
                             max=~max(., na.rm=T), 
                             mean=~mean(., na.rm=T), 
                             sd=~sd(., na.rm=T))) %>% 
    gather(variable, value) %>% 
    separate(variable, sep="_", into=c("variable", "stat")) %>% 
    mutate(stat=factor(stat, levels=c("num.NAs", "min", "q025", "q50", "q75", "max", "mean", "sd"))) %>% 
    spread(key=stat, value=value)
}

1 Import header data

load("../_output/header_sPlot3.0.RData")

Create spatial point dataframe for sPlot data to intersect with spatial layers. Include the fiels Location uncertainty to allow for fuzzy matching. Each plot was intersected with the corresponding layer of environmental (soil, climate) attributes accounting for their location uncertainty (minimum set to 250m). For computing reasons, the maximum location uncertainty was set to 10km. The user should therefore be aware that the variability in climate and soil features estimations may be underestimated for plots with very high location uncertainty.

header.shp <- header %>%
  filter(!is.na(Longitude) | !is.na(Latitude))
header.shp <- SpatialPointsDataFrame(coords= header.shp %>% 
                                        dplyr::select(Longitude, Latitude),
                               proj4string = CRS("+init=epsg:4326"), 
                               data=data.frame(PlotObservationID= header.shp$PlotObservationID, 
                                               loc.uncert=header.shp$`Location uncertainty (m)`, 
                                               `GIVD ID`=header.shp$`GIVD ID`))
## OGR data source with driver: ESRI Shapefile 
## Source: "/data/sPlot/users/Francesco/sPlot3/_derived", layer: "header.shp"
## with 1976235 features
## It has 3 fields

2 Climate

Bioclimatic variables (bio01-bio19) derive from CHELSA

Codes:

Bio1 = Annual Mean Temperature
Bio2 = Mean Diurnal Range
Bio3 = Isothermality
Bio4 = Temperature Seasonality
Bio5 = Max Temperature of Warmest Month
Bio6 = Min Temperature of Coldest Month
Bio7 = Temperature Annual Range
Bio8 = Mean Temperature of Wettest Quarter
Bio9 = Mean Temperature of Driest Quarter
Bio10 = Mean Temperature of Warmest Quarter
Bio11 = Mean Temperature of Coldest Quarter
Bio12 = Annual Precipitation
Bio13 = Precipitation of Wettest Month
Bio14 = Precipitation of Driest Month
Bio15 = Precipitation Seasonality
Bio16 = Precipitation of Wettest Quarter
Bio17 = Precipitation of Driest Quarter
Bio18 = Precipitation of Warmest Quarter
Bio19 = Precipitation of Coldest Quarter

Download raster files:

library(downloader)
url.chelsa <- list()
for(i in 1:19){
  ii <- stringr::str_pad(1:19, width=2, side="left", pad="0")[i]
  url.chelsa[[i]] <- paste("https://www.wsl.ch/lud/chelsa/data/bioclim/integer/CHELSA_bio10_", ii, ".tif", sep="")
  download(url.chelsa[[i]],
           paste("/data/sPlot/users/Francesco/Ancillary_Data/CHELSA/CHELSA_bio10_", ii, ".tif", sep=""), 
           mode = "wb")
}

Intersect header.shp with each raster of the CHELSA collection.
Performed in EVE HPC cluster using function A98_PredictorsExtract.R. Divided in 99 chunks.

header.shp.path <- "../_derived/header.shp.shp"

for(i in 1:19){
  ff <- paste("/data/sPlot/users/Francesco/Ancillary_Data/CHELSA/CHELSA_bio10_", 
              stringr::str_pad(i, width=2, side="left", pad="0"), ".tif", sep="")
  #define output paths
  output.ff1 <- paste("/data/sPlot/users/Francesco/sPlot3/_derived/output_pred/CHELSA_bio10_", 
              stringr::str_pad(i, width=2, side="left", pad="0"), ".csv", sep="")
  output.ff2 <- paste("/data/sPlot/users/Francesco/sPlot3/_derived/output_pred/CHELSA_bio10", 
              stringr::str_pad(i, width=2, side="left", pad="0"), "_sd.csv", sep="")
  # Run PredExtr function - sink to file
  PredExtr(x.shp = header.shp.path, toextract = ff, myfunction = robust.mean, 
                         ncores = 4, typp = "raster", output.ff1)
  PredExtr(x.shp = header.shp.path, toextract = ff, myfunction = robust.sd, 
                         ncores = 4, typp = "raster", output.ff2)
}

Reimport and assemble output.

chelsa.files <- list.files(path="../_derived/output_pred/", pattern="CHELSA_bio10_[0-9]+.csv$", full.names=T)
chelsa.out <- do.call(cbind, lapply(chelsa.files, 
                                    function(x) {read_csv(x,col_types = cols(X1 = col_character(),
                                                                             V1 = col_double())) %>% 
                                        column_to_rownames("X1")}))
colnames(chelsa.out) <- paste0("bio", stringr::str_pad(1:19, width=2, side="left", pad="0"))
#same for sd values
chelsa.sd.files <- list.files(path="../_derived/output_pred/", pattern="CHELSA_bio10_[0-9]+_sd.csv$", full.names=T)
chelsa.sd.out <- do.call(cbind, lapply(chelsa.sd.files, 
                                       function(x) {read_csv(x, col_types = cols(X1 = col_character(),
                                                                                 V1 = col_double())) %>% 
                                           column_to_rownames("X1")}))
colnames(chelsa.sd.out) <- paste0("bio", stringr::str_pad(1:19, width=2, side="left", pad="0"), "sd")

Show summaries

## Warning: attributes are not identical across measure variables;
## they will be dropped
Summary statistics for chelsa mean statistics
variable num.NAs min q025 q50 q75 max mean sd
bio01 4040 -270.578 82.000 96.000 110.000 307.00 97.023 43.401
bio02 4040 6.000 54.000 71.000 80.000 144.00 66.982 21.456
bio03 4040 67.000 233.000 275.000 300.000 832.00 270.146 70.353
bio04 4040 88.000 5397.000 6164.000 7339.000 21703.00 6373.841 1793.695
bio05 4040 -77.642 201.407 224.000 252.000 452.00 229.713 43.397
bio06 4040 -431.000 -50.449 -12.000 12.575 252.00 -19.443 61.260
bio07 4040 26.000 206.000 248.709 285.000 675.00 249.163 63.721
bio08 4040 -208.000 86.000 144.013 177.000 358.00 132.390 62.851
bio09 4040 -377.000 -1.000 57.000 98.333 373.00 63.304 87.406
bio10 4040 -111.628 167.000 178.000 201.264 375.00 186.105 38.709
bio11 4040 -400.000 -14.000 11.000 39.763 279.00 12.339 58.380
bio12 4040 0.000 604.000 761.000 907.000 6554.00 849.872 472.599
bio13 4040 0.000 74.000 85.000 113.000 993.75 104.780 64.680
bio14 4040 0.000 29.848 41.000 51.000 411.00 43.612 27.856
bio15 4040 5.000 17.000 24.000 35.000 331.00 28.480 16.999
bio16 4040 0.000 216.620 247.000 325.000 2832.50 302.450 182.994
bio17 4040 0.000 94.000 131.000 163.000 1251.00 139.050 86.495
bio18 4040 0.000 176.000 216.000 253.000 2285.00 236.268 146.904
bio19 4040 0.000 112.000 163.000 210.511 2645.00 183.224 120.097
## Warning: attributes are not identical across measure variables;
## they will be dropped
Summary statistics for chelsa s.d. statistics
variable num.NAs min q025 q50 q75 max mean sd
bio01sd 1384369 0 0.7800182 2.0310466 5.9857381 51.162192 4.846720 6.7012074
bio02sd 1384369 0 0.0000000 0.1793201 0.5003541 4.392056 0.397326 0.5871731
bio03sd 1384369 0 0.4435312 0.7527727 1.7079089 23.952856 1.391100 1.5943478
bio04sd 1384369 0 7.7781746 15.0614901 31.6960500 242.616057 25.970859 27.2246007
bio05sd 1384369 0 0.9082952 2.2158230 5.6568542 53.383878 5.095573 6.9402301
bio06sd 1384369 0 0.7996481 1.8866058 5.7739680 52.067653 4.665683 6.3305406
bio07sd 1384369 0 0.5001703 0.6595651 1.3733545 8.507203 1.085018 1.0246997
bio08sd 1384369 0 1.0725245 5.6493818 16.9136010 148.421139 12.643420 16.6528157
bio09sd 1384369 0 0.9536438 2.7201027 8.7700028 144.249783 7.687753 12.0662592
bio10sd 1384369 0 0.7754730 2.1454694 5.6568542 53.720906 5.023930 6.9683276
bio11sd 1384369 0 0.7806987 1.8978051 5.8134709 52.062384 4.608413 6.3346350
bio12sd 1384369 0 17.4683532 42.9959747 71.1728646 713.159827 62.144824 64.5121259
bio13sd 1384369 0 2.2862699 5.9902205 10.5262733 99.739245 8.186887 8.8333873
bio14sd 1384369 0 0.7874771 2.2079022 4.3447815 36.986231 3.404173 3.7164424
bio15sd 1384369 0 0.5163978 0.7071068 1.7593330 10.929618 1.283538 1.2838996
bio16sd 1384369 0 6.6037769 17.3523720 29.9374956 297.255136 23.389558 25.4133442
bio17sd 1384369 0 2.5631032 6.9268396 13.9031189 113.488278 10.832693 11.8698016
bio18sd 1384369 0 4.3574734 12.4533991 21.0158670 345.418948 18.467468 22.3670495
bio19sd 1384369 0 3.5121519 9.8499438 18.2777876 296.915363 15.135450 17.0292687

3 Soil

Soil variables (5 cm depth) derive from the ISRIC dataset, downloaded at 250-m resolution

CECSOL Cation Exchange capacity of soil
CLYPPT Clay mass fraction in %
CRFVOL Coarse fragments volumetric in %
ORCDRC Soil Organic Carbon Content in g/kg
PHIHOX Soil pH x 10 in H20
SLTPPT Silt mass fraction in %
SNDPPT Sand mass fraction in %
BLDFIE Bulk Density (fine earth) in kg/m3
P.ret.cat Phosphorous Retention - Categorical value, see ISRIC 2011-06
Download ISRIC soil data

url.bulk <- "https://files.isric.org/soilgrids/data/recent/BLDFIE_M_sl2_250m_ll.tif"
url.cec <- "https://files.isric.org/soilgrids/data/recent/CECSOL_M_sl2_250m_ll.tif"
url.cly <- "https://files.isric.org/soilgrids/data/recent/CLYPPT_M_sl2_250m_ll.tif"
url.crf <- "https://files.isric.org/soilgrids/data/recent/CRFVOL_M_sl2_250m_ll.tif"
url.orc <- "https://files.isric.org/soilgrids/data/recent/ORCDRC_M_sl2_250m_ll.tif"
url.pH <- "https://files.isric.org/soilgrids/data/recent/PHIHOX_M_sl2_250m_ll.tif"
url.slt <- "https://files.isric.org/soilgrids/data/recent/SLTPPT_M_sl2_250m_ll.tif"
url.snd <- "https://files.isric.org/soilgrids/data/recent/SNDPPT_M_sl2_250m_ll.tif"

download(url.bulk, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/BLDFIE_M_sl2_250m_ll.tif", mode = "wb")
download(url.cec, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CECSOL_M_sl2_250m_ll.tif", mode = "wb")
download(url.cly, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CLYPPT_M_sl2_250m_ll.tif", mode = "wb")
download(url.crf, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CRFVOL_M_sl2_250m_ll.tif", mode = "wb")
download(url.orc, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/ORCDRC_M_sl2_250m_ll.tif", mode = "wb")
download(url.pH, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/PHIHOX_M_sl2_250m_ll.tif", mode = "wb")
download(url.slt, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/SLTPPT_M_sl2_250m_ll.tif", mode = "wb")
download(url.snd, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/SNDPPT_M_sl2_250m_ll.tif", mode = "wb")

Intersect header.shp with each raster of the ISRIC collection.
Performed in EVE HPC cluster using function A98_PredictorsExtract.R. Divided in 99 chunks.

for(i in 1:8){
  ff <- list.files("/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/", 
                                pattern = "^[^(Gene)]", full.names = T)[i]
  isric.i <- raster(ff)
  #define output paths
  filename <- str_split(list.files("/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/", 
           pattern = "^[^(Gene)]", full.names = T)[i], pattern="//")[[1]][2]
  output.ff1 <- gsub(pattern=".tif", replacement=".csv", filename)
  output.ff2 <- gsub(pattern=".tif", replacement="_sd.csv", filename)
  # Run PredExtr function - sink to file
  PredExtr(x.shp = header.shp.path, toextract = isric.i, myfunction = robust.mean, 
                         ncores = 4, typp = "raster", output.ff1)
  PredExtr(x.shp = header.shp.path, toextract = isric.i, myfunction = robust.sd, 
                         ncores = 4, typp = "raster", output.ff2)
}

Reimport and assemble output.

ISRIC.layer.names <- c("BLDFIE", "CECSOL","CLYPPT","CRFVOL","ORCDRC","PHIHOX","SLTPPT","SNDPPT")
ISRIC.layer.names1 <- paste0(ISRIC.layer.names, "_M_sl2_250m_ll")
isric.files <- list.files(path="../_derived/output_pred/", 
                          pattern=paste0(paste0(ISRIC.layer.names1, ".csv$"), collapse="|"), 
                          full.names=T)

isric.out <- do.call(cbind, lapply(isric.files,  
                                   function(x) {read_csv(x, col_types = cols(X1 = col_character(),
                                                                             V1 = col_double())) %>% 
                                       column_to_rownames("X1")}))
colnames(isric.out) <- ISRIC.layer.names
#same for sd values
isric.sd.files <- list.files(path="../_derived/output_pred/", 
                          pattern=paste0(paste0(ISRIC.layer.names1, "_sd.csv$"), collapse="|"), 
                          full.names=T)
isric.sd.out <- do.call(cbind, lapply(isric.sd.files, 
                                      function(x) {read_csv(x, col_types = cols(X1 = col_character(),
                                                                                 V1 = col_double())) %>% 
                                           column_to_rownames("X1")}))
colnames(isric.sd.out) <- paste0(ISRIC.layer.names, "sd")

Show summaries

## Warning: attributes are not identical across measure variables;
## they will be dropped
Summary statistics for isric mean statistics
variable num.NAs min q025 q50 q75 max mean sd
BLDFIE 19205 93.667 646.600 796.000 1046.500 1718.500 846.194 280.873
CECSOL 19205 1.000 17.600 21.750 26.500 157.333 22.657 7.182
CLYPPT 19205 0.000 13.600 18.852 23.450 67.000 18.616 7.325
CRFVOL 19205 0.000 6.800 10.937 16.000 55.000 11.707 6.089
ORCDRC 19205 0.000 29.616 50.833 89.000 688.500 65.566 47.791
PHIHOX 19205 38.000 54.167 59.500 64.424 94.200 59.971 7.846
SLTPPT 19205 2.250 28.000 36.289 40.500 80.667 34.163 10.037
SNDPPT 19205 2.400 36.667 43.667 56.500 95.000 47.207 14.710
## Warning: attributes are not identical across measure variables;
## they will be dropped
Summary statistics for isric s.d. statistics
variable num.NAs min q025 q50 q75 max mean sd
BLDFIEsd 115683 0 19.047 30.928 55.756 271.328 41.588 31.638
CECSOLsd 115683 0 0.837 1.390 2.246 50.912 1.744 1.369
CLYPPTsd 115683 0 0.753 1.140 1.826 13.546 1.442 1.057
CRFVOLsd 115683 0 0.584 1.033 1.673 15.556 1.272 0.894
ORCDRCsd 115683 0 3.270 6.229 12.095 91.914 9.499 9.402
PHIHOXsd 115683 0 0.707 1.069 1.982 10.533 1.613 1.424
SLTPPTsd 115683 0 0.837 1.329 2.160 20.569 1.772 1.434
SNDPPTsd 115683 0 1.033 1.708 3.033 25.153 2.449 2.206

4 Elevation variability

Reimport output (calculated in 04_buildHeader)

elevation.out <- read_csv("../_derived/elevatr/elevation.out.csv")

5 Create output and export

soilclim <- header %>% 
  dplyr::select(PlotObservationID) %>% 
  left_join(elevation.out %>% 
              dplyr::select(PlotObservationID, Elevation_median, Elevation_q2.5, Elevation_q97.5, Elevation_DEM.res=DEM.res), 
            by="PlotObservationID") %>% 
  left_join(header.shp@data %>% 
              dplyr::select(PlotObservationID) %>% 
              bind_cols(chelsa.out) %>%
              bind_cols(chelsa.sd.out) %>% 
              bind_cols(isric.out) %>% 
              bind_cols(isric.sd.out) %>% 
              distinct(), 
            by="PlotObservationID")
Show environmenal info for 20 randomly selected plots
PlotObservationID Elevation_median Elevation_q2.5 Elevation_q97.5 Elevation_DEM.res bio01 bio02 bio03 bio04 bio05 bio06 bio07 bio08 bio09 bio10 bio11 bio12 bio13 bio14 bio15 bio16 bio17 bio18 bio19 bio01sd bio02sd bio03sd bio04sd bio05sd bio06sd bio07sd bio08sd bio09sd bio10sd bio11sd bio12sd bio13sd bio14sd bio15sd bio16sd bio17sd bio18sd bio19sd BLDFIE CECSOL CLYPPT CRFVOL ORCDRC PHIHOX SLTPPT SNDPPT BLDFIEsd CECSOLsd CLYPPTsd CRFVOLsd ORCDRCsd PHIHOXsd SLTPPTsd SNDPPTsd
1448180 443 443 443 153 77.000 76.000 259.000 7742.000 230.000 -64.000 295.000 155.000 -24.000 180.000 -28.000 786.000 117.000 37.000 38.000 319.000 113.000 274.000 122.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 838.600 20.400 20.200 18.600 51.400 55.200 44.800 35.000 39.551 2.302 2.168 1.673 7.701 1.789 1.643 0.707
1813227 1018 1018 1018 153 81.000 123.000 246.000 13167.000 327.000 -175.000 502.000 255.000 -83.000 255.000 -107.000 51.000 17.000 1.000 110.000 40.000 3.000 40.000 3.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 1283.200 13.800 15.800 18.200 11.600 80.200 32.200 52.000 12.617 0.447 0.447 0.837 0.894 0.837 0.447 0.707
1811959 4693 4693 4693 153 -60.000 98.000 285.000 8394.000 106.000 -236.000 343.000 61.000 -94.000 61.000 -171.000 272.000 32.000 8.000 35.000 95.000 31.000 95.000 74.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 775.333 18.000 17.333 19.333 56.000 76.667 28.333 54.333 58.158 1.732 0.577 1.528 1.000 0.577 0.577 0.577
681870 404 404 404 153 146.000 36.000 196.000 5111.000 247.000 65.000 182.000 135.000 213.000 224.000 82.000 848.000 132.000 11.000 50.000 356.000 50.000 61.000 278.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 1016.400 22.400 26.200 25.000 67.200 60.400 37.200 37.000 24.460 1.140 0.447 0.707 2.683 0.894 1.095 1.000
1217163 36 36 36 153 104.000 67.000 301.000 5508.000 227.000 3.000 224.000 176.000 84.000 181.000 31.000 769.000 74.000 46.000 12.000 218.000 152.000 216.000 177.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 680.167 10.500 3.667 5.833 36.167 54.167 11.667 84.333 30.721 0.548 0.516 0.983 2.317 0.408 1.366 1.366
1207494 5 5 5 153 102.000 26.000 162.000 4721.000 188.000 25.000 163.000 96.000 103.000 171.000 39.000 799.000 95.000 41.000 29.000 283.000 129.000 217.000 156.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 810.500 14.833 10.333 6.333 59.833 63.167 19.333 70.000 19.316 1.169 1.033 0.816 9.663 1.169 1.033 1.549
17654 NA NA NA NA 44.759 81.711 290.773 6949.753 188.587 -92.217 280.884 135.602 -42.457 138.669 -50.037 1213.514 157.584 64.157 27.547 453.212 212.823 439.343 262.221 26.501 0.454 3.085 120.566 27.926 24.592 3.392 27.969 26.920 27.761 24.640 268.008 37.804 14.943 4.118 107.329 49.422 98.975 59.759 736.949 29.525 16.586 20.539 113.139 54.031 40.079 43.298 74.869 4.263 2.685 3.785 40.576 3.419 3.129 5.061
868630 104 90 124 153 85.681 60.809 252.809 6448.851 217.734 -22.926 240.617 165.500 1.670 176.011 1.670 577.340 69.436 31.085 22.564 195.777 104.553 179.628 104.553 0.986 0.396 0.931 6.712 0.997 1.039 0.551 10.052 0.999 0.910 0.999 21.076 2.335 1.259 0.520 6.865 4.118 6.820 4.118 804.041 13.811 10.232 7.432 30.830 59.988 25.495 64.235 121.003 1.878 1.651 1.090 11.688 4.651 3.223 4.183
1656755 3 3 3 153 142.000 78.000 271.000 7192.000 291.000 4.000 286.000 111.000 51.000 242.000 45.000 988.000 104.000 55.000 20.000 310.000 167.000 262.000 187.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 1255.333 25.000 26.333 14.333 40.333 69.333 46.667 26.667 23.288 6.083 1.528 0.577 18.148 0.577 1.528 0.577
1465465 738 738 738 153 58.000 74.000 261.000 7384.000 205.000 -77.000 282.000 132.000 -39.000 157.000 -42.000 913.000 107.000 48.000 24.000 311.000 148.000 279.000 172.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 705.500 23.333 16.167 18.167 87.667 52.500 45.833 38.333 39.793 1.033 1.602 0.983 15.227 2.345 1.169 2.160
1010524 16 15 17 153 66.000 59.000 204.000 7885.000 220.000 -71.000 290.000 173.000 -18.000 173.000 -41.000 617.000 75.000 26.000 32.000 221.000 86.000 221.000 87.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 674.500 25.167 13.667 8.833 121.167 54.667 34.833 51.667 48.714 3.488 1.211 0.753 12.040 0.516 1.169 1.366
843671 486 486 486 153 80.000 70.000 294.000 5827.000 209.000 -28.000 237.000 22.000 100.000 162.000 3.000 845.000 91.000 57.000 13.000 260.000 181.000 200.000 220.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 621.333 19.500 22.833 15.500 59.167 56.167 49.167 28.000 18.272 0.548 0.408 1.378 2.483 0.753 0.753 0.632
1760748 18 18 18 153 129.000 49.000 172.000 8134.000 279.000 -5.000 285.000 63.000 86.000 247.000 19.000 2033.000 242.000 116.000 25.000 709.000 353.000 574.000 471.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 943.600 35.000 26.000 19.200 125.400 54.800 37.600 36.200 24.296 7.842 1.414 1.483 10.262 1.095 0.548 1.924
861002 NA NA NA 153 121.000 22.000 185.000 3336.000 183.000 65.000 118.000 95.000 161.000 170.000 78.000 963.000 122.000 46.000 32.000 358.000 141.000 152.000 294.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 803.667 20.667 19.083 12.583 77.500 62.500 37.583 43.167 77.671 2.871 1.929 2.429 18.677 1.784 1.084 1.850
1702251 15 0 50 153 184.000 88.000 493.000 3003.000 275.000 97.000 178.000 153.000 224.000 224.000 143.000 134.000 22.000 2.000 60.000 61.000 8.000 8.000 60.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
1166479 -3 -3 -3 153 103.000 57.000 279.000 5179.000 213.000 10.000 204.000 86.000 81.000 177.000 34.000 822.000 80.000 47.000 17.000 239.000 154.000 237.000 174.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 360.167 37.667 46.333 5.833 153.000 60.000 34.833 18.667 4.021 2.066 1.633 1.169 5.138 0.632 0.753 2.338
821480 43 43 43 153 98.000 65.000 291.000 5645.000 222.000 -3.000 224.000 172.000 79.000 178.000 24.000 781.000 78.000 49.000 14.000 226.000 157.000 222.000 167.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 510.667 21.500 5.833 6.833 142.167 53.167 18.333 75.667 26.021 2.588 0.983 0.753 35.278 0.408 2.582 2.338
630661 NA NA NA 153 84.000 75.000 316.000 5650.000 218.000 -20.000 239.000 26.000 160.000 166.000 11.000 1332.000 142.000 90.000 12.000 407.000 282.000 288.000 386.000 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 684.800 19.600 17.600 14.000 79.400 53.600 32.600 49.600 16.784 0.894 0.548 0.000 5.857 0.548 0.894 1.342
863637 76 62 92 153 88.447 63.372 259.596 6491.521 222.723 -21.468 244.330 159.426 3.947 179.191 3.947 578.574 66.436 33.330 19.766 188.202 111.064 174.660 111.064 0.541 0.486 0.661 6.528 0.612 0.562 0.473 12.279 0.575 0.554 0.575 14.924 1.829 0.988 0.557 4.979 3.209 4.746 3.209 605.617 14.941 4.851 7.223 59.797 48.123 12.667 82.289 47.638 2.656 1.941 1.219 15.330 4.559 3.502 5.066
754884 NA NA NA NA 117.482 74.000 310.095 5769.304 251.930 12.575 239.346 144.013 67.824 200.703 43.223 744.488 80.236 50.779 12.760 217.087 156.079 192.671 183.147 2.031 0.000 0.383 15.061 2.216 1.887 0.660 16.914 2.720 2.145 1.898 62.959 6.811 4.345 0.581 18.326 13.903 16.499 16.216 988.585 16.251 20.636 12.025 23.639 62.966 38.217 41.121 92.959 2.246 3.460 1.835 5.752 4.465 3.487 5.647
save(soilclim, file = "../_output/SoilClim_sPlot3.RData")

ANNEX 1 - Code to Extract predictors

# define ancillary functions
robust.mean <- function(x){mean(x, na.rm=T)}
robust.mode <- function(x){if(any(x!=0)) {
  a <- x[which(x!=0)] #exclude zero (i.e. NAs)
  return(as.numeric(names(sort(table(a), decreasing=T))[1]))} else
    return(NA)}
robust.sd <- function(x){sd(x, na.rm=T)}

#main function
PredExtr <- function(x.shp, myfunction=NA, output=NA, 
                     toextract, typp=c("raster", "shp"), ncores, chunkn=1, chunk.i=NA){
  print("Load Packages")
  require(foreach)
  require(parallel)
  require(doParallel)
  require(raster)
  require(rgeos)
  require(rgdal) 
 
  print(paste("Loading", typp, "data :", toextract))
  print(paste("output will be:", output))
  if(typp=="raster"){ mypredictor <- raster(toextract)} else
  mypredictor <- readOGR(toextract)
  colnames(mypredictor@data)
  header.shp <-readOGR(x.shp)
  crs(mypredictor) <- crs(header.shp) #should be verified beforehand!
  
  

  ## Divide in chunks if requested
  if(chunkn>1 & !is.na(chunk.i)){
    print(paste("divide in chunks and run on chunk n.", chunk.i))
    indices <- 1:length(header.shp)
    chunks <- split(indices, sort(indices%%chunkn))
    header.shp <- header.shp[chunks[[chunk.i]],]
    } 

  print("myfunction defined as")
  print(myfunction)
  
  print("go parallel")
  cl <- makeForkCluster(ncores, outfile="" )
  registerDoParallel(cl)

  print("start main foreach loop")
  if(typp=="raster"){
  out <- foreach(i=1:length(header.shp), .packages=c('raster'), .combine=rbind) %dopar% { 
    tmp <- raster::extract(mypredictor, header.shp[i,], 
                         buffer=min(10000,  max(250, header.shp@data$lc_ncrt[i])), fun=myfunction) 
  }
  if(!is.na(output)) {write.csv(out, file = output)} else{return(out)}
  } else {
    out <- sp::over(x=header.shp, y=mypredictor) 
    toassign <- header.shp[which(is.na(out[,1])),]
    print(paste(length(toassign), "plots not matched directely - seek for NN"))
  if(length(toassign)>0){
    
    nearest <- foreach(i=1:length(toassign), .packages=c('raster'), .combine=rbind) %dopar% { 
      print(i)
      nearest.tmp <- tryCatch(geosphere::dist2Line(header.shp[i,], mypredictor),
                              error = function(e){data.frame(distance=NA, lon=NA, lat=NA,ID=NA)}
                              )
      #nearest.tmp <- geosphere::dist2Line(toassign[i,], mypredictor)
      return(nearest.tmp)
    }
    

    out[which(is.na(out[,1])),] <- mypredictor@data[nearest[,"ID"],]
   }
    if(!is.na(output)) {write.csv(out, file = output)} else{return(out)}
  }
  stopCluster(cl)
  }