Timestamp: Fri Mar 13 10:20:02 2020
Drafted: Francesco Maria Sabatini
Revised:
version: 1.0

This report documents how environmental variables were extracted when constructing sPlot 3.0. It is based on dataset sPlot_3.0.2, received on 24/07/2019 from Stephan Hennekens.

knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(viridis)
library(readr)
library(xlsx)
library(knitr)
library(kableExtra)

## Spatial packages
library(rgdal)
library(sp)
library(sf)
library(rgeos)
library(raster)
library(rworldmap)
#library(elevatr)
#library(rnaturalearth)
#library(dggridR)

source("A98_PredictorsExtract.R")

#save temporary files
write("TMPDIR = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('TMPDIR'), '.Renviron'))
write("R_USER = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('R_USER'), '.Renviron'))
rasterOptions(tmpdir="/data/sPlot/users/Francesco/_tmp")

#Ancillary variables
get.summary <- function(x){x %>% 
    summarize_all(.funs=list(num.NAs=~sum(is.na(.)), 
                             min=~min(., na.rm=T),
                             q025=~quantile(., 0.25, na.rm=T), 
                             q50=~quantile(., 0.50, na.rm=T), 
                             q75=~quantile(., .75, na.rm=T), 
                             max=~max(., na.rm=T), 
                             mean=~mean(., na.rm=T), 
                             sd=~sd(., na.rm=T))) %>% 
    gather(variable, value) %>% 
    separate(variable, sep="_", into=c("variable", "stat")) %>% 
    mutate(stat=factor(stat, levels=c("num.NAs", "min", "q025", "q50", "q75", "max", "mean", "sd"))) %>% 
    spread(key=stat, value=value)
}

1 Import header data

load("../_output/header_sPlot3.0.RData")

Create spatial point dataframe for sPlot data to intersect with spatial layers. Include the fiels Location uncertainty to allow for fuzzy matching. Each plot was intersected with the corresponding layer of environmental (soil, climate) attributes accounting for their location uncertainty (minimum set to 250m). For computing reasons, the maximum location uncertainty was set to 10km. The user should therefore be aware that the variability in climate and soil features estimations may be underestimated for plots with very high location uncertainty.

header.shp <- header %>%
  filter(!is.na(Longitude) | !is.na(Latitude))
header.shp <- SpatialPointsDataFrame(coords= header.shp %>% 
                                        dplyr::select(Longitude, Latitude),
                               proj4string = CRS("+init=epsg:4326"), 
                               data=data.frame(PlotObservationID= header.shp$PlotObservationID, 
                                               loc.uncert=header.shp$`Location uncertainty (m)`, 
                                               `GIVD ID`=header.shp$`GIVD ID`))
## OGR data source with driver: ESRI Shapefile 
## Source: "/data/sPlot/users/Francesco/sPlot3/_derived", layer: "header.shp"
## with 1976235 features
## It has 3 fields

2 Climate

Bioclimatic variables (bio01-bio19) derive from CHELSA

Codes:

Bio1 = Annual Mean Temperature
Bio2 = Mean Diurnal Range
Bio3 = Isothermality
Bio4 = Temperature Seasonality
Bio5 = Max Temperature of Warmest Month
Bio6 = Min Temperature of Coldest Month
Bio7 = Temperature Annual Range
Bio8 = Mean Temperature of Wettest Quarter
Bio9 = Mean Temperature of Driest Quarter
Bio10 = Mean Temperature of Warmest Quarter
Bio11 = Mean Temperature of Coldest Quarter
Bio12 = Annual Precipitation
Bio13 = Precipitation of Wettest Month
Bio14 = Precipitation of Driest Month
Bio15 = Precipitation Seasonality
Bio16 = Precipitation of Wettest Quarter
Bio17 = Precipitation of Driest Quarter
Bio18 = Precipitation of Warmest Quarter
Bio19 = Precipitation of Coldest Quarter

Download raster files:

library(downloader)
url.chelsa <- list()
for(i in 1:19){
  ii <- stringr::str_pad(1:19, width=2, side="left", pad="0")[i]
  url.chelsa[[i]] <- paste("https://www.wsl.ch/lud/chelsa/data/bioclim/integer/CHELSA_bio10_", ii, ".tif", sep="")
  download(url.chelsa[[i]],
           paste("/data/sPlot/users/Francesco/Ancillary_Data/CHELSA/CHELSA_bio10_", ii, ".tif", sep=""), 
           mode = "wb")
}

Intersect header.shp with each raster of the CHELSA collection.
Performed in EVE HPC cluster using function A98_PredictorsExtract.R. Divided in 99 chunks.

for(i in 1:19){
  ff <- paste("/data/sPlot/users/Francesco/Ancillary_Data/CHELSA/CHELSA_bio10_", 
              stringr::str_pad(i, width=2, side="left", pad="0"), ".tif", sep="")
  chelsa.i <- raster(ff)
  #define output paths
  output.ff1 <- paste("/data/sPlot/users/Francesco/sPlot3/_derived/output_pred/CHELSA_bio10_", 
              stringr::str_pad(i, width=2, side="left", pad="0"), ".csv", sep="")
  output.ff2 <- paste("/data/sPlot/users/Francesco/sPlot3/_derived/output_pred/CHELSA_bio10_", 
              stringr::str_pad(i, width=2, side="left", pad="0"), "_sd.csv", sep="")
  # Run PredExtr function
  chelsa.out <- PredExtr(x.shp = header.shp, toextract = chelsa.i, myfunction = "robust.mean", 
                         ncores = 4, typp = "raster", output.ff1)
  chelsa.sd <- PredExtr(x.shp = header.shp, toextract = chelsa.i, myfunction = "sd", 
                         ncores = 4, typp = "raster", output).ff2
}

Reimport and assemble output.

chelsa.files <- list.files(path="../_derived/output_pred/", pattern="CHELSA_bio10_[0-9]+.csv$", full.names=T)
chelsa.out <- do.call(cbind, lapply(chelsa.files, 
                                    function(x) {read_csv(x,col_types = cols(X1 = col_character(),
                                                                             V1 = col_double())) %>% 
                                        column_to_rownames("X1")}))
colnames(chelsa.out) <- paste0("bio", stringr::str_pad(1:19, width=2, side="left", pad="0"))
#same for sd values
chelsa.sd.files <- list.files(path="../_derived/output_pred/", pattern="CHELSA_bio10_[0-9]+_sd.csv$", full.names=T)
chelsa.sd.out <- do.call(cbind, lapply(chelsa.sd.files, 
                                       function(x) {read_csv(x, col_types = cols(X1 = col_character(),
                                                                                 V1 = col_double())) %>% 
                                           column_to_rownames("X1")}))
colnames(chelsa.sd.out) <- paste0("bio", stringr::str_pad(1:19, width=2, side="left", pad="0"), "sd")

Show summaries

## Warning: attributes are not identical across measure variables;
## they will be dropped
Summary statistics for chelsa mean statistics
variable num.NAs min q025 q50 q75 max mean sd
bio01 12945 -271 82 96 110 307 96.827 43.865
bio02 12945 6 54 71 80 144 67.108 21.412
bio03 12945 67 234 275 300 832 270.392 70.342
bio04 12945 88 5401 6175 7346 21703 6378.968 1792.980
bio05 12945 -78 201 224 254 452 229.677 43.781
bio06 12945 -431 -51 -12 13 257 -19.769 61.624
bio07 12945 26 206 249 285 675 249.423 63.646
bio08 12945 -208 80 137 177 358 131.073 64.117
bio09 12945 -377 -2 57 99 373 63.223 87.927
bio10 12945 -112 167 178 202 375 185.968 39.192
bio11 12945 -400 -15 11 39 279 12.082 58.751
bio12 12945 0 604 759 911 6570 851.256 477.047
bio13 12945 0 74 85 113 997 104.895 65.190
bio14 12945 0 29 41 51 411 43.682 28.100
bio15 12945 5 17 24 35 331 28.453 16.996
bio16 12945 0 210 248 327 2845 302.701 184.383
bio17 12945 0 93 131 163 1251 139.352 87.411
bio18 12945 0 174 215 253 2284 236.761 148.729
bio19 12945 0 113 163 209 2645 183.524 120.873
## Warning: attributes are not identical across measure variables;
## they will be dropped
Summary statistics for chelsa s.d. statistics
variable num.NAs min q025 q50 q75 max mean sd
bio01sd 12945 -271 82 96 110 307 96.82700 43.86540
bio02sd 12945 6 54 71 80 144 67.10784 21.41174
bio03sd 12945 67 234 275 300 832 270.39188 70.34210
bio04sd 12945 88 5401 6175 7346 21703 6378.96772 1792.97986
bio05sd 12945 -78 201 224 254 452 229.67739 43.78057
bio06sd 12945 -431 -51 -12 13 257 -19.76948 61.62412
bio07sd 12945 26 206 249 285 675 249.42291 63.64596
bio08sd 12945 -208 80 137 177 358 131.07321 64.11688
bio09sd 12945 -377 -2 57 99 373 63.22263 87.92727
bio10sd 12945 -112 167 178 202 375 185.96824 39.19229
bio11sd 12945 -400 -15 11 39 279 12.08249 58.75138
bio12sd 12945 0 604 759 911 6570 851.25643 477.04650
bio13sd 12945 0 74 85 113 997 104.89539 65.19007
bio14sd 12945 0 29 41 51 411 43.68230 28.09960
bio15sd 12945 5 17 24 35 331 28.45348 16.99622
bio16sd 12945 0 210 248 327 2845 302.70133 184.38257
bio17sd 12945 0 93 131 163 1251 139.35193 87.41100
bio18sd 12945 0 174 215 253 2284 236.76145 148.72899
bio19sd 12945 0 113 163 209 2645 183.52381 120.87264

3 Soil

Soil variables (5 cm depth) derive from the ISRIC dataset, downloaded at 250-m resolution

CECSOL Cation Exchange capacity of soil
CLYPPT Clay mass fraction in %
CRFVOL Coarse fragments volumetric in %
ORCDRC Soil Organic Carbon Content in g/kg
PHIHOX Soil pH x 10 in H20
SLTPPT Silt mass fraction in %
SNDPPT Sand mass fraction in %
BLDFIE Bulk Density (fine earth) in kg/m3
P.ret.cat Phosphorous Retention - Categorical value, see ISRIC 2011-06
Download ISRIC soil data

url.bulk <- "https://files.isric.org/soilgrids/data/recent/BLDFIE_M_sl2_250m_ll.tif"
url.cec <- "https://files.isric.org/soilgrids/data/recent/CECSOL_M_sl2_250m_ll.tif"
url.cly <- "https://files.isric.org/soilgrids/data/recent/CLYPPT_M_sl2_250m_ll.tif"
url.crf <- "https://files.isric.org/soilgrids/data/recent/CRFVOL_M_sl2_250m_ll.tif"
url.orc <- "https://files.isric.org/soilgrids/data/recent/ORCDRC_M_sl2_250m_ll.tif"
url.pH <- "https://files.isric.org/soilgrids/data/recent/PHIHOX_M_sl2_250m_ll.tif"
url.slt <- "https://files.isric.org/soilgrids/data/recent/SLTPPT_M_sl2_250m_ll.tif"
url.snd <- "https://files.isric.org/soilgrids/data/recent/SNDPPT_M_sl2_250m_ll.tif"

download(url.bulk, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/BLDFIE_M_sl2_250m_ll.tif", mode = "wb")
download(url.cec, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CECSOL_M_sl2_250m_ll.tif", mode = "wb")
download(url.cly, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CLYPPT_M_sl2_250m_ll.tif", mode = "wb")
download(url.crf, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CRFVOL_M_sl2_250m_ll.tif", mode = "wb")
download(url.orc, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/ORCDRC_M_sl2_250m_ll.tif", mode = "wb")
download(url.pH, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/PHIHOX_M_sl2_250m_ll.tif", mode = "wb")
download(url.slt, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/SLTPPT_M_sl2_250m_ll.tif", mode = "wb")
download(url.snd, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/SNDPPT_M_sl2_250m_ll.tif", mode = "wb")

Intersect header.shp with each raster of the ISRIC collection.
Performed in EVE HPC cluster using function A98_PredictorsExtract.R. Divided in 99 chunks.

for(i in 1:8){
  ff <- list.files("/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/", 
                                pattern = "^[^(Gene)]", full.names = T)[i]
  isric.i <- raster(ff)
  #define output paths
  filename <- str_split(list.files("/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/", 
           pattern = "^[^(Gene)]", full.names = T)[i], pattern="//")[[1]][2]
  output.ff1 <- gsub(pattern=".tif", replacement=".csv", filename)
  output.ff2 <- gsub(pattern=".tif", replacement="_sd.csv", filename)
  # Run PredExtr function
  chelsa.out <- PredExtr(x.shp = header.shp, toextract = isric.i, myfunction = "robust.mean", 
                         ncores = 4, typp = "raster", output.ff1)
  chelsa.sd <- PredExtr(x.shp = header.shp, toextract = isric.i, myfunction = "sd", 
                         ncores = 4, typp = "raster", output).ff2
}

Reimport and assemble output.

ISRIC.layer.names <- c("BLDFIE", "CECSOL","CLYPPT","CRFVOL","ORCDRC","PHIHOX","SLTPPT","SNDPPT")
ISRIC.layer.names1 <- paste0(ISRIC.layer.names, "_M_sl2_250m_ll")
isric.files <- list.files(path="../_derived/output_pred/", 
                          pattern=paste0(paste0(ISRIC.layer.names1, ".csv$"), collapse="|"), 
                          full.names=T)

isric.out <- do.call(cbind, lapply(isric.files,  
                                   function(x) {read_csv(x, col_types = cols(X1 = col_character(),
                                                                             V1 = col_double())) %>% 
                                       column_to_rownames("X1")}))
colnames(isric.out) <- ISRIC.layer.names
#same for sd values
isric.sd.files <- list.files(path="../_derived/output_pred/", 
                          pattern=paste0(paste0(ISRIC.layer.names1, "_sd.csv$"), collapse="|"), 
                          full.names=T)
isric.sd.out <- do.call(cbind, lapply(isric.sd.files, 
                                      function(x) {read_csv(x, col_types = cols(X1 = col_character(),
                                                                                 V1 = col_double())) %>% 
                                           column_to_rownames("X1")}))
colnames(isric.sd.out) <- paste0(ISRIC.layer.names, "sd")

Show summaries

## Warning: attributes are not identical across measure variables;
## they will be dropped
Summary statistics for isric mean statistics
variable num.NAs min q025 q50 q75 max mean sd
BLDFIE 33320 93.667 637.333 799.250 1050.333 1718.500 849.930 285.608
CECSOL 33320 1.000 17.833 21.750 26.500 157.333 22.701 7.163
CLYPPT 33320 0.000 13.500 18.800 24.000 67.000 18.689 7.472
CRFVOL 33320 0.000 6.750 10.667 16.167 55.000 11.715 6.191
ORCDRC 33320 0.000 28.333 49.800 89.833 478.778 65.481 49.436
PHIHOX 33320 38.000 54.000 59.571 64.833 94.200 60.026 7.996
SLTPPT 33320 2.250 28.000 36.167 40.600 80.667 34.103 10.199
SNDPPT 33320 2.400 36.600 44.200 56.500 94.500 47.190 14.915
## Warning: attributes are not identical across measure variables;
## they will be dropped
Summary statistics for isric s.d. statistics
variable num.NAs min q025 q50 q75 max mean sd
BLDFIEsd 33320 93.667 637.333 799.250 1050.333 1718.500 849.930 285.608
CECSOLsd 33320 1.000 17.833 21.750 26.500 157.333 22.701 7.163
CLYPPTsd 33320 0.000 13.500 18.800 24.000 67.000 18.689 7.472
CRFVOLsd 33320 0.000 6.750 10.667 16.167 55.000 11.715 6.191
ORCDRCsd 33320 0.000 28.333 49.800 89.833 478.778 65.481 49.436
PHIHOXsd 33320 38.000 54.000 59.571 64.833 94.200 60.026 7.996
SLTPPTsd 33320 2.250 28.000 36.167 40.600 80.667 34.103 10.199
SNDPPTsd 33320 2.400 36.600 44.200 56.500 94.500 47.190 14.915

4 Create output and export

soilclim <- header %>% 
  dplyr::select(PlotObservationID) %>% 
  left_join(header.shp@data %>% 
              dplyr::select(PlotObservationID) %>% 
              bind_cols(chelsa.out) %>%
              bind_cols(chelsa.sd.out) %>% 
              bind_cols(isric.out) %>% 
              bind_cols(isric.sd.out) %>% 
              distinct(), 
            by="PlotObservationID")
Show environmenal info for 20 randomly selected plots
PlotObservationID bio01 bio02 bio03 bio04 bio05 bio06 bio07 bio08 bio09 bio10 bio11 bio12 bio13 bio14 bio15 bio16 bio17 bio18 bio19 bio01sd bio02sd bio03sd bio04sd bio05sd bio06sd bio07sd bio08sd bio09sd bio10sd bio11sd bio12sd bio13sd bio14sd bio15sd bio16sd bio17sd bio18sd bio19sd BLDFIE CECSOL CLYPPT CRFVOL ORCDRC PHIHOX SLTPPT SNDPPT BLDFIEsd CECSOLsd CLYPPTsd CRFVOLsd ORCDRCsd PHIHOXsd SLTPPTsd SNDPPTsd
1554139 93 95 338 6268 255 -25 280 95 188 188 14 506 57 21 26 169 64 64 136 93 95 338 6268 255 -25 280 95 188 188 14 506 57 21 26 169 64 64 136 1055.000 25.200 20.000 15.600 62.000 63.800 35.000 44.600 1055.000 25.200 20.000 15.600 62.000 63.800 35.000 44.600
1316302 81 60 245 6669 214 -32 247 168 -7 174 -7 623 77 29 27 215 101 213 101 81 60 245 6669 214 -32 247 168 -7 174 -7 623 77 29 27 215 101 213 101 592.167 20.000 8.000 9.167 78.500 47.667 25.667 66.333 592.167 20.000 8.000 9.167 78.500 47.667 25.667 66.333
550012 121 95 325 6663 290 -3 293 90 222 222 37 632 82 17 37 229 53 53 193 121 95 325 6663 290 -3 293 90 222 222 37 632 82 17 37 229 53 53 193 1236.200 15.400 17.800 24.400 26.600 67.000 35.200 46.800 1236.200 15.400 17.800 24.400 26.600 67.000 35.200 46.800
52967 13 84 307 6568 155 -120 275 104 -71 104 -75 1267 180 56 40 526 174 526 194 13 84 307 6568 155 -120 275 104 -71 104 -75 1267 180 56 40 526 174 526 194 794.667 25.500 12.333 24.167 116.667 54.667 36.833 50.833 794.667 25.500 12.333 24.167 116.667 54.667 36.833 50.833
1375604 111 84 266 8138 272 -42 314 191 4 219 -1 716 86 45 22 251 141 159 159 111 84 266 8138 272 -42 314 191 4 219 -1 716 86 45 22 251 141 159 159 1100.667 25.000 27.000 14.167 38.667 68.333 40.833 32.167 1100.667 25.000 27.000 14.167 38.667 68.333 40.833 32.167
1224899 -14 60 227 7099 132 -131 263 85 -38 86 -104 570 81 23 41 239 75 228 105 -14 60 227 7099 132 -131 263 85 -38 86 -104 570 81 23 41 239 75 228 105 705.571 30.000 10.857 17.286 113.429 51.857 34.429 54.143 705.571 30.000 10.857 17.286 113.429 51.857 34.429 54.143
1223378 99 56 273 5241 210 6 204 50 77 174 30 890 87 54 14 257 175 251 190 99 56 273 5241 210 6 204 50 77 174 30 890 87 54 14 257 175 251 190 501.000 21.333 3.000 6.333 116.833 47.667 10.333 86.667 501.000 21.333 3.000 6.333 116.833 47.667 10.333 86.667
1664194 45 77 286 6602 188 -81 269 22 132 139 -42 1196 156 53 27 450 186 209 290 45 77 286 6602 188 -81 269 22 132 139 -42 1196 156 53 27 450 186 209 290 749.600 27.600 22.200 19.000 136.600 58.000 40.600 37.400 749.600 27.600 22.200 19.000 136.600 58.000 40.600 37.400
1691206 226 93 432 4159 322 107 215 272 160 274 160 760 119 26 56 355 80 350 80 226 93 432 4159 322 107 215 272 160 274 160 760 119 26 56 355 80 350 80 1328.750 12.500 20.750 5.500 15.250 64.500 13.500 66.000 1328.750 12.500 20.750 5.500 15.250 64.500 13.500 66.000
1342523 116 85 257 8706 284 -45 329 203 1 231 -2 463 67 24 34 187 72 148 78 116 85 257 8706 284 -45 329 203 1 231 -2 463 67 24 34 187 72 148 78 1328.600 28.600 28.400 3.600 22.400 73.000 48.200 23.600 1328.600 28.600 28.400 3.600 22.400 73.000 48.200 23.600
83669 87 86 277 7868 245 -64 309 161 55 193 -20 869 121 53 32 344 160 275 164 87 86 277 7868 245 -64 309 161 55 193 -20 869 121 53 32 344 160 275 164 861.000 32.400 23.600 9.200 75.600 58.200 43.600 32.600 861.000 32.400 23.600 9.200 75.600 58.200 43.600 32.600
1965558 102 113 291 9313 317 -72 390 121 -2 239 -22 188 20 11 18 60 35 48 38 102 113 291 9313 317 -72 390 121 -2 239 -22 188 20 11 18 60 35 48 38 1428.400 21.400 16.200 13.200 6.800 83.200 30.200 53.600 1428.400 21.400 16.200 13.200 6.800 83.200 30.200 53.600
173816 94 44 287 3799 176 25 152 85 96 150 46 1569 178 89 22 526 279 363 388 94 44 287 3799 176 25 152 85 96 150 46 1569 178 89 22 526 279 363 388 523.000 18.833 18.000 18.667 88.000 55.333 38.167 44.000 523.000 18.833 18.000 18.667 88.000 55.333 38.167 44.000
790076 77 69 278 6333 212 -36 248 137 -6 166 -6 782 87 49 16 246 155 223 155 77 69 278 6333 212 -36 248 137 -6 166 -6 782 87 49 16 246 155 223 155 688.500 23.833 24.333 16.333 73.333 53.833 51.000 24.667 688.500 23.833 24.333 16.333 73.333 53.833 51.000 24.667
1791618 126 56 189 8357 280 -18 298 20 140 247 13 2034 234 108 23 671 325 500 564 126 56 189 8357 280 -18 298 20 140 247 13 2034 234 108 23 671 325 500 564 901.000 26.800 23.600 17.200 102.400 52.000 36.200 40.200 901.000 26.800 23.600 17.200 102.400 52.000 36.200 40.200
1801587 132 75 238 8331 289 -27 316 228 23 250 19 1152 202 25 57 564 85 450 108 132 75 238 8331 289 -27 316 228 23 250 19 1152 202 25 57 564 85 450 108 837.500 24.750 23.500 22.000 67.500 56.500 37.000 39.500 837.500 24.750 23.500 22.000 67.500 56.500 37.000 39.500
1500711 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
733973 112 75 293 6374 252 -5 257 173 30 201 29 779 83 46 20 245 142 244 165 112 75 293 6374 252 -5 257 173 30 201 29 779 83 46 20 245 142 244 165 1053.000 18.600 21.200 14.200 40.200 62.800 30.400 48.600 1053.000 18.600 21.200 14.200 40.200 62.800 30.400 48.600
532537 115 82 275 7385 270 -26 296 216 19 216 14 1215 132 67 21 378 203 378 224 115 82 275 7385 270 -26 296 216 19 216 14 1215 132 67 21 378 203 378 224 923.400 22.600 22.600 6.400 34.200 60.200 45.800 31.400 923.400 22.600 22.600 6.400 34.200 60.200 45.800 31.400
1224635 57 28 159 5300 154 -24 179 77 64 136 -8 1375 160 71 24 472 224 310 347 57 28 159 5300 154 -24 179 77 64 136 -8 1375 160 71 24 472 224 310 347 600.750 24.625 8.250 17.375 127.875 53.500 35.500 56.250 600.750 24.625 8.250 17.375 127.875 53.500 35.500 56.250
save(soilclim, file = "../_output/SoilClim_sPlot3.RData")

ANNEX 1 - Code to Extract predictors

PredExtr <- function(x.shp, myfunction=NA, output,  
                     toextract, typp=c("raster", "shp"), ncores, chunkn=1, chunk.i=NA){
  print("Load Packages")
  require(foreach)
  require(parallel)
  require(doParallel)
  require(raster)
  require(rgeos)
  require(rgdal) 
 
  print(paste("Loading", typp, "data :", toextract))
  print(paste("output will be:", output))
  if(typp=="raster"){ mypredictor <- raster(toextract)} else
    mypredictor <- readOGR(toextract)
  header.shp <-readOGR(x.shp)
  crs(mypredictor) <- crs(header.shp) #should be verified beforehand!

  ## Divide in chunks if requested
  if(chunkn>1 & !is.na(chunk.i)){
    print(paste("divide in chunks and run on chunk n.", chunk.i))
    indices <- 1:length(header.shp)
    chunks <- split(indices, sort(indices%%chunkn))
    header.shp <- header.shp[chunks[[chunk.i]],]
    } 

  # define ancillary functions
  robust.mean <- function(x){mean(x, rm.na=T)}
  robust.mode <- function(x){if(any(x!=0)) {
     a <- x[which(x!=0)] #exclude zero (i.e. NAs)
    return(as.numeric(names(sort(table(a), decreasing=T))[1]))} else
        return(NA)                          }

  print("go parallel")
  cl <- makeForkCluster(ncores, outfile="" )
  registerDoParallel(cl)

  print("start main foreach loop")
  if(typp=="raster"){
  out <- foreach(i=1:length(header.shp), .packages=c('raster'), .combine=rbind) %dopar% { 
    tmp <- raster::extract(mypredictor, header.shp[i,], 
                         buffer=min(10000,  max(250, header.shp@data$loc.uncert[i])), fun=myfunction) 
    }
  write.csv(out, file = output)
  } else {
    out <- sp::over(x=header.shp, y=mypredictor) 
    toassign <- header.shp[which(is.na(out[,1])),]
    print(paste(length(toassign), "plots not matched directely - seek for NN"))
  if(length(toassign)>0){
    
    nearest <- foreach(i=1:length(toassign), .packages=c('raster'), .combine=rbind) %dopar% { 
      print(i)
      nearest.tmp <- tryCatch(geosphere::dist2Line(header.shp[i,], mypredictor),
                              error = function(e){data.frame(distance=NA, lon=NA, lat=NA,ID=NA)}
                              )
      #nearest.tmp <- geosphere::dist2Line(toassign[i,], mypredictor)
      return(nearest.tmp)
    }
    

    out[which(is.na(out[,1])),] <- mypredictor@data[nearest[,"ID"],]
   }
    write.csv(out, file=output)
  }
  stopCluster(cl)
  }