Timestamp: Fri Mar 13 13:56:17 2020
Drafted: Francesco Maria Sabatini
Revised:
version: 1.0

This report documents the construction of the header file for sPlot 3.0. It is based on dataset sPlot_3.0.2, received on 24/07/2019 from Stephan Hennekens.

knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(viridis)
library(readr)
library(xlsx)
library(knitr)
library(kableExtra)

## Spatial packages
library(rgdal)
library(sp)
library(rgeos)
library(raster)
library(rworldmap)
library(elevatr)
library(sf)
library(rnaturalearth)
library(dggridR)

#save temporary files
write("TMPDIR = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('TMPDIR'), '.Renviron'))
write("R_USER = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('R_USER'), '.Renviron'))
rasterOptions(tmpdir="/data/sPlot/users/Francesco/_tmp")
# escape all double quotation marks. Run in Linux terminal
#sed 's/"/\\"/g' sPlot_3_0_2_header.csv > sPlot_3_0_2_header_test.csv

#more general alternative in case some " are already escaped
##first removing \s before all "s, and then adding \ before all ":
#sed 's/\([^\\]\)"/\1\\\"/g; s/"/\\"/g'

1 Import data

Import header data

header0 <- readr::read_delim("../sPlot_data_export/sPlot_3_0_2_header_test.csv", locale = locale(encoding = 'UTF-8'),
                            delim="\t", col_types=cols(
  PlotObservationID = col_double(),
  PlotID = col_double(),
  `TV2 relevé number` = col_double(),
  Country = col_factor(),
  `Cover abundance scale` = col_factor(),
  `Date of recording` = col_date(format="%d-%m-%Y"),
  `Relevé area (m²)` = col_double(),
  `Altitude (m)` = col_double(),
  `Aspect (°)` = col_double(),
  `Slope (°)` = col_double(),
  `Cover total (%)` = col_double(),
  `Cover tree layer (%)` = col_double(),
  `Cover shrub layer (%)` = col_double(),
  `Cover herb layer (%)` = col_double(),
  `Cover moss layer (%)` = col_double(),
  `Cover lichen layer (%)` = col_double(),
  `Cover algae layer (%)` = col_double(),
  `Cover litter layer (%)` = col_double(),
  `Cover open water (%)` = col_double(),
  `Cover bare rock (%)` = col_double(),
  `Height (highest) trees (m)` = col_double(),
  `Height lowest trees (m)` = col_double(),
  `Height (highest) shrubs (m)` = col_double(),
  `Height lowest shrubs (m)` = col_double(),
  `Aver. height (high) herbs (cm)` = col_double(),
  `Aver. height lowest herbs (cm)` = col_double(),
  `Maximum height herbs (cm)` = col_double(),
  `Maximum height cryptogams (mm)` = col_double(),
  `Mosses identified (y/n)` = col_factor(),
  `Lichens identified (y/n)` = col_factor(),
  COMMUNITY = col_character(),
  SUBSTRATE = col_character(),
  Locality = col_character(),
  ORIG_NUM = col_double(),
  ALLIAN_REV = col_character(),
  REV_AUTHOR = col_character(),
  Forest = col_logical(),
  Grassland = col_logical(),
  Wetland = col_logical(),
  `Sparse vegetation` = col_logical(),
  Shrubland = col_logical(),
  `Plants recorded` = col_factor(),
  `Herbs identified (y/n)` = col_factor(),
  Naturalness = col_factor(),
  EUNIS = col_factor(),
  Longitude = col_double(),
  Latitude = col_double(),
  `Location uncertainty (m)` = col_double(),
  Dataset = col_factor(),
  GUID = col_character()
)) %>% 
  rename(Sparse.vegetation=`Sparse vegetation`, 
         ESY=EUNIS) %>% 
  dplyr::select(-COMMUNITY, -ALLIAN_REV, -REV_AUTHOR, -SUBSTRATE) %>%   #too sparse information to be useful
  dplyr::select(-PlotID) #identical to PlotObservationID

The following column names occurred in the header of sPlot v2.1 and are currently missing from the header of v3.0
1. Syntaxon
2. Cover cryptogams (%)
3. Cover bare soil (%)
4. is.forest
5. is.non.forest
6. EVA
7. Biome
8. BiomeID
9. CONTINENT
10. POINT_X
11. POINT_Y
~~ Columns #1, #2, #3, #10, #11 will be dropped. The others will be derived below.

1.1 Solve spatial problems

There are 2020 plots in the Nile dataset without spatial coordinates. Assign manually with wide (90km) location uncertainty.

header <- header0 %>% 
  mutate(Latitude=replace(Latitude, 
                          list=(is.na(Latitude) & Dataset=="Egypt Nile delta"), 
                          values=30.917351)) %>% 
  mutate(Longitude=replace(Longitude, 
                          list=(is.na(Longitude) & Dataset=="Egypt Nile delta"), 
                          values=31.138534)) %>% 
  mutate(`Location uncertainty (m)`=replace(`Location uncertainty (m)`, 
                          list=(is.na(`Location uncertainty (m)`) & Dataset=="Egypt Nile delta"), 
                          values=-90000))

There are two plots in the Romania Grassland Databse and ~4442 plots in the Japan database, whose lat\long are inverted. Correct.

toswap <- c(which(header$Dataset=="Japan" & header$Latitude>90), 
            which(header$Dataset=="Romania Grassland Database" & header$Longitude>40))
header[toswap, c("Latitude", "Longitude")] <- header[toswap, c("Longitude", "Latitude")]

There are 237563 plots without location uncertainty. As a first approximation, we assign the median of the respective dataset, as a negative value to indicate this is an estimation, rather than a measure.

header <- header %>% 
  left_join(header %>% 
              group_by(Dataset) %>% 
              summarize(loc.uncer.median=median(`Location uncertainty (m)`, na.rm=T)), 
            by="Dataset") %>% 
  mutate(`Location uncertainty (m)`=ifelse( is.na(`Location uncertainty (m)` & !is.na(Latitude)), 
                                            -abs(loc.uncer.median), 
                                            `Location uncertainty (m)`)) %>% 
  dplyr::select(-loc.uncer.median)

There are still 91960 plots with no estimation of location uncertainty.
Assign plot size to plots in the Patagonia dataset (input of Ana Cingolani)

header <- header %>% 
  mutate(`Relevé area (m²)`=ifelse( (Dataset=="Patagonia" & is.na(`Relevé area (m²)`)), 
                                    -900, `Relevé area (m²)`))

2 Formations

Fill out the columns Forest:Sparse.vegetation with NAs, where necessary. Create columns is.forest and is.non.forest using script developed for sPlot 2.1
~~ I am not assigning plots to Faber-Langedon formation at this stage, as this is only possible for European plots having an ESY classification.

eunis.key <- read.xlsx("../_input/EUNIS_WFT.xlsx", sheetIndex = "Sheet1", endRow = 246) %>% 
  dplyr::select(EUNIS_code, NATURALNESS:SPARSE_VEG) %>% 
  mutate(EUNIS_code=as.character(EUNIS_code)) %>% 
  rename(ESY=EUNIS_code, 
         Naturalness=NATURALNESS, 
         Forest=FOREST,
         Shrubland=SCRUBLAND,
         Grassland=GRASSLAND,
         Wetland=WETLAND,
         Sparse.vegetation=SPARSE_VEG)#,

header <- header %>% # header.backup %>% 
  mutate(ESY=as.character(ESY)) %>% 
  #mutate(ESY=ifelse(ESY=="?", NA, ESY)) %>% 
  # Systematically assign some databases to forest
  mutate(Forest=ifelse(Dataset %in% 
                         c("Turkey Oak_Forest Database", "Turkey Forest Database", "Chile_forest", "Ethiopia"), 
                       T, Forest)) %>% 
  #fill up with F those rows where at least one column on formation is assigned
  rowwise() %>% 
  mutate(Any=any(Forest, Shrubland, Grassland, Wetland, Sparse.vegetation)) %>% 
  mutate(Forest=ifelse( (is.na(Forest) & Any), F, Forest))  %>%
  mutate(Shrubland=ifelse( (is.na(Shrubland) & Any), F, Shrubland))  %>% 
  mutate(Grassland=ifelse( (is.na(Grassland) & Any), F, Grassland))  %>% 
  mutate(Wetland=ifelse( (is.na(Wetland) & Any), F, Wetland))  %>% 
  mutate(Sparse.vegetation=ifelse( (is.na(Sparse.vegetation) & Any), F, Sparse.vegetation))  %>%
  ungroup() %>% 
  dplyr::select(-Any) %>% 
  mutate_at(vars(Forest:Shrubland), .funs=list(~.*1)) %>% 
  mutate(Naturalness=as.numeric(as.character(Naturalness))) %>% 
  ##join and coalesce with eunis.key
  left_join(eunis.key %>% 
              distinct(), by = "ESY") %>% 
    mutate(
        Forest = dplyr:::coalesce(Forest.x, Forest.y), 
        Shrubland = coalesce(Shrubland.x, Shrubland.y),
        Grassland = coalesce(Grassland.x, Grassland.y),
        Wetland = coalesce(Wetland.x, Wetland.y),
        Sparse.vegetation = coalesce(Sparse.vegetation.x, Sparse.vegetation.y),
        Naturalness = coalesce(Naturalness.x, Naturalness.y)
    ) %>% 
  dplyr::select(-ends_with(".x"), -ends_with(".y"))

3 Fix header and attach GIVD codes

Reduce number of factor levels for the column Plants recorded

header <- header %>% 
  mutate(`Plants recorded`=fct_recode(`Plants recorded`, 
                                      "All vascular plants"="complete vegetation",
                                      "All vascular plants"="Complete vegetation",
                                      "All vascular plants"="all vascular plants", 
                                      "All vascular plants"="complete", 
                                      "All vascular plants"="Complete vegetation (including non-terricolous tax",
                                      "All vascular plants"="Vascular plants",
                                      "All woody plants"="Woody plants",
                                      "All woody plants"="All woody species",
                                      "Woody plants >= 10 cm dbh"= "trees>=10cm dbh",
                                      "All trees & dominant understory"="All trees & dominant shrubs",
                                      "Woody plants >= 1 cm dbh" = "Plants >= 1 cm dbh"
                                      )) %>% 
  mutate(`Plants recorded`=factor(`Plants recorded`, exclude = "#N/A"))

Align consortium labels to those in sPlot’s consortium archive

databases <- read_csv("/data/sPlot/users/Francesco/_sPlot_Management/Consortium/Databases.out.csv")

header <- header %>% 
  mutate(Dataset=fct_recode(Dataset,
                            "BIOTA_South_Africa" = "BIOTA_South_Africa_3", 
                            "Kriti"="Cyprus_Bergmeier", 
                            "European Boreal Forest Database"="European Boreal Forest Database 1", 
                            "European Boreal Forest Database"="European Boreal Forest Database 2", 
                            "European Coastal Vegetation Database"= "European Coastal Vegetation Database-A", 
                            "Germany_vegetweb"="Germany_vegetweb2", 
                            "Germany_vegetweb"="Germany_vegetweb3",
                            "Ladakh"="Ladakh_2", 
                            "Netherlands"="Netherlands Military sites",
                            "NSW Australia" = "NSW Austalia",
                            )) %>% 
  left_join(databases %>% 
              dplyr::select(`GIVD ID`, label) %>% 
              rename(Dataset=label),
            by="Dataset")

4 Assign plots to spatial descriptors

Create spatial point dataframe for sPlot data to intersect with spatial layers

header.shp <- header %>%
  filter(!is.na(Longitude) | !is.na(Latitude))
header.shp <- SpatialPointsDataFrame(coords= header.shp %>% 
                                        dplyr::select(Longitude, Latitude),
                               proj4string = CRS("+init=epsg:4326"), 
                               data=data.frame(PlotObservationID= header.shp$PlotObservationID, 
                                               loc.uncert=header.shp$`Location uncertainty (m)`, 
                                               `GIVD ID`=header.shp$`GIVD ID`))
writeOGR(header.shp, dsn="../_derived/", layer="header.shp", driver="ESRI Shapefile", overwrite_layer = T)
## Warning in writeOGR(header.shp, dsn = "../_derived/", layer = "header.shp", :
## Field names abbreviated for ESRI Shapefile driver

4.1 Assign to Continents

Download and manipulate map of continents

sPDF <- rworldmap::getMap(resolution="coarse")
continent <- sPDF[,"continent"]
crs(continent) <- CRS("+init=epsg:4326")
continent@data[243,"continent"] <- "South America" ## Manually correct missing data
# create clipped version of continent to avoid going beyond 180 lON
coords <- data.frame(x=c(-180,180,180,-180),
                     y=c(-90,-90,90,90))
bboxc = Polygon(coords)
bboxc = SpatialPolygons(list(Polygons(list(bboxc), ID = "a")), proj4string=crs(continent))
continent_clipped <- gIntersection(continent[-137,], bboxc, byid=T) # polygon 137 gives problems... workaround

## same but high resolution (slower, but works better for plots along coastlines)
sPDF <- rworldmap::getMap(resolution="high")
continent.high <- sPDF[,"continent"]
crs(continent.high) <- CRS("+init=epsg:4326")
continent.high@data$continent <- fct_recode(continent.high@data$continent, "South America"="South America and the Caribbean")

Assign plots to continent

continent.out <- sp::over(x=header.shp, y=continent)
#overlay unassigned points to the high resolution layer of continent
toassign <- header.shp[which(is.na(continent.out$continent)),] #154782 remain to assign
crs(toassign) <- crs(continent)
continent.out2 <- sp::over(x=toassign, y=continent.high)
#merge first and second overlay 
continent.out$continent[is.na(continent.out$continent)] <- continent.out2$continent

#correct unassigned points to closest continent
toassign <- header.shp[which(is.na(continent.out$continent)),] #47610 remain to assign
crs(toassign) <- crs(continent)

#go parallel
ncores=8
library(parallel)
library(doParallel)
cl <- makeForkCluster(ncores, outfile="" )
registerDoParallel(cl)
  
nearestContinent <- foreach(i=1:length(toassign), .packages=c('raster'), .combine=rbind) %dopar% { 
  nearestContinent.tmp <- geosphere::dist2Line(toassign[i,], continent_clipped)
}
continent.out$continent[which(is.na(continent.out$continent))] <- as.character(continent[-137,]@data[nearestContinent[,"ID"],])
save(continent.out, file = "../_derived/continent.out")
stopCluster(cl)

Reload, manipulate continent and attach to header

load("../_derived/continent.out")
header <- header %>% 
  left_join(header %>% 
              filter(!is.na(Longitude) | !is.na(Latitude)) %>% 
              dplyr::select(PlotObservationID) %>% 
              bind_cols(continent.out), 
            by="PlotObservationID") %>% 
  mutate(CONTINENT=factor(continent, 
                            levels=c("Africa", "Antarctica", "Australia", "Eurasia", "North America", "South America"),
                            labels=c("AF", "AN", "AU", "EU", "NA", "SA"))) %>% 
  dplyr::select(-continent)

Summarize

## Warning: Factor `CONTINENT` contains implicit NA, consider using
## `forcats::fct_explicit_na`
Number of plots per continent
CONTINENT num.plot
AF 26803
AN 19
AU 66857
EU 1776535
NA 90177
SA 15844
NA 2451

4.2 Assign to sBiomes

Performed in EVE HPC cluster using function A98_PredictorsExtract.R. Divided in 99 chunks.

sBiomes <- readOGR("/data/sPlot/users/Francesco/Ancillary_Data/Biomes_sPlot/sBiomes.shp")
crs(sBiomes) <- crs(header.shp)

cl <- makeForkCluster(5, outfile="")
registerDoParallel(cl)

clusterEvalQ(cl, {
  library(rgdal)
  library(raster)
  library(sp)
  library(elevatr)
  library(dplyr)
  })

sBiomes.out <- foreach(i=1:99, .combine=rbind) %dopar% {
  source("A98_PredictorsExtract.R")
  PredExtr(header.shp, myfunction=NA, output="../_derived/sBiomes/",  
                     toextract=sBiomes, typp="shp", ncores=1, chunkn=99, chunk.i=i)
}
stopCluster(cl)

Reimport output and join to header

sBiome.files <- list.files("../_derived/sBiomes", pattern="sBiomes-[0-9]+.csv", full.names=T)
sBiomes.out <- do.call(rbind, lapply(sBiome.files, function(x) {read_csv(x)}))
header <- header %>% 
  left_join(sBiomes.out %>% 
              rename(PlotObservationID=X1) %>% 
              dplyr::select(PlotObservationID, Name, BiomeID) %>% 
              dplyr::rename(sBiome=Name, sBiomeID=BiomeID), 
            by="PlotObservationID")

There are 11849 unassigned plots.

Summarize:
Number of plots per Biome
sBiome num.plot
Alpine 37851
Boreal zone 32526
Dry midlatitudes 69887
Dry tropics and subtropics 38808
Polar and subpolar zone 7952
Subtropics with winter rain 191623
Subtrop. with year-round rain 73244
Temperate midlatitudes 1488121
Tropics with summer rain 15781
Tropics with year-round rain 11044
NA 11849

4.3 Extract WWF Ecoregions

Extract ecoregion name and ID from Ecoregions of the World. Olson et al. 2001 (BioScience).
Computation was performed in EVE HPC cluster using function A98_PredictorsExtract.R. Divided in 99 chunks.

ecoreg <- readOGR("../sPlot/_Ancillary/official", layer="wwf_terr_ecos")
ecoreg@data <- ecoreg@data %>% 
  dplyr::select(OBJECTID, ECO_NAME, REALM, BIOME, ECO_NUM, ECO_ID, eco_code)

cl <- makeForkCluster(5, outfile="")
registerDoParallel(cl)

clusterEvalQ(cl, {
  library(rgdal)
  library(raster)
  library(sp)
  library(elevatr)
  library(dplyr)
  })

ecoreg.out <- foreach(i=1:99, .combine=rbind) %dopar% {
  source("A98_PredictorsExtract.R")
  PredExtr(header.shp, myfunction=NA, output="../_derived/wwf_ecoregions/",  
                     toextract=ecoreg, typp="shp", ncores=1, chunkn=99, chunk.i=i)
}
stopCluster(cl)

Reimport output and join to header

ecoreg.files <- list.files("../_derived/wwf_ecoregions/", pattern="wwf_terr_ecos-[0-9]+.csv", full.names=T)
ecoreg.out <- do.call(rbind, lapply(ecoreg.files, function(x) {read_csv(x, 
  col_types=cols(
    .default = col_double(),
    ECO_NAME = col_character(),
    REALM = col_character(),
    G200_REGIO = col_character(),
    eco_code = col_character()))}))
header <- header %>% 
  left_join(ecoreg.out %>% 
              rename(PlotObservationID=X1) %>% 
              dplyr::select(PlotObservationID, ECO_NAME, ECO_ID) %>% 
              dplyr::rename(Ecoregion=ECO_NAME, EcoregionID=ECO_ID), 
            by="PlotObservationID")

There are 0 unassigned plots.

Summarize:
Number of plots in the 30 best represented Ecoregions
Ecoregion num.plot
Atlantic mixed forests 349664
Western European broadleaf forests 254820
Baltic mixed forests 186551
Central European mixed forests 126733
Pannonian mixed forests 96371
Alps conifer and mixed forests 74662
Carpathian montane forests 68357
Celtic broadleaf forests 62756
Sarmatic mixed forests 36093
Northeastern Spain and Southern France Mediterranean forests 33345
Taiheiyo evergreen forests 31241
English Lowlands beech forests 30724
Balkan mixed forests 30047
Italian sclerophyllous and semi-deciduous forests 27913
Cantabrian mixed forests 26043
Pyrenees conifer and mixed forests 24529
Tyrrhenian-Adriatic Sclerophyllous and mixed forests 19885
Iberian sclerophyllous and semi-deciduous forests 18182
East European forest steppe 16745
Great Basin shrub steppe 15584
Dinaric Mountains mixed forests 14846
Pontic steppe 14294
Nihonkai montane deciduous forests 14022
Caspian lowland desert 14006
Rodope montane mixed forests 14004
Eastern Australian temperate forests 13988
Scandinavian and Russian taiga 13391
NA 12196
Southeast Australia temperate savanna 11382
North Atlantic moist mixed forests 10567

4.4 Extract elevation

Extract elevation for each plot. Loops over tiles of 1 x 1°, projects to mercator, and extract elevation for plot coordinates, as well as 2.5, 50, and 97.5 quantiles for a buffer area having a radius equal to the location uncertainty of each plot (but only if location uncertainty < 50 km). DEM derive from package elevatr, which uses the Terrain Tiles on Amazon Web Services. Resolutions of DEM rasters vary by region. I set a zoom factor z=10, which corresponds to ~ 75-150 m. Sources are: SRTM, data.gov.at in Austria, NRCAN in Canada, SRTM, NED/3DEP 1/3 arcsec, data.gov.uk in United Kingdom, INEGI in Mexico, ArcticDEM in latitudes above 60°, LINZ in New Zealand, Kartverket in Norway, as described here.
Split data into tiles of 1 x 1 degrees, and create sp::SpatialPointsDataFrame files. Only for plots having a location uncertainty < 50 km.

header.tiles <- header %>%
  dplyr::select(PlotObservationID, Dataset, Longitude, Latitude, `Location uncertainty (m)`) %>%
  mutate(`Location uncertainty (m)`=abs(`Location uncertainty (m)`)) %>% 
  filter(`Location uncertainty (m)`<= 50000) %>%
  mutate_at(.vars=vars(Longitude, Latitude), 
            .funs=list(tile=~cut(., breaks = seq(-180,180, by=.2)))) %>%
  filter(!is.na(Longitude_tile) & !is.na(Latitude_tile) ) %>%
  mutate(tilenam=factor(paste(Longitude_tile, Latitude_tile)))

There are 1581734 plots out of 1978686 plots with Location uncertainty <= 50km (or absent). The total number of tiles is 30123.
Performed in EVE HPC cluster using function A97_ElevationExtract.R. Divided in 99 chunks.

cl <- makeForkCluster(5, outfile="")
registerDoParallel(cl)

clusterEvalQ(cl, {
  library(rgdal)
  library(raster)
  library(sp)
  library(elevatr)
  library(dplyr)})

# Divided in 99 chunks
elevation.out <- foreach(i=1:99, .combine=rbind) %dopar% {
  source("A97_ElevationExtract.R")
  ElevationExtract(header.shp, output, ncores=1, chunk.i=1)}
stopCluster(cl)

For those tiles that failed, extract elevation of remaining plots one by one

#create list of tiles for which dem could not be extracted
myfiles <- list.files("../_derived/elevatr/")
failed <- list.files("../_derived/elevatr/", pattern = "[A-Za-z]*_[0-9]+failed\\.RData$")
failed <- as.numeric(unlist(regmatches(failed, gregexpr("[[:digit:]]+", failed))))

#create SpatialPointsDataFrame
sp.tile0 <- SpatialPointsDataFrame(coords=header.tiles %>% 
                                    filter(tilenam %in% levels(header.tiles$tilenam)[failed]) %>%
                                    dplyr::select(Longitude, Latitude),
                                  data=header.tiles %>% 
                                    filter(tilenam %in% levels(header.tiles$tilenam)[failed]) %>%
                                    dplyr::select(-Longitude, -Latitude),
                                  proj4string = CRS("+init=epsg:4326"))
sp.tile0 <- spTransform(sp.tile0, CRSobj = CRS("+init=epsg:3857 +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0
                                               +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null
                                               +no_defs ")) #project to mercator
output.tile <- data.frame(NULL)

cl <- makeForkCluster(5, outfile="")
registerDoParallel(cl)

clusterEvalQ(cl, {
  library(rgdal)
  library(raster)
  library(sp)
  library(elevatr)
  library(dplyr)})

#Loop over all plots
elevation.failed <- foreach(i=1:nrow(sp.tile0), .packages=c('raster'), .combine=rbind) %dopar% { 
  sp.tile <- sp.tile0[i,]
  tryCatch(raster.tile <- get_elev_raster(sp.tile, z=10, 
                                          expand=max(sp.tile$`Location uncertainty (m)`)),
        error = function(e){
          print(paste("could not retrieve DEM for", sp.tile$PlotObservationID))}
          )
   if(!exists("raster.tile")) {
    output.tile <- data.frame(PlotObservationID==sp.tile$PlotObservationID, 
                              elevation=NA, 
                              Elevation_q2.5=NA, 
                              Elevation_median=NA,
                              Elevation_q97.5=NA,
                              DEM.res=NA)
    return(output.tile)
  } else {
  # clip dem tile with continent shape
  raster.tile <- mask(raster.tile, continent.high.merc)
  
  #extract and summarize elevation data
  elev.tile <- raster::extract(raster.tile, sp.tile, small=T)
  elev.tile.buffer <- raster::extract(raster.tile, sp.tile, 
                                      buffer=sp.tile$`Location uncertainty (m)`, small=T)
  elev.q95 <- t(round(mapply( quantile, 
                            x=elev.tile.buffer,
                            probs=rep(c(0.025, 0.5, 0.975), each=length(elev.tile)), na.rm=T)))
  output.tile <- data.frame(PlotObservationID=sp.tile$PlotObservationID, 
                            elevation=round(elev.tile), 
                            elev.q95, 
                            DEM.res=res(raster.tile)[1]) %>%
  rename(Elevation_q2.5=X2.5., Elevation_median=X50., Elevation_q97.5=X97.5.)
  return(output.tile)
  }
}
stopCluster(cl)
save(elevation.failed, file = "../_derived/elevatr/elevation_missing.RData")

Compose tiles into a single output, and export

myfiles <- list.files("../_derived/elevatr/", pattern = "elevation_tile_[0-9]+\\.RData$", full.names = T)

#create empty data.frame
elevation.out <- matrix(NA, nrow=nrow(header.tiles), ncol=6)
elevation.out <- as.data.frame(elevation.out)
colnames(elevation.out) <- c("PlotObservationID", "elevation", "Elevation_q2.5", "Elevation_median", "Elevation_q97.5","DEM.res")
elevation.out$PlotObservationID <- header.tiles$PlotObservationID

tmp <- NULL
for(i in 1:length(myfiles)){
  load(myfiles[i])
  #attach results to empty data.frame
  tmp <- bind_rows(tmp, output.tile)
  if(i %in% seq(5000, length(myfiles), by=5000)){
    mymatch <- base::match(x=tmp$PlotObservationID, table=elevation.out$PlotObservationID)
    mymatch <- mymatch[!is.na(mymatch)]
    elevation.out[mymatch,] <- tmp
    tmp <- NULL
    print(paste("Attached first", i, "files"))
  }
  if(i %in% seq(1,length(myfiles), by=250)){print(i)}
}

load(file = "../_derived/elevatr/elevation_missing.RData")

mymatch <- base::match(x=elevation.failed$PlotObservationID, table=elevation.out$PlotObservationID)
mymatch <- mymatch[!is.na(mymatch)]
elevation.out[mymatch,] <- elevation.failed

write_csv(elevation.out, path ="../_derived/elevatr/elevation.out.csv")

Reimport output, attach to header and check

elevation.out <- read_csv("../_derived/elevatr/elevation.out.csv")
Example of elevation output (10 randomly selected plots shown)
PlotObservationID elevation Elevation_q2.5 Elevation_median Elevation_q97.5 DEM.res
937780 140 140 140 140 153
767019 252 252 252 252 153
1440522 821 821 821 821 153
619891 315 315 315 315 153
718496 1047 1047 1047 1047 153
832178 620 620 620 620 153
228243 390 390 390 390 153
1784946 201 201 201 201 153
1712313 2283 2283 2283 2283 153
1226300 NA NA NA NA 153
summary(elevation.out %>% 
          dplyr::select(-PlotObservationID, -elevation))
##  Elevation_q2.5    Elevation_median Elevation_q97.5     DEM.res     
##  Min.   :-1018.0   Min.   :-474.0   Min.   :-474     Min.   : 76.4  
##  1st Qu.:   17.0   1st Qu.:  21.0   1st Qu.:  26     1st Qu.:153.0  
##  Median :  163.0   Median : 178.0   Median : 194     Median :153.0  
##  Mean   :  441.6   Mean   : 469.4   Mean   : 506     Mean   :153.1  
##  3rd Qu.:  593.0   3rd Qu.: 644.0   3rd Qu.: 698     3rd Qu.:153.0  
##  Max.   : 6155.0   Max.   :6155.0   Max.   :6155     Max.   :306.0  
##  NA's   :104818    NA's   :104818   NA's   :104818   NA's   :3082

There are 104818 plots without elevation info, corresponding to 5.3% of total.
There are 51389 plots with elevation below sea level.
Join elevation data (only median)

header <- header %>% 
  left_join(elevation.out %>% 
              dplyr::select(PlotObservationID, Elevation_median) %>% 
              rename(elevation_dem=Elevation_median) %>% 
              distinct(PlotObservationID, .keep_all=T), 
            by="PlotObservationID")
Summary and check
Dataset with highest number of plots below sea level (10 randomly selected plots shown)
GIVD ID Dataset num.plot
EU-GB-001 Britain_nvcd 67
AS-CN-007 China_Northern_Mountains_2 15
AU-AU-003 NSW Australia 2
EU-00-004e Spain_sivim_sclerophyllous 4
EU-00-016 Ammophiletea Database 118
EU-RU-003 Russia_lysenko 159
EU-IT-010 Italy_HabItAlp 5
EU-HR-001 Croatia_mix 95
NA-US-014 Aava 21
EU-00-027 European Boreal Forest Database 3

Create Scatterplot between measured elevation in the field, and elevation derived from DEM

#join measured and derived elevation
mydata <- header %>% 
  dplyr::select(PlotObservationID, `Altitude (m)`, elevation_dem) %>%
  filter(!is.na(`Altitude (m)`) & !is.na(elevation_dem)) %>%
  rename(elevation_measured=`Altitude (m)`)

ggplot(data=mydata) + 
  geom_point(aes(x=elevation_measured, y=elevation_dem), alpha=1/10, cex=0.8) + 
  theme_bw() + 
  geom_abline(slope=0, intercept=0, col=2, lty=2) + 
  geom_abline(slope=1, intercept=1, col="Dark green")

5 Map of plots

countries <- ne_countries(returnclass = "sf") %>% 
  st_transform(crs = "+proj=eck4") %>% 
  st_geometry()
graticules <- ne_download(type = "graticules_15", category = "physical",
                          returnclass = "sf") %>% 
  st_transform(crs = "+proj=eck4") %>% 
  st_geometry()
## OGR data source with driver: ESRI Shapefile 
## Source: "/data/sPlot/users/Francesco/_tmp/Rtmpi0p64q", layer: "ne_110m_graticules_15"
## with 35 features
## It has 5 fields
## Integer64 fields read as strings:  degrees scalerank
bb <- ne_download(type = "wgs84_bounding_box", category = "physical",
                  returnclass = "sf") %>% 
  st_transform(crs = "+proj=eck4") %>% 
  st_geometry()
## OGR data source with driver: ESRI Shapefile 
## Source: "/data/sPlot/users/Francesco/_tmp/Rtmpi0p64q", layer: "ne_110m_wgs84_bounding_box"
## with 1 features
## It has 2 fields
## basic graph of the world in Eckert projection
w3a <- ggplot() +
  geom_sf(data = bb, col = "grey20", fill = "white") +
  geom_sf(data = graticules, col = "grey20", lwd = 0.1) +
  geom_sf(data = countries, fill = "grey90", col = NA, lwd = 0.3) +
  coord_sf(crs = "+proj=eck4") +
  theme_minimal() +
  theme(axis.text = element_blank(), 
        legend.title=element_text(size=12), 
        legend.text=element_text(size=12),
        legend.background = element_rect(size=0.1, linetype="solid", colour = 1), 
        legend.key.height = unit(1.1, "cm"), 
        legend.key.width = unit(1.1, "cm")) +
  scale_fill_viridis()

Graph of plot density (hexagons)

header2 <- header %>% 
  filter(!is.na(Longitude) | !is.na(Latitude)) %>% 
  dplyr::select(PlotObservationID, Latitude, Longitude, `GIVD ID`) %>% 
  filter(!(abs(Longitude) >171 & abs(Latitude>70)))
dggs <- dgconstruct(spacing=300, metric=T, resround='down')
## Resolution: 6, Area (km^2): 69967.8493448681, Spacing (km): 261.246386348549, CLS (km): 298.479323187169
#Get the corresponding grid cells for each earthquake epicenter (lat-long pair)
header2$cell <- dgGEO_to_SEQNUM(dggs, header2$Longitude, header2$Latitude)$seqnum

#Calculate number of plots for each cell
header.out   <- header2 %>% 
  group_by(cell) %>% 
  summarise(value.out=log(n(), 10))

#Get the grid cell boundaries for cells 
grid   <- dgcellstogrid(dggs, header.out$cell, frame=F) %>%
  st_as_sf() %>% 
  mutate(cell = header.out$cell) %>% 
  mutate(value.out=header.out$value.out) %>% 
  st_transform("+proj=eck4") %>% 
  st_wrap_dateline(options = c("WRAPDATELINE=YES"))

## plotting
legpos <- c(0.160, .24)
(w3 <- w3a + 
       geom_sf(data=grid, aes(fill=value.out),lwd=0, alpha=0.9)    +
       geom_sf(data = countries, col = "grey10", fill=NA, lwd = 0.3) + 
       scale_fill_viridis(
         name="# plots", breaks=0:5, labels = c("1", "10", "100",
                                                "1,000", "10,000", "100,000"), option="viridis" ) + 
    #labs(fill="# plots") + 
    theme(legend.position = legpos +c(-0.06, 0.25))
)

ggsave(filename="../_pics/PlotDensityLog10_vir.png", width = 15, height = 7, units="in", dpi=300, plot=w3)

Graph of plot location by Dataset

header.sf <- header.shp %>% 
  st_as_sf() %>% 
  st_transform(crs = "+proj=eck4")

(w4 <- w3a + 
         geom_sf(data=header.sf %>% 
                   mutate(GIVD.ID=fct_shuffle(GIVD.ID)), aes(col=factor(GIVD.ID)), pch=16, size=0.8, alpha=0.6) +
         geom_sf(data = countries, col = "grey10", fill=NA, lwd = 0.3) + 
         theme(legend.position = "none"))

ggsave(filename="../_pics/PlotDistrib_Dark2_shuffle1984.png", width = 15, height = 7, units="in", dpi=300, plot=w4) ## takes ~40' to render

6 Fix output and export

#check 
nrow(header)==nrow(header0)
## [1] TRUE
header <- header %>% 
  dplyr::select(
    #Metadata
    PlotObservationID, Dataset, "GIVD ID", "TV2 relevé number", "ORIG_NUM", "GUID", 
    #Geographic info
    Longitude:"Location uncertainty (m)", Country, CONTINENT, sBiome, sBiomeID, Locality,
    #Methodological info
    "Relevé area (m²)", "Cover abundance scale", "Date of recording", "Plants recorded", 
    "Herbs identified (y/n)","Mosses identified (y/n)","Lichens identified (y/n)",
    #Topographical
    elevation_dem, "Altitude (m)", "Aspect (°)", "Slope (°)", 
    #Vegetation type
    Forest:Naturalness, ESY, 
    #Vegetation structure
                "Cover total (%)":"Maximum height cryptogams (mm)")
save(header, file = "../_output/header_sPlot3.0.RData")
Example of header (first 20 rows)
PlotObservationID Dataset GIVD ID TV2 relevé number ORIG_NUM GUID Longitude Latitude Location uncertainty (m) Country CONTINENT sBiome sBiomeID Locality Relevé area (m²) Cover abundance scale Date of recording Plants recorded Herbs identified (y/n) Mosses identified (y/n) Lichens identified (y/n) elevation_dem Altitude (m) Aspect (°) Slope (°) Forest Shrubland Grassland Wetland Sparse.vegetation Naturalness ESY Cover total (%) Cover tree layer (%) Cover shrub layer (%) Cover herb layer (%) Cover moss layer (%) Cover lichen layer (%) Cover algae layer (%) Cover litter layer (%) Cover open water (%) Cover bare rock (%) Height (highest) trees (m) Height lowest trees (m) Height (highest) shrubs (m) Height lowest shrubs (m) Aver. height (high) herbs (cm) Aver. height lowest herbs (cm) Maximum height herbs (cm) Maximum height cryptogams (mm)
1 Albanian Vegetation Database EU-AL-001 1 NA 2089C3E0-6A5C-4D57-B83B-C651D7BADEE0 19.39064 41.86004 15 Albania EU Subtropics with winter rain 4 Buna River Protected Landscape 3 Braun/Blanquet (new) 2012-06-26 NA NA N N 2 2 NA NA 0 0 0 1 0 1 C51a NA NA NA 90 NA NA NA NA NA NA NA NA NA NA 150 NA NA NA
2 Albanian Vegetation Database EU-AL-001 2 NA DD8ECD2E-BE9A-4F6F-8B1A-E5428EFAE462 19.39028 41.85990 15 Albania EU Subtropics with winter rain 4 Buna River Protected Landscape 60 Braun/Blanquet (new) 2012-06-26 NA NA N N 2 2 NA NA NA NA NA NA NA 1 ? NA NA NA 90 NA NA NA NA NA NA NA NA NA NA 160 NA NA NA
3 Albanian Vegetation Database EU-AL-001 3 NA 8F0D02F1-5DF1-40B0-AEBF-520B49FA54DC 19.37799 41.84715 15 Albania EU Subtropics with winter rain 4 Buna River Protected Landscape 30 Braun/Blanquet (new) 2012-06-26 NA NA N N NA NA 160 4 0 0 1 0 0 1 E NA NA NA 45 NA NA NA NA NA NA NA NA NA NA 30 NA NA NA
4 Albanian Vegetation Database EU-AL-001 4 NA EC1B5E84-2B62-4FDA-B2A9-9D8E0B7A62B0 19.37800 41.84716 15 Albania EU Subtropics with winter rain 4 Buna River Protected Landscape 10 Braun/Blanquet (new) 2012-06-26 NA NA N N NA NA NA NA NA NA NA NA NA 1 ? NA NA NA 50 NA NA NA NA NA NA NA NA NA NA 20 NA NA NA
5 Albanian Vegetation Database EU-AL-001 5 NA 73CB0497-09D0-4DD8-A9BA-102C2A03931D 19.38444 41.86112 15 Albania EU Subtropics with winter rain 4 Buna River Protected Landscape 80 Braun/Blanquet (new) 2012-06-26 NA NA N N 0 0 NA NA 0 0 1 0 0 1 E NA 85 5 80 NA NA NA NA NA NA 10 NA 20 NA 150 NA NA NA
6 Albanian Vegetation Database EU-AL-001 6 NA BF2B97B9-ADDB-4E34-8B9B-08CB40392484 19.43621 41.92741 15 Albania EU Subtropics with winter rain 4 Buna River Protected Landscape 30 Braun/Blanquet (new) 2012-06-27 NA NA N N 0 2 NA NA 0 0 0 1 0 1 C12b NA NA NA 80 NA NA NA NA NA NA NA NA NA NA NA NA NA NA
7 Albanian Vegetation Database EU-AL-001 7 NA DD3CACE3-AE62-4547-8718-C36EF8905E07 19.43625 41.92736 15 Albania EU Subtropics with winter rain 4 Buna River Protected Landscape 2 Braun/Blanquet (new) 2012-06-27 NA NA N N 0 2 NA NA 0 0 1 0 0 1 E34b NA NA NA 98 NA NA NA NA NA NA NA NA NA NA 2 NA NA NA
8 Albanian Vegetation Database EU-AL-001 8 NA B291F31F-3B13-4E4D-A7D2-72CE3BA396E9 19.43642 41.92744 15 Albania EU Subtropics with winter rain 4 Buna River Protected Landscape 30 Braun/Blanquet (new) 2012-06-27 NA NA N N 0 2 NA NA 0 0 0 1 0 1 C51a NA NA NA 100 NA NA NA NA NA NA NA NA NA NA 200 NA NA NA
9 Albanian Vegetation Database EU-AL-001 9 NA E831B7FA-E2DC-40A9-889F-AEA43717DDF1 19.43650 41.92738 15 Albania EU Subtropics with winter rain 4 Buna River Protected Landscape 5 Braun/Blanquet (new) 2012-06-27 NA NA N N 0 2 NA NA 0 0 0 0 1 1 C35e NA NA NA 100 NA NA NA NA NA NA NA NA NA NA 20 NA NA NA
10 Albanian Vegetation Database EU-AL-001 10 NA 6B533118-D131-4094-964E-28F49257F3C4 19.43369 41.92656 15 Albania EU Subtropics with winter rain 4 Buna River Protected Landscape 15 Braun/Blanquet (new) 2012-06-27 NA NA N N 0 1 NA NA 0 0 1 0 0 2 IE51 NA NA NA 100 NA NA NA NA NA NA NA NA NA NA 170 NA NA NA
11 Albanian Vegetation Database EU-AL-001 11 NA 7B52E1CC-F89C-4790-AC4E-5781F912B4EE 19.43363 41.92663 15 Albania EU Subtropics with winter rain 4 Buna River Protected Landscape NA Braun/Blanquet (new) 2012-06-27 NA NA N N 0 2 NA NA 0 0 0 1 0 1 C12b NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
12 Albanian Vegetation Database EU-AL-001 12 NA 02DB1195-8269-4998-8A60-F7727EF90835 19.43156 41.92525 15 Albania EU Subtropics with winter rain 4 Buna River Protected Landscape 5 Braun/Blanquet (new) 2012-06-27 NA NA N N 1 2 NA NA 0 0 0 0 1 1 C35e NA NA NA 100 NA NA NA NA NA NA NA NA NA NA 20 NA NA NA
13 Albanian Vegetation Database EU-AL-001 13 NA 0909C93F-7D4A-4EE3-B8BD-9AFB7A3E9448 19.43181 41.92531 15 Albania EU Subtropics with winter rain 4 Buna River Protected Landscape 40 Braun/Blanquet (new) 2012-06-27 NA NA N N 1 1 NA NA 0 0 0 1 0 1 C51a NA NA NA 100 NA NA NA NA NA NA NA NA NA NA 200 NA NA NA
14 Albanian Vegetation Database EU-AL-001 14 NA B9E2E198-A5FD-4500-9840-AE151EFAD6CE 19.43496 41.92724 15 Albania EU Subtropics with winter rain 4 Buna River Protected Landscape 20 Braun/Blanquet (new) 2012-06-27 NA NA N N 0 0 NA NA 0 0 1 0 0 2 IE51 NA NA NA 100 NA NA NA NA NA NA NA NA NA NA 180 NA NA NA
15 Albanian Vegetation Database EU-AL-001 15 NA E07F1B54-5A6D-44F3-B503-00E7F9840C69 19.44038 41.91412 15 Albania EU Subtropics with winter rain 4 Buna River Protected Landscape 5 Braun/Blanquet (new) 2012-06-27 NA NA N N 10 8 40 3 0 0 1 0 0 1 IE51 NA NA NA 90 NA NA NA NA NA NA NA NA NA NA 20 NA NA NA
16 Albanian Vegetation Database EU-AL-001 16 NA 5CDF9483-6D2A-4A47-ADCD-59D5F405BE2F 19.44207 41.91515 15 Albania EU Subtropics with winter rain 4 Buna River Protected Landscape 50 Braun/Blanquet (new) 2012-06-27 NA NA N N 15 9 20 10 0 1 0 0 0 1 F31e NA NA 100 5 NA NA NA NA NA NA NA NA 25 NA NA NA NA NA
17 Albanian Vegetation Database EU-AL-001 17 NA CE9629F3-6FD8-4778-8DAE-DA9C95B65EEE 19.44046 41.91133 15 Albania EU Subtropics with winter rain 4 Buna River Protected Landscape 50 Braun/Blanquet (new) 2012-06-27 NA NA N N 22 16 NA NA 1 0 0 0 0 1 G17a NA 90 15 10 NA NA NA NA NA 50 5 NA 20 NA NA NA NA NA
18 Albanian Vegetation Database EU-AL-001 18 NA EB1441EC-7B95-4806-819C-56BA8C50A7EC 19.44037 41.91150 15 Albania EU Subtropics with winter rain 4 Buna River Protected Landscape 80 Braun/Blanquet (new) 2012-06-27 NA NA N N 14 24 NA NA 1 0 0 0 0 1 G17a NA 100 1 NA NA NA NA NA NA 5 6 NA 20 NA NA NA NA NA
19 Albanian Vegetation Database EU-AL-001 19 NA 19AB31F4-9039-4E18-A60F-BFC191EEC8E5 19.43523 41.90469 15 Albania EU Subtropics with winter rain 4 Buna River Protected Landscape 50 Braun/Blanquet (new) 2012-06-27 NA NA N N 26 21 180 15 1 0 0 0 0 1 G17a NA 100 90 1 NA NA NA NA NA NA 8 NA 20 NA NA NA NA NA
20 Albanian Vegetation Database EU-AL-001 20 NA EBADE079-7015-4486-B1F7-72BDB235FE50 19.44371 41.89758 15 Albania EU Subtropics with winter rain 4 Buna River Protected Landscape 80 Braun/Blanquet (new) 2012-06-27 NA NA N N 25 13 180 15 0 1 0 0 0 1 Fa NA NA 90 NA NA NA NA NA NA NA NA NA 17 NA NA NA NA NA

Supplementary Material

ANNEX 1 - Ancillary function - PredExtr

# define ancillary functions
robust.mean <- function(x){mean(x, na.rm=T)}
robust.mode <- function(x){if(any(x!=0)) {
  a <- x[which(x!=0)] #exclude zero (i.e. NAs)
  return(as.numeric(names(sort(table(a), decreasing=T))[1]))} else
    return(NA)}
robust.sd <- function(x){sd(x, na.rm=T)}

#main function
PredExtr <- function(x.shp, myfunction=NA, output=NA, 
                     toextract, typp=c("raster", "shp"), ncores, chunkn=1, chunk.i=NA){
  print("Load Packages")
  require(foreach)
  require(parallel)
  require(doParallel)
  require(raster)
  require(rgeos)
  require(rgdal) 
 
  print(paste("Loading", typp, "data :", toextract))
  print(paste("output will be:", output))
  if(typp=="raster"){ mypredictor <- raster(toextract)} else
  mypredictor <- readOGR(toextract)
  colnames(mypredictor@data)
  header.shp <-readOGR(x.shp)
  crs(mypredictor) <- crs(header.shp) #should be verified beforehand!
  
  

  ## Divide in chunks if requested
  if(chunkn>1 & !is.na(chunk.i)){
    print(paste("divide in chunks and run on chunk n.", chunk.i))
    indices <- 1:length(header.shp)
    chunks <- split(indices, sort(indices%%chunkn))
    header.shp <- header.shp[chunks[[chunk.i]],]
    } 

  print("myfunction defined as")
  print(myfunction)
  
  print("go parallel")
  cl <- makeForkCluster(ncores, outfile="" )
  registerDoParallel(cl)

  print("start main foreach loop")
  if(typp=="raster"){
  out <- foreach(i=1:length(header.shp), .packages=c('raster'), .combine=rbind) %dopar% { 
    tmp <- raster::extract(mypredictor, header.shp[i,], 
                         buffer=min(10000,  max(250, header.shp@data$lc_ncrt[i])), fun=myfunction) 
  }
  if(!is.na(output)) {write.csv(out, file = output)} else{return(out)}
  } else {
    out <- sp::over(x=header.shp, y=mypredictor) 
    toassign <- header.shp[which(is.na(out[,1])),]
    print(paste(length(toassign), "plots not matched directely - seek for NN"))
  if(length(toassign)>0){
    
    nearest <- foreach(i=1:length(toassign), .packages=c('raster'), .combine=rbind) %dopar% { 
      print(i)
      nearest.tmp <- tryCatch(geosphere::dist2Line(header.shp[i,], mypredictor),
                              error = function(e){data.frame(distance=NA, lon=NA, lat=NA,ID=NA)}
                              )
      #nearest.tmp <- geosphere::dist2Line(toassign[i,], mypredictor)
      return(nearest.tmp)
    }
    

    out[which(is.na(out[,1])),] <- mypredictor@data[nearest[,"ID"],]
   }
    if(!is.na(output)) {write.csv(out, file = output)} else{return(out)}
  }
  stopCluster(cl)
  }

ANNEX 2 - Ancillary function - ElevationExtract

ElevationExtract <- function(header, output, ncores, chunk.i){
  print("load packages")
  require(tidyverse)
  
  require(rgdal)
  require(sp)
  require(rgeos)
  require(raster)
  require(rworldmap)
  require(elevatr)
  
  require(parallel)
  require(doParallel)
  
  print("Import header and divide in tiles")
  header.shp <- readOGR(header)
  header.tiles <- header.shp@data %>% 
    bind_cols(as.data.frame(header.shp@coords)) %>% 
    rename(PlotObservationID=PltObID, Longitude=coords.x1, Latitude=coords.x2) %>% 
    mutate(lc_ncrt=abs(lc_ncrt)) %>% 
    filter(lc_ncrt <= 50000) %>%
    mutate_at(.vars=vars(Longitude, Latitude), 
              .funs=list(tile=~cut(., breaks = seq(-180,180, by=.2)))) %>%
    mutate(tilenam=factor(paste(Longitude_tile, Latitude_tile)))
    
  
  print("Get continent")
  sPDF <- rworldmap::getMap(resolution="high")
  continent.high <- sPDF[,"continent"]
  crs(continent.high) <- CRS("+init=epsg:4326")
  continent.high@data$continent <- fct_recode(continent.high@data$continent, "South America"="South America and the Caribbean")
  continent.high.merc <- spTransform(continent.high, CRS( "+init=epsg:3857 +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m
+nadgrids=@null +no_defs"))

  print("go parallel")
  cl <- makeForkCluster(ncores, outfile="")
  registerDoParallel(ncores)
  
  clusterEvalQ(cl, {
    library(rgdal)
    library(raster)
    library(sp)
    library(elevatr)
    library(dplyr)
  })

  print("create list of tiles still to do")
  myfiles <- list.files(path = output, pattern = "[A-Za-z]*_[0-9]+\\.RData$")
  done <- NULL
  done <- as.numeric(unlist(regmatches(myfiles, gregexpr("[[:digit:]]+", myfiles))))
  todo <- 1:nlevels(header.tiles$tilenam)
  if(length(done)>0) {todo <- todo[-which(todo %in% done)]}
  todo <- sample(todo, replace=F)
  print(paste(length(todo), "tiles to do"))
  
  print("divide in chunks")
  #divide in chunks
  todo.chunks <- split(todo, sort(todo%%10))
  
  print(paste("start main foreach loop on chunk n=", chunk.i))
  print(paste(length(todo.chunks[[chunk.i]]), "to do"))
  foreach(i = todo.chunks[[chunk.i]]) %dopar% {  
    print(paste("doing", i))
    #create sp and project data
    if(nrow(header.tiles %>% 
            filter(tilenam %in% levels(header.tiles$tilenam)[i])) ==0 ) next()
    sp.tile <- SpatialPointsDataFrame(coords=header.tiles %>% 
                                        filter(tilenam %in% levels(header.tiles$tilenam)[i]) %>%
                                        dplyr::select(Longitude, Latitude),
                                      data=header.tiles %>% 
                                        filter(tilenam %in% levels(header.tiles$tilenam)[i]) %>%
                                        dplyr::select(-Longitude, -Latitude),
                                      proj4string = CRS("+init=epsg:4326"))
    sp.tile <- spTransform(sp.tile, CRSobj = CRS("+init=epsg:3857 +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0
                                                 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null
                                                 +no_defs ")) #project to mercator
    
    #retrieve dem raster
    tryCatch(raster.tile <- get_elev_raster(sp.tile, z=9, expand=max(sp.tile$lc_ncrt), clip="bbox"),
             error = function(e){e}
    )
    if(!exists("raster.tile")) {
      raster.tile <- NA
      print(paste("tile", i, "doesn't work!, skip to next"))
      save(raster.tile, file = paste(output, "elevation_tile_", i, "failed.RData", sep=""))
      rm(raster.tile)
      } else {
    # clip dem tile with continent shape
    raster.tile <- mask(raster.tile, continent.high.merc)
    
    #extract and summarize elevation data
    elev.tile <- raster::extract(raster.tile, sp.tile, small=T)
    elev.tile.buffer <- raster::extract(raster.tile, sp.tile, 
                                        buffer=sp.tile@data$lc_ncrt, 
                                        small=T)
    tmp <- round(mapply( quantile, 
                         x=elev.tile.buffer,
                         #center=elev.tile,
                         probs=rep(c(0.025, 0.5, 0.975), each=length(elev.tile)), 
                         #lc_ncrt=sp.tile$lc_ncrt, 
                         na.rm=T))
    elev.q95 <- setNames(data.frame(matrix(tmp, ncol = 3, nrow = length(elev.tile.buffer))), 
                         c("Elevation_q2.5", "Elevation_median", "Elevation_q97.5"))
    output.tile <- data.frame(PlotObservationID=sp.tile$PlotObservationID, 
                              elevation=round(elev.tile), 
                              elev.q95, 
                              DEM.res=res(raster.tile)[1]) 
    
    #save output
    save(output.tile, file = paste(output, "elevation_tile_", i, ".RData", sep=""))
    print(paste(i, "done"))
  }}
  stopCluster(cl)
  }

ANNEX 3 - SessionInfo()

sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.6 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/openblas-base/libblas.so.3
## LAPACK: /usr/lib/libopenblasp-r0.2.18.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8          LC_NUMERIC=C                 
##  [3] LC_TIME=en_US.UTF-8           LC_COLLATE=en_US.UTF-8       
##  [5] LC_MONETARY=en_US.UTF-8       LC_MESSAGES=en_US.UTF-8      
##  [7] LC_PAPER=en_US.UTF-8          LC_NAME=en_US.UTF-8          
##  [9] LC_ADDRESS=en_US.UTF-8        LC_TELEPHONE=en_US.UTF-8     
## [11] LC_MEASUREMENT=en_US.UTF-8    LC_IDENTIFICATION=en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] dggridR_2.0.3       rnaturalearth_0.1.0 sf_0.7-4           
##  [4] elevatr_0.2.0       rworldmap_1.3-6     raster_3.0-7       
##  [7] rgeos_0.5-2         rgdal_1.4-3         sp_1.4-1           
## [10] kableExtra_1.1.0    knitr_1.28          xlsx_0.6.3         
## [13] viridis_0.5.1       viridisLite_0.3.0   forcats_0.5.0      
## [16] stringr_1.4.0       dplyr_0.8.5         purrr_0.3.3        
## [19] readr_1.3.1         tidyr_1.0.2         tibble_2.1.3       
## [22] ggplot2_3.3.0       tidyverse_1.3.0    
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.1         maps_3.3.0         jsonlite_1.6.1     dotCall64_1.0-0   
##  [5] modelr_0.1.6       assertthat_0.2.1   highr_0.8          xlsxjars_0.6.1    
##  [9] cellranger_1.1.0   yaml_2.2.1         pillar_1.4.2       backports_1.1.5   
## [13] lattice_0.20-40    glue_1.3.1         digest_0.6.23      rvest_0.3.5       
## [17] colorspace_1.4-1   htmltools_0.4.0    pkgconfig_2.0.3    broom_0.5.5       
## [21] haven_2.2.0        scales_1.1.0       webshot_0.5.2      farver_2.0.3      
## [25] generics_0.0.2     withr_2.1.2        cli_2.0.2          magrittr_1.5      
## [29] crayon_1.3.4       readxl_1.3.1       maptools_0.9-9     evaluate_0.14     
## [33] fs_1.3.2           fansi_0.4.1        nlme_3.1-145       class_7.3-15      
## [37] xml2_1.2.2         foreign_0.8-76     tools_3.6.3        hms_0.5.3         
## [41] lifecycle_0.2.0    munsell_0.5.0      reprex_0.3.0       e1071_1.7-3       
## [45] compiler_3.6.3     rlang_0.4.4        units_0.6-5        classInt_0.4-2    
## [49] grid_3.6.3         rstudioapi_0.11    spam_2.5-1         rmarkdown_2.1     
## [53] gtable_0.3.0       codetools_0.2-16   DBI_1.1.0          R6_2.4.1          
## [57] gridExtra_2.3      lubridate_1.7.4    rworldxtra_1.01    KernSmooth_2.23-16
## [61] rJava_0.9-11       stringi_1.4.6      Rcpp_1.0.3         fields_10.3       
## [65] vctrs_0.2.3        dbplyr_1.4.2       tidyselect_1.0.0   xfun_0.12