Timestamp: Fri Mar 13 13:56:17 2020
Drafted: Francesco Maria Sabatini
Revised:
version: 1.0
This report documents the construction of the header file for sPlot 3.0. It is based on dataset sPlot_3.0.2, received on 24/07/2019 from Stephan Hennekens.
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(viridis)
library(readr)
library(xlsx)
library(knitr)
library(kableExtra)
## Spatial packages
library(rgdal)
library(sp)
library(rgeos)
library(raster)
library(rworldmap)
library(elevatr)
library(sf)
library(rnaturalearth)
library(dggridR)
#save temporary files
write("TMPDIR = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('TMPDIR'), '.Renviron'))
write("R_USER = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('R_USER'), '.Renviron'))
rasterOptions(tmpdir="/data/sPlot/users/Francesco/_tmp")
# escape all double quotation marks. Run in Linux terminal
#sed 's/"/\\"/g' sPlot_3_0_2_header.csv > sPlot_3_0_2_header_test.csv
#more general alternative in case some " are already escaped
##first removing \s before all "s, and then adding \ before all ":
#sed 's/\([^\\]\)"/\1\\\"/g; s/"/\\"/g'
Import header data
header0 <- readr::read_delim("../sPlot_data_export/sPlot_3_0_2_header_test.csv", locale = locale(encoding = 'UTF-8'),
delim="\t", col_types=cols(
PlotObservationID = col_double(),
PlotID = col_double(),
`TV2 relevé number` = col_double(),
Country = col_factor(),
`Cover abundance scale` = col_factor(),
`Date of recording` = col_date(format="%d-%m-%Y"),
`Relevé area (m²)` = col_double(),
`Altitude (m)` = col_double(),
`Aspect (°)` = col_double(),
`Slope (°)` = col_double(),
`Cover total (%)` = col_double(),
`Cover tree layer (%)` = col_double(),
`Cover shrub layer (%)` = col_double(),
`Cover herb layer (%)` = col_double(),
`Cover moss layer (%)` = col_double(),
`Cover lichen layer (%)` = col_double(),
`Cover algae layer (%)` = col_double(),
`Cover litter layer (%)` = col_double(),
`Cover open water (%)` = col_double(),
`Cover bare rock (%)` = col_double(),
`Height (highest) trees (m)` = col_double(),
`Height lowest trees (m)` = col_double(),
`Height (highest) shrubs (m)` = col_double(),
`Height lowest shrubs (m)` = col_double(),
`Aver. height (high) herbs (cm)` = col_double(),
`Aver. height lowest herbs (cm)` = col_double(),
`Maximum height herbs (cm)` = col_double(),
`Maximum height cryptogams (mm)` = col_double(),
`Mosses identified (y/n)` = col_factor(),
`Lichens identified (y/n)` = col_factor(),
COMMUNITY = col_character(),
SUBSTRATE = col_character(),
Locality = col_character(),
ORIG_NUM = col_double(),
ALLIAN_REV = col_character(),
REV_AUTHOR = col_character(),
Forest = col_logical(),
Grassland = col_logical(),
Wetland = col_logical(),
`Sparse vegetation` = col_logical(),
Shrubland = col_logical(),
`Plants recorded` = col_factor(),
`Herbs identified (y/n)` = col_factor(),
Naturalness = col_factor(),
EUNIS = col_factor(),
Longitude = col_double(),
Latitude = col_double(),
`Location uncertainty (m)` = col_double(),
Dataset = col_factor(),
GUID = col_character()
)) %>%
rename(Sparse.vegetation=`Sparse vegetation`,
ESY=EUNIS) %>%
dplyr::select(-COMMUNITY, -ALLIAN_REV, -REV_AUTHOR, -SUBSTRATE) %>% #too sparse information to be useful
dplyr::select(-PlotID) #identical to PlotObservationID
The following column names occurred in the header of sPlot v2.1 and are currently missing from the header of v3.0
1. Syntaxon
2. Cover cryptogams (%)
3. Cover bare soil (%)
4. is.forest
5. is.non.forest
6. EVA
7. Biome
8. BiomeID
9. CONTINENT
10. POINT_X
11. POINT_Y
~~ Columns #1, #2, #3, #10, #11 will be dropped. The others will be derived below.
There are 2020 plots in the Nile dataset without spatial coordinates. Assign manually with wide (90km) location uncertainty.
header <- header0 %>%
mutate(Latitude=replace(Latitude,
list=(is.na(Latitude) & Dataset=="Egypt Nile delta"),
values=30.917351)) %>%
mutate(Longitude=replace(Longitude,
list=(is.na(Longitude) & Dataset=="Egypt Nile delta"),
values=31.138534)) %>%
mutate(`Location uncertainty (m)`=replace(`Location uncertainty (m)`,
list=(is.na(`Location uncertainty (m)`) & Dataset=="Egypt Nile delta"),
values=-90000))
There are two plots in the Romania Grassland Databse
and ~4442 plots in the Japan
database, whose lat\long are inverted. Correct.
toswap <- c(which(header$Dataset=="Japan" & header$Latitude>90),
which(header$Dataset=="Romania Grassland Database" & header$Longitude>40))
header[toswap, c("Latitude", "Longitude")] <- header[toswap, c("Longitude", "Latitude")]
There are 237563 plots without location uncertainty. As a first approximation, we assign the median of the respective dataset, as a negative value to indicate this is an estimation, rather than a measure.
header <- header %>%
left_join(header %>%
group_by(Dataset) %>%
summarize(loc.uncer.median=median(`Location uncertainty (m)`, na.rm=T)),
by="Dataset") %>%
mutate(`Location uncertainty (m)`=ifelse( is.na(`Location uncertainty (m)` & !is.na(Latitude)),
-abs(loc.uncer.median),
`Location uncertainty (m)`)) %>%
dplyr::select(-loc.uncer.median)
There are still 91960 plots with no estimation of location uncertainty.
Assign plot size to plots in the Patagonia dataset (input of Ana Cingolani)
header <- header %>%
mutate(`Relevé area (m²)`=ifelse( (Dataset=="Patagonia" & is.na(`Relevé area (m²)`)),
-900, `Relevé area (m²)`))
Fill out the columns Forest:Sparse.vegetation
with NAs, where necessary. Create columns is.forest
and is.non.forest
using script developed for sPlot 2.1
~~ I am not assigning plots to Faber-Langedon formation at this stage, as this is only possible for European plots having an ESY classification.
eunis.key <- read.xlsx("../_input/EUNIS_WFT.xlsx", sheetIndex = "Sheet1", endRow = 246) %>%
dplyr::select(EUNIS_code, NATURALNESS:SPARSE_VEG) %>%
mutate(EUNIS_code=as.character(EUNIS_code)) %>%
rename(ESY=EUNIS_code,
Naturalness=NATURALNESS,
Forest=FOREST,
Shrubland=SCRUBLAND,
Grassland=GRASSLAND,
Wetland=WETLAND,
Sparse.vegetation=SPARSE_VEG)#,
header <- header %>% # header.backup %>%
mutate(ESY=as.character(ESY)) %>%
#mutate(ESY=ifelse(ESY=="?", NA, ESY)) %>%
# Systematically assign some databases to forest
mutate(Forest=ifelse(Dataset %in%
c("Turkey Oak_Forest Database", "Turkey Forest Database", "Chile_forest", "Ethiopia"),
T, Forest)) %>%
#fill up with F those rows where at least one column on formation is assigned
rowwise() %>%
mutate(Any=any(Forest, Shrubland, Grassland, Wetland, Sparse.vegetation)) %>%
mutate(Forest=ifelse( (is.na(Forest) & Any), F, Forest)) %>%
mutate(Shrubland=ifelse( (is.na(Shrubland) & Any), F, Shrubland)) %>%
mutate(Grassland=ifelse( (is.na(Grassland) & Any), F, Grassland)) %>%
mutate(Wetland=ifelse( (is.na(Wetland) & Any), F, Wetland)) %>%
mutate(Sparse.vegetation=ifelse( (is.na(Sparse.vegetation) & Any), F, Sparse.vegetation)) %>%
ungroup() %>%
dplyr::select(-Any) %>%
mutate_at(vars(Forest:Shrubland), .funs=list(~.*1)) %>%
mutate(Naturalness=as.numeric(as.character(Naturalness))) %>%
##join and coalesce with eunis.key
left_join(eunis.key %>%
distinct(), by = "ESY") %>%
mutate(
Forest = dplyr:::coalesce(Forest.x, Forest.y),
Shrubland = coalesce(Shrubland.x, Shrubland.y),
Grassland = coalesce(Grassland.x, Grassland.y),
Wetland = coalesce(Wetland.x, Wetland.y),
Sparse.vegetation = coalesce(Sparse.vegetation.x, Sparse.vegetation.y),
Naturalness = coalesce(Naturalness.x, Naturalness.y)
) %>%
dplyr::select(-ends_with(".x"), -ends_with(".y"))
Reduce number of factor levels for the column Plants recorded
header <- header %>%
mutate(`Plants recorded`=fct_recode(`Plants recorded`,
"All vascular plants"="complete vegetation",
"All vascular plants"="Complete vegetation",
"All vascular plants"="all vascular plants",
"All vascular plants"="complete",
"All vascular plants"="Complete vegetation (including non-terricolous tax",
"All vascular plants"="Vascular plants",
"All woody plants"="Woody plants",
"All woody plants"="All woody species",
"Woody plants >= 10 cm dbh"= "trees>=10cm dbh",
"All trees & dominant understory"="All trees & dominant shrubs",
"Woody plants >= 1 cm dbh" = "Plants >= 1 cm dbh"
)) %>%
mutate(`Plants recorded`=factor(`Plants recorded`, exclude = "#N/A"))
Align consortium labels to those in sPlot’s consortium archive
databases <- read_csv("/data/sPlot/users/Francesco/_sPlot_Management/Consortium/Databases.out.csv")
header <- header %>%
mutate(Dataset=fct_recode(Dataset,
"BIOTA_South_Africa" = "BIOTA_South_Africa_3",
"Kriti"="Cyprus_Bergmeier",
"European Boreal Forest Database"="European Boreal Forest Database 1",
"European Boreal Forest Database"="European Boreal Forest Database 2",
"European Coastal Vegetation Database"= "European Coastal Vegetation Database-A",
"Germany_vegetweb"="Germany_vegetweb2",
"Germany_vegetweb"="Germany_vegetweb3",
"Ladakh"="Ladakh_2",
"Netherlands"="Netherlands Military sites",
"NSW Australia" = "NSW Austalia",
)) %>%
left_join(databases %>%
dplyr::select(`GIVD ID`, label) %>%
rename(Dataset=label),
by="Dataset")
Create spatial point dataframe for sPlot data to intersect with spatial layers
header.shp <- header %>%
filter(!is.na(Longitude) | !is.na(Latitude))
header.shp <- SpatialPointsDataFrame(coords= header.shp %>%
dplyr::select(Longitude, Latitude),
proj4string = CRS("+init=epsg:4326"),
data=data.frame(PlotObservationID= header.shp$PlotObservationID,
loc.uncert=header.shp$`Location uncertainty (m)`,
`GIVD ID`=header.shp$`GIVD ID`))
writeOGR(header.shp, dsn="../_derived/", layer="header.shp", driver="ESRI Shapefile", overwrite_layer = T)
## Warning in writeOGR(header.shp, dsn = "../_derived/", layer = "header.shp", :
## Field names abbreviated for ESRI Shapefile driver
Download and manipulate map of continents
sPDF <- rworldmap::getMap(resolution="coarse")
continent <- sPDF[,"continent"]
crs(continent) <- CRS("+init=epsg:4326")
continent@data[243,"continent"] <- "South America" ## Manually correct missing data
# create clipped version of continent to avoid going beyond 180 lON
coords <- data.frame(x=c(-180,180,180,-180),
y=c(-90,-90,90,90))
bboxc = Polygon(coords)
bboxc = SpatialPolygons(list(Polygons(list(bboxc), ID = "a")), proj4string=crs(continent))
continent_clipped <- gIntersection(continent[-137,], bboxc, byid=T) # polygon 137 gives problems... workaround
## same but high resolution (slower, but works better for plots along coastlines)
sPDF <- rworldmap::getMap(resolution="high")
continent.high <- sPDF[,"continent"]
crs(continent.high) <- CRS("+init=epsg:4326")
continent.high@data$continent <- fct_recode(continent.high@data$continent, "South America"="South America and the Caribbean")
Assign plots to continent
continent.out <- sp::over(x=header.shp, y=continent)
#overlay unassigned points to the high resolution layer of continent
toassign <- header.shp[which(is.na(continent.out$continent)),] #154782 remain to assign
crs(toassign) <- crs(continent)
continent.out2 <- sp::over(x=toassign, y=continent.high)
#merge first and second overlay
continent.out$continent[is.na(continent.out$continent)] <- continent.out2$continent
#correct unassigned points to closest continent
toassign <- header.shp[which(is.na(continent.out$continent)),] #47610 remain to assign
crs(toassign) <- crs(continent)
#go parallel
ncores=8
library(parallel)
library(doParallel)
cl <- makeForkCluster(ncores, outfile="" )
registerDoParallel(cl)
nearestContinent <- foreach(i=1:length(toassign), .packages=c('raster'), .combine=rbind) %dopar% {
nearestContinent.tmp <- geosphere::dist2Line(toassign[i,], continent_clipped)
}
continent.out$continent[which(is.na(continent.out$continent))] <- as.character(continent[-137,]@data[nearestContinent[,"ID"],])
save(continent.out, file = "../_derived/continent.out")
stopCluster(cl)
Reload, manipulate continent and attach to header
load("../_derived/continent.out")
header <- header %>%
left_join(header %>%
filter(!is.na(Longitude) | !is.na(Latitude)) %>%
dplyr::select(PlotObservationID) %>%
bind_cols(continent.out),
by="PlotObservationID") %>%
mutate(CONTINENT=factor(continent,
levels=c("Africa", "Antarctica", "Australia", "Eurasia", "North America", "South America"),
labels=c("AF", "AN", "AU", "EU", "NA", "SA"))) %>%
dplyr::select(-continent)
Summarize
## Warning: Factor `CONTINENT` contains implicit NA, consider using
## `forcats::fct_explicit_na`
CONTINENT | num.plot |
---|---|
AF | 26803 |
AN | 19 |
AU | 66857 |
EU | 1776535 |
NA | 90177 |
SA | 15844 |
NA | 2451 |
Performed in EVE HPC cluster using function A98_PredictorsExtract.R
. Divided in 99 chunks.
sBiomes <- readOGR("/data/sPlot/users/Francesco/Ancillary_Data/Biomes_sPlot/sBiomes.shp")
crs(sBiomes) <- crs(header.shp)
cl <- makeForkCluster(5, outfile="")
registerDoParallel(cl)
clusterEvalQ(cl, {
library(rgdal)
library(raster)
library(sp)
library(elevatr)
library(dplyr)
})
sBiomes.out <- foreach(i=1:99, .combine=rbind) %dopar% {
source("A98_PredictorsExtract.R")
PredExtr(header.shp, myfunction=NA, output="../_derived/sBiomes/",
toextract=sBiomes, typp="shp", ncores=1, chunkn=99, chunk.i=i)
}
stopCluster(cl)
Reimport output and join to header
sBiome.files <- list.files("../_derived/sBiomes", pattern="sBiomes-[0-9]+.csv", full.names=T)
sBiomes.out <- do.call(rbind, lapply(sBiome.files, function(x) {read_csv(x)}))
header <- header %>%
left_join(sBiomes.out %>%
rename(PlotObservationID=X1) %>%
dplyr::select(PlotObservationID, Name, BiomeID) %>%
dplyr::rename(sBiome=Name, sBiomeID=BiomeID),
by="PlotObservationID")
There are 11849 unassigned plots.
Summarize:sBiome | num.plot |
---|---|
Alpine | 37851 |
Boreal zone | 32526 |
Dry midlatitudes | 69887 |
Dry tropics and subtropics | 38808 |
Polar and subpolar zone | 7952 |
Subtropics with winter rain | 191623 |
Subtrop. with year-round rain | 73244 |
Temperate midlatitudes | 1488121 |
Tropics with summer rain | 15781 |
Tropics with year-round rain | 11044 |
NA | 11849 |
Extract ecoregion name and ID from Ecoregions of the World. Olson et al. 2001 (BioScience).
Computation was performed in EVE HPC cluster using function A98_PredictorsExtract.R
. Divided in 99 chunks.
ecoreg <- readOGR("../sPlot/_Ancillary/official", layer="wwf_terr_ecos")
ecoreg@data <- ecoreg@data %>%
dplyr::select(OBJECTID, ECO_NAME, REALM, BIOME, ECO_NUM, ECO_ID, eco_code)
cl <- makeForkCluster(5, outfile="")
registerDoParallel(cl)
clusterEvalQ(cl, {
library(rgdal)
library(raster)
library(sp)
library(elevatr)
library(dplyr)
})
ecoreg.out <- foreach(i=1:99, .combine=rbind) %dopar% {
source("A98_PredictorsExtract.R")
PredExtr(header.shp, myfunction=NA, output="../_derived/wwf_ecoregions/",
toextract=ecoreg, typp="shp", ncores=1, chunkn=99, chunk.i=i)
}
stopCluster(cl)
Reimport output and join to header
ecoreg.files <- list.files("../_derived/wwf_ecoregions/", pattern="wwf_terr_ecos-[0-9]+.csv", full.names=T)
ecoreg.out <- do.call(rbind, lapply(ecoreg.files, function(x) {read_csv(x,
col_types=cols(
.default = col_double(),
ECO_NAME = col_character(),
REALM = col_character(),
G200_REGIO = col_character(),
eco_code = col_character()))}))
header <- header %>%
left_join(ecoreg.out %>%
rename(PlotObservationID=X1) %>%
dplyr::select(PlotObservationID, ECO_NAME, ECO_ID) %>%
dplyr::rename(Ecoregion=ECO_NAME, EcoregionID=ECO_ID),
by="PlotObservationID")
There are 0 unassigned plots.
Summarize:Ecoregion | num.plot |
---|---|
Atlantic mixed forests | 349664 |
Western European broadleaf forests | 254820 |
Baltic mixed forests | 186551 |
Central European mixed forests | 126733 |
Pannonian mixed forests | 96371 |
Alps conifer and mixed forests | 74662 |
Carpathian montane forests | 68357 |
Celtic broadleaf forests | 62756 |
Sarmatic mixed forests | 36093 |
Northeastern Spain and Southern France Mediterranean forests | 33345 |
Taiheiyo evergreen forests | 31241 |
English Lowlands beech forests | 30724 |
Balkan mixed forests | 30047 |
Italian sclerophyllous and semi-deciduous forests | 27913 |
Cantabrian mixed forests | 26043 |
Pyrenees conifer and mixed forests | 24529 |
Tyrrhenian-Adriatic Sclerophyllous and mixed forests | 19885 |
Iberian sclerophyllous and semi-deciduous forests | 18182 |
East European forest steppe | 16745 |
Great Basin shrub steppe | 15584 |
Dinaric Mountains mixed forests | 14846 |
Pontic steppe | 14294 |
Nihonkai montane deciduous forests | 14022 |
Caspian lowland desert | 14006 |
Rodope montane mixed forests | 14004 |
Eastern Australian temperate forests | 13988 |
Scandinavian and Russian taiga | 13391 |
NA | 12196 |
Southeast Australia temperate savanna | 11382 |
North Atlantic moist mixed forests | 10567 |
Extract elevation for each plot. Loops over tiles of 1 x 1°, projects to mercator, and extract elevation for plot coordinates, as well as 2.5, 50, and 97.5 quantiles for a buffer area having a radius equal to the location uncertainty of each plot (but only if location uncertainty < 50 km). DEM derive from package elevatr, which uses the Terrain Tiles on Amazon Web Services. Resolutions of DEM rasters vary by region. I set a zoom factor z=10, which corresponds to ~ 75-150 m. Sources are: SRTM, data.gov.at in Austria, NRCAN in Canada, SRTM, NED/3DEP 1/3 arcsec, data.gov.uk in United Kingdom, INEGI in Mexico, ArcticDEM in latitudes above 60°, LINZ in New Zealand, Kartverket in Norway, as described here.
Split data into tiles of 1 x 1 degrees, and create sp::SpatialPointsDataFrame
files. Only for plots having a location uncertainty < 50 km.
header.tiles <- header %>%
dplyr::select(PlotObservationID, Dataset, Longitude, Latitude, `Location uncertainty (m)`) %>%
mutate(`Location uncertainty (m)`=abs(`Location uncertainty (m)`)) %>%
filter(`Location uncertainty (m)`<= 50000) %>%
mutate_at(.vars=vars(Longitude, Latitude),
.funs=list(tile=~cut(., breaks = seq(-180,180, by=.2)))) %>%
filter(!is.na(Longitude_tile) & !is.na(Latitude_tile) ) %>%
mutate(tilenam=factor(paste(Longitude_tile, Latitude_tile)))
There are 1581734 plots out of 1978686 plots with Location uncertainty <= 50km (or absent). The total number of tiles is 30123.
Performed in EVE HPC cluster using function A97_ElevationExtract.R
. Divided in 99 chunks.
cl <- makeForkCluster(5, outfile="")
registerDoParallel(cl)
clusterEvalQ(cl, {
library(rgdal)
library(raster)
library(sp)
library(elevatr)
library(dplyr)})
# Divided in 99 chunks
elevation.out <- foreach(i=1:99, .combine=rbind) %dopar% {
source("A97_ElevationExtract.R")
ElevationExtract(header.shp, output, ncores=1, chunk.i=1)}
stopCluster(cl)
For those tiles that failed, extract elevation of remaining plots one by one
#create list of tiles for which dem could not be extracted
myfiles <- list.files("../_derived/elevatr/")
failed <- list.files("../_derived/elevatr/", pattern = "[A-Za-z]*_[0-9]+failed\\.RData$")
failed <- as.numeric(unlist(regmatches(failed, gregexpr("[[:digit:]]+", failed))))
#create SpatialPointsDataFrame
sp.tile0 <- SpatialPointsDataFrame(coords=header.tiles %>%
filter(tilenam %in% levels(header.tiles$tilenam)[failed]) %>%
dplyr::select(Longitude, Latitude),
data=header.tiles %>%
filter(tilenam %in% levels(header.tiles$tilenam)[failed]) %>%
dplyr::select(-Longitude, -Latitude),
proj4string = CRS("+init=epsg:4326"))
sp.tile0 <- spTransform(sp.tile0, CRSobj = CRS("+init=epsg:3857 +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0
+lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null
+no_defs ")) #project to mercator
output.tile <- data.frame(NULL)
cl <- makeForkCluster(5, outfile="")
registerDoParallel(cl)
clusterEvalQ(cl, {
library(rgdal)
library(raster)
library(sp)
library(elevatr)
library(dplyr)})
#Loop over all plots
elevation.failed <- foreach(i=1:nrow(sp.tile0), .packages=c('raster'), .combine=rbind) %dopar% {
sp.tile <- sp.tile0[i,]
tryCatch(raster.tile <- get_elev_raster(sp.tile, z=10,
expand=max(sp.tile$`Location uncertainty (m)`)),
error = function(e){
print(paste("could not retrieve DEM for", sp.tile$PlotObservationID))}
)
if(!exists("raster.tile")) {
output.tile <- data.frame(PlotObservationID==sp.tile$PlotObservationID,
elevation=NA,
Elevation_q2.5=NA,
Elevation_median=NA,
Elevation_q97.5=NA,
DEM.res=NA)
return(output.tile)
} else {
# clip dem tile with continent shape
raster.tile <- mask(raster.tile, continent.high.merc)
#extract and summarize elevation data
elev.tile <- raster::extract(raster.tile, sp.tile, small=T)
elev.tile.buffer <- raster::extract(raster.tile, sp.tile,
buffer=sp.tile$`Location uncertainty (m)`, small=T)
elev.q95 <- t(round(mapply( quantile,
x=elev.tile.buffer,
probs=rep(c(0.025, 0.5, 0.975), each=length(elev.tile)), na.rm=T)))
output.tile <- data.frame(PlotObservationID=sp.tile$PlotObservationID,
elevation=round(elev.tile),
elev.q95,
DEM.res=res(raster.tile)[1]) %>%
rename(Elevation_q2.5=X2.5., Elevation_median=X50., Elevation_q97.5=X97.5.)
return(output.tile)
}
}
stopCluster(cl)
save(elevation.failed, file = "../_derived/elevatr/elevation_missing.RData")
Compose tiles into a single output, and export
myfiles <- list.files("../_derived/elevatr/", pattern = "elevation_tile_[0-9]+\\.RData$", full.names = T)
#create empty data.frame
elevation.out <- matrix(NA, nrow=nrow(header.tiles), ncol=6)
elevation.out <- as.data.frame(elevation.out)
colnames(elevation.out) <- c("PlotObservationID", "elevation", "Elevation_q2.5", "Elevation_median", "Elevation_q97.5","DEM.res")
elevation.out$PlotObservationID <- header.tiles$PlotObservationID
tmp <- NULL
for(i in 1:length(myfiles)){
load(myfiles[i])
#attach results to empty data.frame
tmp <- bind_rows(tmp, output.tile)
if(i %in% seq(5000, length(myfiles), by=5000)){
mymatch <- base::match(x=tmp$PlotObservationID, table=elevation.out$PlotObservationID)
mymatch <- mymatch[!is.na(mymatch)]
elevation.out[mymatch,] <- tmp
tmp <- NULL
print(paste("Attached first", i, "files"))
}
if(i %in% seq(1,length(myfiles), by=250)){print(i)}
}
load(file = "../_derived/elevatr/elevation_missing.RData")
mymatch <- base::match(x=elevation.failed$PlotObservationID, table=elevation.out$PlotObservationID)
mymatch <- mymatch[!is.na(mymatch)]
elevation.out[mymatch,] <- elevation.failed
write_csv(elevation.out, path ="../_derived/elevatr/elevation.out.csv")
Reimport output, attach to header and check
elevation.out <- read_csv("../_derived/elevatr/elevation.out.csv")
PlotObservationID | elevation | Elevation_q2.5 | Elevation_median | Elevation_q97.5 | DEM.res |
---|---|---|---|---|---|
937780 | 140 | 140 | 140 | 140 | 153 |
767019 | 252 | 252 | 252 | 252 | 153 |
1440522 | 821 | 821 | 821 | 821 | 153 |
619891 | 315 | 315 | 315 | 315 | 153 |
718496 | 1047 | 1047 | 1047 | 1047 | 153 |
832178 | 620 | 620 | 620 | 620 | 153 |
228243 | 390 | 390 | 390 | 390 | 153 |
1784946 | 201 | 201 | 201 | 201 | 153 |
1712313 | 2283 | 2283 | 2283 | 2283 | 153 |
1226300 | NA | NA | NA | NA | 153 |
summary(elevation.out %>%
dplyr::select(-PlotObservationID, -elevation))
## Elevation_q2.5 Elevation_median Elevation_q97.5 DEM.res
## Min. :-1018.0 Min. :-474.0 Min. :-474 Min. : 76.4
## 1st Qu.: 17.0 1st Qu.: 21.0 1st Qu.: 26 1st Qu.:153.0
## Median : 163.0 Median : 178.0 Median : 194 Median :153.0
## Mean : 441.6 Mean : 469.4 Mean : 506 Mean :153.1
## 3rd Qu.: 593.0 3rd Qu.: 644.0 3rd Qu.: 698 3rd Qu.:153.0
## Max. : 6155.0 Max. :6155.0 Max. :6155 Max. :306.0
## NA's :104818 NA's :104818 NA's :104818 NA's :3082
There are 104818 plots without elevation info, corresponding to 5.3% of total.
There are 51389 plots with elevation below sea level.
Join elevation data (only median)
header <- header %>%
left_join(elevation.out %>%
dplyr::select(PlotObservationID, Elevation_median) %>%
rename(elevation_dem=Elevation_median) %>%
distinct(PlotObservationID, .keep_all=T),
by="PlotObservationID")
Summary and check
GIVD ID | Dataset | num.plot |
---|---|---|
EU-GB-001 | Britain_nvcd | 67 |
AS-CN-007 | China_Northern_Mountains_2 | 15 |
AU-AU-003 | NSW Australia | 2 |
EU-00-004e | Spain_sivim_sclerophyllous | 4 |
EU-00-016 | Ammophiletea Database | 118 |
EU-RU-003 | Russia_lysenko | 159 |
EU-IT-010 | Italy_HabItAlp | 5 |
EU-HR-001 | Croatia_mix | 95 |
NA-US-014 | Aava | 21 |
EU-00-027 | European Boreal Forest Database | 3 |
Create Scatterplot between measured elevation in the field, and elevation derived from DEM
#join measured and derived elevation
mydata <- header %>%
dplyr::select(PlotObservationID, `Altitude (m)`, elevation_dem) %>%
filter(!is.na(`Altitude (m)`) & !is.na(elevation_dem)) %>%
rename(elevation_measured=`Altitude (m)`)
ggplot(data=mydata) +
geom_point(aes(x=elevation_measured, y=elevation_dem), alpha=1/10, cex=0.8) +
theme_bw() +
geom_abline(slope=0, intercept=0, col=2, lty=2) +
geom_abline(slope=1, intercept=1, col="Dark green")
countries <- ne_countries(returnclass = "sf") %>%
st_transform(crs = "+proj=eck4") %>%
st_geometry()
graticules <- ne_download(type = "graticules_15", category = "physical",
returnclass = "sf") %>%
st_transform(crs = "+proj=eck4") %>%
st_geometry()
## OGR data source with driver: ESRI Shapefile
## Source: "/data/sPlot/users/Francesco/_tmp/Rtmpi0p64q", layer: "ne_110m_graticules_15"
## with 35 features
## It has 5 fields
## Integer64 fields read as strings: degrees scalerank
bb <- ne_download(type = "wgs84_bounding_box", category = "physical",
returnclass = "sf") %>%
st_transform(crs = "+proj=eck4") %>%
st_geometry()
## OGR data source with driver: ESRI Shapefile
## Source: "/data/sPlot/users/Francesco/_tmp/Rtmpi0p64q", layer: "ne_110m_wgs84_bounding_box"
## with 1 features
## It has 2 fields
## basic graph of the world in Eckert projection
w3a <- ggplot() +
geom_sf(data = bb, col = "grey20", fill = "white") +
geom_sf(data = graticules, col = "grey20", lwd = 0.1) +
geom_sf(data = countries, fill = "grey90", col = NA, lwd = 0.3) +
coord_sf(crs = "+proj=eck4") +
theme_minimal() +
theme(axis.text = element_blank(),
legend.title=element_text(size=12),
legend.text=element_text(size=12),
legend.background = element_rect(size=0.1, linetype="solid", colour = 1),
legend.key.height = unit(1.1, "cm"),
legend.key.width = unit(1.1, "cm")) +
scale_fill_viridis()
Graph of plot density (hexagons)
header2 <- header %>%
filter(!is.na(Longitude) | !is.na(Latitude)) %>%
dplyr::select(PlotObservationID, Latitude, Longitude, `GIVD ID`) %>%
filter(!(abs(Longitude) >171 & abs(Latitude>70)))
dggs <- dgconstruct(spacing=300, metric=T, resround='down')
## Resolution: 6, Area (km^2): 69967.8493448681, Spacing (km): 261.246386348549, CLS (km): 298.479323187169
#Get the corresponding grid cells for each earthquake epicenter (lat-long pair)
header2$cell <- dgGEO_to_SEQNUM(dggs, header2$Longitude, header2$Latitude)$seqnum
#Calculate number of plots for each cell
header.out <- header2 %>%
group_by(cell) %>%
summarise(value.out=log(n(), 10))
#Get the grid cell boundaries for cells
grid <- dgcellstogrid(dggs, header.out$cell, frame=F) %>%
st_as_sf() %>%
mutate(cell = header.out$cell) %>%
mutate(value.out=header.out$value.out) %>%
st_transform("+proj=eck4") %>%
st_wrap_dateline(options = c("WRAPDATELINE=YES"))
## plotting
legpos <- c(0.160, .24)
(w3 <- w3a +
geom_sf(data=grid, aes(fill=value.out),lwd=0, alpha=0.9) +
geom_sf(data = countries, col = "grey10", fill=NA, lwd = 0.3) +
scale_fill_viridis(
name="# plots", breaks=0:5, labels = c("1", "10", "100",
"1,000", "10,000", "100,000"), option="viridis" ) +
#labs(fill="# plots") +
theme(legend.position = legpos +c(-0.06, 0.25))
)
ggsave(filename="../_pics/PlotDensityLog10_vir.png", width = 15, height = 7, units="in", dpi=300, plot=w3)
Graph of plot location by Dataset
header.sf <- header.shp %>%
st_as_sf() %>%
st_transform(crs = "+proj=eck4")
(w4 <- w3a +
geom_sf(data=header.sf %>%
mutate(GIVD.ID=fct_shuffle(GIVD.ID)), aes(col=factor(GIVD.ID)), pch=16, size=0.8, alpha=0.6) +
geom_sf(data = countries, col = "grey10", fill=NA, lwd = 0.3) +
theme(legend.position = "none"))
ggsave(filename="../_pics/PlotDistrib_Dark2_shuffle1984.png", width = 15, height = 7, units="in", dpi=300, plot=w4) ## takes ~40' to render
#check
nrow(header)==nrow(header0)
## [1] TRUE
header <- header %>%
dplyr::select(
#Metadata
PlotObservationID, Dataset, "GIVD ID", "TV2 relevé number", "ORIG_NUM", "GUID",
#Geographic info
Longitude:"Location uncertainty (m)", Country, CONTINENT, sBiome, sBiomeID, Locality,
#Methodological info
"Relevé area (m²)", "Cover abundance scale", "Date of recording", "Plants recorded",
"Herbs identified (y/n)","Mosses identified (y/n)","Lichens identified (y/n)",
#Topographical
elevation_dem, "Altitude (m)", "Aspect (°)", "Slope (°)",
#Vegetation type
Forest:Naturalness, ESY,
#Vegetation structure
"Cover total (%)":"Maximum height cryptogams (mm)")
save(header, file = "../_output/header_sPlot3.0.RData")
PlotObservationID | Dataset | GIVD ID | TV2 relevé number | ORIG_NUM | GUID | Longitude | Latitude | Location uncertainty (m) | Country | CONTINENT | sBiome | sBiomeID | Locality | Relevé area (m²) | Cover abundance scale | Date of recording | Plants recorded | Herbs identified (y/n) | Mosses identified (y/n) | Lichens identified (y/n) | elevation_dem | Altitude (m) | Aspect (°) | Slope (°) | Forest | Shrubland | Grassland | Wetland | Sparse.vegetation | Naturalness | ESY | Cover total (%) | Cover tree layer (%) | Cover shrub layer (%) | Cover herb layer (%) | Cover moss layer (%) | Cover lichen layer (%) | Cover algae layer (%) | Cover litter layer (%) | Cover open water (%) | Cover bare rock (%) | Height (highest) trees (m) | Height lowest trees (m) | Height (highest) shrubs (m) | Height lowest shrubs (m) | Aver. height (high) herbs (cm) | Aver. height lowest herbs (cm) | Maximum height herbs (cm) | Maximum height cryptogams (mm) |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | Albanian Vegetation Database | EU-AL-001 | 1 | NA | 2089C3E0-6A5C-4D57-B83B-C651D7BADEE0 | 19.39064 | 41.86004 | 15 | Albania | EU | Subtropics with winter rain | 4 | Buna River Protected Landscape | 3 | Braun/Blanquet (new) | 2012-06-26 | NA | NA | N | N | 2 | 2 | NA | NA | 0 | 0 | 0 | 1 | 0 | 1 | C51a | NA | NA | NA | 90 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 150 | NA | NA | NA |
2 | Albanian Vegetation Database | EU-AL-001 | 2 | NA | DD8ECD2E-BE9A-4F6F-8B1A-E5428EFAE462 | 19.39028 | 41.85990 | 15 | Albania | EU | Subtropics with winter rain | 4 | Buna River Protected Landscape | 60 | Braun/Blanquet (new) | 2012-06-26 | NA | NA | N | N | 2 | 2 | NA | NA | NA | NA | NA | NA | NA | 1 | ? | NA | NA | NA | 90 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 160 | NA | NA | NA |
3 | Albanian Vegetation Database | EU-AL-001 | 3 | NA | 8F0D02F1-5DF1-40B0-AEBF-520B49FA54DC | 19.37799 | 41.84715 | 15 | Albania | EU | Subtropics with winter rain | 4 | Buna River Protected Landscape | 30 | Braun/Blanquet (new) | 2012-06-26 | NA | NA | N | N | NA | NA | 160 | 4 | 0 | 0 | 1 | 0 | 0 | 1 | E | NA | NA | NA | 45 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 30 | NA | NA | NA |
4 | Albanian Vegetation Database | EU-AL-001 | 4 | NA | EC1B5E84-2B62-4FDA-B2A9-9D8E0B7A62B0 | 19.37800 | 41.84716 | 15 | Albania | EU | Subtropics with winter rain | 4 | Buna River Protected Landscape | 10 | Braun/Blanquet (new) | 2012-06-26 | NA | NA | N | N | NA | NA | NA | NA | NA | NA | NA | NA | NA | 1 | ? | NA | NA | NA | 50 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 20 | NA | NA | NA |
5 | Albanian Vegetation Database | EU-AL-001 | 5 | NA | 73CB0497-09D0-4DD8-A9BA-102C2A03931D | 19.38444 | 41.86112 | 15 | Albania | EU | Subtropics with winter rain | 4 | Buna River Protected Landscape | 80 | Braun/Blanquet (new) | 2012-06-26 | NA | NA | N | N | 0 | 0 | NA | NA | 0 | 0 | 1 | 0 | 0 | 1 | E | NA | 85 | 5 | 80 | NA | NA | NA | NA | NA | NA | 10 | NA | 20 | NA | 150 | NA | NA | NA |
6 | Albanian Vegetation Database | EU-AL-001 | 6 | NA | BF2B97B9-ADDB-4E34-8B9B-08CB40392484 | 19.43621 | 41.92741 | 15 | Albania | EU | Subtropics with winter rain | 4 | Buna River Protected Landscape | 30 | Braun/Blanquet (new) | 2012-06-27 | NA | NA | N | N | 0 | 2 | NA | NA | 0 | 0 | 0 | 1 | 0 | 1 | C12b | NA | NA | NA | 80 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
7 | Albanian Vegetation Database | EU-AL-001 | 7 | NA | DD3CACE3-AE62-4547-8718-C36EF8905E07 | 19.43625 | 41.92736 | 15 | Albania | EU | Subtropics with winter rain | 4 | Buna River Protected Landscape | 2 | Braun/Blanquet (new) | 2012-06-27 | NA | NA | N | N | 0 | 2 | NA | NA | 0 | 0 | 1 | 0 | 0 | 1 | E34b | NA | NA | NA | 98 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 2 | NA | NA | NA |
8 | Albanian Vegetation Database | EU-AL-001 | 8 | NA | B291F31F-3B13-4E4D-A7D2-72CE3BA396E9 | 19.43642 | 41.92744 | 15 | Albania | EU | Subtropics with winter rain | 4 | Buna River Protected Landscape | 30 | Braun/Blanquet (new) | 2012-06-27 | NA | NA | N | N | 0 | 2 | NA | NA | 0 | 0 | 0 | 1 | 0 | 1 | C51a | NA | NA | NA | 100 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 200 | NA | NA | NA |
9 | Albanian Vegetation Database | EU-AL-001 | 9 | NA | E831B7FA-E2DC-40A9-889F-AEA43717DDF1 | 19.43650 | 41.92738 | 15 | Albania | EU | Subtropics with winter rain | 4 | Buna River Protected Landscape | 5 | Braun/Blanquet (new) | 2012-06-27 | NA | NA | N | N | 0 | 2 | NA | NA | 0 | 0 | 0 | 0 | 1 | 1 | C35e | NA | NA | NA | 100 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 20 | NA | NA | NA |
10 | Albanian Vegetation Database | EU-AL-001 | 10 | NA | 6B533118-D131-4094-964E-28F49257F3C4 | 19.43369 | 41.92656 | 15 | Albania | EU | Subtropics with winter rain | 4 | Buna River Protected Landscape | 15 | Braun/Blanquet (new) | 2012-06-27 | NA | NA | N | N | 0 | 1 | NA | NA | 0 | 0 | 1 | 0 | 0 | 2 | IE51 | NA | NA | NA | 100 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 170 | NA | NA | NA |
11 | Albanian Vegetation Database | EU-AL-001 | 11 | NA | 7B52E1CC-F89C-4790-AC4E-5781F912B4EE | 19.43363 | 41.92663 | 15 | Albania | EU | Subtropics with winter rain | 4 | Buna River Protected Landscape | NA | Braun/Blanquet (new) | 2012-06-27 | NA | NA | N | N | 0 | 2 | NA | NA | 0 | 0 | 0 | 1 | 0 | 1 | C12b | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
12 | Albanian Vegetation Database | EU-AL-001 | 12 | NA | 02DB1195-8269-4998-8A60-F7727EF90835 | 19.43156 | 41.92525 | 15 | Albania | EU | Subtropics with winter rain | 4 | Buna River Protected Landscape | 5 | Braun/Blanquet (new) | 2012-06-27 | NA | NA | N | N | 1 | 2 | NA | NA | 0 | 0 | 0 | 0 | 1 | 1 | C35e | NA | NA | NA | 100 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 20 | NA | NA | NA |
13 | Albanian Vegetation Database | EU-AL-001 | 13 | NA | 0909C93F-7D4A-4EE3-B8BD-9AFB7A3E9448 | 19.43181 | 41.92531 | 15 | Albania | EU | Subtropics with winter rain | 4 | Buna River Protected Landscape | 40 | Braun/Blanquet (new) | 2012-06-27 | NA | NA | N | N | 1 | 1 | NA | NA | 0 | 0 | 0 | 1 | 0 | 1 | C51a | NA | NA | NA | 100 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 200 | NA | NA | NA |
14 | Albanian Vegetation Database | EU-AL-001 | 14 | NA | B9E2E198-A5FD-4500-9840-AE151EFAD6CE | 19.43496 | 41.92724 | 15 | Albania | EU | Subtropics with winter rain | 4 | Buna River Protected Landscape | 20 | Braun/Blanquet (new) | 2012-06-27 | NA | NA | N | N | 0 | 0 | NA | NA | 0 | 0 | 1 | 0 | 0 | 2 | IE51 | NA | NA | NA | 100 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 180 | NA | NA | NA |
15 | Albanian Vegetation Database | EU-AL-001 | 15 | NA | E07F1B54-5A6D-44F3-B503-00E7F9840C69 | 19.44038 | 41.91412 | 15 | Albania | EU | Subtropics with winter rain | 4 | Buna River Protected Landscape | 5 | Braun/Blanquet (new) | 2012-06-27 | NA | NA | N | N | 10 | 8 | 40 | 3 | 0 | 0 | 1 | 0 | 0 | 1 | IE51 | NA | NA | NA | 90 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 20 | NA | NA | NA |
16 | Albanian Vegetation Database | EU-AL-001 | 16 | NA | 5CDF9483-6D2A-4A47-ADCD-59D5F405BE2F | 19.44207 | 41.91515 | 15 | Albania | EU | Subtropics with winter rain | 4 | Buna River Protected Landscape | 50 | Braun/Blanquet (new) | 2012-06-27 | NA | NA | N | N | 15 | 9 | 20 | 10 | 0 | 1 | 0 | 0 | 0 | 1 | F31e | NA | NA | 100 | 5 | NA | NA | NA | NA | NA | NA | NA | NA | 25 | NA | NA | NA | NA | NA |
17 | Albanian Vegetation Database | EU-AL-001 | 17 | NA | CE9629F3-6FD8-4778-8DAE-DA9C95B65EEE | 19.44046 | 41.91133 | 15 | Albania | EU | Subtropics with winter rain | 4 | Buna River Protected Landscape | 50 | Braun/Blanquet (new) | 2012-06-27 | NA | NA | N | N | 22 | 16 | NA | NA | 1 | 0 | 0 | 0 | 0 | 1 | G17a | NA | 90 | 15 | 10 | NA | NA | NA | NA | NA | 50 | 5 | NA | 20 | NA | NA | NA | NA | NA |
18 | Albanian Vegetation Database | EU-AL-001 | 18 | NA | EB1441EC-7B95-4806-819C-56BA8C50A7EC | 19.44037 | 41.91150 | 15 | Albania | EU | Subtropics with winter rain | 4 | Buna River Protected Landscape | 80 | Braun/Blanquet (new) | 2012-06-27 | NA | NA | N | N | 14 | 24 | NA | NA | 1 | 0 | 0 | 0 | 0 | 1 | G17a | NA | 100 | 1 | NA | NA | NA | NA | NA | NA | 5 | 6 | NA | 20 | NA | NA | NA | NA | NA |
19 | Albanian Vegetation Database | EU-AL-001 | 19 | NA | 19AB31F4-9039-4E18-A60F-BFC191EEC8E5 | 19.43523 | 41.90469 | 15 | Albania | EU | Subtropics with winter rain | 4 | Buna River Protected Landscape | 50 | Braun/Blanquet (new) | 2012-06-27 | NA | NA | N | N | 26 | 21 | 180 | 15 | 1 | 0 | 0 | 0 | 0 | 1 | G17a | NA | 100 | 90 | 1 | NA | NA | NA | NA | NA | NA | 8 | NA | 20 | NA | NA | NA | NA | NA |
20 | Albanian Vegetation Database | EU-AL-001 | 20 | NA | EBADE079-7015-4486-B1F7-72BDB235FE50 | 19.44371 | 41.89758 | 15 | Albania | EU | Subtropics with winter rain | 4 | Buna River Protected Landscape | 80 | Braun/Blanquet (new) | 2012-06-27 | NA | NA | N | N | 25 | 13 | 180 | 15 | 0 | 1 | 0 | 0 | 0 | 1 | Fa | NA | NA | 90 | NA | NA | NA | NA | NA | NA | NA | NA | NA | 17 | NA | NA | NA | NA | NA |
PredExtr
# define ancillary functions
robust.mean <- function(x){mean(x, na.rm=T)}
robust.mode <- function(x){if(any(x!=0)) {
a <- x[which(x!=0)] #exclude zero (i.e. NAs)
return(as.numeric(names(sort(table(a), decreasing=T))[1]))} else
return(NA)}
robust.sd <- function(x){sd(x, na.rm=T)}
#main function
PredExtr <- function(x.shp, myfunction=NA, output=NA,
toextract, typp=c("raster", "shp"), ncores, chunkn=1, chunk.i=NA){
print("Load Packages")
require(foreach)
require(parallel)
require(doParallel)
require(raster)
require(rgeos)
require(rgdal)
print(paste("Loading", typp, "data :", toextract))
print(paste("output will be:", output))
if(typp=="raster"){ mypredictor <- raster(toextract)} else
mypredictor <- readOGR(toextract)
colnames(mypredictor@data)
header.shp <-readOGR(x.shp)
crs(mypredictor) <- crs(header.shp) #should be verified beforehand!
## Divide in chunks if requested
if(chunkn>1 & !is.na(chunk.i)){
print(paste("divide in chunks and run on chunk n.", chunk.i))
indices <- 1:length(header.shp)
chunks <- split(indices, sort(indices%%chunkn))
header.shp <- header.shp[chunks[[chunk.i]],]
}
print("myfunction defined as")
print(myfunction)
print("go parallel")
cl <- makeForkCluster(ncores, outfile="" )
registerDoParallel(cl)
print("start main foreach loop")
if(typp=="raster"){
out <- foreach(i=1:length(header.shp), .packages=c('raster'), .combine=rbind) %dopar% {
tmp <- raster::extract(mypredictor, header.shp[i,],
buffer=min(10000, max(250, header.shp@data$lc_ncrt[i])), fun=myfunction)
}
if(!is.na(output)) {write.csv(out, file = output)} else{return(out)}
} else {
out <- sp::over(x=header.shp, y=mypredictor)
toassign <- header.shp[which(is.na(out[,1])),]
print(paste(length(toassign), "plots not matched directely - seek for NN"))
if(length(toassign)>0){
nearest <- foreach(i=1:length(toassign), .packages=c('raster'), .combine=rbind) %dopar% {
print(i)
nearest.tmp <- tryCatch(geosphere::dist2Line(header.shp[i,], mypredictor),
error = function(e){data.frame(distance=NA, lon=NA, lat=NA,ID=NA)}
)
#nearest.tmp <- geosphere::dist2Line(toassign[i,], mypredictor)
return(nearest.tmp)
}
out[which(is.na(out[,1])),] <- mypredictor@data[nearest[,"ID"],]
}
if(!is.na(output)) {write.csv(out, file = output)} else{return(out)}
}
stopCluster(cl)
}
ElevationExtract
ElevationExtract <- function(header, output, ncores, chunk.i){
print("load packages")
require(tidyverse)
require(rgdal)
require(sp)
require(rgeos)
require(raster)
require(rworldmap)
require(elevatr)
require(parallel)
require(doParallel)
print("Import header and divide in tiles")
header.shp <- readOGR(header)
header.tiles <- header.shp@data %>%
bind_cols(as.data.frame(header.shp@coords)) %>%
rename(PlotObservationID=PltObID, Longitude=coords.x1, Latitude=coords.x2) %>%
mutate(lc_ncrt=abs(lc_ncrt)) %>%
filter(lc_ncrt <= 50000) %>%
mutate_at(.vars=vars(Longitude, Latitude),
.funs=list(tile=~cut(., breaks = seq(-180,180, by=.2)))) %>%
mutate(tilenam=factor(paste(Longitude_tile, Latitude_tile)))
print("Get continent")
sPDF <- rworldmap::getMap(resolution="high")
continent.high <- sPDF[,"continent"]
crs(continent.high) <- CRS("+init=epsg:4326")
continent.high@data$continent <- fct_recode(continent.high@data$continent, "South America"="South America and the Caribbean")
continent.high.merc <- spTransform(continent.high, CRS( "+init=epsg:3857 +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m
+nadgrids=@null +no_defs"))
print("go parallel")
cl <- makeForkCluster(ncores, outfile="")
registerDoParallel(ncores)
clusterEvalQ(cl, {
library(rgdal)
library(raster)
library(sp)
library(elevatr)
library(dplyr)
})
print("create list of tiles still to do")
myfiles <- list.files(path = output, pattern = "[A-Za-z]*_[0-9]+\\.RData$")
done <- NULL
done <- as.numeric(unlist(regmatches(myfiles, gregexpr("[[:digit:]]+", myfiles))))
todo <- 1:nlevels(header.tiles$tilenam)
if(length(done)>0) {todo <- todo[-which(todo %in% done)]}
todo <- sample(todo, replace=F)
print(paste(length(todo), "tiles to do"))
print("divide in chunks")
#divide in chunks
todo.chunks <- split(todo, sort(todo%%10))
print(paste("start main foreach loop on chunk n=", chunk.i))
print(paste(length(todo.chunks[[chunk.i]]), "to do"))
foreach(i = todo.chunks[[chunk.i]]) %dopar% {
print(paste("doing", i))
#create sp and project data
if(nrow(header.tiles %>%
filter(tilenam %in% levels(header.tiles$tilenam)[i])) ==0 ) next()
sp.tile <- SpatialPointsDataFrame(coords=header.tiles %>%
filter(tilenam %in% levels(header.tiles$tilenam)[i]) %>%
dplyr::select(Longitude, Latitude),
data=header.tiles %>%
filter(tilenam %in% levels(header.tiles$tilenam)[i]) %>%
dplyr::select(-Longitude, -Latitude),
proj4string = CRS("+init=epsg:4326"))
sp.tile <- spTransform(sp.tile, CRSobj = CRS("+init=epsg:3857 +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0
+lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null
+no_defs ")) #project to mercator
#retrieve dem raster
tryCatch(raster.tile <- get_elev_raster(sp.tile, z=9, expand=max(sp.tile$lc_ncrt), clip="bbox"),
error = function(e){e}
)
if(!exists("raster.tile")) {
raster.tile <- NA
print(paste("tile", i, "doesn't work!, skip to next"))
save(raster.tile, file = paste(output, "elevation_tile_", i, "failed.RData", sep=""))
rm(raster.tile)
} else {
# clip dem tile with continent shape
raster.tile <- mask(raster.tile, continent.high.merc)
#extract and summarize elevation data
elev.tile <- raster::extract(raster.tile, sp.tile, small=T)
elev.tile.buffer <- raster::extract(raster.tile, sp.tile,
buffer=sp.tile@data$lc_ncrt,
small=T)
tmp <- round(mapply( quantile,
x=elev.tile.buffer,
#center=elev.tile,
probs=rep(c(0.025, 0.5, 0.975), each=length(elev.tile)),
#lc_ncrt=sp.tile$lc_ncrt,
na.rm=T))
elev.q95 <- setNames(data.frame(matrix(tmp, ncol = 3, nrow = length(elev.tile.buffer))),
c("Elevation_q2.5", "Elevation_median", "Elevation_q97.5"))
output.tile <- data.frame(PlotObservationID=sp.tile$PlotObservationID,
elevation=round(elev.tile),
elev.q95,
DEM.res=res(raster.tile)[1])
#save output
save(output.tile, file = paste(output, "elevation_tile_", i, ".RData", sep=""))
print(paste(i, "done"))
}}
stopCluster(cl)
}
sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/lib/openblas-base/libblas.so.3
## LAPACK: /usr/lib/libopenblasp-r0.2.18.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=en_US.UTF-8
## [9] LC_ADDRESS=en_US.UTF-8 LC_TELEPHONE=en_US.UTF-8
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] dggridR_2.0.3 rnaturalearth_0.1.0 sf_0.7-4
## [4] elevatr_0.2.0 rworldmap_1.3-6 raster_3.0-7
## [7] rgeos_0.5-2 rgdal_1.4-3 sp_1.4-1
## [10] kableExtra_1.1.0 knitr_1.28 xlsx_0.6.3
## [13] viridis_0.5.1 viridisLite_0.3.0 forcats_0.5.0
## [16] stringr_1.4.0 dplyr_0.8.5 purrr_0.3.3
## [19] readr_1.3.1 tidyr_1.0.2 tibble_2.1.3
## [22] ggplot2_3.3.0 tidyverse_1.3.0
##
## loaded via a namespace (and not attached):
## [1] httr_1.4.1 maps_3.3.0 jsonlite_1.6.1 dotCall64_1.0-0
## [5] modelr_0.1.6 assertthat_0.2.1 highr_0.8 xlsxjars_0.6.1
## [9] cellranger_1.1.0 yaml_2.2.1 pillar_1.4.2 backports_1.1.5
## [13] lattice_0.20-40 glue_1.3.1 digest_0.6.23 rvest_0.3.5
## [17] colorspace_1.4-1 htmltools_0.4.0 pkgconfig_2.0.3 broom_0.5.5
## [21] haven_2.2.0 scales_1.1.0 webshot_0.5.2 farver_2.0.3
## [25] generics_0.0.2 withr_2.1.2 cli_2.0.2 magrittr_1.5
## [29] crayon_1.3.4 readxl_1.3.1 maptools_0.9-9 evaluate_0.14
## [33] fs_1.3.2 fansi_0.4.1 nlme_3.1-145 class_7.3-15
## [37] xml2_1.2.2 foreign_0.8-76 tools_3.6.3 hms_0.5.3
## [41] lifecycle_0.2.0 munsell_0.5.0 reprex_0.3.0 e1071_1.7-3
## [45] compiler_3.6.3 rlang_0.4.4 units_0.6-5 classInt_0.4-2
## [49] grid_3.6.3 rstudioapi_0.11 spam_2.5-1 rmarkdown_2.1
## [53] gtable_0.3.0 codetools_0.2-16 DBI_1.1.0 R6_2.4.1
## [57] gridExtra_2.3 lubridate_1.7.4 rworldxtra_1.01 KernSmooth_2.23-16
## [61] rJava_0.9-11 stringi_1.4.6 Rcpp_1.0.3 fields_10.3
## [65] vctrs_0.2.3 dbplyr_1.4.2 tidyselect_1.0.0 xfun_0.12