diff --git a/code/04_buildHeader.Rmd b/code/04_buildHeader.Rmd index 1f4bb818c9658be16cf081a546ebad8dd66302e5..f0405a427238bfd7b4d50e58cafbef42225b258d 100644 --- a/code/04_buildHeader.Rmd +++ b/code/04_buildHeader.Rmd @@ -24,21 +24,22 @@ This report documents the construction of the header file for sPlot 3.0. It is b ```{r results="hide", message=F, warning=F} -write("TMPDIR = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('TMPDIR'), '.Renviron')) -write("R_USER = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('R_USER'), '.Renviron')) - knitr::opts_chunk$set(echo = TRUE) library(tidyverse) library(readr) library(xlsx) ## Spatial packages -library(sp) library(rgdal) +library(sp) library(rgeos) library(raster) library(rworldmap) +#save temporary files +write("TMPDIR = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('TMPDIR'), '.Renviron')) +write("R_USER = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('R_USER'), '.Renviron')) +rasterOptions(tmpdir="/data/sPlot/users/Francesco/_tmp") ``` ```{bash, eval=F} @@ -247,10 +248,9 @@ header <- header %>% )) %>% mutate(`Plants recorded`=factor(`Plants recorded`, exclude = "#N/A")) ``` - +Align labels consortium ```{r} databases <- read_csv("/data/sPlot/users/Francesco/_sPlot_Management/Consortium/Databases.out.csv") -## align labels to consortium header <- header %>% mutate(Dataset=fct_recode(Dataset, @@ -450,7 +450,7 @@ header.tiles <- header %>% dplyr::select(PlotObservationID, Dataset, Longitude, Latitude, `Location uncertainty (m)`) %>% filter(`Location uncertainty (m)`<= 50000) %>% mutate_at(.vars=vars(Longitude, Latitude), - .funs=list(tile=~cut(., breaks = seq(-180,180, by=1)))) %>% + .funs=list(tile=~cut(., breaks = seq(-180,180, by=.2)))) %>% filter(!is.na(Longitude_tile) & !is.na(Latitude_tile) ) %>% mutate(tilenam=factor(paste(Longitude_tile, Latitude_tile))) %>% mutate(`Location uncertainty (m)`=abs(`Location uncertainty (m)`)) @@ -461,7 +461,7 @@ There are `r nrow(header.tiles)` plots out of `r nrow(header)` plots with Locati Extract elevation for each plot. Loops over tiles of 1 x 1°, projects to mercator, and extract elevation for plot coordinates, as well as 2.5, 50, and 97.5 quantiles for a buffer area having a radius equal to the location uncertainty of each plot (but only if location uncertainty < 50 km). DEM derive from package [elevatr](https://cran.r-project.org/web/packages/elevatr/vignettes/introduction_to_elevatr.html#get_raster_elevation_data), which uses the [Terrain Tiles on Amazon Web Services](https://registry.opendata.aws/terrain-tiles/). Resolutions of DEM rasters vary by region. I set a zoom factor z=10, which corresponds to ~ 75-150 m. Sources are: SRTM, data.gov.at in Austria, NRCAN in Canada, SRTM, NED/3DEP 1/3 arcsec, data.gov.uk in United Kingdom, INEGI in Mexico, ArcticDEM in latitudes above 60°, LINZ in New Zealand, Kartverket in Norway, as described [here](https://github.com/tilezen/joerd/blob/master/docs/data-sources.md) -```{r } +```{r message==F} library(elevatr) continent.high.merc <- spTransform(continent.high, CRS( "+init=epsg:3857 +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m @@ -469,10 +469,11 @@ continent.high.merc <- spTransform(continent.high, CRS( "+init=epsg:3857 +proj=m require(parallel) require(doParallel) -cl <- makeForkCluster(10, outfile="") +cl <- makeForkCluster(5, outfile="") registerDoParallel(cl) clusterEvalQ(cl, { + library(rgdal) library(raster) library(sp) library(elevatr) @@ -480,12 +481,15 @@ clusterEvalQ(cl, { }) #create list of tiles for which dem could not be extracted -myfiles <- list.files("../_derived/elevatr/") +myfiles <- list.files("../_derived/elevatr/", pattern = "[A-Za-z]*_[0-9]+\\.RData$") +done <- NULL done <- as.numeric(unlist(regmatches(myfiles, gregexpr("[[:digit:]]+", myfiles)))) todo <- 1:nlevels(header.tiles$tilenam) todo <- todo[-which(todo %in% done)] +todo <- sample(todo, replace=F) #foreach(i = 1:nlevels(header.sp$tilenam)) %do% { foreach(i = todo) %dopar% { +#for(i in todo){ #create sp and project data if(nrow(header.tiles %>% filter(tilenam %in% levels(header.tiles$tilenam)[i])) ==0 ) next() @@ -496,19 +500,25 @@ foreach(i = todo) %dopar% { filter(tilenam %in% levels(header.tiles$tilenam)[i]) %>% dplyr::select(-Longitude, -Latitude), proj4string = CRS("+init=epsg:4326")) - sp.tile <- spTransform(sp.tile, CRSobj = CRS("+init=epsg:3857")) #project to mercator + sp.tile <- spTransform(sp.tile, CRSobj = CRS("+init=epsg:3857 +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 + +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null + +no_defs ")) #project to mercator #retrieve dem raster - tryCatch(raster.tile <- get_elev_raster(sp.tile, z=10, expand=max(sp.tile$`Location uncertainty (m)`), clip="bbox"), - error = function(e){next(paste("tile", i, "doesn't work!, skip to next"))} + tryCatch(raster.tile <- get_elev_raster(sp.tile, z=9, expand=max(sp.tile$`Location uncertainty (m)`), clip="bbox"), + error = function(e){e} ) + if(inherits(raster.tile, "error")) { + raster.tile <- NA + save(raster.tile, file = paste("../_derived/elevatr/Failed_tile_", i, ".RData", sep="")) + next(paste("tile", i, "doesn't work!, skip to next")) + } # clip dem tile with continent shape raster.tile <- mask(raster.tile, continent.high.merc) #extract and summarize elevation data elev.tile <- raster::extract(raster.tile, sp.tile, small=T) elev.tile.buffer <- raster::extract(raster.tile, sp.tile, - #buffer=sp.tile$`Location uncertainty (m)` buffer=sp.tile@data$`Location uncertainty (m)`, small=T) tmp <- round(mapply( quantile, @@ -540,44 +550,50 @@ For those tiles that failed, extract elevation of remaining plots one by one #create list of tiles for which dem could not be extracted myfiles <- list.files("../_derived/elevatr/") done <- as.numeric(unlist(regmatches(myfiles, gregexpr("[[:digit:]]+", myfiles)))) -todo <- 1:nlevels(header.sp$tilenam) +todo <- 1:nlevels(header.tiles$tilenam) todo <- todo[-which(todo %in% done)] #create SpatialPointsDataFrame -sp.tile0 <- SpatialPointsDataFrame(coords=header.sp %>% - filter(tilenam %in% levels(header.sp$tilenam)[todo]) %>% +sp.tile0 <- SpatialPointsDataFrame(coords=header.tiles %>% + filter(tilenam %in% levels(header.tiles$tilenam)[todo]) %>% dplyr::select(Longitude, Latitude), - data=header.sp %>% - filter(tilenam %in% levels(header.sp$tilenam)[todo]) %>% + data=header.tiles %>% + filter(tilenam %in% levels(header.tiles$tilenam)[todo]) %>% dplyr::select(-Longitude, -Latitude), proj4string = CRS("+init=epsg:4326")) -sp.tile0 <- spTransform(sp.tile0, CRSobj = CRS("+init=epsg:3857")) #project to mercator +sp.tile0 <- spTransform(sp.tile0, CRSobj = CRS("+init=epsg:3857 +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 + +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null + +no_defs ")) #project to mercator output.tile <- data.frame(NULL) #Loop over all plots -for(i in 1:nrow(sp.tile0)){ +for(i in 1:10){#nrow(sp.tile0)){ sp.tile <- sp.tile0[i,] tryCatch(raster.tile <- get_elev_raster(sp.tile, z=10, expand=max(sp.tile$`Location uncertainty (m)`)), - error = function(e){bind_rows(output.tile, - data.frame(PlotObservationID=sp.tile$PlotObservationID, - elevation=NA, - Elevation_q2.5=NA, - Elevation_median=NA, - Elevation_q97.5=NA, - DEM.res=NA)) + error = function(e){#bind_rows(output.tile, + # data.frame(PlotObservationID=sp.tile$PlotObservationID, + # elevation=NA, + # Elevation_q2.5=NA, + # Elevation_median=NA, + # Elevation_q97.5=NA, + # DEM.res=NA)) print(paste("could not retrieve DEM for", sp.tile$PlotObservationID))} ) + # clip dem tile with continent shape + raster.tile <- mask(raster.tile, continent.high.merc) + #extract and summarize elevation data elev.tile <- raster::extract(raster.tile, sp.tile, small=T) elev.tile.buffer <- raster::extract(raster.tile, sp.tile, buffer=sp.tile$`Location uncertainty (m)`, small=T) #elev.q95 <- t(round(sapply(elev.tile.buffer,stats::quantile, probs=c(0.025, 0.5, 0.975), na.rm=T))) - elev.q95 <- t(round(mapply( RobustQuantile, + elev.q95 <- t(round(mapply( quantile, x=elev.tile.buffer, - center=elev.tile, + #center=elev.tile, probs=rep(c(0.025, 0.5, 0.975), each=length(elev.tile)), - loc.uncert=sp.tile$`Location uncertainty (m)`))) - output.tile <- bind_rows(output.tile, + #loc.uncert=sp.tile$`Location uncertainty (m)`, + na.rm=T))) + output.tile <- bind_rows(output.tile, data.frame(PlotObservationID=sp.tile$PlotObservationID, elevation=round(elev.tile), elev.q95, diff --git a/code/A97_ElevationExtract.R b/code/A97_ElevationExtract.R new file mode 100644 index 0000000000000000000000000000000000000000..863d8db149471120d0aa4fc53b0bd844de301fb3 --- /dev/null +++ b/code/A97_ElevationExtract.R @@ -0,0 +1,110 @@ + +ElevationExtract <- function(header, output, ncores){ + print("load packages") + require(tidyverse) + + require(rgdal) + require(sp) + require(rgeos) + require(raster) + require(rworldmap) + require(elevatr) + + require(parallel) + require(doParallel) + + print("Import header and divide in tiles") + header.shp <- readOGR(header) + header.tiles <- header.shp@data %>% + bind_cols(as.data.frame(header.shp@coords)) %>% + rename(PlotObservationID=PltObID, Longitude=coords.x1, Latitude=coords.x2) %>% + filter(lc_ncrt <= 50000) %>% + mutate_at(.vars=vars(Longitude, Latitude), + .funs=list(tile=~cut(., breaks = seq(-180,180, by=.2)))) %>% + mutate(tilenam=factor(paste(Longitude_tile, Latitude_tile))) %>% + mutate(lc_ncrt=abs(lc_ncrt)) + + print("Get continent") + sPDF <- rworldmap::getMap(resolution="high") + continent.high <- sPDF[,"continent"] + crs(continent.high) <- CRS("+init=epsg:4326") + continent.high@data$continent <- fct_recode(continent.high@data$continent, "South America"="South America and the Caribbean") + continent.high.merc <- spTransform(continent.high, CRS( "+init=epsg:3857 +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m ++nadgrids=@null +no_defs")) + + print("go parallel") + cl <- makeForkCluster(ncores, outfile="") + registerDoParallel(ncores) + + clusterEvalQ(cl, { + library(rgdal) + library(raster) + library(sp) + library(elevatr) + library(dplyr) + }) + + print("create list of tiles still to do") + myfiles <- list.files(path = output, pattern = "[A-Za-z]*_[0-9]+\\.RData$") + done <- NULL + done <- as.numeric(unlist(regmatches(myfiles, gregexpr("[[:digit:]]+", myfiles)))) + todo <- 1:nlevels(header.tiles$tilenam) + if(length(done)>0) {todo <- todo[-which(todo %in% done)]} + todo <- sample(todo, replace=F) + print(paste(length(todo), "tiles to do")) + + print("start main foreach loop") +#foreach(i = 1:nlevels(header.sp$tilenam)) %do% { + foreach(i =todo[1:6]) %dopar% { + #for(i in todo){ + #create sp and project data + if(nrow(header.tiles %>% + filter(tilenam %in% levels(header.tiles$tilenam)[i])) ==0 ) next() + sp.tile <- SpatialPointsDataFrame(coords=header.tiles %>% + filter(tilenam %in% levels(header.tiles$tilenam)[i]) %>% + dplyr::select(Longitude, Latitude), + data=header.tiles %>% + filter(tilenam %in% levels(header.tiles$tilenam)[i]) %>% + dplyr::select(-Longitude, -Latitude), + proj4string = CRS("+init=epsg:4326")) + sp.tile <- spTransform(sp.tile, CRSobj = CRS("+init=epsg:3857 +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 + +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null + +no_defs ")) #project to mercator + + #retrieve dem raster + tryCatch(raster.tile <- get_elev_raster(sp.tile, z=9, expand=max(sp.tile$lc_ncrt), clip="bbox"), + error = function(e){e} + ) + if(inherits(raster.tile, "error")) { + raster.tile <- NA + save(raster.tile, file = paste(output, "elevation_tile_", i, "failed.RData", sep="")) + next(paste("tile", i, "doesn't work!, skip to next")) + } + # clip dem tile with continent shape + raster.tile <- mask(raster.tile, continent.high.merc) + + #extract and summarize elevation data + elev.tile <- raster::extract(raster.tile, sp.tile, small=T) + elev.tile.buffer <- raster::extract(raster.tile, sp.tile, + buffer=sp.tile@data$lc_ncrt, + small=T) + tmp <- round(mapply( quantile, + x=elev.tile.buffer, + #center=elev.tile, + probs=rep(c(0.025, 0.5, 0.975), each=length(elev.tile)), + #lc_ncrt=sp.tile$lc_ncrt, + na.rm=T)) + elev.q95 <- setNames(data.frame(matrix(tmp, ncol = 3, nrow = length(elev.tile.buffer))), + c("Elevation_q2.5", "Elevation_median", "Elevation_q97.5")) + output.tile <- data.frame(PlotObservationID=sp.tile$PlotObservationID, + elevation=round(elev.tile), + elev.q95, + DEM.res=res(raster.tile)[1]) + + #save output + save(output.tile, file = paste(output, "_elevation_tile_", i, ".RData", sep="")) + print(i) + } + stopCluster(cl) + } + diff --git a/code/SessionA98.R b/code/SessionA98.R deleted file mode 100644 index 8894a3929c6228c936639a4bfda98aa5159fd3a3..0000000000000000000000000000000000000000 --- a/code/SessionA98.R +++ /dev/null @@ -1,14 +0,0 @@ -source("/data/sPlot/users/Francesco/Ancillary_Functions/A98_PredictorsExtract.R") - -x.shp <- "/data/sPlot/users/Francesco/sPlot3/_derived/header.shp.shp" -myfunction <- NA -output <- "/data/sPlot/users/Francesco/sPlot3/_derived/test.csv" -#toextract <- "/data/sPlot/users/Francesco/Ancillary_Data/Biomes_sPlot/sBiomes.shp" -toextract <- "/data/sPlot/users/Francesco/Ancillary_Data/Ecoregions_WWF/wwf_terr_ecos.shp" -typp <- "shp" - -ncores <- 10 -chunkn <- 100000 -chunk.i <- 1 - -PredExtr(x.shp, myfunction=NA, output,toextract, typp, ncores, chunkn, chunk.i) diff --git a/code/cli_A97.r b/code/cli_A97.r new file mode 100644 index 0000000000000000000000000000000000000000..5dbe37bb30bd185035e55c3656d6ee3449d63908 --- /dev/null +++ b/code/cli_A97.r @@ -0,0 +1,59 @@ +library(optparse) + +# ------------------------------------------------------------------------------ +# defaults +# ------------------------------------------------------------------------------ + +default.ncores <- 32 +# ------------------------------------------------------------------------------ +# parsing arguments +# ------------------------------------------------------------------------------ + +options <- list ( + + make_option( + opt_str = c("-c", "--cores"), + dest = "ncores", + type = "integer", + default = default.ncores, + help = paste0("number of CPU cores to use, defaults to ", default.ncores), + metavar = "4" + ), + + make_option( + opt_str = c("-t", "--typp"), + dest = "typp", + type = "character", + default = default.typp, + help = "Which data type? raster or shp", + metavar = "4" + ) +) + +parser <- OptionParser( + usage = "Rscript %prog [options] data dt_beals header output", + option_list = options, + description = "\nan awesome R script", + epilogue = "use with caution, the awesomeness might slap you in the face!" +) + +cli <- parse_args(parser, positional_arguments = 2) + +# ------------------------------------------------------------------------------ +# assign a few shortcuts +# ------------------------------------------------------------------------------ + +header <- cli$args[1] +output <- cli$args[2] +ncores <- cli$options$ncores + + +# ------------------------------------------------------------------------------ +# actual program +# ------------------------------------------------------------------------------ + +source("A97_ElevationExtract.R") + +PredExtr(x.shp, myfunction, output, toextract, typp, ncores, chunkn, chunk.i) + + diff --git a/code/submit-A97.sh b/code/submit-A97.sh new file mode 100644 index 0000000000000000000000000000000000000000..e0edf5cdcdd582ecdb19cd3ac436b69bd241b25d --- /dev/null +++ b/code/submit-A97.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +#$ -S /bin/bash +#$ -N ElevationExtract + +#$ -o /work/$USER/$JOB_NAME-$JOB_ID-$TASK_ID.log +#$ -j y + +#$ -l h_rt=00:30:00:00 +#$ -l h_vmem=60G + +#$ -pe smp 20-32 + +#$ -cwd + + +module load R + +output=/data/splot/_data_splot3/output_pred/elevatr/ + +Rscript \ + cli_A97.r \ + --cores ${NSLOTS:-1} \ + /data/splot/_data_splot3/header.shp.shp \ + "$output" \ No newline at end of file