From 2769fe4a8aa8222862e3f746575d7c351841e933 Mon Sep 17 00:00:00 2001
From: Francesco Sabatini <francesco.sabatini@idiv.de>
Date: Fri, 21 Feb 2020 23:12:59 +0100
Subject: [PATCH] Create A97 to extract elevatr in Cluster

---
 code/04_buildHeader.Rmd     |  80 +++++++++++++++-----------
 code/A97_ElevationExtract.R | 110 ++++++++++++++++++++++++++++++++++++
 code/SessionA98.R           |  14 -----
 code/cli_A97.r              |  59 +++++++++++++++++++
 code/submit-A97.sh          |  25 ++++++++
 5 files changed, 242 insertions(+), 46 deletions(-)
 create mode 100644 code/A97_ElevationExtract.R
 delete mode 100644 code/SessionA98.R
 create mode 100644 code/cli_A97.r
 create mode 100644 code/submit-A97.sh

diff --git a/code/04_buildHeader.Rmd b/code/04_buildHeader.Rmd
index 1f4bb81..f0405a4 100644
--- a/code/04_buildHeader.Rmd
+++ b/code/04_buildHeader.Rmd
@@ -24,21 +24,22 @@ This report documents the construction of the header file for sPlot 3.0. It is b
 
 
 ```{r results="hide", message=F, warning=F}
-write("TMPDIR = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('TMPDIR'), '.Renviron'))
-write("R_USER = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('R_USER'), '.Renviron'))
-
 knitr::opts_chunk$set(echo = TRUE)
 library(tidyverse)
 library(readr)
 library(xlsx)
 
 ## Spatial packages
-library(sp)
 library(rgdal)
+library(sp)
 library(rgeos)
 library(raster)
 library(rworldmap)
 
+#save temporary files
+write("TMPDIR = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('TMPDIR'), '.Renviron'))
+write("R_USER = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('R_USER'), '.Renviron'))
+rasterOptions(tmpdir="/data/sPlot/users/Francesco/_tmp")
 ```
 
 ```{bash, eval=F}
@@ -247,10 +248,9 @@ header <- header %>%
                                       )) %>% 
   mutate(`Plants recorded`=factor(`Plants recorded`, exclude = "#N/A"))
 ```
-
+Align labels consortium 
 ```{r}
 databases <- read_csv("/data/sPlot/users/Francesco/_sPlot_Management/Consortium/Databases.out.csv")
-## align labels to consortium 
 
 header <- header %>% 
   mutate(Dataset=fct_recode(Dataset,
@@ -450,7 +450,7 @@ header.tiles <- header %>%
   dplyr::select(PlotObservationID, Dataset, Longitude, Latitude, `Location uncertainty (m)`) %>%
   filter(`Location uncertainty (m)`<= 50000) %>%
   mutate_at(.vars=vars(Longitude, Latitude), 
-            .funs=list(tile=~cut(., breaks = seq(-180,180, by=1)))) %>%
+            .funs=list(tile=~cut(., breaks = seq(-180,180, by=.2)))) %>%
   filter(!is.na(Longitude_tile) & !is.na(Latitude_tile) ) %>%
   mutate(tilenam=factor(paste(Longitude_tile, Latitude_tile))) %>%
   mutate(`Location uncertainty (m)`=abs(`Location uncertainty (m)`))
@@ -461,7 +461,7 @@ There are `r nrow(header.tiles)` plots out of `r nrow(header)` plots with Locati
 
 Extract elevation for each plot. Loops over tiles of 1 x 1°, projects to mercator, and extract elevation for plot coordinates, as well as 2.5, 50, and 97.5 quantiles for a buffer area having a radius equal to the location uncertainty of each plot (but only if location uncertainty < 50 km). DEM derive from package [elevatr](https://cran.r-project.org/web/packages/elevatr/vignettes/introduction_to_elevatr.html#get_raster_elevation_data), which uses the [Terrain Tiles on Amazon Web Services](https://registry.opendata.aws/terrain-tiles/). Resolutions of DEM rasters vary by region. I set a zoom factor z=10, which corresponds to ~ 75-150 m. Sources are: SRTM, data.gov.at in Austria, NRCAN in Canada, SRTM, NED/3DEP 1/3 arcsec, data.gov.uk in United Kingdom, INEGI in Mexico, ArcticDEM in latitudes above 60°, LINZ in New Zealand, Kartverket in Norway, as described [here](https://github.com/tilezen/joerd/blob/master/docs/data-sources.md)
 
-```{r }
+```{r message==F}
 library(elevatr)
 
 continent.high.merc <- spTransform(continent.high, CRS( "+init=epsg:3857 +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m
@@ -469,10 +469,11 @@ continent.high.merc <- spTransform(continent.high, CRS( "+init=epsg:3857 +proj=m
 
 require(parallel)
 require(doParallel)
-cl <- makeForkCluster(10, outfile="")
+cl <- makeForkCluster(5, outfile="")
 registerDoParallel(cl)
 
 clusterEvalQ(cl, {
+  library(rgdal)
   library(raster)
   library(sp)
   library(elevatr)
@@ -480,12 +481,15 @@ clusterEvalQ(cl, {
   })
 
 #create list of tiles for which dem could not be extracted
-myfiles <- list.files("../_derived/elevatr/")
+myfiles <- list.files("../_derived/elevatr/", pattern = "[A-Za-z]*_[0-9]+\\.RData$")
+done <- NULL
 done <- as.numeric(unlist(regmatches(myfiles, gregexpr("[[:digit:]]+", myfiles))))
 todo <- 1:nlevels(header.tiles$tilenam)
 todo <- todo[-which(todo %in% done)]
+todo <- sample(todo, replace=F)
 #foreach(i = 1:nlevels(header.sp$tilenam)) %do% {
 foreach(i = todo) %dopar% {  
+#for(i in todo){
   #create sp and project data
   if(nrow(header.tiles %>% 
           filter(tilenam %in% levels(header.tiles$tilenam)[i])) ==0 ) next()
@@ -496,19 +500,25 @@ foreach(i = todo) %dopar% {
                                     filter(tilenam %in% levels(header.tiles$tilenam)[i]) %>%
                                     dplyr::select(-Longitude, -Latitude),
                                   proj4string = CRS("+init=epsg:4326"))
-  sp.tile <- spTransform(sp.tile, CRSobj = CRS("+init=epsg:3857")) #project to mercator
+  sp.tile <- spTransform(sp.tile, CRSobj = CRS("+init=epsg:3857 +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0
+                                               +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null
+                                               +no_defs ")) #project to mercator
 
   #retrieve dem raster
-  tryCatch(raster.tile <- get_elev_raster(sp.tile, z=10, expand=max(sp.tile$`Location uncertainty (m)`), clip="bbox"),
-        error = function(e){next(paste("tile", i, "doesn't work!, skip to next"))}
+  tryCatch(raster.tile <- get_elev_raster(sp.tile, z=9, expand=max(sp.tile$`Location uncertainty (m)`), clip="bbox"),
+        error = function(e){e}
   )
+  if(inherits(raster.tile, "error")) {
+    raster.tile <- NA
+    save(raster.tile, file = paste("../_derived/elevatr/Failed_tile_", i, ".RData", sep=""))
+    next(paste("tile", i, "doesn't work!, skip to next"))
+  }
   # clip dem tile with continent shape
   raster.tile <- mask(raster.tile, continent.high.merc)
   
   #extract and summarize elevation data
   elev.tile <- raster::extract(raster.tile, sp.tile, small=T)
   elev.tile.buffer <- raster::extract(raster.tile, sp.tile, 
-                                      #buffer=sp.tile$`Location uncertainty (m)`
                                       buffer=sp.tile@data$`Location uncertainty (m)`, 
                                       small=T)
   tmp <- round(mapply( quantile, 
@@ -540,44 +550,50 @@ For those tiles that failed, extract elevation of remaining plots one by one
 #create list of tiles for which dem could not be extracted
 myfiles <- list.files("../_derived/elevatr/")
 done <- as.numeric(unlist(regmatches(myfiles, gregexpr("[[:digit:]]+", myfiles))))
-todo <- 1:nlevels(header.sp$tilenam)
+todo <- 1:nlevels(header.tiles$tilenam)
 todo <- todo[-which(todo %in% done)]
 
 #create SpatialPointsDataFrame
-sp.tile0 <- SpatialPointsDataFrame(coords=header.sp %>% 
-                                    filter(tilenam %in% levels(header.sp$tilenam)[todo]) %>%
+sp.tile0 <- SpatialPointsDataFrame(coords=header.tiles %>% 
+                                    filter(tilenam %in% levels(header.tiles$tilenam)[todo]) %>%
                                     dplyr::select(Longitude, Latitude),
-                                  data=header.sp %>% 
-                                    filter(tilenam %in% levels(header.sp$tilenam)[todo]) %>%
+                                  data=header.tiles %>% 
+                                    filter(tilenam %in% levels(header.tiles$tilenam)[todo]) %>%
                                     dplyr::select(-Longitude, -Latitude),
                                   proj4string = CRS("+init=epsg:4326"))
-sp.tile0 <- spTransform(sp.tile0, CRSobj = CRS("+init=epsg:3857")) #project to mercator
+sp.tile0 <- spTransform(sp.tile0, CRSobj = CRS("+init=epsg:3857 +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0
+                                               +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null
+                                               +no_defs ")) #project to mercator
 output.tile <- data.frame(NULL)
 #Loop over all plots
-for(i in 1:nrow(sp.tile0)){
+for(i in 1:10){#nrow(sp.tile0)){
   sp.tile <- sp.tile0[i,]
   tryCatch(raster.tile <- get_elev_raster(sp.tile, z=10, 
                                           expand=max(sp.tile$`Location uncertainty (m)`)),
-        error = function(e){bind_rows(output.tile, 
-                                      data.frame(PlotObservationID=sp.tile$PlotObservationID,
-                                                 elevation=NA,
-                                                 Elevation_q2.5=NA,
-                                                 Elevation_median=NA,
-                                                 Elevation_q97.5=NA,
-                                                 DEM.res=NA))
+        error = function(e){#bind_rows(output.tile, 
+                            #          data.frame(PlotObservationID=sp.tile$PlotObservationID,
+                            #                     elevation=NA,
+                            #                     Elevation_q2.5=NA,
+                            #                     Elevation_median=NA,
+                            #                     Elevation_q97.5=NA,
+                            #                     DEM.res=NA))
           print(paste("could not retrieve DEM for", sp.tile$PlotObservationID))}
           )
+  # clip dem tile with continent shape
+  raster.tile <- mask(raster.tile, continent.high.merc)
+  
   #extract and summarize elevation data
   elev.tile <- raster::extract(raster.tile, sp.tile, small=T)
   elev.tile.buffer <- raster::extract(raster.tile, sp.tile, 
                                       buffer=sp.tile$`Location uncertainty (m)`, small=T)
   #elev.q95 <- t(round(sapply(elev.tile.buffer,stats::quantile, probs=c(0.025, 0.5, 0.975), na.rm=T)))
-  elev.q95 <- t(round(mapply( RobustQuantile, 
+  elev.q95 <- t(round(mapply( quantile, 
                             x=elev.tile.buffer,
-                            center=elev.tile,
+                            #center=elev.tile,
                             probs=rep(c(0.025, 0.5, 0.975), each=length(elev.tile)), 
-                            loc.uncert=sp.tile$`Location uncertainty (m)`)))
-  output.tile <- bind_rows(output.tile, 
+                            #loc.uncert=sp.tile$`Location uncertainty (m)`, 
+                       na.rm=T)))
+   output.tile <- bind_rows(output.tile, 
                         data.frame(PlotObservationID=sp.tile$PlotObservationID, 
                             elevation=round(elev.tile), 
                             elev.q95, 
diff --git a/code/A97_ElevationExtract.R b/code/A97_ElevationExtract.R
new file mode 100644
index 0000000..863d8db
--- /dev/null
+++ b/code/A97_ElevationExtract.R
@@ -0,0 +1,110 @@
+
+ElevationExtract <- function(header, output, ncores){
+  print("load packages")
+  require(tidyverse)
+  
+  require(rgdal)
+  require(sp)
+  require(rgeos)
+  require(raster)
+  require(rworldmap)
+  require(elevatr)
+  
+  require(parallel)
+  require(doParallel)
+  
+  print("Import header and divide in tiles")
+  header.shp <- readOGR(header)
+  header.tiles <- header.shp@data %>% 
+    bind_cols(as.data.frame(header.shp@coords)) %>% 
+    rename(PlotObservationID=PltObID, Longitude=coords.x1, Latitude=coords.x2) %>% 
+    filter(lc_ncrt <= 50000) %>%
+    mutate_at(.vars=vars(Longitude, Latitude), 
+              .funs=list(tile=~cut(., breaks = seq(-180,180, by=.2)))) %>%
+    mutate(tilenam=factor(paste(Longitude_tile, Latitude_tile))) %>%
+    mutate(lc_ncrt=abs(lc_ncrt))
+  
+  print("Get continent")
+  sPDF <- rworldmap::getMap(resolution="high")
+  continent.high <- sPDF[,"continent"]
+  crs(continent.high) <- CRS("+init=epsg:4326")
+  continent.high@data$continent <- fct_recode(continent.high@data$continent, "South America"="South America and the Caribbean")
+  continent.high.merc <- spTransform(continent.high, CRS( "+init=epsg:3857 +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m
++nadgrids=@null +no_defs"))
+
+  print("go parallel")
+  cl <- makeForkCluster(ncores, outfile="")
+  registerDoParallel(ncores)
+  
+  clusterEvalQ(cl, {
+    library(rgdal)
+    library(raster)
+    library(sp)
+    library(elevatr)
+    library(dplyr)
+  })
+
+  print("create list of tiles still to do")
+  myfiles <- list.files(path = output, pattern = "[A-Za-z]*_[0-9]+\\.RData$")
+  done <- NULL
+  done <- as.numeric(unlist(regmatches(myfiles, gregexpr("[[:digit:]]+", myfiles))))
+  todo <- 1:nlevels(header.tiles$tilenam)
+  if(length(done)>0) {todo <- todo[-which(todo %in% done)]}
+  todo <- sample(todo, replace=F)
+  print(paste(length(todo), "tiles to do"))
+  
+  print("start main foreach loop")
+#foreach(i = 1:nlevels(header.sp$tilenam)) %do% {
+  foreach(i =todo[1:6]) %dopar% {  
+    #for(i in todo){
+    #create sp and project data
+    if(nrow(header.tiles %>% 
+            filter(tilenam %in% levels(header.tiles$tilenam)[i])) ==0 ) next()
+    sp.tile <- SpatialPointsDataFrame(coords=header.tiles %>% 
+                                        filter(tilenam %in% levels(header.tiles$tilenam)[i]) %>%
+                                        dplyr::select(Longitude, Latitude),
+                                      data=header.tiles %>% 
+                                        filter(tilenam %in% levels(header.tiles$tilenam)[i]) %>%
+                                        dplyr::select(-Longitude, -Latitude),
+                                      proj4string = CRS("+init=epsg:4326"))
+    sp.tile <- spTransform(sp.tile, CRSobj = CRS("+init=epsg:3857 +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0
+                                                 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null
+                                                 +no_defs ")) #project to mercator
+    
+    #retrieve dem raster
+    tryCatch(raster.tile <- get_elev_raster(sp.tile, z=9, expand=max(sp.tile$lc_ncrt), clip="bbox"),
+             error = function(e){e}
+    )
+    if(inherits(raster.tile, "error")) {
+      raster.tile <- NA
+      save(raster.tile, file = paste(output, "elevation_tile_", i, "failed.RData", sep=""))
+      next(paste("tile", i, "doesn't work!, skip to next"))
+    }
+    # clip dem tile with continent shape
+    raster.tile <- mask(raster.tile, continent.high.merc)
+    
+    #extract and summarize elevation data
+    elev.tile <- raster::extract(raster.tile, sp.tile, small=T)
+    elev.tile.buffer <- raster::extract(raster.tile, sp.tile, 
+                                        buffer=sp.tile@data$lc_ncrt, 
+                                        small=T)
+    tmp <- round(mapply( quantile, 
+                         x=elev.tile.buffer,
+                         #center=elev.tile,
+                         probs=rep(c(0.025, 0.5, 0.975), each=length(elev.tile)), 
+                         #lc_ncrt=sp.tile$lc_ncrt, 
+                         na.rm=T))
+    elev.q95 <- setNames(data.frame(matrix(tmp, ncol = 3, nrow = length(elev.tile.buffer))), 
+                         c("Elevation_q2.5", "Elevation_median", "Elevation_q97.5"))
+    output.tile <- data.frame(PlotObservationID=sp.tile$PlotObservationID, 
+                              elevation=round(elev.tile), 
+                              elev.q95, 
+                              DEM.res=res(raster.tile)[1]) 
+    
+    #save output
+    save(output.tile, file = paste(output, "_elevation_tile_", i, ".RData", sep=""))
+    print(i)
+  }
+  stopCluster(cl)
+  }
+  
diff --git a/code/SessionA98.R b/code/SessionA98.R
deleted file mode 100644
index 8894a39..0000000
--- a/code/SessionA98.R
+++ /dev/null
@@ -1,14 +0,0 @@
-source("/data/sPlot/users/Francesco/Ancillary_Functions/A98_PredictorsExtract.R")
-
-x.shp <- "/data/sPlot/users/Francesco/sPlot3/_derived/header.shp.shp"
-myfunction <- NA
-output <- "/data/sPlot/users/Francesco/sPlot3/_derived/test.csv"
-#toextract <- "/data/sPlot/users/Francesco/Ancillary_Data/Biomes_sPlot/sBiomes.shp"
-toextract <- "/data/sPlot/users/Francesco/Ancillary_Data/Ecoregions_WWF/wwf_terr_ecos.shp"
-typp <- "shp"
-
-ncores <- 10
-chunkn <- 100000
-chunk.i <- 1
-
-PredExtr(x.shp, myfunction=NA, output,toextract, typp, ncores, chunkn, chunk.i)
diff --git a/code/cli_A97.r b/code/cli_A97.r
new file mode 100644
index 0000000..5dbe37b
--- /dev/null
+++ b/code/cli_A97.r
@@ -0,0 +1,59 @@
+library(optparse)
+
+# ------------------------------------------------------------------------------
+# defaults
+# ------------------------------------------------------------------------------
+
+default.ncores <- 32
+# ------------------------------------------------------------------------------
+# parsing arguments
+# ------------------------------------------------------------------------------
+
+options <- list (
+
+	make_option(
+		opt_str = c("-c", "--cores"),
+		dest    = "ncores",
+		type    = "integer",
+		default = default.ncores,
+		help    = paste0("number of CPU cores to use, defaults to ", default.ncores),
+		metavar = "4"
+	),
+	
+	make_option(
+	  opt_str = c("-t", "--typp"),
+	  dest    = "typp",
+	  type    = "character",
+	  default = default.typp,
+	  help    = "Which data type? raster or shp",
+	  metavar = "4"
+	)
+)
+
+parser <- OptionParser(
+       usage       = "Rscript %prog [options] data dt_beals header output",
+       option_list = options,
+       description = "\nan awesome R script",
+       epilogue    = "use with caution, the awesomeness might slap you in the face!"
+)
+
+cli <- parse_args(parser, positional_arguments = 2)
+
+# ------------------------------------------------------------------------------
+# assign a few shortcuts
+# ------------------------------------------------------------------------------
+
+header     	   <- cli$args[1]
+output  	     <- cli$args[2]
+ncores   	     <- cli$options$ncores
+
+
+# ------------------------------------------------------------------------------
+# actual program
+# ------------------------------------------------------------------------------
+
+source("A97_ElevationExtract.R")
+
+PredExtr(x.shp, myfunction, output, toextract, typp, ncores, chunkn, chunk.i)
+
+
diff --git a/code/submit-A97.sh b/code/submit-A97.sh
new file mode 100644
index 0000000..e0edf5c
--- /dev/null
+++ b/code/submit-A97.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+#$ -S /bin/bash
+#$ -N ElevationExtract
+
+#$ -o /work/$USER/$JOB_NAME-$JOB_ID-$TASK_ID.log
+#$ -j y
+
+#$ -l h_rt=00:30:00:00
+#$ -l h_vmem=60G
+
+#$ -pe smp 20-32
+
+#$ -cwd
+
+
+module load R
+
+output=/data/splot/_data_splot3/output_pred/elevatr/
+
+Rscript \
+    cli_A97.r \
+    --cores ${NSLOTS:-1} \
+    /data/splot/_data_splot3/header.shp.shp \
+    "$output"
\ No newline at end of file
-- 
GitLab