From 02ee32dcd376c8a3d949e4b9326bf5746c42afa6 Mon Sep 17 00:00:00 2001
From: Francesco Sabatini <francesco.sabatini@idiv.de>
Date: Fri, 14 Feb 2020 11:49:19 +0100
Subject: [PATCH] Adapted function to extract descriptors from Shp\raster in
 cluster

---
 code/A98_PredictorsExtract.R | 57 +++++++++++++++++++++++++
 code/SessionA98.R            | 12 ++++++
 code/cli_A98.r               | 82 ++++++++++++++++++++++++++++++++++++
 code/submit-A98.sh           | 35 +++++++++++++++
 4 files changed, 186 insertions(+)
 create mode 100644 code/A98_PredictorsExtract.R
 create mode 100644 code/SessionA98.R
 create mode 100644 code/cli_A98.r
 create mode 100644 code/submit-A98.sh

diff --git a/code/A98_PredictorsExtract.R b/code/A98_PredictorsExtract.R
new file mode 100644
index 0000000..fd9e1b3
--- /dev/null
+++ b/code/A98_PredictorsExtract.R
@@ -0,0 +1,57 @@
+
+PredExtr <- function(x.shp, myfunction=NA, output,  
+                     toextract, typp=c("raster", "shp"), ncores, chunkn=1, chunk.i=NA){
+  print("Load Packages")
+  require(foreach)
+  require(parallel)
+  require(doParallel)
+  require(raster)
+  require(rgeos)
+  
+  print(paste("Loading", typp, "data :", toextract))
+  if(typp=="raster"){ mypredictor <- raster(toextract)} else
+    mypredictor <- readOGR(toextract)
+  header.shp <-readOGR(x.shp)
+  ## Divide in chunks if requested
+  if(chunkn>1 & !is.na(chunk.i)){
+    print(paste("divide in chunks and run on chunk n.", chunk.i))
+    indices <- 1:length(header.shp)
+    chunks <- split(indices, sort(indices%%chunkn))
+    header.shp <- header.shp[chunks[[chunk.i]],]
+    } 
+
+  # define ancillary functions
+  robust.mean <- function(x){mean(x, rm.na=T)}
+  robust.mode <- function(x){if(any(x!=0)) {
+ 	 a <- x[which(x!=0)] #exclude zero (i.e. NAs)
+  	return(as.numeric(names(sort(table(a), decreasing=T))[1]))} else
+	    return(NA)                          }
+
+  print("go parallel")
+  cl <- makeForkCluster(ncores, outfile="" )
+  registerDoParallel(cl)
+
+  print("start main foreach loop")
+  if(typp=="raster"){
+  out <- foreach(i=1:length(header.shp), .packages=c('raster'), .combine=rbind) %dopar% { 
+    tmp <- raster::extract(mypredictor, header.shp[i,], 
+                         buffer=min(10000,  max(250, header.shp@data$loc.uncert[i])), fun=myfunction) 
+    }
+  write.csv(out, file = output)
+  } else {
+    out <- sp::over(x=header.shp, y=mypredictor) 
+    toassign <- header.shp[which(is.na(out[,1])),]
+    
+    nearest <- foreach(i=1:length(toassign), .packages=c('raster'), .combine=rbind) %dopar% { 
+      nearest.tmp <- geosphere::dist2Line(toassign[i,], mypredictor)
+      return(nearest.tmp)
+      }
+    out[which(is.na(out[,1])),] <- mypredictor@data[nearest[,"ID"],]
+    write.csv(out, file=output)
+  }
+  stopCluster(cl)
+  }
+
+
+
+
diff --git a/code/SessionA98.R b/code/SessionA98.R
new file mode 100644
index 0000000..334e479
--- /dev/null
+++ b/code/SessionA98.R
@@ -0,0 +1,12 @@
+source("/data/sPlot/users/Francesco/Ancillary_Functions/A98_PredictorsExtract.R")
+
+x.shp <- "/data/sPlot/users/Francesco/sPlot3/_derived/header.shp.shp"
+myfunction <- NA
+output <- "/data/sPlot/users/Francesco/sPlot3/_derived/test.csv"
+toextract <- "/data/sPlot/users/Francesco/Ancillary_Data/Biomes_sPlot/sBiomes.shp"
+typp <- "shp"
+ncores <- 10
+chunkn <- 100000
+chunk.i <- 1
+
+PredExtr(x.shp, myfunction=NA, output,toextract, typp, ncores, chunkn, chunk.i)
diff --git a/code/cli_A98.r b/code/cli_A98.r
new file mode 100644
index 0000000..5e0e49e
--- /dev/null
+++ b/code/cli_A98.r
@@ -0,0 +1,82 @@
+library(optparse)
+
+# ------------------------------------------------------------------------------
+# defaults
+# ------------------------------------------------------------------------------
+
+default.ncores <- 32
+default.typp <- "shp"
+# ------------------------------------------------------------------------------
+# parsing arguments
+# ------------------------------------------------------------------------------
+
+options <- list (
+
+	make_option(
+		opt_str = c("-c", "--cores"),
+		dest    = "ncores",
+		type    = "integer",
+		default = default.ncores,
+		help    = paste0("number of CPU cores to use, defaults to ", default.ncores),
+		metavar = "4"
+	),
+	
+	make_option(
+	  opt_str = c("-t", "--typp"),
+	  dest    = "typp",
+	  type    = "character",
+	  default = default.typp,
+	  help    = "Which data type? raster or shp",
+	  metavar = "4"
+	), 
+	make_option(
+	  opt_str = c("-c", "--chunkn"),
+	  dest    = "chunkn",
+	  type    = "integer",
+	  default = 1,
+	  help    = "How many chunks?",
+	  metavar = "4"
+	),
+	
+	make_option(
+	  opt_str = c("-i", "--chunk.i"),
+	  dest    = "chunk.i",
+	  type    = "integer",
+	  default = NA,
+	  help    = "Which chunk?",
+	  metavar = "4"
+	)
+)
+
+parser <- OptionParser(
+       usage       = "Rscript %prog [options] data dt_beals header output",
+       option_list = options,
+       description = "\nan awesome R script",
+       epilogue    = "use with caution, the awesomeness might slap you in the face!"
+)
+
+cli <- parse_args(parser, positional_arguments = 4)
+
+# ------------------------------------------------------------------------------
+# assign a few shortcuts
+# ------------------------------------------------------------------------------
+
+x.shp     	   <- cli$args[1]
+myfunction  	 <- cli$args[2]
+output  	     <- cli$args[3]
+toextract   	 <- cli$args[4]
+typp           <- cli$options$typp
+ncores   	     <- cli$options$ncores
+chunkn         <- cli$options$chunkn
+chunk.i        <- cli$options$chunk.i
+
+
+# ------------------------------------------------------------------------------
+# actual program
+# ------------------------------------------------------------------------------
+
+source("A98_PredictorsExtract.R")
+
+PredExtr(x.shp, myfunction, output, toextract, typp, ncores, chunkn, chunk.i)
+
+
diff --git a/code/submit-A98.sh b/code/submit-A98.sh
new file mode 100644
index 0000000..0a54ff1
--- /dev/null
+++ b/code/submit-A98.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+#$ -S /bin/bash
+#$ -N PredictorExtract
+
+#$ -o /work/$USER/$JOB_NAME-$JOB_ID.log
+#$ -j y
+
+#$ -l h_rt=00:30:00:00
+#$ -l h_vmem=3G
+
+#$ -pe smp 4-32
+
+#$ -cwd
+
+#$ -t 1-99
+
+
+module load R
+
+filename="$1"
+shift 
+
+output=/work/$USER/output_pred/$(basename $filename .shp).csv
+
+Rscript \
+    cli_A98.r \
+    --cores ${NSLOTS:-1} \
+    --typp \
+    --chunk.i $SGE_TASK_ID \
+    --chunkn 99 \
+    /data/splot/_data/header.shp.RData \
+    robust.mean \
+    "$output" \
+    "$filename"
\ No newline at end of file
-- 
GitLab