From 02ee32dcd376c8a3d949e4b9326bf5746c42afa6 Mon Sep 17 00:00:00 2001 From: Francesco Sabatini <francesco.sabatini@idiv.de> Date: Fri, 14 Feb 2020 11:49:19 +0100 Subject: [PATCH] Adapted function to extract descriptors from Shp\raster in cluster --- code/A98_PredictorsExtract.R | 57 +++++++++++++++++++++++++ code/SessionA98.R | 12 ++++++ code/cli_A98.r | 82 ++++++++++++++++++++++++++++++++++++ code/submit-A98.sh | 35 +++++++++++++++ 4 files changed, 186 insertions(+) create mode 100644 code/A98_PredictorsExtract.R create mode 100644 code/SessionA98.R create mode 100644 code/cli_A98.r create mode 100644 code/submit-A98.sh diff --git a/code/A98_PredictorsExtract.R b/code/A98_PredictorsExtract.R new file mode 100644 index 0000000..fd9e1b3 --- /dev/null +++ b/code/A98_PredictorsExtract.R @@ -0,0 +1,57 @@ + +PredExtr <- function(x.shp, myfunction=NA, output, + toextract, typp=c("raster", "shp"), ncores, chunkn=1, chunk.i=NA){ + print("Load Packages") + require(foreach) + require(parallel) + require(doParallel) + require(raster) + require(rgeos) + + print(paste("Loading", typp, "data :", toextract)) + if(typp=="raster"){ mypredictor <- raster(toextract)} else + mypredictor <- readOGR(toextract) + header.shp <-readOGR(x.shp) + ## Divide in chunks if requested + if(chunkn>1 & !is.na(chunk.i)){ + print(paste("divide in chunks and run on chunk n.", chunk.i)) + indices <- 1:length(header.shp) + chunks <- split(indices, sort(indices%%chunkn)) + header.shp <- header.shp[chunks[[chunk.i]],] + } + + # define ancillary functions + robust.mean <- function(x){mean(x, rm.na=T)} + robust.mode <- function(x){if(any(x!=0)) { + a <- x[which(x!=0)] #exclude zero (i.e. NAs) + return(as.numeric(names(sort(table(a), decreasing=T))[1]))} else + return(NA) } + + print("go parallel") + cl <- makeForkCluster(ncores, outfile="" ) + registerDoParallel(cl) + + print("start main foreach loop") + if(typp=="raster"){ + out <- foreach(i=1:length(header.shp), .packages=c('raster'), .combine=rbind) %dopar% { + tmp <- raster::extract(mypredictor, header.shp[i,], + buffer=min(10000, max(250, header.shp@data$loc.uncert[i])), fun=myfunction) + } + write.csv(out, file = output) + } else { + out <- sp::over(x=header.shp, y=mypredictor) + toassign <- header.shp[which(is.na(out[,1])),] + + nearest <- foreach(i=1:length(toassign), .packages=c('raster'), .combine=rbind) %dopar% { + nearest.tmp <- geosphere::dist2Line(toassign[i,], mypredictor) + return(nearest.tmp) + } + out[which(is.na(out[,1])),] <- mypredictor@data[nearest[,"ID"],] + write.csv(out, file=output) + } + stopCluster(cl) + } + + + + diff --git a/code/SessionA98.R b/code/SessionA98.R new file mode 100644 index 0000000..334e479 --- /dev/null +++ b/code/SessionA98.R @@ -0,0 +1,12 @@ +source("/data/sPlot/users/Francesco/Ancillary_Functions/A98_PredictorsExtract.R") + +x.shp <- "/data/sPlot/users/Francesco/sPlot3/_derived/header.shp.shp" +myfunction <- NA +output <- "/data/sPlot/users/Francesco/sPlot3/_derived/test.csv" +toextract <- "/data/sPlot/users/Francesco/Ancillary_Data/Biomes_sPlot/sBiomes.shp" +typp <- "shp" +ncores <- 10 +chunkn <- 100000 +chunk.i <- 1 + +PredExtr(x.shp, myfunction=NA, output,toextract, typp, ncores, chunkn, chunk.i) diff --git a/code/cli_A98.r b/code/cli_A98.r new file mode 100644 index 0000000..5e0e49e --- /dev/null +++ b/code/cli_A98.r @@ -0,0 +1,82 @@ +library(optparse) + +# ------------------------------------------------------------------------------ +# defaults +# ------------------------------------------------------------------------------ + +default.ncores <- 32 +default.typp <- "shp" +# ------------------------------------------------------------------------------ +# parsing arguments +# ------------------------------------------------------------------------------ + +options <- list ( + + make_option( + opt_str = c("-c", "--cores"), + dest = "ncores", + type = "integer", + default = default.ncores, + help = paste0("number of CPU cores to use, defaults to ", default.ncores), + metavar = "4" + ), + + make_option( + opt_str = c("-t", "--typp"), + dest = "typp", + type = "character", + default = default.typp, + help = "Which data type? raster or shp", + metavar = "4" + ), + make_option( + opt_str = c("-c", "--chunkn"), + dest = "chunkn", + type = "integer", + default = 1, + help = "How many chunks?", + metavar = "4" + ), + + make_option( + opt_str = c("-i", "--chunk.i"), + dest = "chunk.i", + type = "integer", + default = NA, + help = "Which chunk?", + metavar = "4" + ) +) + +parser <- OptionParser( + usage = "Rscript %prog [options] data dt_beals header output", + option_list = options, + description = "\nan awesome R script", + epilogue = "use with caution, the awesomeness might slap you in the face!" +) + +cli <- parse_args(parser, positional_arguments = 4) + +# ------------------------------------------------------------------------------ +# assign a few shortcuts +# ------------------------------------------------------------------------------ + +x.shp <- cli$args[1] +myfunction <- cli$args[2] +output <- cli$args[3] +toextract <- cli$args[4] +typp <- cli$options$typp +ncores <- cli$options$ncores +chunkn <- cli$options$chunkn +chunk.i <- cli$options$chunk.i + + +# ------------------------------------------------------------------------------ +# actual program +# ------------------------------------------------------------------------------ + +source("A98_PredictorsExtract.R") + +PredExtr(x.shp, myfunction, output, toextract, typp, ncores, chunkn, chunk.i) + + diff --git a/code/submit-A98.sh b/code/submit-A98.sh new file mode 100644 index 0000000..0a54ff1 --- /dev/null +++ b/code/submit-A98.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +#$ -S /bin/bash +#$ -N PredictorExtract + +#$ -o /work/$USER/$JOB_NAME-$JOB_ID.log +#$ -j y + +#$ -l h_rt=00:30:00:00 +#$ -l h_vmem=3G + +#$ -pe smp 4-32 + +#$ -cwd + +#$ -t 1-99 + + +module load R + +filename="$1" +shift + +output=/work/$USER/output_pred/$(basename $filename .shp).csv + +Rscript \ + cli_A98.r \ + --cores ${NSLOTS:-1} \ + --typp \ + --chunk.i $SGE_TASK_ID \ + --chunkn 99 \ + /data/splot/_data/header.shp.RData \ + robust.mean \ + "$output" \ + "$filename" \ No newline at end of file -- GitLab