Skip to content
Snippets Groups Projects
Commit 02ee32dc authored by Francesco Sabatini's avatar Francesco Sabatini
Browse files

Adapted function to extract descriptors from Shp\raster in cluster

parent 420d97c8
Branches
No related tags found
No related merge requests found
PredExtr <- function(x.shp, myfunction=NA, output,
toextract, typp=c("raster", "shp"), ncores, chunkn=1, chunk.i=NA){
print("Load Packages")
require(foreach)
require(parallel)
require(doParallel)
require(raster)
require(rgeos)
print(paste("Loading", typp, "data :", toextract))
if(typp=="raster"){ mypredictor <- raster(toextract)} else
mypredictor <- readOGR(toextract)
header.shp <-readOGR(x.shp)
## Divide in chunks if requested
if(chunkn>1 & !is.na(chunk.i)){
print(paste("divide in chunks and run on chunk n.", chunk.i))
indices <- 1:length(header.shp)
chunks <- split(indices, sort(indices%%chunkn))
header.shp <- header.shp[chunks[[chunk.i]],]
}
# define ancillary functions
robust.mean <- function(x){mean(x, rm.na=T)}
robust.mode <- function(x){if(any(x!=0)) {
a <- x[which(x!=0)] #exclude zero (i.e. NAs)
return(as.numeric(names(sort(table(a), decreasing=T))[1]))} else
return(NA) }
print("go parallel")
cl <- makeForkCluster(ncores, outfile="" )
registerDoParallel(cl)
print("start main foreach loop")
if(typp=="raster"){
out <- foreach(i=1:length(header.shp), .packages=c('raster'), .combine=rbind) %dopar% {
tmp <- raster::extract(mypredictor, header.shp[i,],
buffer=min(10000, max(250, header.shp@data$loc.uncert[i])), fun=myfunction)
}
write.csv(out, file = output)
} else {
out <- sp::over(x=header.shp, y=mypredictor)
toassign <- header.shp[which(is.na(out[,1])),]
nearest <- foreach(i=1:length(toassign), .packages=c('raster'), .combine=rbind) %dopar% {
nearest.tmp <- geosphere::dist2Line(toassign[i,], mypredictor)
return(nearest.tmp)
}
out[which(is.na(out[,1])),] <- mypredictor@data[nearest[,"ID"],]
write.csv(out, file=output)
}
stopCluster(cl)
}
source("/data/sPlot/users/Francesco/Ancillary_Functions/A98_PredictorsExtract.R")
x.shp <- "/data/sPlot/users/Francesco/sPlot3/_derived/header.shp.shp"
myfunction <- NA
output <- "/data/sPlot/users/Francesco/sPlot3/_derived/test.csv"
toextract <- "/data/sPlot/users/Francesco/Ancillary_Data/Biomes_sPlot/sBiomes.shp"
typp <- "shp"
ncores <- 10
chunkn <- 100000
chunk.i <- 1
PredExtr(x.shp, myfunction=NA, output,toextract, typp, ncores, chunkn, chunk.i)
library(optparse)
# ------------------------------------------------------------------------------
# defaults
# ------------------------------------------------------------------------------
default.ncores <- 32
default.typp <- "shp"
# ------------------------------------------------------------------------------
# parsing arguments
# ------------------------------------------------------------------------------
options <- list (
make_option(
opt_str = c("-c", "--cores"),
dest = "ncores",
type = "integer",
default = default.ncores,
help = paste0("number of CPU cores to use, defaults to ", default.ncores),
metavar = "4"
),
make_option(
opt_str = c("-t", "--typp"),
dest = "typp",
type = "character",
default = default.typp,
help = "Which data type? raster or shp",
metavar = "4"
),
make_option(
opt_str = c("-c", "--chunkn"),
dest = "chunkn",
type = "integer",
default = 1,
help = "How many chunks?",
metavar = "4"
),
make_option(
opt_str = c("-i", "--chunk.i"),
dest = "chunk.i",
type = "integer",
default = NA,
help = "Which chunk?",
metavar = "4"
)
)
parser <- OptionParser(
usage = "Rscript %prog [options] data dt_beals header output",
option_list = options,
description = "\nan awesome R script",
epilogue = "use with caution, the awesomeness might slap you in the face!"
)
cli <- parse_args(parser, positional_arguments = 4)
# ------------------------------------------------------------------------------
# assign a few shortcuts
# ------------------------------------------------------------------------------
x.shp <- cli$args[1]
myfunction <- cli$args[2]
output <- cli$args[3]
toextract <- cli$args[4]
typp <- cli$options$typp
ncores <- cli$options$ncores
chunkn <- cli$options$chunkn
chunk.i <- cli$options$chunk.i
# ------------------------------------------------------------------------------
# actual program
# ------------------------------------------------------------------------------
source("A98_PredictorsExtract.R")
PredExtr(x.shp, myfunction, output, toextract, typp, ncores, chunkn, chunk.i)
#!/bin/bash
#$ -S /bin/bash
#$ -N PredictorExtract
#$ -o /work/$USER/$JOB_NAME-$JOB_ID.log
#$ -j y
#$ -l h_rt=00:30:00:00
#$ -l h_vmem=3G
#$ -pe smp 4-32
#$ -cwd
#$ -t 1-99
module load R
filename="$1"
shift
output=/work/$USER/output_pred/$(basename $filename .shp).csv
Rscript \
cli_A98.r \
--cores ${NSLOTS:-1} \
--typp \
--chunk.i $SGE_TASK_ID \
--chunkn 99 \
/data/splot/_data/header.shp.RData \
robust.mean \
"$output" \
"$filename"
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment