From 206fffdfe812b235e86e5f0cb1041cfd7dba048a Mon Sep 17 00:00:00 2001 From: Francesco Sabatini <francesco.sabatini@idiv.de> Date: Fri, 13 Mar 2020 11:50:31 +0100 Subject: [PATCH] Improved A98 to actually account for loc uncert --- code/05_ExtractEnvironment.Rmd | 21 +++++++++++---------- code/A98_PredictorsExtract.R | 34 +++++++++++++++++++++------------- code/submit-A98.sh | 17 +++++++---------- 3 files changed, 39 insertions(+), 33 deletions(-) diff --git a/code/05_ExtractEnvironment.Rmd b/code/05_ExtractEnvironment.Rmd index 473e97d..b834e31 100644 --- a/code/05_ExtractEnvironment.Rmd +++ b/code/05_ExtractEnvironment.Rmd @@ -125,20 +125,21 @@ for(i in 1:19){ Intersect `header.shp` with each raster of the CHELSA collection. Performed in EVE HPC cluster using function `A98_PredictorsExtract.R`. Divided in 99 chunks. ```{r, warning=F, message=F, eval=F} +header.shp.path <- "../_derived/header.shp.shp" + for(i in 1:19){ ff <- paste("/data/sPlot/users/Francesco/Ancillary_Data/CHELSA/CHELSA_bio10_", stringr::str_pad(i, width=2, side="left", pad="0"), ".tif", sep="") - chelsa.i <- raster(ff) #define output paths output.ff1 <- paste("/data/sPlot/users/Francesco/sPlot3/_derived/output_pred/CHELSA_bio10_", stringr::str_pad(i, width=2, side="left", pad="0"), ".csv", sep="") - output.ff2 <- paste("/data/sPlot/users/Francesco/sPlot3/_derived/output_pred/CHELSA_bio10_", + output.ff2 <- paste("/data/sPlot/users/Francesco/sPlot3/_derived/output_pred/CHELSA_bio10", stringr::str_pad(i, width=2, side="left", pad="0"), "_sd.csv", sep="") - # Run PredExtr function - chelsa.out <- PredExtr(x.shp = header.shp, toextract = chelsa.i, myfunction = "robust.mean", + # Run PredExtr function - sink to file + PredExtr(x.shp = header.shp.path, toextract = ff, myfunction = robust.mean, ncores = 4, typp = "raster", output.ff1) - chelsa.sd <- PredExtr(x.shp = header.shp, toextract = chelsa.i, myfunction = "sd", - ncores = 4, typp = "raster", output).ff2 + PredExtr(x.shp = header.shp.path, toextract = ff, myfunction = robust.sd, + ncores = 4, typp = "raster", output.ff2) } ``` Reimport and assemble output. @@ -223,11 +224,11 @@ for(i in 1:8){ pattern = "^[^(Gene)]", full.names = T)[i], pattern="//")[[1]][2] output.ff1 <- gsub(pattern=".tif", replacement=".csv", filename) output.ff2 <- gsub(pattern=".tif", replacement="_sd.csv", filename) - # Run PredExtr function - chelsa.out <- PredExtr(x.shp = header.shp, toextract = isric.i, myfunction = "robust.mean", + # Run PredExtr function - sink to file + PredExtr(x.shp = header.shp.path, toextract = isric.i, myfunction = robust.mean, ncores = 4, typp = "raster", output.ff1) - chelsa.sd <- PredExtr(x.shp = header.shp, toextract = isric.i, myfunction = "sd", - ncores = 4, typp = "raster", output).ff2 + PredExtr(x.shp = header.shp.path, toextract = isric.i, myfunction = robust.sd, + ncores = 4, typp = "raster", output.ff2) } ``` diff --git a/code/A98_PredictorsExtract.R b/code/A98_PredictorsExtract.R index 5cb6f4b..b13dd91 100644 --- a/code/A98_PredictorsExtract.R +++ b/code/A98_PredictorsExtract.R @@ -1,5 +1,14 @@ -PredExtr <- function(x.shp, myfunction=NA, output, +# define ancillary functions +robust.mean <- function(x){mean(x, na.rm=T)} +robust.mode <- function(x){if(any(x!=0)) { + a <- x[which(x!=0)] #exclude zero (i.e. NAs) + return(as.numeric(names(sort(table(a), decreasing=T))[1]))} else + return(NA)} +robust.sd <- function(x){sd(x, na.rm=T)} + +#main function +PredExtr <- function(x.shp, myfunction=NA, output=NA, toextract, typp=c("raster", "shp"), ncores, chunkn=1, chunk.i=NA){ print("Load Packages") require(foreach) @@ -12,9 +21,12 @@ PredExtr <- function(x.shp, myfunction=NA, output, print(paste("Loading", typp, "data :", toextract)) print(paste("output will be:", output)) if(typp=="raster"){ mypredictor <- raster(toextract)} else - mypredictor <- readOGR(toextract) + mypredictor <- readOGR(toextract) + colnames(mypredictor@data) header.shp <-readOGR(x.shp) crs(mypredictor) <- crs(header.shp) #should be verified beforehand! + + ## Divide in chunks if requested if(chunkn>1 & !is.na(chunk.i)){ @@ -24,13 +36,9 @@ PredExtr <- function(x.shp, myfunction=NA, output, header.shp <- header.shp[chunks[[chunk.i]],] } - # define ancillary functions - robust.mean <- function(x){mean(x, rm.na=T)} - robust.mode <- function(x){if(any(x!=0)) { - a <- x[which(x!=0)] #exclude zero (i.e. NAs) - return(as.numeric(names(sort(table(a), decreasing=T))[1]))} else - return(NA) } - + print("myfunction defined as") + print(myfunction) + print("go parallel") cl <- makeForkCluster(ncores, outfile="" ) registerDoParallel(cl) @@ -39,9 +47,9 @@ PredExtr <- function(x.shp, myfunction=NA, output, if(typp=="raster"){ out <- foreach(i=1:length(header.shp), .packages=c('raster'), .combine=rbind) %dopar% { tmp <- raster::extract(mypredictor, header.shp[i,], - buffer=min(10000, max(250, header.shp@data$loc.uncert[i])), fun=myfunction) - } - write.csv(out, file = output) + buffer=min(10000, max(250, header.shp@data$lc_ncrt[i])), fun=myfunction) + } + if(!is.na(output)) {write.csv(out, file = output)} else{return(out)} } else { out <- sp::over(x=header.shp, y=mypredictor) toassign <- header.shp[which(is.na(out[,1])),] @@ -60,7 +68,7 @@ PredExtr <- function(x.shp, myfunction=NA, output, out[which(is.na(out[,1])),] <- mypredictor@data[nearest[,"ID"],] } - write.csv(out, file=output) + if(!is.na(output)) {write.csv(out, file = output)} else{return(out)} } stopCluster(cl) } diff --git a/code/submit-A98.sh b/code/submit-A98.sh index 25e5c6d..182138b 100644 --- a/code/submit-A98.sh +++ b/code/submit-A98.sh @@ -3,32 +3,29 @@ #$ -S /bin/bash #$ -N PredictorExtract -#$ -o /work/$USER/$JOB_NAME-$JOB_ID-$TASK_ID.log +#$ -o /work/$USER/$JOB_NAME-$JOB_ID.log #$ -j y #$ -l h_rt=00:30:00:00 -#$ -l h_vmem=1G +#$ -l h_vmem=4G -#$ -pe smp 8-32 +#$ -pe smp 12-32 #$ -cwd -#$ -t 1-99 - - module load R filename="$1" shift -output=/data/splot/_data_splot3/output_pred/$(basename $filename .shp)-$SGE_TASK_ID.csv +output=/data/splot/_data_splot3/output_pred/$(basename $filename .shp).csv Rscript \ cli_A98.r \ --cores ${NSLOTS:-1} \ - --typp "shp" \ - --chunk.i $SGE_TASK_ID \ - --chunkn 99 \ + --typp "raster" \ + --chunk.i 1 \ + --chunkn 1 \ /data/splot/_data_splot3/header.shp.shp \ robust.mean \ "$output" \ -- GitLab