Select Git revision
abundance_prediction.R
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
abundance_prediction.R 11.64 KiB
#-----------------------------------------------#
# Model prediction: #
# the next part is for making model predictions #
# throughout study area #
# define predictors of 'grid' #
#-----------------------------------------------#
#----------------#
# Load Libraries #
#----------------#
suppressMessages(library(parallel))
suppressMessages(library(foreach))
suppressMessages(library(doParallel))
suppressMessages(library(reshape2))
suppressMessages(library(dplyr))
suppressMessages(library(rgdal))
suppressMessages(library(sp))
suppressMessages(library(rgeos))
suppressMessages(library(raster))
suppressPackageStartupMessages(library(optparse))
cl <- makeForkCluster(outfile = "")
registerDoParallel(cl)
#-----------------------------#
# command line option parsing #
#-----------------------------#
option_list <- list (
make_option(c("-i", "--input-directory"), dest = "input_directory",
type = "character", help = "directory with input files",
metavar = "/path/to/input-dir"),
make_option("--predictor-input-directory",
dest = "pred_input_directory", type = "character",
help = "directory with predictor input files",
metavar = "/path/to/pred_input-dir"),
make_option(c("-o", "--output-directory"), dest = "output_directory",
type = "character", help = "directory with output files",
metavar = "/path/to/output-dir"),
make_option("--exclude-year", dest = "exclude_year", type = "integer",
help = "year to exclude", metavar = "2015"),
make_option("--year-to-predict",
dest = "year_to_predict",
type = "integer",
help = "year of the survey years (1994:2015) to predict abundance to",
metavar = "2015"),
make_option(c("-q", "--quiet"), dest = "quiet_script", action = "store_true",
default = FALSE, help = "don't print all intermediate results")
)
options <- parse_args(OptionParser(option_list=option_list))
# sanity checks
if (is.null(options$input_directory)) {
stop("input directory not specified, check --help")
}
if (is.null(options$pred_input_directory)) {
stop("input directory not specified, check --help")
}
if (is.null(options$output_directory)) {
stop("output directory not specified, check --help")
}
exclude_year_possibilities <- c(1999:2015)
if (!is.na(options$exclude_year) && !(options$exclude_year %in% exclude_year_possibilities)) {
stop(paste("exclude year must be between", min(exclude_year_possibilities),
"and", max(exclude_year_possibilities)))
}
year_to_predict_possibilities <- c(1999:2015)
if (!(options$year_to_predict %in% year_to_predict_possibilities)) {
stop(paste("the year to predict to year must be between",
min(year_to_predict_possibilities),
"and", max(year_to_predict_possibilities)))
}
# is quiet?
is_quiet <- options$quiet_script
# input directory
indir <- options$input_directory
if(is_quiet){print(paste("indir", indir))}
# input directory
indir_predictors <- options$pred_input_directory
if(is_quiet){print(paste("indir_predictors", indir_predictors))}
# directory in which output is written
outdir <- options$output_directory
if(is_quiet){print(paste("outdir", outdir))}
year_to_predict <- as.numeric(options$year_to_predict )
if(is_quiet){print(paste("year to predict " , year_to_predict))}
exclude_year <- as.numeric(options$exclude_year)
if(is_quiet){print(paste("exclude year", exclude_year))}
if(is_quiet){print(paste(Sys.time(), "0. start run"))}
#------------------------#
# command line arguments #
#------------------------#
indir_fun <- "~/orangutan_density_distribution/src/functions/"
crs_aea <- "+proj=aea +lat_1=7 +lat_2=-32 +lat_0=-15 +lon_0=125 +x_0=0
+y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs" # final projection
#-------------------------------#
# Load and prepare coefficients #
#-------------------------------#
# Load functions
source(file.path(indir_fun, "generic/path.to.current.R"))
# Load coefficients and weights
abundMod_results_path <- path.to.current(indir, "abundMod_results", "rds" )
if(is_quiet){print(paste("this is abundMod_results_path:", abundMod_results_path))}
abundMod_results <- readRDS(abundMod_results_path)
# exclude the first column, which contains models, exclude the coefficient of the
# autocorellation term and the weighted aic of the model
# we are excluding the autocorellation term, because the mean is zero, and thus
# it will not have an influence on the overall outcome
coeffs <- abundMod_results[ , grepl(x=colnames(abundMod_results),
pattern="coeff")]
# all predictors that are not included in a specific model have NA as their
# coefficient, which is replaced by 0, so that the estimate is also 0
coeffs[is.na(coeffs) == T] <- 0
# Load estimates for observation and grid
# these are the predictors that will be used in the prediction
# THEY MUST BE IN THE ORDER IN WHICH THEY APPEAR IN THE COEFFICIENTS
# CODE THIS
# here only separate after first _
predictor_names_coeffs <- gsub("coeff_","", names(coeffs))
#UNDERSTAND HERE WHAT IS HAPPENING
#interaction_terms_names <- predictor_names_coeffs[predictor_names_coeffs %in%
# paste0("year:", predictor_names_coeffs)]
#interaction_terms_names <- gsub("year:", "", interaction_terms_names)
#quadratic_terms_names <- predictor_names_coeffs[predictor_names_coeffs %in%
# paste0("I(", predictor_names_coeffs, "^2)")]
#quadratic_terms_names <- gsub("I\\(|\\^2\\)", "", quadratic_terms_names )
quadratic_terms_names <- c("rain_dry")
predictor_names_coeffs <- predictor_names_coeffs[predictor_names_coeffs != "(Intercept)"]
# don't include interaction or quadratic term
predictor_names <- predictor_names_coeffs[!grepl("I(*)", predictor_names_coeffs)]
#----------------------------#
# Load and prepare estimates #
#----------------------------#
if(is_quiet){print(paste("these are predictor names: ", predictor_names))}
predictors_path <- path.to.current(indir_predictors, paste0("predictors_abundance_",
year_to_predict),"rds")
if(is_quiet){print(paste("this is predictors path", predictors_path))}
predictors <- readRDS(predictors_path) %>%
dplyr::filter(predictor %in% predictor_names) %>%
dcast(id + z_year ~ predictor, value.var = "scaled_value")
predictors$year <- predictors$z_year
predictors$z_year <- NULL
if(is_quiet){str(predictors)}
if(is_quiet){print(paste("this is nrow predictors", nrow(predictors)))}
#--------------------------#
# PREDICTION FOR each year #
#--------------------------#
#predictor_estimates must comprise covariates and dumy codes in the sequence in which they appear
# in the coefficients
# HERE PAY ATTENTION FOR INTERACTIONS
# AND IF THERE IS MORE THAN ONE QUADRATIC TERM
intercept <- rep(1, nrow(predictors))
predictor_estimates <- cbind( intercept,
predictors[ , predictor_names],
predictors[ ,quadratic_terms_names] * predictors[ ,quadratic_terms_names])
names(predictor_estimates) <- c("intercept", predictor_names,
paste0("I(", quadratic_terms_names, "^2)"))
# Alternative here to loop through id, which is added to predictor_estimates
# more correct in terms of being sure that the right thing is done,
# but takes a bit longer (52s, to 35s for 100 rows)
## PLUS PAY ATTENTION, IF PREDICTIONS NOT SAME NROW--> VALUES GET RECYCLED
if(is_quiet){print(paste("1. start pred_per_cell", Sys.time()))}
pred_per_cell <- foreach(i = 1:nrow(predictor_estimates), .combine = c) %dopar% {
# pred_per_cell <- foreach(i = 1:100, .combine = c) %dopar% {
t_predictor_estimates <- t( predictor_estimates[i, ])
pred_estimates_wcoeffs <- data.frame(mapply(`*`, coeffs, t_predictor_estimates, SIMPLIFY = F))
pred_estimates_sum <- apply(pred_estimates_wcoeffs, 1, sum)
pred_estimates_weighted <- pred_estimates_sum * abundMod_results$w_aic
pred_estimates_calc <- sum(pred_estimates_weighted)
return(exp(pred_estimates_calc))
}
if(is_quiet){print(paste(Sys.time(), "2. finished dopar loop"))}
# is this correct -> ????
pred_per_cell <- as.data.frame(cbind(predictors$id, pred_per_cell))
names(pred_per_cell) <- c("id", "abundance_pred")
# exclude NAN
pred_per_cell <- pred_per_cell[!is.nan(pred_per_cell$abundance_pred), ]
if (!is.na(exclude_year)){paste("year_to_exclude:", exclude_year)}
print(paste(Sys.time(), "sum predicted for ", year_to_predict,
sum(pred_per_cell$abundance_pred)))
print(paste(Sys.time(),
"range predicted for ", year_to_predict,
range(pred_per_cell$abundance_pred)))
print(paste(Sys.time(),
"density predicted for ", year_to_predict,
mean(pred_per_cell$abundance_pred)))
print(paste(Sys.time(),
"nr of values over 10 ", year_to_predict,
sum(pred_per_cell$abundance_pred > 10)))
if (is.na(exclude_year)){
name_suffix <- ""} else {
name_suffix <- paste0(exclude_year, "_")
}
saveRDS(pred_per_cell,
file = file.path(outdir,
paste0("abundance_pred_per_cell_",
year_to_predict,"_",
name_suffix, "_",
Sys.Date(), ".rds")))
save.image(file.path(outdir, paste0("abundance_pred_image_", name_suffix,
year_to_predict, "_", Sys.Date(), ".RData")))
#-----------------------#
# convert output to map #
#-----------------------#
if(is_quiet){print(paste(Sys.time(), "3. Start making map"))}
geography_grid_path <- path.to.current(indir_predictors,
paste0("geography_", year_to_predict), "rds")
geography_grid <- readRDS(geography_grid_path)
geography_grid_for_join <- dplyr::select(geography_grid, id, x_start, y_start)
pred_per_cell_sp <- left_join(geography_grid_for_join,
pred_per_cell, by = "id") %>%
as.data.frame()
pred_per_cell_sp <- SpatialPointsDataFrame(coords =
cbind(pred_per_cell_sp$x_start,
pred_per_cell_sp$y_start),
data = pred_per_cell_sp,
proj4string = CRS(crs_aea), match.ID = T)
writeOGR(pred_per_cell_sp, dsn = outdir,
layer = paste0("prediction_shp_",
name_suffix,
year_to_predict, "_",
Sys.Date()), driver = "ESRI Shapefile")
# pay attention that the tif gets a new name because later we delete all
# with the same name as the shapefile
system_string <- paste0("gdal_rasterize -ot 'Float32' -a abndnc_ ",
"-tr 1000.0 1000.0 -l prediction_shp_",
name_suffix, year_to_predict, "_", Sys.Date(),
" ", outdir, "/",
"prediction_shp_", name_suffix,
year_to_predict, "_",
Sys.Date(), ".shp",
" ", outdir, "/",
"prediction_map_",name_suffix,
year_to_predict, "_", Sys.Date(), ".tif")
system(system_string)
#delete the shapefile again because clogs up my system
system_string_del <- paste0("rm -f ", outdir, "/prediction_shp_",
name_suffix, year_to_predict, "_",
Sys.Date(), ".*")
system(system_string_del)
save.image(file.path(outdir, paste0("abundance_pred_image_", name_suffix,
year_to_predict, "_", Sys.Date(), ".RData")))
if(is_quiet){print(paste(Sys.time(), "4. wrote results and done :-)"))}