Select Git revision
abundance_prediction.R
Maria Voigt authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
abundance_prediction.R 13.70 KiB
#-----------------------------------------------#
# Model prediction: #
# the next part is for making model predictions #
# throughout study area #
# define predictors of 'grid' #
#-----------------------------------------------#
#----------------#
# Load Libraries #
#----------------#
suppressMessages(library(parallel))
suppressMessages(library(foreach))
suppressMessages(library(doParallel))
suppressMessages(library(reshape2))
suppressMessages(library(dplyr))
suppressMessages(library(rgdal))
suppressMessages(library(sp))
suppressMessages(library(rgeos))
suppressMessages(library(raster))
suppressPackageStartupMessages(library(optparse))
cl <- makeForkCluster(outfile = "")
suppressMessages(registerDoParallel(cl))
#-----------------------------#
# command line option parsing #
#-----------------------------#
print(paste("Start abundance_prediction script", Sys.time()))
option_list <- list (
make_option(c("-i", "--input-directory"), dest = "input_directory",
type = "character", help = "directory with input files",
metavar = "/path/to/input-dir"),
make_option(c("-o", "--output-directory"), dest = "output_directory",
type = "character", help = "directory with output files",
metavar = "/path/to/output-dir"),
make_option("--exclude-year", dest = "exclude_year", type = "integer",
default = NA, help = "year to exclude", metavar = "2015"),
make_option("--year-to-predict",
dest = "year_to_predict",
type = "integer",
help = "year of the survey years (1994:2015) to predict abundance to",
metavar = "2015"),
make_option(c("-q", "--quiet"), dest = "verbose_script",
action = "store_false",
default = TRUE,
help = "don't print all intermediate results"))
# verbose option a bit counterintuitive
# because I make action store_false, when I say -q that
# means that verbose == F, which is quiet
options <- parse_args(OptionParser(option_list=option_list))
# sanity checks
if (is.null(options$input_directory)) {
stop("input directory not specified, check --help")
}
if (is.null(options$output_directory)) {
stop("output directory not specified, check --help")
}
exclude_year_possibilities <- c(1999:2015)
if (!is.na(options$exclude_year) && !(options$exclude_year %in% exclude_year_possibilities)) {
stop(paste("exclude year must be between", min(exclude_year_possibilities),
"and", max(exclude_year_possibilities)))
}
year_to_predict_possibilities <- c(1999:2015)
if (!(options$year_to_predict %in% year_to_predict_possibilities)) {
stop(paste("the year to predict to year must be between",
min(year_to_predict_possibilities),
"and", max(year_to_predict_possibilities)))
}
# is quiet?
is_verbose <- options$verbose_script
# input directory
indir <- options$input_directory
if(is_verbose){print(paste("indir", indir))}
# input directory
indir <- options$pred_input_directory
if(is_verbose){print(paste("indir", indir))}
# directory in which output is written
outdir <- options$output_directory
if(is_verbose){print(paste("outdir", outdir))}
year_to_predict <- as.numeric(options$year_to_predict )
print(paste("for year to predict" , year_to_predict))
exclude_year <- as.numeric(options$exclude_year)
if(is_verbose){print(paste("exclude year", exclude_year))}
if(is_verbose){print(paste(Sys.time(), "0. start run"))}
#------------------------#
# command line arguments #
#------------------------#
indir_fun <- "~/orangutan_density_distribution/src/functions/"
crs_aea <- "+proj=aea +lat_1=7 +lat_2=-32 +lat_0=-15 +lon_0=125 +x_0=0
+y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs" # final projection
if (is.na(exclude_year)){
name_suffix <- ""} else {
name_suffix <- paste0(exclude_year, "_")
}
#----------------#
# Load functions #
#----------------#
# Load functions
source(file.path(indir_fun, "generic/path.to.current.R"))
source(file.path(indir_fun, "project_functions/scale.predictors.R"))
#----------------------#
# Prepare coefficients #
#----------------------#
# Load coefficients and weights
abundMod_results_path <- path.to.current(indir, "abundMod_results", "rds" )
if(is_verbose){print(paste("this is abundMod_results_path:", abundMod_results_path))}
abundMod_results <- readRDS(abundMod_results_path)
# exclude the first column, which contains models, exclude the coefficient of the
# autocorellation term and the weighted aic of the model
# we are excluding the autocorellation term, because the mean is zero, and thus
# it will not have an influence on the overall outcome
coeffs <- abundMod_results[ , grepl(x=colnames(abundMod_results),
pattern="coeff")]
# all predictors that are not included in a specific model have NA as their
# coefficient, which is replaced by 0, so that the estimate is also 0
coeffs[is.na(coeffs) == T] <- 0
# Load estimates for observation and grid
# these are the predictors that will be used in the prediction
# THEY MUST BE IN THE ORDER IN WHICH THEY APPEAR IN THE COEFFICIENTS
# CODE THIS
# here only separate after first _
predictor_names_coeffs <- gsub("coeff_","", names(coeffs))
#UNDERSTAND HERE WHAT IS HAPPENING
#interaction_terms_names <- predictor_names_coeffs[predictor_names_coeffs %in%
# paste0("year:", predictor_names_coeffs)]
#interaction_terms_names <- gsub("year:", "", interaction_terms_names)
#quadratic_terms_names <- predictor_names_coeffs[predictor_names_coeffs %in%
# paste0("I(", predictor_names_coeffs, "^2)")]
#quadratic_terms_names <- gsub("I\\(|\\^2\\)", "", quadratic_terms_names )
quadratic_terms_names <- c("rain_dry")
predictor_names_coeffs <- predictor_names_coeffs[predictor_names_coeffs != "(Intercept)"]
# don't include interaction or quadratic term
predictor_names <- predictor_names_coeffs[!grepl("I(*)", predictor_names_coeffs)]
#----------------------------#
# Load and prepare estimates #
#----------------------------#
geography_path <- path.to.current(indir, paste0("geography_",
year_to_predict),"rds")
if(is_verbose){print(paste("this is geography path", geography_path))}
geography <- readRDS(geography_path)
predictors_path <- path.to.current(indir, paste0("predictors_abundance_",
year_to_predict),"rds")
if(is_verbose){print(paste("this is predictors path", predictors_path))}
predictors <- readRDS(predictors_path)
# import here the already z-transformed or not
predictors_obs_path <- path.to.current(indir,
paste0("predictors_observation_scaled_",
name_suffix),
"rds")
if(is_verbose){print(paste("this is predictors-obs path", predictors_obs_path))}
predictors_obs <- readRDS(predictors_obs_path)
# Scale the grid-predictors using mean and sd of predictors_obs
# predictors for scaling
predictor_names_for_scaling <- c( "dem", "slope", "temp_mean", "rain_dry", "rain_var",
"ou_killings", "ou_killing_prediction", "human_pop_dens",
"perc_muslim", "peatswamp", "lowland_forest", "lower_montane_forest" ,
"road_dens", "distance_PA", "fire_dens", "deforestation_hansen",
"deforestation_gaveau", "plantation_distance", "pulp_distance", "palm_distance",
"dom_T_OC", "dom_T_PH")
predictor_names_add <- c("year", "x_center", "y_center")
predictors <- rename(predictors, unscaled_value = value)
# calculate x and y center
geography$unscaled_x_center <- rowMeans(cbind(geography$x_start, geography$x_end), na.rm = T)
geography$unscaled_y_center <- rowMeans(cbind(geography$y_start, geography$y_end), na.rm = T)
# function here
predictors_grid <- scale.predictors.grid(predictor_names_for_scaling,
predictor_names_add,
predictors,
predictors_obs,
geography)
saveRDS(predictors_grid, file.path(outdir, paste0("predictors_grid_scaled_", name_suffix,
year_to_predict, "_", Sys.Date(), ".rds")))
# predictors used in model
predictor_names <- c("year", "temp_mean", "rain_var", "rain_dry", "dom_T_OC",
"peatswamp", "lowland_forest",
"lower_montane_forest", "deforestation_hansen",
"human_pop_dens", "ou_killing_prediction",
"perc_muslim" )
#--------------------------#
# PREDICTION FOR each year #
#--------------------------#
#predictor_estimates must comprise covariates and dumy codes in the sequence in which they appear
# in the coefficients
# HERE PAY ATTENTION FOR INTERACTIONS
# AND IF THERE IS MORE THAN ONE QUADRATIC TERM
intercept <- rep(1, nrow(predictors_grid))
predictor_estimates <- cbind( intercept,
predictors_grid[ , predictor_names],
predictors_grid[ ,quadratic_terms_names] * predictors_grid[ ,quadratic_terms_names])
names(predictor_estimates) <- c("intercept", predictor_names,
paste0("I(", quadratic_terms_names, "^2)"))
# Alternative here to loop through id, which is added to predictor_estimates
# more correct in terms of being sure that the right thing is done,
# but takes a bit longer (52s, to 35s for 100 rows)
## PLUS PAY ATTENTION, IF PREDICTIONS NOT SAME NROW--> VALUES GET RECYCLED
if(is_verbose){print(paste("1. start pred_per_cell", Sys.time()))}
pred_per_cell <- foreach(i = 1:nrow(predictor_estimates), .combine = c) %dopar% {
# pred_per_cell <- foreach(i = 1:100, .combine = c) %dopar% {
t_predictor_estimates <- t( predictor_estimates[i, ])
pred_estimates_wcoeffs <- data.frame(mapply(`*`, coeffs, t_predictor_estimates, SIMPLIFY = F))
pred_estimates_sum <- apply(pred_estimates_wcoeffs, 1, sum)
pred_estimates_weighted <- pred_estimates_sum * abundMod_results$w_aic
pred_estimates_calc <- sum(pred_estimates_weighted)
return(exp(pred_estimates_calc))
}
if(is_verbose){print(paste(Sys.time(), "2. finished dopar loop"))}
# is this correct -> ????
pred_per_cell <- as.data.frame(cbind(predictors_grid$id, pred_per_cell))
names(pred_per_cell) <- c("id", "abundance_pred")
# exclude NAN
pred_per_cell <- pred_per_cell[!is.nan(pred_per_cell$abundance_pred), ]
if (!is.na(exclude_year)){paste("year_to_exclude:", exclude_year)}
print(paste(Sys.time(), "sum predicted for ", year_to_predict,
sum(pred_per_cell$abundance_pred)))
print(paste(Sys.time(),
"range predicted for ", year_to_predict,
range(pred_per_cell$abundance_pred)))
print(paste(Sys.time(),
"density predicted for ", year_to_predict,
mean(pred_per_cell$abundance_pred)))
print(paste(Sys.time(),
"nr of values over 10 ", year_to_predict,
sum(pred_per_cell$abundance_pred > 10)))
if (is.na(exclude_year)){
name_suffix <- ""} else {
name_suffix <- paste0(exclude_year, "_")
}
saveRDS(pred_per_cell,
file = file.path(outdir,
paste0("abundance_pred_per_cell_",
year_to_predict,"_",
name_suffix,
Sys.Date(), ".rds")))
#-----------------------#
# convert output to map #
#-----------------------#
if(is_verbose){print(paste(Sys.time(), "3. Start making map"))}
geography_grid_path <- path.to.current(indir_predictors,
paste0("geography_", year_to_predict), "rds")
geography_grid <- readRDS(geography_grid_path)
geography_grid_for_join <- dplyr::select(geography_grid, id, x_start, y_start)
pred_per_cell_sp <- left_join(geography_grid_for_join,
pred_per_cell, by = "id") %>%
as.data.frame()
pred_per_cell_sp <- SpatialPointsDataFrame(coords =
cbind(pred_per_cell_sp$x_start,
pred_per_cell_sp$y_start),
data = pred_per_cell_sp,
proj4string = CRS(crs_aea), match.ID = T)
writeOGR(pred_per_cell_sp, dsn = outdir,
layer = paste0("prediction_shp_",
name_suffix,
year_to_predict, "_",
Sys.Date()), driver = "ESRI Shapefile")
# pay attention that the tif gets a new name because later we delete all
# with the same name as the shapefile
system_string <- paste0("gdal_rasterize -ot 'Float32' -a abndnc_ ",
"-tr 1000.0 1000.0 -l prediction_shp_",
name_suffix, year_to_predict, "_", Sys.Date(),
" ", outdir, "/",
"prediction_shp_", name_suffix,
year_to_predict, "_",
Sys.Date(), ".shp",
" ", outdir, "/",
"prediction_map_",name_suffix,
year_to_predict, "_", Sys.Date(), ".tif")
system(system_string)
#delete the shapefile again because clogs up my system
system_string_del <- paste0("rm -f ", outdir, "/prediction_shp_",
name_suffix, year_to_predict, "_",
Sys.Date(), ".*")
system(system_string_del)
save.image(file.path(outdir, paste0("abundance_pred_image_", name_suffix,
year_to_predict, "_", Sys.Date(), ".RData")))
print(paste("Finish model_prediction script for year", year_to_predict,
Sys.time()))