Skip to content
Snippets Groups Projects
Select Git revision
  • 60a58262fc8179f340e82642471dcd7d934e4446
  • main default protected
2 results

01_01_range_map_preparation.R

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    abundance_prediction.R 6.38 KiB
    
    #-----------------------------------------------#
    # Model prediction:                             #
    # the next part is for making model predictions #
    # throughout study area                         #
    # define predictors of 'grid'                   #
    #-----------------------------------------------#
    #----------------#
    # Load Libraries #
    #----------------#
    
    library(parallel)
    library(foreach)
    library(doParallel)
    library(reshape2)
    library(dplyr)
    
    
    cl <- makeForkCluster(outfile = "")
    registerDoParallel(cl)
    
    #------------------------#
    # command line arguments #
    #------------------------#
    print(paste(Sys.time(), "0. start run"))
    args <- commandArgs(trailingOnly = TRUE)
    print(paste("args", args))
    indir <- args[1]
    print(paste("indir ", indir))
    outdir <- args[2]
    print(paste("outdir ", outdir))
    indir_predictors <- args[3]
    print(paste("indir predictors", indir_predictors))
    year_to_predict <- as.numeric(args[4])
    print(paste("year " , year_to_predict))
    
    indir_fun <- "~/orangutan_density_distribution/src/functions/"
    
    
    #-----------#
    # LOAD DATA #
    #-----------#
    # Load functions
    source(file.path(indir_fun, "generic/path.to.current.R"))
    print("function loaded")
    
    # Load coefficients and weights
    coeff_weights_path <- path.to.current(indir, "coeff_weights", "rds" )
    print(paste("this is coeff_weights_path:", coeff_weights_path))
    coeff_weights <- readRDS(coeff_weights_path)
    # exclude the first column, which contains models, exclude the coefficient of the
    # autocorellation term and the weighted aic of the model
    # we are excluding the autocorellation term, because the mean is zero, and thus
    # it will not have an influence on the overall outcome
    coeffs <- coeff_weights[ , 2:(length(coeff_weights) - 2)]
    # all predictors that are not included in a specific model have NA as their
    # coefficient, which is replaced by 0, so that the estimate is also 0
    coeffs[is.na(coeffs) == T] <- 0
    
    # Load estimates for observation and grid
    # these are the predictors that will be used in the prediction
    # THEY MUST BE IN THE ORDER IN WHICH THEY APPEAR IN THE COEFFICIENTS
    # CODE THIS
    # here only separate after first _
    predictor_names_coeffs <- gsub("coeff_","", names(coeffs))
    #UNDERSTAND HERE WHAT IS HAPPENING
    #interaction_terms_names <- predictor_names_coeffs[predictor_names_coeffs %in%
    #                           paste0("year:", predictor_names_coeffs)]
    #interaction_terms_names <- gsub("year:", "", interaction_terms_names)
    #quadratic_terms_names <- predictor_names_coeffs[predictor_names_coeffs %in%
    #                                                 paste0("I(", predictor_names_coeffs, "^2)")]
    #quadratic_terms_names <- gsub("I\\(|\\^2\\)", "", quadratic_terms_names )
    
    interaction_terms_names <- c("human_pop_dens")
    quadratic_terms_names <- c("rain_dry")
    predictor_names_coeffs <- predictor_names_coeffs[predictor_names_coeffs != "(Intercept)"]
    # don't include interaction or quadratic term
    predictor_names <- predictor_names_coeffs[!grepl("I(*)", predictor_names_coeffs)]
    predictor_names <- predictor_names[!grepl("year[:punct:]*", predictor_names)]
    # this is not a good fix, the problem is that with the second grepl also variable "year" gone
    predictor_names <- c("year", predictor_names)
                                            # predictors for year on grid
    
    print(paste("these are predictor names: ", predictor_names))
    predictors_path <- path.to.current(indir_predictors, paste0("predictors_abundance_",
                                                     year_to_predict),"rds")
    print(paste("this is predictors path", predictors_path))
    predictors <- readRDS(predictors_path) %>%
     dplyr::filter(predictor %in% predictor_names) %>%
        dcast(id + z_year ~ predictor,  value.var = "scaled_value")
    
    predictors$year <- predictors$z_year
    predictors$z_year <- NULL
    str(predictors)
    
    
    
    print(paste("this is nrow predictors", nrow(predictors)))
    #--------------------------#
    # PREDICTION FOR each year #
    #--------------------------#
    #predictor_estimates must comprise covariates and dumy codes in the sequence in which they appear
    # in the coefficients
    # HERE PAY ATTENTION FOR INTERACTIONS
    # AND IF THERE IS MORE THAN ONE QUADRATIC TERM
    intercept <- rep(1, nrow(predictors))
    predictor_estimates <- cbind( intercept,
                        predictors[ , predictor_names],
                        predictors[ ,quadratic_terms_names] * predictors[ ,quadratic_terms_names],
                        predictors[ , "year"] * predictors[ , interaction_terms_names])
    
    names(predictor_estimates) <- c("intercept", predictor_names,
                                    paste0("I(", quadratic_terms_names, "^2)"),
    				paste0("year:", interaction_terms_names))
    
    
    
    # Alternative here to loop through id, which is added to predictor_estimates
    # more correct in terms of being sure that the right thing is done,
    # but takes a bit longer (52s, to 35s for 100 rows)
    ## PLUS PAY ATTENTION, IF PREDICTIONS NOT SAME NROW--> VALUES GET RECYCLED
    print(paste("1. start pred_per_cell", Sys.time()))
    pred_per_cell <- foreach(i = 1:nrow(predictor_estimates), .combine = c)  %dopar% {
    # pred_per_cell <- foreach(i = 1:100, .combine = c)  %dopar% {
    t_predictor_estimates <- t( predictor_estimates[i, ])
                       pred_estimates_wcoeffs  <- data.frame(mapply(`*`, coeffs, t_predictor_estimates, SIMPLIFY = F))
                       pred_estimates_sum <- apply(pred_estimates_wcoeffs, 1, sum)
        pred_estimates_calc <- exp(pred_estimates_sum) * 1/(1 + exp(-(pred_estimates_sum)))
                     pred_estimates_weighted <- pred_estimates_calc * coeff_weights$w_aic
                     sum(pred_estimates_weighted)
                    }
    
    print(paste(Sys.time(), "2. finished dopar loop"))
    
    # is this correct -> ????
    pred_per_cell <- as.data.frame(cbind(predictors$id, pred_per_cell))
    names(pred_per_cell) <- c("id", "abundance_pred")
    
    # exclude NAN
    pred_per_cell <- pred_per_cell[!is.nan(pred_per_cell$abundance_pred), ]
    
    
    print(paste(Sys.time(), "sum predicted for ", year_to_predict,
                sum(pred_per_cell$abundance_pred)))
    print(paste(Sys.time(), "range predicted for ", year_to_predict,
                range(pred_per_cell$abundance_pred)))
    
    
    save.image(file.path(outdir, paste0("abundance_pred_image_", year_to_predict, "_",
                                        Sys.Date(), ".RData")))
    saveRDS(pred_per_cell,
              file = file.path(outdir,
                               paste0("abundance_pred_per_cell_",
                                      year_to_predict,"_",
                                      Sys.Date(), ".rds")))
    
    
    print(paste(Sys.time(), "3. wrote results and done :-)"))