diff --git a/src/model_fitting/abundance_model.R b/src/model_fitting/abundance_model.R index a71426e77af484d7bed52362abfec3e3378081c9..91278717e706d74c07de7b91db7e4e76e689e9b1 100644 --- a/src/model_fitting/abundance_model.R +++ b/src/model_fitting/abundance_model.R @@ -38,6 +38,9 @@ option_list <- list ( default = NA, help = "year to exclude", metavar = "2015"), make_option("--exclude-grid", dest = "exclude_grid", type = "integer", default = NA, help = "index of grid-cells to exclude", metavar = "1"), + make_option("--exclude-random", dest = "exclude_random", type = "integer", + default = NA, help = "randomly excluding the percentage given", + metavar = "20"), make_option("--include-aerial", dest = "include_aerial", action="store_true", default=FALSE, help="include aerial transects"), @@ -95,6 +98,9 @@ if(is_verbose){print(paste("exclude year", exclude_year))} exclude_grid <- options$exclude_grid if(is_verbose){print(paste("exclude grid", exclude_grid))} +exclude_random <- options$exclude_random +if(is_verbose){print(paste("exclude random", exclude_random))} + #---------# # Globals # #---------# @@ -285,6 +291,16 @@ if (!is.na(exclude_grid)){ predictors_obs <- predictors_obs[predictors_obs$grid_id != exclude_grid, ] } +if (!is.na(exclude_random)){ + # choose X percent random grid_cells from all cells + # save them in predictors_excluded random and exclude them from the others + ids_to_exclude <- sample(predictors_obs$id, + size = nrow(predictors_obs)/100 * exclude_random, + replace = FALSE) + predictors_excluded_random <- predictors_obs[predictors_obs$id %in% ids_to_exclude, ] + predictors_obs <- predictors_obs[!predictors_obs$id %in% ids_to_exclude, ] +} + # also we increase maxit for the two cases, # because then slightly less data if (is.na(exclude_grid) & is.na(exclude_year)){ @@ -384,7 +400,7 @@ if(is_verbose){print(paste("8. Start running models", Sys.time()))} results_res <- foreach(i = 1:nrow(all_model_terms), .combine = rbind) %dopar%{ # make results dataframe - if (is.na(exclude_year) & is.na(exclude_grid)){ + if (is.na(exclude_year) & is.na(exclude_grid) & is.na(exclude_random)){ result <- as.data.frame(matrix(NA, ncol = 3 * length(model_terms) + 5, nrow = 1)) @@ -450,6 +466,7 @@ results_res <- foreach(i = 1:nrow(all_model_terms), result[ , "R2"] <- summary(comparison_lm)$r.squared # if we are excluding years, this is the test of predicted data vs observed data # for this year (with which the model wasn't fitted) + # probably I could code this bit similarly for all if (!is.na(exclude_year)){ predictors_excluded_year_pred <- predictors_excluded_year predictors_excluded_year_pred$offset_term <- 0 @@ -472,7 +489,17 @@ results_res <- foreach(i = 1:nrow(all_model_terms), result[ , "R2_cross"] <- summary(cross_lm_grid)$r.squared } + if (!is.na(exclude_random)){ + predictors_excluded_grid_pred <- predictors_excluded_random + predictors_excluded_grid_pred$offset_term <- 0 + prediction_transect_excluded_grid <- predict.glm(res, + newdata = predictors_excluded_random, + type = "response") + cross_lm_random = lm(log(predictors_excluded_random$ou_dens+ 1) ~ + log(prediction_transect_excluded_random + 1)) + result[ , "R2_cross"] <- summary(cross_lm_random)$r.squared + } return(result) }