Skip to content
Snippets Groups Projects
Commit a40f1dc1 authored by Maria Voigt's avatar Maria Voigt
Browse files

adding random crossvalidation

parent ae2019bf
No related branches found
No related tags found
No related merge requests found
...@@ -38,6 +38,9 @@ option_list <- list ( ...@@ -38,6 +38,9 @@ option_list <- list (
default = NA, help = "year to exclude", metavar = "2015"), default = NA, help = "year to exclude", metavar = "2015"),
make_option("--exclude-grid", dest = "exclude_grid", type = "integer", make_option("--exclude-grid", dest = "exclude_grid", type = "integer",
default = NA, help = "index of grid-cells to exclude", metavar = "1"), default = NA, help = "index of grid-cells to exclude", metavar = "1"),
make_option("--exclude-random", dest = "exclude_random", type = "integer",
default = NA, help = "randomly excluding the percentage given",
metavar = "20"),
make_option("--include-aerial", make_option("--include-aerial",
dest = "include_aerial", action="store_true", dest = "include_aerial", action="store_true",
default=FALSE, help="include aerial transects"), default=FALSE, help="include aerial transects"),
...@@ -95,6 +98,9 @@ if(is_verbose){print(paste("exclude year", exclude_year))} ...@@ -95,6 +98,9 @@ if(is_verbose){print(paste("exclude year", exclude_year))}
exclude_grid <- options$exclude_grid exclude_grid <- options$exclude_grid
if(is_verbose){print(paste("exclude grid", exclude_grid))} if(is_verbose){print(paste("exclude grid", exclude_grid))}
exclude_random <- options$exclude_random
if(is_verbose){print(paste("exclude random", exclude_random))}
#---------# #---------#
# Globals # # Globals #
#---------# #---------#
...@@ -285,6 +291,16 @@ if (!is.na(exclude_grid)){ ...@@ -285,6 +291,16 @@ if (!is.na(exclude_grid)){
predictors_obs <- predictors_obs[predictors_obs$grid_id != exclude_grid, ] predictors_obs <- predictors_obs[predictors_obs$grid_id != exclude_grid, ]
} }
if (!is.na(exclude_random)){
# choose X percent random grid_cells from all cells
# save them in predictors_excluded random and exclude them from the others
ids_to_exclude <- sample(predictors_obs$id,
size = nrow(predictors_obs)/100 * exclude_random,
replace = FALSE)
predictors_excluded_random <- predictors_obs[predictors_obs$id %in% ids_to_exclude, ]
predictors_obs <- predictors_obs[!predictors_obs$id %in% ids_to_exclude, ]
}
# also we increase maxit for the two cases, # also we increase maxit for the two cases,
# because then slightly less data # because then slightly less data
if (is.na(exclude_grid) & is.na(exclude_year)){ if (is.na(exclude_grid) & is.na(exclude_year)){
...@@ -384,7 +400,7 @@ if(is_verbose){print(paste("8. Start running models", Sys.time()))} ...@@ -384,7 +400,7 @@ if(is_verbose){print(paste("8. Start running models", Sys.time()))}
results_res <- foreach(i = 1:nrow(all_model_terms), results_res <- foreach(i = 1:nrow(all_model_terms),
.combine = rbind) %dopar%{ .combine = rbind) %dopar%{
# make results dataframe # make results dataframe
if (is.na(exclude_year) & is.na(exclude_grid)){ if (is.na(exclude_year) & is.na(exclude_grid) & is.na(exclude_random)){
result <- as.data.frame(matrix(NA, ncol = 3 * result <- as.data.frame(matrix(NA, ncol = 3 *
length(model_terms) + 5, length(model_terms) + 5,
nrow = 1)) nrow = 1))
...@@ -450,6 +466,7 @@ results_res <- foreach(i = 1:nrow(all_model_terms), ...@@ -450,6 +466,7 @@ results_res <- foreach(i = 1:nrow(all_model_terms),
result[ , "R2"] <- summary(comparison_lm)$r.squared result[ , "R2"] <- summary(comparison_lm)$r.squared
# if we are excluding years, this is the test of predicted data vs observed data # if we are excluding years, this is the test of predicted data vs observed data
# for this year (with which the model wasn't fitted) # for this year (with which the model wasn't fitted)
# probably I could code this bit similarly for all
if (!is.na(exclude_year)){ if (!is.na(exclude_year)){
predictors_excluded_year_pred <- predictors_excluded_year predictors_excluded_year_pred <- predictors_excluded_year
predictors_excluded_year_pred$offset_term <- 0 predictors_excluded_year_pred$offset_term <- 0
...@@ -472,7 +489,17 @@ results_res <- foreach(i = 1:nrow(all_model_terms), ...@@ -472,7 +489,17 @@ results_res <- foreach(i = 1:nrow(all_model_terms),
result[ , "R2_cross"] <- summary(cross_lm_grid)$r.squared result[ , "R2_cross"] <- summary(cross_lm_grid)$r.squared
} }
if (!is.na(exclude_random)){
predictors_excluded_grid_pred <- predictors_excluded_random
predictors_excluded_grid_pred$offset_term <- 0
prediction_transect_excluded_grid <- predict.glm(res,
newdata = predictors_excluded_random,
type = "response")
cross_lm_random = lm(log(predictors_excluded_random$ou_dens+ 1) ~
log(prediction_transect_excluded_random + 1))
result[ , "R2_cross"] <- summary(cross_lm_random)$r.squared
}
return(result) return(result)
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment