From 1f944e41f56d34dab0df6296e97a4f072f9473f9 Mon Sep 17 00:00:00 2001
From: Maria Voigt <maria.voigt@idiv.de>
Date: Tue, 18 Apr 2017 13:35:35 +0200
Subject: [PATCH] fixing bug in cross-validation and including it back again

also for the non-exclude scenario
---
 src/model_fitting/abundance_model.R | 35 ++++++++++++++++-------------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/src/model_fitting/abundance_model.R b/src/model_fitting/abundance_model.R
index 07ec685..1515d4e 100644
--- a/src/model_fitting/abundance_model.R
+++ b/src/model_fitting/abundance_model.R
@@ -367,26 +367,27 @@ if(is_verbose){print(paste("8. Start running models", Sys.time()))}
 
 
 
+save.image(file.path(outdir, "image_before_fail.RData"))
 
 results_res <- foreach(i = 1:nrow(all_model_terms),
                        .combine = rbind) %dopar%{
     # make results dataframe
     if (is.na(exclude_year)){
       result <- as.data.frame(matrix(NA, ncol = 3 *
-                                       length(model_terms) + 4, #5
+                                       length(model_terms) + 5,
                                      nrow = 1))
-                           names(result) <- c("model", paste("coeff", model_terms, sep = "_"),
+       names(result) <- c("model", paste("coeff", model_terms, sep = "_"),
                                               paste("P",model_terms,sep = "_"),
                                               paste("SE", model_terms, sep = "_"),
-                                              "theta", "SE.theta", "AIC" #, "R2"
+                                              "theta", "SE.theta", "AIC", "R2"
                            )} else {
       result <- as.data.frame(matrix(NA, ncol = 3 *
-                                       length(model_terms) +5, #+ 6,
+                                       length(model_terms) + 6,
                                                           nrow = 1))
       names(result) <- c("model", paste("coeff", model_terms, sep = "_"),
                                               paste("P",model_terms,sep = "_"),
                                               paste("SE", model_terms, sep = "_"),
-                                              "theta", "SE.theta", "AIC", #"R2",
+                                              "theta", "SE.theta", "AIC", "R2",
                                               "R2_cross")
                          }
     # make model
@@ -424,25 +425,27 @@ results_res <- foreach(i = 1:nrow(all_model_terms),
 
     # what do I need to do
     # I need to get only the prediction estimates columns that are true in
-    # all_model_terms[i, ]==1
-    # predictors_obs_pred <- predictors_obs
-    # predictors_obs_pred$offset_term <- 0
+    #all_model_terms[i, ]==1
+    predictors_obs_pred <- predictors_obs
+    predictors_obs_pred$offset_term <- 0
     # Comparison of observed data vs prediction
-    #  prediction_per_transect <-  predict.glm(res,
-    #                                        newdata = predictors_obs_pred,
-    #                                        type = "response")
+    prediction_per_transect <-  predict.glm(res,
+                                            newdata = predictors_obs_pred,
+                                            type = "response")
 
-    # comparison_lm = lm(log(predictors_obs$nr_ou_per_km2 + 1) ~
-    #                    log(prediction_per_transect + 1) )
+     comparison_lm = lm(log(predictors_obs$ou_dens + 1) ~
+                        log(prediction_per_transect + 1) )
 
-    #    result[ , "R2"] <- summary(comparison_lm)$r.squared
+        result[ , "R2"] <- summary(comparison_lm)$r.squared
     # if we are excluding years, this is the test of predicted data vs observed data
     # for this year (with which the model wasn't fitted)
-    if (!is.na(exclude_year)){
+  if (!is.na(exclude_year)){
+  predictors_excluded_year_pred <- predictors_excluded_year
+  predictors_excluded_year_pred$offset_term <- 0
   prediction_transect_excluded_year <-  predict.glm(res,
                                newdata = predictors_excluded_year,
 			       type = "response")
-  cross_lm = lm(log(predictors_excluded_year$nr_ou_per_km2 + 1) ~
+  cross_lm = lm(log(predictors_excluded_year$ou_dens+ 1) ~
                      log(prediction_transect_excluded_year + 1))
 
   result[ , "R2_cross"] <- summary(cross_lm)$r.squared
-- 
GitLab