From 8c6ec23cabcd4e368722c93d5cedee4b64cee5ea Mon Sep 17 00:00:00 2001
From: Maria Voigt <maria.voigt@idiv.de>
Date: Mon, 13 Feb 2017 16:05:43 +0100
Subject: [PATCH] transfer scaling into function and debugging

---
 src/model_fitting/abundance_model.R | 83 ++++++++++-------------------
 1 file changed, 29 insertions(+), 54 deletions(-)

diff --git a/src/model_fitting/abundance_model.R b/src/model_fitting/abundance_model.R
index f8cceec..edff02e 100644
--- a/src/model_fitting/abundance_model.R
+++ b/src/model_fitting/abundance_model.R
@@ -102,6 +102,7 @@ if(is_verbose){print(paste("indir_fun", indir_fun))}
 cl <- makeForkCluster(outfile = "")
 registerDoParallel(cl)
 
+source(file.path(indir_fun, "project_functions/scale.predictors.R"))
 source(file.path(indir_fun, "roger_functions/rogers_model_functions.R"))
 source(file.path(indir_fun, "generic/path.to.current.R"))
 source(file.path(indir_fun, "roger_functions/aic_c_fac.r"))
@@ -171,17 +172,15 @@ predictor_names_for_scaling <- c( "dem", "slope", "temp_mean", "rain_dry", "rain
                                   "road_dens", "distance_PA", "fire_dens", "deforestation_hansen",
                                   "deforestation_gaveau", "plantation_distance", "pulp_distance", "palm_distance",
                                   "dom_T_OC", "dom_T_PH")
-# predictors used in model
-predictor_names <- c("year", "temp_mean", "rain_var", "rain_dry", "dom_T_OC",
-                    "peatswamp", "lowland_forest",
-                    "lower_montane_forest", "deforestation_hansen",
-                    "human_pop_dens", "ou_killing_prediction",
-                    "perc_muslim" )
 
+# additional predictors that have to be scaled: year and x- and y-center
+predictor_names_add <- c("year", "x_center", "y_center")
+
+
+# prepare predictors data-frame
 predictors <- dplyr::select(predictors, id, predictor, unscaled_year = year,
                             unscaled_value = value)
 
-
 # need to get rid of occurrence data
 predictors <- predictors %>%
   inner_join(transects, by = "id")
@@ -192,54 +191,35 @@ if (include_aerial == F){
   predictors <- filter(predictors, group != "aerial")
 }
 
-# SCALE PREDICTORD
-for (predictor_name in predictor_names_for_scaling){
-  predictors[predictors$predictor == predictor_name,
-             "value" ] <-
-    as.numeric(as.vector(scale(predictors[predictors$predictor == predictor_name,
-                                          "unscaled_value"])))
-}
-
-
-predictors$year <- as.numeric(as.vector(scale(predictors[ , "unscaled_year"])))
-
-
-
 # delete all rows that have zero
 if(is_verbose){print("how many rows with na in scaled_value")
-  nrow(predictors[is.na(predictors$value),  ])}
+  nrow(predictors[is.na(predictors$unscaled_value),  ])}
 # deleting is.na values here
-predictors <- predictors[!is.na(predictors$value), ]
-
-geography <- dplyr::select(geography, -c(year))
-
-# Rename here1
-predictors_obs <- predictors %>%
-  dplyr::filter(predictor %in% predictor_names_for_scaling)%>%
-  dcast(id + year ~ predictor,  value.var = "value") %>%
-  inner_join(geography, by = "id")%>%
+predictors <- predictors[!is.na(predictors$unscaled_value), ]
+
+# scale predictors
+predictors_obs <- scale.predictors.observation(predictor_names_for_scaling,
+                                   predictor_names_add,
+                                   predictors,
+                                   geography)
+
+predictors_obs <- geography %>%
+  dplyr::select(-c(year,
+                   unscaled_x_center,
+                   unscaled_y_center)) %>%
   dplyr::select(-group) %>%
-  inner_join(transects, by = "id" )
-
-
-
-predictors_obs_unscaled <- predictors %>%
-  dplyr::filter(predictor %in% predictor_names_for_scaling)%>%
-  dcast(id + unscaled_year ~ predictor,  value.var = "unscaled_value")
+  inner_join(transects, by = "id") %>%
+  inner_join(predictors_obs, by = "id")
 
 
-names(predictors_obs_unscaled)[-c(1,2)] <- paste0("unscaled_",
-                                                  names(predictors_obs_unscaled)[-c(1,2)])
 
-predictors_obs <- left_join(predictors_obs, predictors_obs_unscaled, by = "id")
 
-
-# scale x and y center
-predictors_obs$x_center <- as.numeric(as.vector(scale(predictors_obs[ ,
-                                                                      "unscaled_x_center"])))
-
-predictors_obs$y_center <- as.numeric(as.vector(scale(predictors_obs[ ,
-                                                                      "unscaled_y_center"])))
+# predictors used in model
+predictor_names <- c("year", "temp_mean", "rain_var", "rain_dry", "dom_T_OC",
+                     "peatswamp", "lowland_forest",
+                     "lower_montane_forest", "deforestation_hansen",
+                     "human_pop_dens", "ou_killing_prediction",
+                     "perc_muslim" )
 
 
 #  ou density and offset term for ground and absence transects
@@ -276,10 +256,6 @@ predictors_obs <- aerial_predictors_obs %>%
 # bind the two together
 predictors_obs <- predictors_obs %>%
   arrange(id) %>%
-  dplyr::select(id, group, x_start:LU, length_km:nest_decay,
-                year, deforestation_gaveau:temp_mean, x_center, y_center,
-                unscaled_year:unscaled_temp_mean, unscaled_x_center,
-                unscaled_y_center, ou_dens, offset_term) %>%
   as.data.frame(.)
 
 
@@ -287,9 +263,8 @@ if(is_verbose){print("look at predictors_obs")
   str(predictors_obs)
   summary(predictors_obs)}
 
-# save this
-# save the relevant output for the prediction and the validation
-saveRDS(predictors_obs, file = file.path(outdir, paste0("predictors_obs_",
+# save the relevant output for the prediction and the validation (USED IN THESE SCRIPTS)
+saveRDS(predictors_obs, file = file.path(outdir, paste0("predictors_observation_scaled_",
                                                           name_suffix,
                                                           Sys.Date(), ".rds")))
 
-- 
GitLab