From b16a131d4c0fd3f49ccd3776fe61ddcf0be0039a Mon Sep 17 00:00:00 2001
From: Julian Sagebiel <julian.sagebiel@idiv.de>
Date: Thu, 24 Aug 2023 16:45:16 +0200
Subject: [PATCH] many changes in function to make valugaps data work

---
 Projects/SE_AGRI/parameters_SE Design-Agri.R |   2 +
 Projects/ValuGaps/code/GIS_data.R            |   2 +-
 Projects/ValuGaps/parameters_valugaps.R      |  11 +-
 functions.R                                  | 355 ++++++++++---------
 simulationcore_purrr.R                       |  11 +-
 5 files changed, 210 insertions(+), 171 deletions(-)

diff --git a/Projects/SE_AGRI/parameters_SE Design-Agri.R b/Projects/SE_AGRI/parameters_SE Design-Agri.R
index 1d51a9f..52e7994 100644
--- a/Projects/SE_AGRI/parameters_SE Design-Agri.R	
+++ b/Projects/SE_AGRI/parameters_SE Design-Agri.R	
@@ -14,6 +14,8 @@ bforeign = 0.3
 bdamage = 0.6
 bprice = 0.2
 
+
+
 manipulations = list(alt1.professional=     expr(alt1.initiator==1), 
                      alt2.professional=     expr(alt2.initiator==1),
                      alt1.expert      =     expr(alt1.initiator==2), 
diff --git a/Projects/ValuGaps/code/GIS_data.R b/Projects/ValuGaps/code/GIS_data.R
index c278b5f..ac73130 100644
--- a/Projects/ValuGaps/code/GIS_data.R
+++ b/Projects/ValuGaps/code/GIS_data.R
@@ -27,7 +27,7 @@ process_data <- function(data, name_prefix, cover_data) {
 #### Draw respondents ####
 # Load population density data for Germany
 
-density_raster <- rast("data/gis/BBSR_Landnutzung_Bevoelkerung/ewz250_2011_v1_multi.tif")
+density_raster <- rast("Projects/ValuGaps/data/gis/BBSR_Landnutzung_Bevoelkerung/ewz250_2011_v1_multi.tif")
 
 
 ##### Do plot of the pop density ####
diff --git a/Projects/ValuGaps/parameters_valugaps.R b/Projects/ValuGaps/parameters_valugaps.R
index b156f44..9df3ee3 100644
--- a/Projects/ValuGaps/parameters_valugaps.R
+++ b/Projects/ValuGaps/parameters_valugaps.R
@@ -96,7 +96,7 @@ protected_areas <- mask(protected_areas, vect(germany_geo_c))
 #### Define simulation parameters ####
 
 resps = 96  # number of respondents 
-nosim=2  # number of simulations to run (about 500 is minimum)
+nosim=3  # number of simulations to run (about 500 is minimum)
 #buffer_dist <- 15000
 
 buffer_dist_ls <- list(10000, 15000, 25000, 50000) # define radius for the respondents in meter
@@ -110,6 +110,7 @@ bc= 1.4
 bc2 = -0.0175
 bp=-0.03
 
+mani1 = expr(left_join(final_set, by="ID"))
 
 manipulations = list(
 #expr(across(c("HNV_on_corine", "Protected_on_corine"), ~replace_na(.,0)) ),
@@ -135,3 +136,11 @@ u<-list(
 # v1 =V.1~ basc + bb*alt1.b+ bc * alt1.c + bp * alt1.p , 
 # v2 =V.2~ basc + bb*alt2.b+ bc * alt2.c + bp * alt2.p ,
 # v3 =V.3~ 0)
+
+
+## this function will be called before the data is simulated
+sou_gis <- function() {
+  
+  source("Projects/ValuGaps/code/GIS_data.R")
+  
+}
diff --git a/functions.R b/functions.R
index 1ff2fc9..1feaa7e 100644
--- a/functions.R
+++ b/functions.R
@@ -1,165 +1,192 @@
-sim_choice <- function(designfile, no_sim=10, respondents=330, mnl_U,utils=u ) {
- 
-  
-  require("tictoc")
-  require("readr")
-  require("psych")
-  require("dplyr")          
-  require("evd")           
-  require("tidyr")
-  require("kableExtra")
-  require("gridExtra")
-  require("stringr")
-  require("mixl")
-  require("furrr")
-  require("purrr")
-  require("ggplot2")
-  require("formula.tools")  
-   
-  
-  mnl_U <-paste(map_chr(u,as.character,keep.source.attr = TRUE),collapse = "",";") %>%
-    str_replace_all( c( "priors\\[\"" = "" , "\"\\]" = "" ,  "~" = "=", "\\." = "_" , " b" = " @b"  , "V."="U_", " alt"="$alt"))
-  
-  
-  
-  
-  
-  by_formula <- function(equation){ #used to take formulas as inputs in simulation utility function
-    # //! cur_data_all may get deprecated in favor of pick
-    # pick(everything()) %>%
-    cur_data_all() %>%
-      transmute(!!lhs(equation) := !!rhs(equation) )
-  } 
-  
-  
-
-
-  
-  simulate_choices <- function(data=database) {  #the part in dataset that needs to be repeated in each run
-
-    
-   # browser()    
-       
-    n=seq_along(1:length(utils))
-
-    print("DANGER")
-
-      data <-  data %>% 
-      group_by(ID) %>% 
-      mutate(
-        across(.cols=n,.fns = ~ rgumbel(setpp,loc=0, scale=1), .names = "{'e'}_{n}" ), 
-         across(starts_with("V_"), .names = "{'U'}_{n}") + across(starts_with("e_")) ) %>% ungroup() %>% 
-         mutate(CHOICE=max.col(.[,grep("U_",names(.))])
-      )   %>% 
-      as.data.frame()
-
-    
-
-    
-    return(data)
-    
-  } 
-  
-  estimate_sim <- function(run=1) {         #start loop
-    
-    cat(run)
-    
-    database <- simulate_choices() 
-    
-    
-    
-    
-    model<-mixl::estimate(model_spec,start_values = est, availabilities = availabilities, data= database,)
-    
-    return(model)   
-    
-  }
-  
-  designs_all <- list() 
-  
-  design <- read_delim(designfile,delim = "\t",
-                       escape_double = FALSE,
-                       trim_ws = TRUE  , 
-                       col_select = c(-Design, -starts_with("...")) ,
-                       name_repair = "universal") %>% 
-    filter(!is.na(Choice.situation)) 
-  
-  
-  
-  nsets<-nrow(design)        
-  nblocks<-max(design$Block)
-  setpp <- nsets/nblocks      # Choice Sets per respondent; in this 'no blocks' design everyone sees all 24 sets
-  #respondents <- replications*nblocks
-  replications <- respondents/nblocks
-  #browser()
-  
-  if(!exists("manipulations")) manipulations=list() 
-  
-  database<- design %>%
-    arrange(Block,Choice.situation) %>% 
-    slice(rep(row_number(), replications)) %>%    ## replicate design according to number of replications
-    mutate(RID = rep(1:respondents, each=setpp)) %>%  # create Respondent ID.
-    relocate(RID,`Choice.situation`) %>% 
-    mutate( !!! manipulations ,
-            map_dfc(utils,by_formula)   #our functions to create utility variables. They need to be entered as a formula list as an argument
-            
-    ) %>% 
-    rename(ID ="RID") %>%
-    rename_with(~ stringr::str_replace(.,pattern = "\\.","_"), everything()) %>% 
-    as.data.frame()
-  
-  
-  
-  
-  
-  database <- simulate_choices() 
-  
-  model_spec <- mixl::specify_model(mnl_U, database, disable_multicore=F)
-  
-  est=setNames(rep(0,length(model_spec$beta_names)), model_spec$beta_names)
-  
-  
-  availabilities <- mixl::generate_default_availabilities(
-    database, model_spec$num_utility_functions)
-  
-  plan(multisession, workers = 8)
-  
-  output<- 1:no_sim %>% map(estimate_sim)
-  
-  coefs<-map(1:length(output),~summary(output[[.]])[["coefTable"]][c(1,8)]  %>%
-               tibble::rownames_to_column() %>%
-               pivot_wider(names_from = rowname, values_from = c(est, rob_pval0)) ) %>% 
-    bind_rows(.id = "run")
-  
-  output[["summary"]] <-describe(coefs[,-1], fast = TRUE)
-  
-  output[["coefs"]] <-coefs
-  
-  pvals <- output[["coefs"]] %>% select(starts_with("rob_pval0"))
-  
-  output[["power"]] <- 100*table(apply(pvals,1,  function(x) all(x<0.05)))/nrow(pvals)
-  
-  
-  output[["metainfo"]] <- c(Path = designfile, NoSim = no_sim, NoResp =respondents)
-  
-  
-  print(kable(output[["summary"]],digits = 2, format = "rst"))
-  
-  
-  print(output[["power"]])
-  
-  
-  return(output)  
-  
-  
-}
-
-
-plot_multi_histogram <- function(df, feature, label_column) { #function to create nice multi histograms, taken somewhere from the web
-  plt <- ggplot(df, aes(x=eval(parse(text=feature)), fill=eval(parse(text=label_column)))) +
-    #geom_histogram(alpha=0.7, position="identity", aes(y = ..density..), color="black") +
-    geom_density(alpha=0.5) +
-    geom_vline(aes(xintercept=mean(eval(parse(text=feature)))), color="black", linetype="dashed", linewidth=1) +  ## this makes a vertical line of the mean
-    labs(x=feature, y = "Density")
-  plt + guides(fill=guide_legend(title=label_column))
+sim_choice <- function(designfile, no_sim=10, respondents=330, mnl_U,utils=u ) {
+ 
+  
+  require("tictoc")
+  require("readr")
+  require("psych")
+  require("dplyr")          
+  require("evd")           
+  require("tidyr")
+  require("kableExtra")
+  require("gridExtra")
+  require("stringr")
+  require("mixl")
+  require("furrr")
+  require("purrr")
+  require("ggplot2")
+  require("formula.tools")  
+   
+  
+  mnl_U <-paste(map_chr(u,as.character,keep.source.attr = TRUE),collapse = "",";") %>%
+    str_replace_all( c( "priors\\[\"" = "" , "\"\\]" = "" ,  "~" = "=", "\\." = "_" , " b" = " @b"  , "V_"="U_", " alt"="$alt"))
+  
+ cat("mixl \n") 
+ print(mnl_U)
+ 
+ cat("\n Simulation \n")
+ 
+ print(u)
+ 
+  
+  
+  by_formula <- function(equation){ #used to take formulas as inputs in simulation utility function
+    # //! cur_data_all may get deprecated in favor of pick
+    # pick(everything()) %>%
+    cur_data_all() %>%
+      transmute(!!lhs(equation) := !!rhs(equation) )
+  } 
+  
+  
+
+
+  
+  simulate_choices <- function(data=datadet) {  #the part in dataset that needs to be repeated in each run
+cat("does sou_gis exist")
+   print( exists("sou_gis"))
+   # browser()    
+ 
+    if (exists("sou_gis") && is.function(sou_gis)) {
+      sou_gis()
+      
+      cat("source of gis has been done")
+    }    
+       
+   # source("Projects/ValuGaps/code/GIS_data.R")
+    
+    if(!exists("manipulations")) manipulations=list() ## If no user input on further data manipulations
+       
+    n=seq_along(1:length(utils))    # number of utility functions
+
+    
+    
+    print("DANGER")
+
+    print(exists("final_set"))
+    
+    if(exists("final_set")) data = left_join(data,final_set, by = "ID")     
+    
+      data <-  data %>% 
+       # rename(ID="RID") %>% 
+      group_by(ID) %>% 
+
+         #rename(ID ="RID") %>%
+        #rename_with(~ stringr::str_replace(.,pattern = "\\.","_"), everything()) %>% 
+      mutate(!!! manipulations ,
+             map_dfc(utils,by_formula)) %>%    #our functions to create utility variables. They need to be entered as a formula list as an argument
+      rename_with(~ stringr::str_replace(.,pattern = "\\.","_"), everything()) %>% 
+               
+        mutate(across(.cols=n,.fns = ~ rgumbel(setpp,loc=0, scale=1), .names = "{'e'}_{n}" ), 
+         across(starts_with("V_"), .names = "{'U'}_{n}") + across(starts_with("e_")) ) %>% ungroup() %>% 
+         mutate(CHOICE=max.col(.[,grep("U_",names(.))])
+      )   %>% 
+      as.data.frame()
+
+    
+print("data has been made")
+
+cat(head(data$CHOICE))
+    
+    return(data)
+    
+  } 
+  
+  estimate_sim <- function(run=1) {         #start loop
+    
+    cat("This is Run number ", run)
+    
+    database <- simulate_choices(datadet) 
+    
+    
+   cat("This is the utility functions \n" , mnl_U) 
+    
+    model<-mixl::estimate(model_spec,start_values = est, availabilities = availabilities, data= database,)
+    
+    return(model)   
+    
+  }
+  
+  designs_all <- list() 
+  
+  design <- read_delim(designfile,delim = "\t",
+                       escape_double = FALSE,
+                       trim_ws = TRUE  , 
+                       col_select = c(-Design, -starts_with("...")) ,
+                       name_repair = "universal") %>% 
+    filter(!is.na(Choice.situation)) 
+  
+  
+  
+  nsets<-nrow(design)        
+  nblocks<-max(design$Block)
+  setpp <- nsets/nblocks      # Choice Sets per respondent; in this 'no blocks' design everyone sees all 24 sets
+  #respondents <- replications*nblocks
+  replications <- respondents/nblocks
+  #browser()
+  
+ 
+  
+  datadet<- design %>%
+    arrange(Block,Choice.situation) %>% 
+    slice(rep(row_number(), replications)) %>%    ## replicate design according to number of replications
+    mutate(ID = rep(1:respondents, each=setpp)) %>%  # create Respondent ID.
+    relocate(ID,`Choice.situation`) %>% 
+as.data.frame()
+  
+  
+  
+  
+  
+  database <- simulate_choices() 
+  
+  model_spec <- mixl::specify_model(mnl_U, database, disable_multicore=F)
+  
+  est=setNames(rep(0,length(model_spec$beta_names)), model_spec$beta_names)
+  
+  
+  availabilities <- mixl::generate_default_availabilities(
+    database, model_spec$num_utility_functions)
+  
+  plan(multisession, workers = 8)
+  
+  output<- 1:no_sim %>% map(estimate_sim)
+ 
+  
+  
+   
+  coefs<-map(1:length(output),~summary(output[[.]])[["coefTable"]][c(1,8)]  %>%
+               tibble::rownames_to_column() %>%
+               pivot_wider(names_from = rowname, values_from = c(est, rob_pval0)) ) %>% 
+    bind_rows(.id = "run")
+  
+  output[["summary"]] <-describe(coefs[,-1], fast = TRUE)
+  
+  output[["coefs"]] <-coefs
+  
+  pvals <- output[["coefs"]] %>% dplyr::select(starts_with("rob_pval0"))
+  
+  output[["power"]] <- 100*table(apply(pvals,1,  function(x) all(x<0.05)))/nrow(pvals)
+  
+  
+  output[["metainfo"]] <- c(Path = designfile, NoSim = no_sim, NoResp =respondents)
+  
+  
+  print(kable(output[["summary"]],digits = 2, format = "rst"))
+  
+  
+  print(output[["power"]])
+  
+  
+  return(output)  
+  
+  
+}
+
+
+plot_multi_histogram <- function(df, feature, label_column) { #function to create nice multi histograms, taken somewhere from the web
+  plt <- ggplot(df, aes(x=eval(parse(text=feature)), fill=eval(parse(text=label_column)))) +
+    #geom_histogram(alpha=0.7, position="identity", aes(y = ..density..), color="black") +
+    geom_density(alpha=0.5) +
+    geom_vline(aes(xintercept=mean(eval(parse(text=feature)))), color="black", linetype="dashed", linewidth=1) +  ## this makes a vertical line of the mean
+    labs(x=feature, y = "Density")
+  plt + guides(fill=guide_legend(title=label_column))
 } 
\ No newline at end of file
diff --git a/simulationcore_purrr.R b/simulationcore_purrr.R
index b95ce75..67f02e0 100644
--- a/simulationcore_purrr.R
+++ b/simulationcore_purrr.R
@@ -6,8 +6,9 @@ library("formula.tools")
 library(ggplot2)
 library(stringr)
 
-source("Projects/ValuGaps/parameters_valugaps.R")
-source(params$file)
+source("Projects/SE_AGRI/parameters_SE Design-Agri.R")
+#source("Projects/ValuGaps/parameters_valugaps.R",echo = TRUE)
+#source(params$file)
 
 
 designfile<-list.files(designpath,full.names = T)
@@ -39,7 +40,7 @@ powa <- map(all_designs, ~ .x$power)
 
 
 summaryall <- as.data.frame(map(all_designs, ~.x$summary)) %>% 
-  select(!ends_with("vars")) %>% 
+  dplyr::select(!ends_with("vars")) %>% 
   relocate(ends_with(c(".n", "mean","sd", "min" ,"max", "range" , "se" )))
 
 coefall <- map(all_designs, ~ .x$coefs)
@@ -47,7 +48,7 @@ coefall <- map(all_designs, ~ .x$coefs)
 pat<-paste0("(",paste(designname,collapse = "|"),").") # needed to identify pattern to be replaced
 
 s<-as.data.frame(coefall) %>% 
-  select(!matches("pval|run")) %>%
+  dplyr::select(!matches("pval|run")) %>%
   rename_with(~ sub("est_b", "", .x), everything()) %>%
 #  rename_with(~ sub("est_asc_", "asc", .x), everything()) %>%
   rename_with( ~ paste0(.,"_",stringr::str_extract(.,pat )), everything() ) %>%   # rename attributes for reshape part 1
@@ -57,7 +58,7 @@ s<-as.data.frame(coefall) %>%
 
 p=list()
 
-for (att in names(select(s,-c("design","run")))) {
+for (att in names(dplyr::select(s,-c("design","run")))) {
   
   p[[att]] <- plot_multi_histogram(s,att,"design")
   
-- 
GitLab