functions.R

sim_choice <- function(designfile, no_sim=10, respondents=330, mnl_U,utils=u[[1]] ) {
 
  
  require("tictoc")
  require("readr")
  require("psych")
  require("dplyr")          
  require("evd")           
  require("tidyr")
  require("kableExtra")
  require("gridExtra")
  require("stringr")
  require("mixl")
  require("furrr")
  require("purrr")
  require("ggplot2")
  require("formula.tools")  
  require("rlang")
   
  
  mnl_U <-paste(map_chr(utils,as.character,keep.source.attr = TRUE),collapse = "",";") %>%
    str_replace_all( c( "priors\\[\"" = "" , "\"\\]" = "" ,  "~" = "=", "\\." = "_" , " b" = " @b"  , "V_"="U_", " alt"="$alt"))
  
 cat("mixl \n") 
 print(mnl_U)
 
 cat("\n Simulation \n")
 
 print(u)
 
  
  by_formula <- function(equation){ #used to take formulas as inputs in simulation utility function
    # //! cur_data_all may get deprecated in favor of pick
    # pick(everything()) %>%
    cur_data_all() %>%
      transmute(!!lhs(equation) := !!rhs(equation) )
  } 
  
  
  simulate_choices <- function(data=datadet) {  #the part in dataset that needs to be repeated in each run
cat(" \n does sou_gis exist: ", exists("sou_gis"), "\n")

    if (exists("sou_gis") && is.function(sou_gis)) {
      sou_gis()
      
      cat("\n source of gis has been done \n")
    }    
       
   # source("Projects/ValuGaps/code/GIS_data.R")
    
    if(!exists("manipulations")) manipulations=list() ## If no user input on further data manipulations
       
    n=seq_along(1:length(utils))    # number of utility functions

    
#browser()    


    cat("\n dataset final_set exists: ",exists("final_set"), "\n")
    
    if(exists("final_set")) data = left_join(data,final_set, by = "ID")   
    
    cat("\n decisiongroups exists: " ,exists("decisiongroups"))  
    
    if(exists("decisiongroups"))  {     ### create a new variable to classify decision groups.
      data = mutate(data,group = as.numeric(cut(row_number(),
                                 breaks = decisiongroups * n(), 
                                 labels = seq_along(decisiongroups[-length(decisiongroups)]),
                                 include.lowest = TRUE)))
      
      print(table(data$group))
    } else {
      
      data$group=1
    }
   

       data<- data %>% 
      group_by(ID) %>% 
      mutate(!!! manipulations) 
             

subsets<- split(data,data$group)     
   
subsets <-  map2(.x = seq_along(u),.y = subsets,
                 ~ mutate(.y,map_dfc(u[[.x]],by_formula)))
 
data <-bind_rows(subsets)           
 
   data<- data %>%       
     rename_with(~ stringr::str_replace(.,pattern = "\\.","_"), everything()) %>% 
        mutate(across(.cols=n,.fns = ~ rgumbel(setpp,loc=0, scale=1), .names = "{'e'}_{n}" ), 
         across(starts_with("V_"), .names = "{'U'}_{n}") + across(starts_with("e_")) ) %>% ungroup() %>% 
         mutate(CHOICE=max.col(.[,grep("U_",names(.))])
      )   %>% 
      as.data.frame()

       
print("\n data has been made \n")

cat("\n First few observations \n ") 
print(head(data))
 cat("\n \n ")   
    return(data)
    
  } 
  
  estimate_sim <- function(run=1) {         #start loop
    
    cat("This is Run number ", run)
    
    database <- simulate_choices(datadet) 
    
    
   cat("This is the utility functions \n" , mnl_U) 
    
    model<-mixl::estimate(model_spec,start_values = est, availabilities = availabilities, data= database,)
    
    return(model)   
    
  }
  
  designs_all <- list() 
  
  design <- read_delim(designfile,delim = "\t",
                       escape_double = FALSE,
                       trim_ws = TRUE  , 
                       col_select = c(-Design, -starts_with("...")) ,
                       name_repair = "universal" , show_col_types = FALSE) %>% 
    filter(!is.na(Choice.situation))
  nsets<-nrow(design)        
  nblocks<-max(design$Block)
  setpp <- nsets/nblocks      # Choice Sets per respondent; in this 'no blocks' design everyone sees all 24 sets
  #respondents <- replications*nblocks
  replications <- respondents/nblocks
  #browser()
  
 
  datadet<- design %>%
    arrange(Block,Choice.situation) %>% 
    slice(rep(row_number(), replications)) %>%    ## replicate design according to number of replications
    mutate(ID = rep(1:respondents, each=setpp)) %>%  # create Respondent ID.
    relocate(ID,`Choice.situation`) %>% 
as.data.frame()
  
  
  database <- simulate_choices() 
  
  model_spec <- mixl::specify_model(mnl_U, database, disable_multicore=F)
  
  est=setNames(rep(0,length(model_spec$beta_names)), model_spec$beta_names)
  
  
  availabilities <- mixl::generate_default_availabilities(
    database, model_spec$num_utility_functions)
  
  plan(multisession, workers = 8)
  
  output<- 1:no_sim %>% map(estimate_sim)
 
  
  coefs<-map(1:length(output),~summary(output[[.]])[["coefTable"]][c(1,8)]  %>%
               tibble::rownames_to_column() %>%
               pivot_wider(names_from = rowname, values_from = c(est, rob_pval0)) ) %>% 
    bind_rows(.id = "run")
  
  output[["summary"]] <-psych::describe(coefs[,-1], fast = TRUE)
  
  output[["coefs"]] <-coefs
  
  pvals <- output[["coefs"]] %>% dplyr::select(starts_with("rob_pval0"))
  
  output[["power"]] <- 100*table(apply(pvals,1,  function(x) all(x<0.05)))/nrow(pvals)
  
  
  output[["metainfo"]] <- c(Path = designfile, NoSim = no_sim, NoResp =respondents)
  
  
  print(kable(output[["summary"]],digits = 2, format = "rst"))
  
  
  print(output[["power"]])
  
  
  return(output)  
  
  
}


plot_multi_histogram <- function(df, feature, label_column) { #function to create nice multi histograms, taken somewhere from the web
  plt <- ggplot(df, aes(x=eval(parse(text=feature)), fill=eval(parse(text=label_column)))) +
    #geom_histogram(alpha=0.7, position="identity", aes(y = ..density..), color="black") +
    geom_density(alpha=0.5) +
    geom_vline(aes(xintercept=mean(eval(parse(text=feature)))), color="black", linetype="dashed", linewidth=1) +  ## this makes a vertical line of the mean
    labs(x=feature, y = "Density")
  plt + guides(fill=guide_legend(title=label_column))
}