##test sim_choice <- function(designfile, no_sim=10, respondents=330, mnl_U,utils=u[[1]] ,destype) { require("tictoc") require("readr") require("psych") require("dplyr") require("evd") require("tidyr") require("kableExtra") require("gridExtra") require("stringr") require("mixl") require("furrr") require("purrr") require("ggplot2") require("formula.tools") require("rlang") readdesign <- function(design = designfile, designtype=destype) { design <- switch(designtype, "ngene" =readr::read_delim(design,delim = "\t", escape_double = FALSE, trim_ws = TRUE , col_select = c(-Design, -starts_with("...")) , name_repair = "universal" , show_col_types = FALSE) %>% filter(!is.na(Choice.situation)), "spdesign" =as.data.frame(readRDS(design)) %>% mutate(Choice.situation=1:n()) %>% rename_with(~ stringr::str_replace(.,pattern = "_","\\."), everything()), stop("Invalid value for design. Please provide either 'ngene' or 'spdesign'.") ) } mnl_U <-paste(map_chr(utils,as.character,keep.source.attr = TRUE),collapse = "",";") %>% str_replace_all( c( "priors\\[\"" = "" , "\"\\]" = "" , "~" = "=", "\\." = "_" , " b" = " @b" , "V_"="U_", " alt"="$alt")) cat("mixl \n") print(mnl_U) cat("\n Simulation \n") print(u) by_formula <- function(equation){ #used to take formulas as inputs in simulation utility function # //! cur_data_all may get deprecated in favor of pick # pick(everything()) %>% cur_data_all() %>% transmute(!!lhs(equation) := !!rhs(equation) ) } simulate_choices <- function(data=datadet) { #the part in dataset that needs to be repeated in each run cat(" \n does sou_gis exist: ", exists("sou_gis"), "\n") if (exists("sou_gis") && is.function(sou_gis)) { sou_gis() cat("\n source of gis has been done \n") } # source("Projects/ValuGaps/code/GIS_data.R") if(!exists("manipulations")) manipulations=list() ## If no user input on further data manipulations n=seq_along(1:length(utils)) # number of utility functions #browser() cat("\n dataset final_set exists: ",exists("final_set"), "\n") if(exists("final_set")) data = left_join(data,final_set, by = "ID") cat("\n decisiongroups exists: " ,exists("decisiongroups")) if(exists("decisiongroups")) { ### create a new variable to classify decision groups. data = mutate(data,group = as.numeric(cut(row_number(), breaks = decisiongroups * n(), labels = seq_along(decisiongroups[-length(decisiongroups)]), include.lowest = TRUE))) print(table(data$group)) } else { data$group=1 } data<- data %>% group_by(ID) %>% mutate(!!! manipulations) subsets<- split(data,data$group) subsets <- map2(.x = seq_along(u),.y = subsets, ~ mutate(.y,map_dfc(u[[.x]],by_formula))) data <-bind_rows(subsets) data<- data %>% rename_with(~ stringr::str_replace(.,pattern = "\\.","_"), everything()) %>% mutate(across(.cols=n,.fns = ~ rgumbel(setpp,loc=0, scale=1), .names = "{'e'}_{n}" ), across(starts_with("V_"), .names = "{'U'}_{n}") + across(starts_with("e_")) ) %>% ungroup() %>% mutate(CHOICE=max.col(.[,grep("U_",names(.))]) ) %>% as.data.frame() print("\n data has been made \n") cat("\n First few observations \n ") print(head(data)) cat("\n \n ") return(data) } estimate_sim <- function(run=1) { #start loop cat("This is Run number ", run) database <- simulate_choices(datadet) cat("This is the utility functions \n" , mnl_U) model<-mixl::estimate(model_spec,start_values = est, availabilities = availabilities, data= database,) return(model) } designs_all <- list() design<- readdesign() if (!exists("design$Block")) design$Block=1 nsets<-nrow(design) nblocks<-max(design$Block) setpp <- nsets/nblocks # Choice Sets per respondent; in this 'no blocks' design everyone sees all 24 sets #respondents <- replications*nblocks replications <- respondents/nblocks #browser() datadet<- design %>% arrange(Block,Choice.situation) %>% slice(rep(row_number(), replications)) %>% ## replicate design according to number of replications mutate(ID = rep(1:respondents, each=setpp)) %>% # create Respondent ID. relocate(ID,`Choice.situation`) %>% as.data.frame() database <- simulate_choices() model_spec <- mixl::specify_model(mnl_U, database, disable_multicore=F) est=setNames(rep(0,length(model_spec$beta_names)), model_spec$beta_names) availabilities <- mixl::generate_default_availabilities( database, model_spec$num_utility_functions) plan(multisession, workers = 8) output<- 1:no_sim %>% map(estimate_sim) coefs<-map(1:length(output),~summary(output[[.]])[["coefTable"]][c(1,8)] %>% tibble::rownames_to_column() %>% pivot_wider(names_from = rowname, values_from = c(est, rob_pval0)) ) %>% bind_rows(.id = "run") output[["summary"]] <-psych::describe(coefs[,-1], fast = TRUE) output[["coefs"]] <-coefs pvals <- output[["coefs"]] %>% dplyr::select(starts_with("rob_pval0")) output[["power"]] <- 100*table(apply(pvals,1, function(x) all(x<0.05)))/nrow(pvals) output[["metainfo"]] <- c(Path = designfile, NoSim = no_sim, NoResp =respondents) print(kable(output[["summary"]],digits = 2, format = "rst")) print(output[["power"]]) return(output) } plot_multi_histogram <- function(df, feature, label_column) { #function to create nice multi histograms, taken somewhere from the web plt <- ggplot(df, aes(x=eval(parse(text=feature)), fill=eval(parse(text=label_column)))) + #geom_histogram(alpha=0.7, position="identity", aes(y = ..density..), color="black") + geom_density(alpha=0.5) + geom_vline(aes(xintercept=mean(eval(parse(text=feature)))), color="black", linetype="dashed", linewidth=1) + ## this makes a vertical line of the mean labs(x=feature, y = "Density") plt + guides(fill=guide_legend(title=label_column)) }