#' Simulate and estimate choices #' #' @param designfile path to a file containing a design. #' @param no_sim Number of runs i.e. how often do you want the simulation to be repeated #' @param respondents Number of respondents. How many respondents do you want to simulate in each run. #' @param u A list with utility functions. The list can incorporate as many decision rule groups as you want. However, each group must be in a list in this list. If you just use one group (the normal), this group still has to be in a list in the u list. As a convention name beta coefficients starting with a lower case "b" #' @inheritParams readdesign #' @inheritParams simulate_choices #' @param chunks The number of chunks determines how often results should be stored on disk as a safety measure to not loose simulations if models have already been estimated. For example, if no_sim is 100 and chunks = 2, the data will be saved on disk after 50 and after 100 runs. #' @param utility_transform_type How the utility function you entered is transformed to the utility function required for mixl. You can use the classic way (simple) where parameters have to start with "b" and variables with "alt" or the more flexible (but potentially error prone) way (exact) where parameters and variables are matched exactly what how the are called in the dataset and in the bcoeff list. Default is "simple". In the long run, simple will be deleted, as exact should be downwards compatible. #' @return a list with all information on the run #' @export #' #' @examples \dontrun{ simchoice(designfile="somefile", no_sim=10, respondents=330, #' mnl_U,u=u[[1]] ,designtype="ngene")} #' sim_choice <- function(designfile, no_sim = 10, respondents = 330, u ,designtype = NULL, destype = NULL, bcoeff, decisiongroups=c(0,1), manipulations = list() , estimate, chunks=1, utility_transform_type = "simple") { if (utility_transform_type == "simple") { message("'simple' is deprecated and will be removed in the future. Use 'exact' instead.") } # Create a lookup table for bcoeff names bcoeff_lookup <- tibble::tibble( original = names(bcoeff), modified = stringr::str_replace_all(names(bcoeff), "_", "") ) # Replace all underscores in bcoeff names with an empty string names(bcoeff) <- bcoeff_lookup$modified #### Function to simulate and estimate #### estimate_sim <- function(run=1) { #start loop cat("This is Run number ", run) database <- simulate_choices(datadet, utility = u, setspp=setpp, bcoeff = bcoeff, decisiongroups = decisiongroups, manipulations = manipulations) model<-mixl::estimate(model_spec,start_values = est, availabilities = availabilities, data= database) return(model) } # Empty list where to store all designs later on designs_all <- list() #### Print some messages #### cat("Utility function used in simulation, ie the true utility: \n\n") print(u) #### Read in the design file and set core variables #### design<- readdesign(design = designfile, designtype = designtype, destype = destype) if (!("Block" %in% colnames(design))) design$Block=1 # If no Blocks exist, create a variable Blocks to indicate it is only one block nsets<-nrow(design) nblocks<-max(design$Block) setpp <- nsets/nblocks # Choice Sets per respondent replications <- respondents/nblocks ## if replications is non int, assign unevenly datadet<- design %>% dplyr::arrange(Block,Choice.situation) %>% dplyr::slice(rep(dplyr::row_number(), replications)) %>% ## replicate design according to number of replications dplyr::mutate(ID = rep(1:respondents, each=setpp)) %>% # create Respondent ID. dplyr::relocate(ID,`Choice.situation`) %>% as.data.frame() database <- simulate_choices(data=datadet, utility = u, setspp = setpp, bcoeff = bcoeff, decisiongroups = decisiongroups, manipulations = manipulations) ### start estimation if(estimate==TRUE) { #### Function that transforms user written utility for simulation into utility function for mixl. transform_util <- function() { mnl_U <-paste(purrr::map_chr(u[[1]],as.character,keep.source.attr = TRUE),collapse = "",";") %>% stringr::str_replace_all( c( "priors\\[\"" = "" , "\"\\]" = "" , "~" = "=", "\\." = "_" , " b" = " @b" , "V_"="U_", " alt"=" $alt")) } transform_util2 <- function() { # Exclude columns that match "V_<any integer>" or "U_<any integer>" pattern relevant_database_vars <- setdiff( names(database), c(grep("^(V_|U_|e_)\\d+$", names(database), value = TRUE), "CHOICE") ) mnl_U <- paste( purrr::map_chr(u[[1]], as.character, keep.source.attr = TRUE), collapse = "", ";" ) %>% # Replace coefficients with exact matches stringr::str_replace_all(stats::setNames( paste0("@", names(bcoeff)), paste0("(?<![._a-zA-Z0-9])", names(bcoeff), "(?![._a-zA-Z0-9-])") )) %>% # General transformations stringr::str_replace_all(c( `priors\\["` = "", `"\\]` = "", `~` = "=", # `\\.` = "_", ## can be deleted when everything works `V.` = "U_" ## was originally V_ )) %>% # Replace only relevant database variables stringr::str_replace_all(stats::setNames( paste0("$", relevant_database_vars), paste0("(?<![._a-zA-Z0-9])", relevant_database_vars, "(?![._a-zA-Z0-9-])") )) %>% # Clean up duplicate symbols stringr::str_replace_all(c(`@@` = "@", "\\$\\$" = "$")) return(mnl_U) } # transform utility function to mixl format # transform utility function to mixl format mnl_U <- switch( utility_transform_type, "simple" = transform_util(), "exact" = transform_util2(), stop("Invalid utility_transform_type. Use 'simple' or 'exact'.") ) #### Print selected utility function cat("Transformed utility function (type:", utility_transform_type, "):\n") print(mnl_U) # specify model for mixl estimation model_spec <- mixl::specify_model(mnl_U, database, disable_multicore=F) est=stats::setNames(rep(0,length(model_spec$beta_names)), model_spec$beta_names) availabilities <- mixl::generate_default_availabilities( database, model_spec$num_utility_functions) if (chunks >1) { # Calculate the size of each chunk chunk_size <- ceiling(no_sim / chunks) # Initialize the starting point for the first chunk start_point <- 1 for (i in 1:chunks) { # Calculate the end point for the current chunk end_point <- start_point + chunk_size - 1 # Ensure we do not go beyond the total number of simulations if (end_point > no_sim) { end_point <- no_sim } # Run simulations for the current chunk output <- start_point:end_point %>% purrr::map(estimate_sim) saveRDS(output,paste0("tmp_",i,".RDS")) rm(output) gc() # Print or save the output as required print(paste("Results for chunk", i, "from", start_point, "to", end_point)) # Update the start point for the next chunk start_point <- end_point + 1 # Break the loop if the end point reaches or exceeds no_sim if (start_point > no_sim) break } output <- list() # Initialize the list to store all outputs # Assuming the files are named in sequence as 'tmp_1.RDS', 'tmp_2.RDS', ..., 'tmp_n.RDS' for (i in 1:chunks) { # Load each RDS file file_content <- readRDS(paste0("tmp_", i, ".RDS")) file.remove(paste0("tmp_", i, ".RDS")) # Append the contents of each file to the all_outputs list output <- c(output, file_content) } } else { output <- 1:no_sim %>% purrr::map(estimate_sim) } coefs<-purrr::map(1:length(output),~summary(output[[.]])[["coefTable"]][c(1,8)] %>% tibble::rownames_to_column() %>% tidyr::pivot_wider(names_from = rowname, values_from = c(est, rob_pval0)) ) %>% dplyr::bind_rows(.id = "run") output[["summary"]] <-psych::describe(coefs[,-1], fast = TRUE) output[["coefs"]] <-coefs pvals <- output[["coefs"]] %>% dplyr::select(dplyr::starts_with("rob_pval0")) output[["power"]] <- 100*table(apply(pvals,1, function(x) all(x<0.05)))/nrow(pvals) output[["metainfo"]] <- c(Path = designfile, NoSim = no_sim, NoResp =respondents) print(kableExtra::kable(output[["summary"]],digits = 2, format = "rst")) print(output[["power"]]) return(output) } else { output<- 1:no_sim %>% purrr::map(~ simulate_choices(datadet, utility = u, setspp=setpp, bcoeff = bcoeff, decisiongroups = decisiongroups, manipulations = manipulations)) return(output) } }