Code owners
Assign users and groups as approvers for specific file changes. Learn more.
sim_choice.R 8.41 KiB
#' Simulate and estimate choices
#'
#' @param designfile path to a file containing a design.
#' @param no_sim Number of runs i.e. how often do you want the simulation to be repeated
#' @param respondents Number of respondents. How many respondents do you want to simulate in each run.
#' @param u A list with utility functions. The list can incorporate as many decision rule groups as you want. However, each group must be in a list in this list. If you just use one group (the normal), this group still has to be in a list in the u list. As a convention name beta coefficients starting with a lower case "b"
#' @inheritParams readdesign
#' @inheritParams simulate_choices
#' @param chunks The number of chunks determines how often results should be stored on disk as a safety measure to not loose simulations if models have already been estimated. For example, if no_sim is 100 and chunks = 2, the data will be saved on disk after 50 and after 100 runs.
#' @param utility_transform_type How the utility function you entered is transformed to the utility function required for mixl. You can use the classic way (simple) where parameters have to start with "b" and variables with "alt" or the more flexible (but potentially error prone) way (exact) where parameters and variables are matched exactly what how the are called in the dataset and in the bcoeff list. Default is "simple". In the long run, simple will be deleted, as exact should be downwards compatible.
#' @return a list with all information on the run
#' @export
#'
#' @examples \dontrun{ simchoice(designfile="somefile", no_sim=10, respondents=330,
#' mnl_U,u=u[[1]] ,designtype="ngene")}
#'
sim_choice <- function(designfile, no_sim = 10, respondents = 330, u ,designtype = NULL, destype = NULL, bcoeff, decisiongroups=c(0,1), manipulations = list() , estimate, chunks=1, utility_transform_type = "simple") {
if (utility_transform_type == "simple") {
message("'simple' is deprecated and will be removed in the future. Use 'exact' instead.")
}
# Create a lookup table for bcoeff names
bcoeff_lookup <- tibble::tibble(
original = names(bcoeff),
modified = stringr::str_replace_all(names(bcoeff), "_", "")
)
# Replace all underscores in bcoeff names with an empty string
names(bcoeff) <- bcoeff_lookup$modified
#### Function to simulate and estimate ####
estimate_sim <- function(run=1) { #start loop
cat("This is Run number ", run)
database <- simulate_choices(datadet, utility = u, setspp=setpp, bcoeff = bcoeff, decisiongroups = decisiongroups, manipulations = manipulations)
model<-mixl::estimate(model_spec,start_values = est, availabilities = availabilities, data= database)
return(model)
}
# Empty list where to store all designs later on
designs_all <- list()
#### Print some messages ####
cat("Utility function used in simulation, ie the true utility: \n\n")
print(u)
#### Read in the design file and set core variables ####
design<- readdesign(design = designfile, designtype = designtype, destype = destype)
if (!("Block" %in% colnames(design))) design$Block=1 # If no Blocks exist, create a variable Blocks to indicate it is only one block
nsets<-nrow(design)
nblocks<-max(design$Block)
setpp <- nsets/nblocks # Choice Sets per respondent
replications <- respondents/nblocks
## if replications is non int, assign unevenly
datadet<- design %>%
dplyr::arrange(Block,Choice.situation) %>%
dplyr::slice(rep(dplyr::row_number(), replications)) %>% ## replicate design according to number of replications
dplyr::mutate(ID = rep(1:respondents, each=setpp)) %>% # create Respondent ID.
dplyr::relocate(ID,`Choice.situation`) %>%
as.data.frame()
database <- simulate_choices(data=datadet, utility = u, setspp = setpp, bcoeff = bcoeff, decisiongroups = decisiongroups, manipulations = manipulations)
### start estimation
if(estimate==TRUE) {
#### Function that transforms user written utility for simulation into utility function for mixl.
transform_util <- function() {
mnl_U <-paste(purrr::map_chr(u[[1]],as.character,keep.source.attr = TRUE),collapse = "",";") %>%
stringr::str_replace_all( c( "priors\\[\"" = "" , "\"\\]" = "" , "~" = "=", "\\." = "_" , " b" = " @b" , "V_"="U_", " alt"=" $alt"))
}
transform_util2 <- function() {
# Exclude columns that match "V_<any integer>" or "U_<any integer>" pattern
relevant_database_vars <- setdiff(
names(database),
c(grep("^(V_|U_|e_)\\d+$", names(database), value = TRUE), "CHOICE")
)
mnl_U <- paste(
purrr::map_chr(u[[1]], as.character, keep.source.attr = TRUE),
collapse = "",
";"
) %>%
# Replace coefficients with exact matches
stringr::str_replace_all(stats::setNames(
paste0("@", names(bcoeff)),
paste0("(?<![._a-zA-Z0-9])", names(bcoeff), "(?![._a-zA-Z0-9-])")
)) %>%
# General transformations
stringr::str_replace_all(c(
`priors\\["` = "",
`"\\]` = "",
`~` = "=",
# `\\.` = "_", ## can be deleted when everything works
`V.` = "U_" ## was originally V_
)) %>%
# Replace only relevant database variables
stringr::str_replace_all(stats::setNames(
paste0("$", relevant_database_vars),
paste0("(?<![._a-zA-Z0-9])", relevant_database_vars, "(?![._a-zA-Z0-9-])")
)) %>%
# Clean up duplicate symbols
stringr::str_replace_all(c(`@@` = "@", "\\$\\$" = "$"))
return(mnl_U)
}
# transform utility function to mixl format
# transform utility function to mixl format
mnl_U <- switch(
utility_transform_type,
"simple" = transform_util(),
"exact" = transform_util2(),
stop("Invalid utility_transform_type. Use 'simple' or 'exact'.")
)
#### Print selected utility function
cat("Transformed utility function (type:", utility_transform_type, "):\n")
print(mnl_U)
# specify model for mixl estimation
model_spec <- mixl::specify_model(mnl_U, database, disable_multicore=F)
est=stats::setNames(rep(0,length(model_spec$beta_names)), model_spec$beta_names)
availabilities <- mixl::generate_default_availabilities(
database, model_spec$num_utility_functions)
if (chunks >1) {
# Calculate the size of each chunk
chunk_size <- ceiling(no_sim / chunks)
# Initialize the starting point for the first chunk
start_point <- 1
for (i in 1:chunks) {
# Calculate the end point for the current chunk
end_point <- start_point + chunk_size - 1
# Ensure we do not go beyond the total number of simulations
if (end_point > no_sim) {
end_point <- no_sim
}
# Run simulations for the current chunk
output <- start_point:end_point %>% purrr::map(estimate_sim)
saveRDS(output,paste0("tmp_",i,".RDS"))
rm(output)
gc()
# Print or save the output as required
print(paste("Results for chunk", i, "from", start_point, "to", end_point))
# Update the start point for the next chunk
start_point <- end_point + 1
# Break the loop if the end point reaches or exceeds no_sim
if (start_point > no_sim) break
}
output <- list() # Initialize the list to store all outputs
# Assuming the files are named in sequence as 'tmp_1.RDS', 'tmp_2.RDS', ..., 'tmp_n.RDS'
for (i in 1:chunks) {
# Load each RDS file
file_content <- readRDS(paste0("tmp_", i, ".RDS"))
file.remove(paste0("tmp_", i, ".RDS"))
# Append the contents of each file to the all_outputs list
output <- c(output, file_content)
}
} else {
output <- 1:no_sim %>% purrr::map(estimate_sim)
}
coefs<-purrr::map(1:length(output),~summary(output[[.]])[["coefTable"]][c(1,8)] %>%
tibble::rownames_to_column() %>%
tidyr::pivot_wider(names_from = rowname, values_from = c(est, rob_pval0)) ) %>%
dplyr::bind_rows(.id = "run")
output[["summary"]] <-psych::describe(coefs[,-1], fast = TRUE)
output[["coefs"]] <-coefs
pvals <- output[["coefs"]] %>% dplyr::select(dplyr::starts_with("rob_pval0"))
output[["power"]] <- 100*table(apply(pvals,1, function(x) all(x<0.05)))/nrow(pvals)
output[["metainfo"]] <- c(Path = designfile, NoSim = no_sim, NoResp =respondents)
print(kableExtra::kable(output[["summary"]],digits = 2, format = "rst"))
print(output[["power"]])
return(output)
} else {
output<- 1:no_sim %>% purrr::map(~ simulate_choices(datadet, utility = u, setspp=setpp, bcoeff = bcoeff, decisiongroups = decisiongroups, manipulations = manipulations))
return(output)
}
}