From 20bd966e02f30de633b4e2fa9f285665070e0d5b Mon Sep 17 00:00:00 2001 From: Julian Sagebiel <julian.sagebiel@idiv.de> Date: Sun, 24 Nov 2024 01:03:30 +0100 Subject: [PATCH] added chunks option in sim_choice to temporarily save results on disc --- R/sim_all.R | 4 +-- R/sim_choice.R | 60 ++++++++++++++++++++++++++++++++-- man/sim_all.Rd | 5 ++- man/sim_choice.Rd | 5 ++- tests/manual-tests/Rbookfull.R | 6 ++-- 5 files changed, 71 insertions(+), 9 deletions(-) diff --git a/R/sim_all.R b/R/sim_all.R index 495ea14..29a95c6 100644 --- a/R/sim_all.R +++ b/R/sim_all.R @@ -27,7 +27,7 @@ #' bheight2=0.25, #' bheight3=0.50) #' -sim_all <- function(nosim=2, resps, destype=NULL, designpath, u, bcoeff, decisiongroups = c(0,1), manipulations = list(), estimate = TRUE){ +sim_all <- function(nosim=2, resps, destype=NULL, designpath, u, bcoeff, decisiongroups = c(0,1), manipulations = list(), estimate = TRUE, chunks=1){ ################################################# ########## Input Validation Test ############### @@ -111,7 +111,7 @@ sim_all <- function(nosim=2, resps, destype=NULL, designpath, u, bcoeff, decisio tictoc::tic() all_designs<- purrr::map(designfile, sim_choice, - no_sim= nosim,respondents = resps, destype=destype, ut=u, bcoefficients = bcoeff, decisiongroups = decisiongroups, manipulations = manipulations, estimate = estimate) %>% ## iterate simulation over all designs + no_sim= nosim,respondents = resps, destype=destype, ut=u, bcoefficients = bcoeff, decisiongroups = decisiongroups, manipulations = manipulations, estimate = estimate, chunks =chunks) %>% ## iterate simulation over all designs stats::setNames(designname) diff --git a/R/sim_choice.R b/R/sim_choice.R index 848f62a..d97b9db 100644 --- a/R/sim_choice.R +++ b/R/sim_choice.R @@ -7,13 +7,14 @@ #' @inheritParams readdesign #' @param estimate If TRUE models will be estimated. If false only a dataset will be simulated. Default is true #' @inheritParams simulate_choices +#' @param chunks The number of chunks determines how often results should be stored on disk as a safety measure to not loose simulations if models have already been estimated. For example, if no_sim is 100 and chunks = 2, the data will be saved on disk after 50 and after 100 runs. #' @return a list with all information on the run #' @export #' #' @examples \dontrun{ simchoice(designfile="somefile", no_sim=10, respondents=330, #' mnl_U,ut=u[[1]] ,destype="ngene")} #' -sim_choice <- function(designfile, no_sim=10, respondents=330,ut ,destype=destype, bcoefficients, decisiongroups=c(0,1), manipulations = list() , estimate) { +sim_choice <- function(designfile, no_sim=10, respondents=330,ut ,destype=destype, bcoefficients, decisiongroups=c(0,1), manipulations = list() , estimate, chunks=1) { @@ -109,9 +110,64 @@ designs_all <- list() availabilities <- mixl::generate_default_availabilities( database, model_spec$num_utility_functions) + if (chunks >1) { - output<- 1:no_sim %>% purrr::map(estimate_sim) + # Calculate the size of each chunk + chunk_size <- ceiling(no_sim / chunks) + # Initialize the starting point for the first chunk + start_point <- 1 + + for (i in 1:chunks) { + # Calculate the end point for the current chunk + end_point <- start_point + chunk_size - 1 + + # Ensure we do not go beyond the total number of simulations + if (end_point > no_sim) { + end_point <- no_sim + } + + # Run simulations for the current chunk + + + output <- start_point:end_point %>% purrr::map(estimate_sim) + + + + saveRDS(output,paste0("tmp_",i,".RDS")) + rm(output) + + gc() + + # Print or save the output as required + print(paste("Results for chunk", i, "from", start_point, "to", end_point)) + + # Update the start point for the next chunk + start_point <- end_point + 1 + + # Break the loop if the end point reaches or exceeds no_sim + if (start_point > no_sim) break + +} + + output <- list() # Initialize the list to store all outputs + + # Assuming the files are named in sequence as 'tmp_1.RDS', 'tmp_2.RDS', ..., 'tmp_n.RDS' + for (i in 1:chunks) { + # Load each RDS file + file_content <- readRDS(paste0("tmp_", i, ".RDS")) + file.remove(paste0("tmp_", i, ".RDS")) + + # Append the contents of each file to the all_outputs list + output <- c(output, file_content) + } + + + } else { + + output <- 1:no_sim %>% purrr::map(estimate_sim) + + } diff --git a/man/sim_all.Rd b/man/sim_all.Rd index 6cca772..9b8817c 100644 --- a/man/sim_all.Rd +++ b/man/sim_all.Rd @@ -14,7 +14,8 @@ sim_all( bcoeff, decisiongroups = c(0, 1), manipulations = list(), - estimate = TRUE + estimate = TRUE, + chunks = 1 ) } \arguments{ @@ -33,6 +34,8 @@ sim_all( \item{manipulations}{A variable to alter terms of the utility functions examples may be applying a factor or applying changes to terms selectively for different groups} \item{estimate}{If TRUE models will be estimated. If false only a dataset will be simulated. Default is true} + +\item{chunks}{The number of chunks determines how often results should be stored on disk as a safety measure to not loose simulations if models have already been estimated. For example, if no_sim is 100 and chunks = 2, the data will be saved on disk after 50 and after 100 runs.} } \value{ A list, with all information on the simulation. This list an be easily processed by the user and in the rmarkdown template. diff --git a/man/sim_choice.Rd b/man/sim_choice.Rd index 21c3183..3ede006 100644 --- a/man/sim_choice.Rd +++ b/man/sim_choice.Rd @@ -13,7 +13,8 @@ sim_choice( bcoefficients, decisiongroups = c(0, 1), manipulations = list(), - estimate + estimate, + chunks = 1 ) } \arguments{ @@ -32,6 +33,8 @@ sim_choice( \item{manipulations}{A variable to alter terms of the utility functions examples may be applying a factor or applying changes to terms selectively for different groups} \item{estimate}{If TRUE models will be estimated. If false only a dataset will be simulated. Default is true} + +\item{chunks}{The number of chunks determines how often results should be stored on disk as a safety measure to not loose simulations if models have already been estimated. For example, if no_sim is 100 and chunks = 2, the data will be saved on disk after 50 and after 100 runs.} } \value{ a list with all information on the run diff --git a/tests/manual-tests/Rbookfull.R b/tests/manual-tests/Rbookfull.R index 4a08908..f011268 100644 --- a/tests/manual-tests/Rbookfull.R +++ b/tests/manual-tests/Rbookfull.R @@ -1,7 +1,7 @@ rm(list=ls()) devtools::load_all() - +### currently used to test the chunks option set.seed(3393) @@ -11,7 +11,7 @@ designpath<- system.file("extdata","Rbook" ,package = "simulateDCE") notes <- "No Heuristics" resps =240 # number of respondents -nosim=2 # number of simulations to run (about 500 is minimum) +nosim=19 # number of simulations to run (about 500 is minimum) #betacoefficients should not include "-" bcoeff<-list( @@ -39,7 +39,7 @@ ul<- list(u1= list( rbook <- simulateDCE::sim_all(nosim = nosim, resps=resps, destype = destype, - designpath = designpath, u= ul, bcoeff = bcoeff) + designpath = designpath, u= ul, bcoeff = bcoeff, chunks = 4) -- GitLab