From 20bd966e02f30de633b4e2fa9f285665070e0d5b Mon Sep 17 00:00:00 2001
From: Julian Sagebiel <julian.sagebiel@idiv.de>
Date: Sun, 24 Nov 2024 01:03:30 +0100
Subject: [PATCH] added chunks option in sim_choice to temporarily save results
 on disc

---
 R/sim_all.R                    |  4 +--
 R/sim_choice.R                 | 60 ++++++++++++++++++++++++++++++++--
 man/sim_all.Rd                 |  5 ++-
 man/sim_choice.Rd              |  5 ++-
 tests/manual-tests/Rbookfull.R |  6 ++--
 5 files changed, 71 insertions(+), 9 deletions(-)

diff --git a/R/sim_all.R b/R/sim_all.R
index 495ea14..29a95c6 100644
--- a/R/sim_all.R
+++ b/R/sim_all.R
@@ -27,7 +27,7 @@
 #'      bheight2=0.25,
 #'      bheight3=0.50)
 #'
-sim_all <- function(nosim=2, resps, destype=NULL, designpath, u, bcoeff, decisiongroups = c(0,1), manipulations = list(), estimate = TRUE){
+sim_all <- function(nosim=2, resps, destype=NULL, designpath, u, bcoeff, decisiongroups = c(0,1), manipulations = list(), estimate = TRUE, chunks=1){
 
   #################################################
   ########## Input Validation Test ###############
@@ -111,7 +111,7 @@ sim_all <- function(nosim=2, resps, destype=NULL, designpath, u, bcoeff, decisio
   tictoc::tic()
 
   all_designs<- purrr::map(designfile, sim_choice,
-                           no_sim= nosim,respondents = resps,  destype=destype, ut=u, bcoefficients = bcoeff, decisiongroups = decisiongroups, manipulations = manipulations, estimate = estimate) %>%  ## iterate simulation over all designs
+                           no_sim= nosim,respondents = resps,  destype=destype, ut=u, bcoefficients = bcoeff, decisiongroups = decisiongroups, manipulations = manipulations, estimate = estimate, chunks =chunks) %>%  ## iterate simulation over all designs
     stats::setNames(designname)
 
 
diff --git a/R/sim_choice.R b/R/sim_choice.R
index 848f62a..d97b9db 100644
--- a/R/sim_choice.R
+++ b/R/sim_choice.R
@@ -7,13 +7,14 @@
 #' @inheritParams readdesign
 #' @param estimate If TRUE models will be estimated. If false only a dataset will be simulated. Default is true
 #' @inheritParams simulate_choices
+#' @param chunks The number of chunks determines how often results should be stored on disk as a safety measure to not loose simulations if models have already been estimated. For example, if no_sim is 100 and chunks = 2, the data will be saved on disk after 50 and after 100 runs.
 #' @return a list with all information on the run
 #' @export
 #'
 #' @examples \dontrun{  simchoice(designfile="somefile", no_sim=10, respondents=330,
 #'  mnl_U,ut=u[[1]] ,destype="ngene")}
 #'
-sim_choice <- function(designfile, no_sim=10, respondents=330,ut ,destype=destype, bcoefficients, decisiongroups=c(0,1), manipulations = list() , estimate) {
+sim_choice <- function(designfile, no_sim=10, respondents=330,ut ,destype=destype, bcoefficients, decisiongroups=c(0,1), manipulations = list() , estimate, chunks=1) {
 
 
 
@@ -109,9 +110,64 @@ designs_all <- list()
   availabilities <- mixl::generate_default_availabilities(
     database, model_spec$num_utility_functions)
 
+  if (chunks >1) {
 
-  output<- 1:no_sim %>% purrr::map(estimate_sim)
+  # Calculate the size of each chunk
+  chunk_size <- ceiling(no_sim / chunks)
 
+  # Initialize the starting point for the first chunk
+  start_point <- 1
+
+  for (i in 1:chunks) {
+    # Calculate the end point for the current chunk
+    end_point <- start_point + chunk_size - 1
+
+    # Ensure we do not go beyond the total number of simulations
+    if (end_point > no_sim) {
+      end_point <- no_sim
+    }
+
+    # Run simulations for the current chunk
+
+
+    output <- start_point:end_point %>% purrr::map(estimate_sim)
+
+
+
+      saveRDS(output,paste0("tmp_",i,".RDS"))
+      rm(output)
+
+    gc()
+
+    # Print or save the output as required
+    print(paste("Results for chunk", i, "from", start_point, "to", end_point))
+
+    # Update the start point for the next chunk
+    start_point <- end_point + 1
+
+    # Break the loop if the end point reaches or exceeds no_sim
+    if (start_point > no_sim) break
+
+}
+
+  output <- list()  # Initialize the list to store all outputs
+
+  # Assuming the files are named in sequence as 'tmp_1.RDS', 'tmp_2.RDS', ..., 'tmp_n.RDS'
+  for (i in 1:chunks) {
+    # Load each RDS file
+    file_content <- readRDS(paste0("tmp_", i, ".RDS"))
+    file.remove(paste0("tmp_", i, ".RDS"))
+
+    # Append the contents of each file to the all_outputs list
+    output <- c(output, file_content)
+  }
+
+
+  } else {
+
+    output <- 1:no_sim %>% purrr::map(estimate_sim)
+
+  }
 
 
 
diff --git a/man/sim_all.Rd b/man/sim_all.Rd
index 6cca772..9b8817c 100644
--- a/man/sim_all.Rd
+++ b/man/sim_all.Rd
@@ -14,7 +14,8 @@ sim_all(
   bcoeff,
   decisiongroups = c(0, 1),
   manipulations = list(),
-  estimate = TRUE
+  estimate = TRUE,
+  chunks = 1
 )
 }
 \arguments{
@@ -33,6 +34,8 @@ sim_all(
 \item{manipulations}{A variable to alter terms of the utility functions examples may be applying a factor or applying changes to terms selectively for different groups}
 
 \item{estimate}{If TRUE models will be estimated. If false only a dataset will be simulated. Default is true}
+
+\item{chunks}{The number of chunks determines how often results should be stored on disk as a safety measure to not loose simulations if models have already been estimated. For example, if no_sim is 100 and chunks = 2, the data will be saved on disk after 50 and after 100 runs.}
 }
 \value{
 A list, with all information on the simulation. This list an be easily processed by the user and in the rmarkdown template.
diff --git a/man/sim_choice.Rd b/man/sim_choice.Rd
index 21c3183..3ede006 100644
--- a/man/sim_choice.Rd
+++ b/man/sim_choice.Rd
@@ -13,7 +13,8 @@ sim_choice(
   bcoefficients,
   decisiongroups = c(0, 1),
   manipulations = list(),
-  estimate
+  estimate,
+  chunks = 1
 )
 }
 \arguments{
@@ -32,6 +33,8 @@ sim_choice(
 \item{manipulations}{A variable to alter terms of the utility functions examples may be applying a factor or applying changes to terms selectively for different groups}
 
 \item{estimate}{If TRUE models will be estimated. If false only a dataset will be simulated. Default is true}
+
+\item{chunks}{The number of chunks determines how often results should be stored on disk as a safety measure to not loose simulations if models have already been estimated. For example, if no_sim is 100 and chunks = 2, the data will be saved on disk after 50 and after 100 runs.}
 }
 \value{
 a list with all information on the run
diff --git a/tests/manual-tests/Rbookfull.R b/tests/manual-tests/Rbookfull.R
index 4a08908..f011268 100644
--- a/tests/manual-tests/Rbookfull.R
+++ b/tests/manual-tests/Rbookfull.R
@@ -1,7 +1,7 @@
 
 rm(list=ls())
 devtools::load_all()
-
+### currently used to test the chunks option
 
 set.seed(3393)
 
@@ -11,7 +11,7 @@ designpath<- system.file("extdata","Rbook" ,package = "simulateDCE")
 notes <- "No Heuristics"
 
 resps =240  # number of respondents
-nosim=2 # number of simulations to run (about 500 is minimum)
+nosim=19 # number of simulations to run (about 500 is minimum)
 
 #betacoefficients should not include "-"
 bcoeff<-list(
@@ -39,7 +39,7 @@ ul<- list(u1= list(
 
 
 rbook <- simulateDCE::sim_all(nosim = nosim, resps=resps, destype = destype,
-                 designpath = designpath, u= ul, bcoeff = bcoeff)
+                 designpath = designpath, u= ul, bcoeff = bcoeff, chunks = 4)
 
 
 
-- 
GitLab