From 4b36596f6220e4adb8a9aa2a67b453a4842d8327 Mon Sep 17 00:00:00 2001
From: Julian Sagebiel <julian.sagebiel@idiv.de>
Date: Sun, 22 Dec 2024 01:05:12 +0100
Subject: [PATCH] improved reshaping for histograms to make it more robust

---
 R/sim_all.R    | 89 +++++++++++++++++++++++++++++++++++++++-----------
 man/sim_all.Rd |  5 ++-
 2 files changed, 74 insertions(+), 20 deletions(-)

diff --git a/R/sim_all.R b/R/sim_all.R
index f9890e4..16d9e48 100644
--- a/R/sim_all.R
+++ b/R/sim_all.R
@@ -4,6 +4,7 @@
 #' @param resps Number of respondents you want to simulate
 #' @inheritParams readdesign
 #' @param designpath The path to the folder where the designs are stored. For example "c:/myfancydec/Designs"
+#' @param reshape_type Must be "auto", "stats" to use the reshape from the stats package or tidyr to use pivot longer. Default is auto and should not bother you. Only change it once you face an error at this position and you may be lucky that it works then.
 #' @inheritParams sim_choice
 #' @inheritParams simulate_choices
 #' @return A list, with all information on the simulation. This list an be easily processed by the user and in the rmarkdown template.
@@ -36,7 +37,8 @@ sim_all <- function(nosim = 2,
                     manipulations = list(),
                     estimate = TRUE,
                     chunks = 1,
-                    utility_transform_type = "simple") {
+                    utility_transform_type = "simple",
+                    reshape_type = "auto") {
   #################################################
   ########## Input Validation Test ###############
   #################################################
@@ -162,38 +164,87 @@ sim_all <- function(nosim = 2,
     powa <- purrr::map(all_designs, ~ .x$power)
 
 
-browser()
 
-    summaryall <- data.frame(truepar = as.double(c(bcoeff,rep(NA,length(bcoeff)))), as.data.frame(purrr::map(all_designs, ~ .x$summary)) )%>%
+
+
+    summaryall <- data.frame(truepar = as.double(c(bcoeff, rep(
+      NA, length(bcoeff)
+    ))), as.data.frame(purrr::map(all_designs, ~ .x$summary))) %>%
       dplyr::select(!dplyr::ends_with("vars")) %>%
       dplyr::relocate(truepar, dplyr::ends_with(c(
-        ".n", "mean", "sd", "min" , "max", "range" , "se"
+        ".n", "truepar", "mean", "sd", "min" , "max", "range" , "se"
       )))
 
     coefall <- purrr::map(all_designs, ~ .x$coefs)
 
     pat <- paste0("(", paste(designname, collapse = "|"), ").") # needed to identify pattern to be replaced
 
-    s <- as.data.frame(coefall) %>%
+
+    preprocessed <- as.data.frame(coefall) %>%
       dplyr::select(!dplyr::matches("pval|run")) %>%
-      dplyr::rename_with( ~ sub("est_b", "", .x), dplyr::everything()) %>%
-      dplyr::rename_with(~ paste0(., "_", stringr::str_extract(., pat)),
-                         dplyr::everything()) %>%   # rename attributes for reshape part 1
-      dplyr::rename_with(~ stringr::str_replace(., pattern = pat, replacement =
-                                                  ""),
-                         dplyr::everything())  %>%
-      stats::reshape(
-        varying = 1:ncol(.),
-        sep = "_"  ,
-        direction = "long" ,
-        timevar = "design",
-        idvar = "run"
-      )
+      dplyr::rename_with(~ sub("est_", "", .x), dplyr::everything()) %>%
+      dplyr::rename_with( ~ paste0(., "_", stringr::str_extract(., pat)),
+                          dplyr::everything()) %>%   # rename attributes for reshape part 1
+      dplyr::rename_with( ~ stringr::str_replace(., pattern = pat, replacement =
+                                                   ""),
+                          dplyr::everything())
+
+
+    s <- switch(
+      reshape_type,
+      "stats" = {
+        message("Using stats::reshape for reshaping...")
+        stats::reshape(
+          preprocessed,
+          varying = 1:ncol(preprocessed),
+          sep = "_",
+          direction = "long",
+          timevar = "design"
+        ) %>%
+          dplyr::select(-id)
+      },
+      "tidyr" = {
+        message("Using tidyr::pivot_longer for reshaping...")
+        tidyr::pivot_longer(
+          preprocessed,
+          cols = dplyr::everything(),
+          names_to = c(".value", "design"),
+          names_sep = "_",
+          values_drop_na = TRUE
+        )
+      },
+      "auto" = {
+        tryCatch(
+          {
+            message("Trying tidyr::pivot_longer for reshaping...")
+            tidyr::pivot_longer(
+              preprocessed,
+              cols = dplyr::everything(),
+              names_to = c(".value", "design"),
+              names_sep = "_",
+              values_drop_na = TRUE
+            )
+          },
+          error = function(e) {
+            message("tidyr::pivot_longer failed, falling back to stats::reshape...")
+            stats::reshape(
+              preprocessed,
+              varying = 1:ncol(preprocessed),
+              sep = "_",
+              direction = "long",
+              timevar = "design"
+            ) %>%
+              dplyr::select(-id)
+          }
+        )
+      },
+      stop("You need to specify either 'tidyr', 'stats', or 'auto' as the reshape_type")
+    )
 
 
     p = list()
 
-    for (att in names(dplyr::select(s, -c("design", "run")))) {
+    for (att in names(dplyr::select(s, -c("design")))) {
       p[[att]] <- plot_multi_histogram(s, att, "design")
 
       print(p[[att]])
diff --git a/man/sim_all.Rd b/man/sim_all.Rd
index 3a55827..7522c92 100644
--- a/man/sim_all.Rd
+++ b/man/sim_all.Rd
@@ -17,7 +17,8 @@ sim_all(
   manipulations = list(),
   estimate = TRUE,
   chunks = 1,
-  utility_transform_type = "simple"
+  utility_transform_type = "simple",
+  reshape_type = "tidyr"
 )
 }
 \arguments{
@@ -44,6 +45,8 @@ sim_all(
 \item{chunks}{The number of chunks determines how often results should be stored on disk as a safety measure to not loose simulations if models have already been estimated. For example, if no_sim is 100 and chunks = 2, the data will be saved on disk after 50 and after 100 runs.}
 
 \item{utility_transform_type}{How the utility function you entered is transformed to the utility function required for mixl. You can use the classic way (simple) where parameters have to start with "b" and variables with "alt" or the more flexible (but potentially error prone) way (exact) where parameters and variables are matched exactly what how the are called in the dataset and in the bcoeff list. Default is "simple". In the long run, simple will be deleted, as exact should be downwards compatible.}
+
+\item{reshape_type}{Must be either "stats" to use the reshape from the stats package or tidyr to use pivot longer. Default is tidyr. Only change it once you face an error at this position and you may be lucky that it works then.}
 }
 \value{
 A list, with all information on the simulation. This list an be easily processed by the user and in the rmarkdown template.
-- 
GitLab