From 45359a506c525ee8c169f5b984535fdd8b42f349 Mon Sep 17 00:00:00 2001
From: Julian Sagebiel <julian.sagebiel@idiv.de>
Date: Sun, 22 Dec 2024 00:09:50 +0100
Subject: [PATCH] added true coefficients to summaryall and changed argument
 names to be more consistent accross functions

---
 DESCRIPTION             |   3 +-
 R/globals.R             |   2 +-
 R/sim_all.R             | 142 +++++++++++++++++++++++++++-------------
 R/sim_choice.R          |  24 +++----
 R/simulate_choices.R    |  12 ++--
 man/sim_all.Rd          |   2 +-
 man/sim_choice.Rd       |  10 +--
 man/simulate_choices.Rd |  10 +--
 8 files changed, 127 insertions(+), 78 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 6f98150..f01c645 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -27,7 +27,8 @@ Imports:
     stringr,
     tibble,
     tictoc,
-    tidyr
+    tidyr,
+    rlang
 Suggests: 
     knitr,
     testthat (>= 3.0.0)
diff --git a/R/globals.R b/R/globals.R
index eaa2250..3f8afe3 100644
--- a/R/globals.R
+++ b/R/globals.R
@@ -1 +1 @@
-utils::globalVariables(c("designfile", "destype", "Choice.situation", "Design", "." ,"ID", ":=", "..density..", "sou_gis", "block", "Block"))
+utils::globalVariables(c("designfile", "destype", "Choice.situation", "Design", "." ,"ID", ":=", "..density..", "sou_gis", "block", "Block", "row_id", "final_set"))
diff --git a/R/sim_all.R b/R/sim_all.R
index 9f7decc..f9890e4 100644
--- a/R/sim_all.R
+++ b/R/sim_all.R
@@ -4,8 +4,6 @@
 #' @param resps Number of respondents you want to simulate
 #' @inheritParams readdesign
 #' @param designpath The path to the folder where the designs are stored. For example "c:/myfancydec/Designs"
-#' @param u A list with utility functions. The list can incorporate as many decision rule groups as you want. However, each group must be in a list in this list. If you just use one group (the normal),  this  group still  has to be in a list in  the u list. As a convention name beta coefficients starting with a lower case "b"
-#' @param bcoeff List of initial coefficients for the utility function. List content/length can vary based on application, but should all begin with b and be the same as those entered in the utility functions
 #' @inheritParams sim_choice
 #' @inheritParams simulate_choices
 #' @return A list, with all information on the simulation. This list an be easily processed by the user and in the rmarkdown template.
@@ -27,15 +25,27 @@
 #'      bheight2=0.25,
 #'      bheight3=0.50)
 #'
-sim_all <- function(nosim=2, resps, designtype=NULL, destype = NULL, designpath, u, bcoeff, decisiongroups = c(0,1), manipulations = list(), estimate = TRUE, chunks=1, utility_transform_type = "simple"){
-
+sim_all <- function(nosim = 2,
+                    resps,
+                    designtype = NULL,
+                    destype = NULL,
+                    designpath,
+                    u,
+                    bcoeff,
+                    decisiongroups = c(0, 1),
+                    manipulations = list(),
+                    estimate = TRUE,
+                    chunks = 1,
+                    utility_transform_type = "simple") {
   #################################################
   ########## Input Validation Test ###############
   #################################################
 
   ########### validate the utility function ########
-  if (missing(u) || !(is.list(u) && any(sapply(u, is.list)))){
-    stop(" 'u' must be provided and must be a list containing at least one list element (list of lists).")
+  if (missing(u) || !(is.list(u) && any(sapply(u, is.list)))) {
+    stop(
+      " 'u' must be provided and must be a list containing at least one list element (list of lists)."
+    )
   }
 
   ########## validate the bcoeff list ################
@@ -46,7 +56,9 @@ sim_all <- function(nosim=2, resps, designtype=NULL, destype = NULL, designpath,
 
 
   if (nosim < chunks) {
-    stop("You cannot have more chunks than runs. The number of chunks tells us how often we save the simulation results on disk. Maximum one per run.")
+    stop(
+      "You cannot have more chunks than runs. The number of chunks tells us how often we save the simulation results on disk. Maximum one per run."
+    )
   }
 
   # Check if bcoeff is a list
@@ -54,7 +66,7 @@ sim_all <- function(nosim=2, resps, designtype=NULL, destype = NULL, designpath,
     stop("Argument 'bcoeff' must be a list.")
   }
 
-  if (length(u) != length(decisiongroups) -1){
+  if (length(u) != length(decisiongroups) - 1) {
     stop("Number of decision groups must equal number of utility functions!")
   }
   if (!is.vector(decisiongroups)) {
@@ -78,7 +90,7 @@ sim_all <- function(nosim=2, resps, designtype=NULL, destype = NULL, designpath,
   }
 
   #### check that all the coefficients in utility function have a cooresponding value in bcoeff ####
-    # Extract coefficients from utility function starting with "b"
+  # Extract coefficients from utility function starting with "b"
   coeff_names_ul <- unique(unlist(lapply(u, function(u) {
     formula_strings <- unlist(u)
     coef_names <- unique(unlist(lapply(formula_strings, function(f) {
@@ -93,30 +105,52 @@ sim_all <- function(nosim=2, resps, designtype=NULL, destype = NULL, designpath,
   # Check if all utility function coefficients starting with "b" are covered in bcoeff list
   missing_coeffs <- coeff_names_ul[!(coeff_names_ul %in% names(bcoeff))]
   if (length(missing_coeffs) > 0) {
-    stop(paste("Missing coefficients in 'bcoeff':", paste(missing_coeffs, collapse = ", "), ". Perhaps there is a typo?"))
+    stop(paste(
+      "Missing coefficients in 'bcoeff':",
+      paste(missing_coeffs, collapse = ", "),
+      ". Perhaps there is a typo?"
+    ))
   }
   ########## validate resps #####################
-  if (missing(resps) ||  !(is.integer(resps) || (is.numeric(resps) && identical(trunc(resps), resps)))) {
-    stop(" 'resps' must be provided and must be an integer indicating  the number of respondents per run.")
+  if (missing(resps) ||
+      !(is.integer(resps) ||
+        (is.numeric(resps) && identical(trunc(resps), resps)))) {
+    stop(
+      " 'resps' must be provided and must be an integer indicating  the number of respondents per run."
+    )
   }
   ########## validate designpath ################
   if (!dir.exists(designpath)) {
-    stop(" The folder where your designs are stored does not exist. \n Check if designpath is correctly specified")
+    stop(
+      " The folder where your designs are stored does not exist. \n Check if designpath is correctly specified"
+    )
   }
 
   #################################################
   ########## End Validation Tests #################
   #################################################
 
-  designfile<-list.files(designpath,full.names = T)
-  designname <- stringr::str_remove_all(list.files(designpath,full.names = F),
-                               "(.ngd|_|.RDS)")  ## Make sure designnames to not contain file ending and "_", as the may cause issues when replace
+  designfile <- list.files(designpath, full.names = T)
+  designname <- stringr::str_remove_all(list.files(designpath, full.names = F), "(.ngd|_|.RDS)")  ## Make sure designnames to not contain file ending and "_", as the may cause issues when replace
 
 
   tictoc::tic()
 
-  all_designs<- purrr::map(designfile, sim_choice,
-                           no_sim= nosim,respondents = resps,  designtype=designtype, destype =destype, ut=u, bcoefficients = bcoeff, decisiongroups = decisiongroups, manipulations = manipulations, estimate = estimate, chunks =chunks, utility_transform_type = utility_transform_type) %>%  ## iterate simulation over all designs
+  all_designs <- purrr::map(
+    designfile,
+    sim_choice,
+    no_sim = nosim,
+    respondents = resps,
+    designtype = designtype,
+    destype = destype,
+    u = u,
+    bcoeff = bcoeff,
+    decisiongroups = decisiongroups,
+    manipulations = manipulations,
+    estimate = estimate,
+    chunks = chunks,
+    utility_transform_type = utility_transform_type
+  ) %>%  ## iterate simulation over all designs
     stats::setNames(designname)
 
 
@@ -124,49 +158,63 @@ sim_all <- function(nosim=2, resps, designtype=NULL, destype = NULL, designpath,
 
   print(time)
 
-if (estimate==TRUE) {
-
-
-
-  powa <- purrr::map(all_designs, ~ .x$power)
+  if (estimate == TRUE) {
+    powa <- purrr::map(all_designs, ~ .x$power)
 
 
+browser()
 
+    summaryall <- data.frame(truepar = as.double(c(bcoeff,rep(NA,length(bcoeff)))), as.data.frame(purrr::map(all_designs, ~ .x$summary)) )%>%
+      dplyr::select(!dplyr::ends_with("vars")) %>%
+      dplyr::relocate(truepar, dplyr::ends_with(c(
+        ".n", "mean", "sd", "min" , "max", "range" , "se"
+      )))
 
-  summaryall <- as.data.frame(purrr::map(all_designs, ~.x$summary)) %>%
-    dplyr::select(!dplyr::ends_with("vars")) %>%
-    dplyr::relocate(dplyr::ends_with(c(".n", "mean","sd", "min" ,"max", "range" , "se" )))
+    coefall <- purrr::map(all_designs, ~ .x$coefs)
 
-  coefall <- purrr::map(all_designs, ~ .x$coefs)
+    pat <- paste0("(", paste(designname, collapse = "|"), ").") # needed to identify pattern to be replaced
 
-  pat<-paste0("(",paste(designname,collapse = "|"),").") # needed to identify pattern to be replaced
+    s <- as.data.frame(coefall) %>%
+      dplyr::select(!dplyr::matches("pval|run")) %>%
+      dplyr::rename_with( ~ sub("est_b", "", .x), dplyr::everything()) %>%
+      dplyr::rename_with(~ paste0(., "_", stringr::str_extract(., pat)),
+                         dplyr::everything()) %>%   # rename attributes for reshape part 1
+      dplyr::rename_with(~ stringr::str_replace(., pattern = pat, replacement =
+                                                  ""),
+                         dplyr::everything())  %>%
+      stats::reshape(
+        varying = 1:ncol(.),
+        sep = "_"  ,
+        direction = "long" ,
+        timevar = "design",
+        idvar = "run"
+      )
 
-  s<-as.data.frame(coefall) %>%
-    dplyr::select(!dplyr::matches("pval|run")) %>%
-    dplyr::rename_with(~ sub("est_b", "", .x), dplyr::everything()) %>%
-    dplyr::rename_with( ~ paste0(.,"_",stringr::str_extract(.,pat )), dplyr::everything() ) %>%   # rename attributes for reshape part 1
-    dplyr::rename_with( ~ stringr::str_replace(.,pattern = pat,replacement=""), dplyr::everything() )  %>%
-    stats::reshape(varying =1:ncol(.), sep = "_"  , direction = "long" ,timevar = "design", idvar = "run" )
 
+    p = list()
 
-  p=list()
+    for (att in names(dplyr::select(s, -c("design", "run")))) {
+      p[[att]] <- plot_multi_histogram(s, att, "design")
 
-  for (att in names(dplyr::select(s,-c("design","run")))) {
+      print(p[[att]])
 
-    p[[att]] <- plot_multi_histogram(s,att,"design")
+    }
 
-    print(p[[att]])
+    all_designs[["summaryall"]] = summaryall
+    all_designs[["graphs"]] = p
+    all_designs[["powa"]] = powa
 
   }
-
-  all_designs[["summaryall"]] = summaryall
-  all_designs[["graphs"]]=p
-  all_designs[["powa"]]=powa
-
-}
-  all_designs[["time"]]=time
-  all_designs[["arguements"]] = list( "Beta values" = bcoeff, "Utility functions" = u , "Decision groups" =decisiongroups , "Manipulation of vars" = manipulations,
-                                      "Number Simulations" = nosim, "Respondents" = resps, "Designpath" = designpath)
+  all_designs[["time"]] = time
+  all_designs[["arguements"]] = list(
+    "Beta values" = bcoeff,
+    "Utility functions" = u ,
+    "Decision groups" = decisiongroups ,
+    "Manipulation of vars" = manipulations,
+    "Number Simulations" = nosim,
+    "Respondents" = resps,
+    "Designpath" = designpath
+  )
 
 
 
diff --git a/R/sim_choice.R b/R/sim_choice.R
index ed50e05..877b4b2 100644
--- a/R/sim_choice.R
+++ b/R/sim_choice.R
@@ -3,7 +3,7 @@
 #' @param designfile path to a file containing a design.
 #' @param no_sim Number of runs i.e. how often do you want the simulation to be repeated
 #' @param respondents Number of respondents. How many respondents do you want to simulate in each run.
-#' @param ut The first element of the utility function list
+#' @param u A list with utility functions. The list can incorporate as many decision rule groups as you want. However, each group must be in a list in this list. If you just use one group (the normal),  this  group still  has to be in a list in  the u list. As a convention name beta coefficients starting with a lower case "b"
 #' @inheritParams readdesign
 #' @inheritParams simulate_choices
 #' @param chunks The number of chunks determines how often results should be stored on disk as a safety measure to not loose simulations if models have already been estimated. For example, if no_sim is 100 and chunks = 2, the data will be saved on disk after 50 and after 100 runs.
@@ -12,13 +12,13 @@
 #' @export
 #'
 #' @examples \dontrun{  simchoice(designfile="somefile", no_sim=10, respondents=330,
-#'  mnl_U,ut=u[[1]] ,designtype="ngene")}
+#'  mnl_U,u=u[[1]] ,designtype="ngene")}
 #'
-sim_choice <- function(designfile, no_sim = 10, respondents = 330,ut ,designtype = NULL, destype = NULL, bcoefficients, decisiongroups=c(0,1), manipulations = list() , estimate, chunks=1, utility_transform_type = "simple") {
+sim_choice <- function(designfile, no_sim = 10, respondents = 330, u ,designtype = NULL, destype = NULL, bcoeff, decisiongroups=c(0,1), manipulations = list() , estimate, chunks=1, utility_transform_type = "simple") {
 
 
   if (utility_transform_type == "simple") {
-    message("'simple' is deprecated and will be removed in the future. Use 'exact' instead.", call. = FALSE)
+    message("'simple' is deprecated and will be removed in the future. Use 'exact' instead.")
   }
 
 
@@ -32,7 +32,7 @@ sim_choice <- function(designfile, no_sim = 10, respondents = 330,ut ,designtype
 
     cat("This is Run number ", run)
 
-    database <- simulate_choices(datadet, utility = ut, setspp=setpp, bcoefficients = bcoefficients, decisiongroups = decisiongroups, manipulations = manipulations)
+    database <- simulate_choices(datadet, utility = u, setspp=setpp, bcoeff = bcoeff, decisiongroups = decisiongroups, manipulations = manipulations)
 
 
     model<-mixl::estimate(model_spec,start_values = est, availabilities = availabilities, data= database)
@@ -52,7 +52,7 @@ designs_all <- list()
 
  cat("Utility function used in simulation, ie the true utility: \n\n")
 
-     print(ut)
+     print(u)
 
 
 
@@ -90,7 +90,7 @@ designs_all <- list()
 
 
 
-  database <- simulate_choices(data=datadet, utility = ut, setspp = setpp, bcoefficients = bcoefficients, decisiongroups = decisiongroups, manipulations = manipulations)
+  database <- simulate_choices(data=datadet, utility = u, setspp = setpp, bcoeff = bcoeff, decisiongroups = decisiongroups, manipulations = manipulations)
 
 
 ### start estimation
@@ -102,7 +102,7 @@ designs_all <- list()
     ####  Function that transforms user written utility for simulation into utility function for mixl.
     transform_util <- function() {
 
-      mnl_U <-paste(purrr::map_chr(ut[[1]],as.character,keep.source.attr = TRUE),collapse = "",";") %>%
+      mnl_U <-paste(purrr::map_chr(u[[1]],as.character,keep.source.attr = TRUE),collapse = "",";") %>%
         stringr::str_replace_all( c( "priors\\[\"" = "" , "\"\\]" = "" ,  "~" = "=", "\\." = "_" , " b" = " @b"  , "V_"="U_", " alt"=" $alt"))
 
     }
@@ -116,14 +116,14 @@ transform_util2 <- function() {
   )
 
   mnl_U <- paste(
-    purrr::map_chr(ut[[1]], as.character, keep.source.attr = TRUE),
+    purrr::map_chr(u[[1]], as.character, keep.source.attr = TRUE),
     collapse = "",
     ";"
   ) %>%
     # Replace coefficients with exact matches
     stringr::str_replace_all(stats::setNames(
-      paste0("@", names(bcoefficients)),
-      paste0("(?<![._a-zA-Z0-9])", names(bcoefficients), "(?![._a-zA-Z0-9-])")
+      paste0("@", names(bcoeff)),
+      paste0("(?<![._a-zA-Z0-9])", names(bcoeff), "(?![._a-zA-Z0-9-])")
     )) %>%
     # General transformations
     stringr::str_replace_all(c(
@@ -260,7 +260,7 @@ transform_util2 <- function() {
   return(output)
 } else {
 
-  output<- 1:no_sim %>% purrr::map(~ simulate_choices(datadet, utility = ut, setspp=setpp, bcoefficients = bcoefficients, decisiongroups = decisiongroups, manipulations = manipulations))
+  output<- 1:no_sim %>% purrr::map(~ simulate_choices(datadet, utility = u, setspp=setpp, bcoeff = bcoeff, decisiongroups = decisiongroups, manipulations = manipulations))
   return(output)
 
 }
diff --git a/R/simulate_choices.R b/R/simulate_choices.R
index 5cb5f32..49bb285 100644
--- a/R/simulate_choices.R
+++ b/R/simulate_choices.R
@@ -1,23 +1,23 @@
-#' Simulate choices based on a dataframe with a design
+#' Simulate choices based on a data.frame with a design and respondents
 #'
 #' @param data a dataframe that includes a design repeated for the number of observations
 #' @param utility a list with the utility functions, one utility function for each alternatives
 #' @param setspp an integer, the number of choice sets per person
-#' @param bcoefficients List of initial coefficients for the utility function. List content/length can vary based on application, but should all begin with b and be the same as those entered in the utility functions
+#' @param bcoeff List of initial coefficients for the utility function. List content/length can vary based on application. I ideally begins (but does not have to) with b and need be the same as those entered in the utility functions
 #' @param decisiongroups A vector showing how decision groups are numerically distributed
 #' @param manipulations A variable to alter terms of the utility functions examples may be applying a factor or applying changes to terms selectively for different groups
 #' @param estimate If TRUE models will be estimated. If false only a dataset will be simulated. Default is true
-#' @return a dataframe that includes simulated choices and a design
+#' @return a data.frame that includes simulated choices and a design
 #' @export
 #'
 #' @examples \dontrun{simulate_choices(datadet, ut,setspp)}
-simulate_choices <- function(data, utility, setspp, bcoefficients, decisiongroups = c(0,1), manipulations = list(), estimate) {  #the part in dataset that needs to be repeated in each run
+simulate_choices <- function(data, utility, setspp, bcoeff, decisiongroups = c(0,1), manipulations = list(), estimate) {  #the part in dataset that needs to be repeated in each run
 
 
 
 ### unpack the bcoeff list so variables are accessible
-  for (key in names(bcoefficients)) {
-    assign(key, bcoefficients[[key]])
+  for (key in names(bcoeff)) {
+    assign(key, bcoeff[[key]])
   }
 
 
diff --git a/man/sim_all.Rd b/man/sim_all.Rd
index a7d5e46..3a55827 100644
--- a/man/sim_all.Rd
+++ b/man/sim_all.Rd
@@ -33,7 +33,7 @@ sim_all(
 
 \item{u}{A list with utility functions. The list can incorporate as many decision rule groups as you want. However, each group must be in a list in this list. If you just use one group (the normal),  this  group still  has to be in a list in  the u list. As a convention name beta coefficients starting with a lower case "b"}
 
-\item{bcoeff}{List of initial coefficients for the utility function. List content/length can vary based on application, but should all begin with b and be the same as those entered in the utility functions}
+\item{bcoeff}{List of initial coefficients for the utility function. List content/length can vary based on application. I ideally begins (but does not have to) with b and need be the same as those entered in the utility functions}
 
 \item{decisiongroups}{A vector showing how decision groups are numerically distributed}
 
diff --git a/man/sim_choice.Rd b/man/sim_choice.Rd
index e15264b..5b4cd79 100644
--- a/man/sim_choice.Rd
+++ b/man/sim_choice.Rd
@@ -8,10 +8,10 @@ sim_choice(
   designfile,
   no_sim = 10,
   respondents = 330,
-  ut,
+  u,
   designtype = NULL,
   destype = NULL,
-  bcoefficients,
+  bcoeff,
   decisiongroups = c(0, 1),
   manipulations = list(),
   estimate,
@@ -26,13 +26,13 @@ sim_choice(
 
 \item{respondents}{Number of respondents. How many respondents do you want to simulate in each run.}
 
-\item{ut}{The first element of the utility function list}
+\item{u}{A list with utility functions. The list can incorporate as many decision rule groups as you want. However, each group must be in a list in this list. If you just use one group (the normal),  this  group still  has to be in a list in  the u list. As a convention name beta coefficients starting with a lower case "b"}
 
 \item{designtype}{Is it a design created with ngene, spdesign or idefix. use 'ngene', 'spdesign' or 'idefix. Ngene designs should be stored as the standard .ngd output. spdesign should be the spdesign object stored as an RDS file. Idefix objects should also be stored as an RDS file. If designtype is not specified, I try to guess what it is. This is especially helpful if you want to carry out a simulation for both spdesign designs and ngene designs at the same time.}
 
 \item{destype}{Deprecated. Use designtype instead.}
 
-\item{bcoefficients}{List of initial coefficients for the utility function. List content/length can vary based on application, but should all begin with b and be the same as those entered in the utility functions}
+\item{bcoeff}{List of initial coefficients for the utility function. List content/length can vary based on application. I ideally begins (but does not have to) with b and need be the same as those entered in the utility functions}
 
 \item{decisiongroups}{A vector showing how decision groups are numerically distributed}
 
@@ -52,6 +52,6 @@ Simulate and estimate choices
 }
 \examples{
 \dontrun{  simchoice(designfile="somefile", no_sim=10, respondents=330,
- mnl_U,ut=u[[1]] ,designtype="ngene")}
+ mnl_U,u=u[[1]] ,designtype="ngene")}
 
 }
diff --git a/man/simulate_choices.Rd b/man/simulate_choices.Rd
index e5eacb3..efbac3e 100644
--- a/man/simulate_choices.Rd
+++ b/man/simulate_choices.Rd
@@ -2,13 +2,13 @@
 % Please edit documentation in R/simulate_choices.R
 \name{simulate_choices}
 \alias{simulate_choices}
-\title{Simulate choices based on a dataframe with a design}
+\title{Simulate choices based on a data.frame with a design and respondents}
 \usage{
 simulate_choices(
   data,
   utility,
   setspp,
-  bcoefficients,
+  bcoeff,
   decisiongroups = c(0, 1),
   manipulations = list(),
   estimate
@@ -21,7 +21,7 @@ simulate_choices(
 
 \item{setspp}{an integer, the number of choice sets per person}
 
-\item{bcoefficients}{List of initial coefficients for the utility function. List content/length can vary based on application, but should all begin with b and be the same as those entered in the utility functions}
+\item{bcoeff}{List of initial coefficients for the utility function. List content/length can vary based on application. I ideally begins (but does not have to) with b and need be the same as those entered in the utility functions}
 
 \item{decisiongroups}{A vector showing how decision groups are numerically distributed}
 
@@ -30,10 +30,10 @@ simulate_choices(
 \item{estimate}{If TRUE models will be estimated. If false only a dataset will be simulated. Default is true}
 }
 \value{
-a dataframe that includes simulated choices and a design
+a data.frame that includes simulated choices and a design
 }
 \description{
-Simulate choices based on a dataframe with a design
+Simulate choices based on a data.frame with a design and respondents
 }
 \examples{
 \dontrun{simulate_choices(datadet, ut,setspp)}
-- 
GitLab