From f264a641346d1d2ce2eca1437d8c6452b1037285 Mon Sep 17 00:00:00 2001
From: dj44vuri <julian.sagebiel@idiv.de>
Date: Tue, 19 Dec 2023 10:50:18 +0100
Subject: [PATCH] first working version tested with RBook

---
 DESCRIPTION                    |   7 +++
 NAMESPACE                      |   1 +
 R/sim_all.R                    |  64 +++++++++++++++++++
 R/sim_choice.R                 | 109 +++++++++++++++++++++++++++++++++
 R/simulate_choices.R           |  87 ++++++++++++++++++++++++++
 R/utils.R                      |  55 +++++++++++++++++
 inst/extdata/Rbook/design1.RDS | Bin 0 -> 1279 bytes
 man/simulate_choices.Rd        |  24 ++++++++
 tests/manualtests.R            |  44 +++++++++++++
 9 files changed, 391 insertions(+)
 create mode 100644 R/sim_all.R
 create mode 100644 R/sim_choice.R
 create mode 100644 R/simulate_choices.R
 create mode 100644 R/utils.R
 create mode 100644 inst/extdata/Rbook/design1.RDS
 create mode 100644 man/simulate_choices.Rd
 create mode 100644 tests/manualtests.R

diff --git a/DESCRIPTION b/DESCRIPTION
index 268a44c..4bfb583 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -11,8 +11,15 @@ Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.2.2
 Imports: 
     dplyr,
+    formula.tools,
+    mixl,
+    psych,
+    purrr,
     readr,
+    rmarkdown,
     stringr,
+    tibble,
+    tictoc,
     tidyr
 Suggests: 
     testthat (>= 3.0.0)
diff --git a/NAMESPACE b/NAMESPACE
index d7d3adc..2179643 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,3 +1,4 @@
 # Generated by roxygen2: do not edit by hand
 
 export(readdesign)
+export(simulate_choices)
diff --git a/R/sim_all.R b/R/sim_all.R
new file mode 100644
index 0000000..96c2f0e
--- /dev/null
+++ b/R/sim_all.R
@@ -0,0 +1,64 @@
+sim_all <- function(){
+
+  require("stringr")
+
+  designfile<-list.files(designpath,full.names = T)
+  designname <- str_remove_all(list.files(designpath,full.names = F),
+                               "(.ngd|_|.RDS)")  ## Make sure designnames to not contain file ending and "_", as the may cause issues when replace
+
+  if (!exists("destype")) destype="ngene"
+
+  tictoc::tic()
+
+  all_designs<- purrr::map(designfile, sim_choice,
+                           no_sim= nosim,respondents = resps, mnl_U = mnl_U, destype=destype) %>%  ## iterate simulation over all designs
+    setNames(designname)
+
+
+  time <- tictoc::toc()
+
+  print(time)
+
+
+
+  powa <- map(all_designs, ~ .x$power)
+
+
+
+
+  summaryall <- as.data.frame(purrr::map(all_designs, ~.x$summary)) %>%
+    dplyr::select(!ends_with("vars")) %>%
+    relocate(ends_with(c(".n", "mean","sd", "min" ,"max", "range" , "se" )))
+
+  coefall <- map(all_designs, ~ .x$coefs)
+
+  pat<-paste0("(",paste(designname,collapse = "|"),").") # needed to identify pattern to be replaced
+
+  s<-as.data.frame(coefall) %>%
+    dplyr::select(!matches("pval|run")) %>%
+    rename_with(~ sub("est_b", "", .x), everything()) %>%
+    #  rename_with(~ sub("est_asc_", "asc", .x), everything()) %>%
+    rename_with( ~ paste0(.,"_",stringr::str_extract(.,pat )), everything() ) %>%   # rename attributes for reshape part 1
+    rename_with( ~ stringr::str_replace(.,pattern = pat,replacement=""), everything() )  %>%
+    reshape(varying =1:ncol(.), sep = "_"  , direction = "long" ,timevar = "design", idvar = "run" )
+
+
+  p=list()
+
+  for (att in names(dplyr::select(s,-c("design","run")))) {
+
+    p[[att]] <- plot_multi_histogram(s,att,"design")
+
+    print(p[[att]])
+
+  }
+
+  all_designs[["summaryall"]] = summaryall
+  all_designs[["graphs"]]=p
+  all_designs[["powa"]]=powa
+  all_designs[["time"]]=time
+
+
+
+  return(all_designs)
+}
diff --git a/R/sim_choice.R b/R/sim_choice.R
new file mode 100644
index 0000000..0634ac7
--- /dev/null
+++ b/R/sim_choice.R
@@ -0,0 +1,109 @@
+sim_choice <- function(designfile, no_sim=10, respondents=330, mnl_U,utils=u[[1]] ,destype) {
+
+
+  require("tictoc")
+  require("readr")
+  require("psych")
+  require("dplyr")
+  require("evd")
+  require("tidyr")
+  require("kableExtra")
+  require("gridExtra")
+  require("stringr")
+  require("mixl")
+  require("furrr")
+  require("purrr")
+  require("ggplot2")
+  require("formula.tools")
+  require("rlang")
+
+
+  estimate_sim <- function(run=1) {         #start loop
+
+    cat("This is Run number ", run)
+
+    database <- simulate_choices(datadet, utility = utils, setspp=setpp )
+
+
+    cat("This is the utility functions \n" , mnl_U)
+
+    model<-mixl::estimate(model_spec,start_values = est, availabilities = availabilities, data= database,)
+
+    return(model)
+
+  }
+
+  mnl_U <-paste(map_chr(utils,as.character,keep.source.attr = TRUE),collapse = "",";") %>%
+    str_replace_all( c( "priors\\[\"" = "" , "\"\\]" = "" ,  "~" = "=", "\\." = "_" , " b" = " @b"  , "V_"="U_", " alt"="$alt"))
+
+  cat("mixl \n")
+  cat(mnl_U)
+
+  cat("\n Simulation \n")
+
+  print(u)
+
+
+  designs_all <- list()
+
+
+  design<- readdesign(design = designfile)
+
+  if (!exists("design$Block")) design$Block=1
+
+  nsets<-nrow(design)
+  nblocks<-max(design$Block)
+  setpp <- nsets/nblocks      # Choice Sets per respondent; in this 'no blocks' design everyone sees all 24 sets
+
+  replications <- respondents/nblocks
+
+  datadet<- design %>%
+    arrange(Block,Choice.situation) %>%
+    slice(rep(row_number(), replications)) %>%    ## replicate design according to number of replications
+    mutate(ID = rep(1:respondents, each=setpp)) %>%  # create Respondent ID.
+    relocate(ID,`Choice.situation`) %>%
+    as.data.frame()
+
+  database <- simulate_choices(data=datadet, utility = utils, setspp = setpp)
+
+  model_spec <- mixl::specify_model(mnl_U, database, disable_multicore=F)
+
+  est=setNames(rep(0,length(model_spec$beta_names)), model_spec$beta_names)
+
+
+  availabilities <- mixl::generate_default_availabilities(
+    database, model_spec$num_utility_functions)
+
+
+  output<- 1:no_sim %>% map(estimate_sim)
+
+
+
+
+  coefs<-map(1:length(output),~summary(output[[.]])[["coefTable"]][c(1,8)]  %>%
+               tibble::rownames_to_column() %>%
+               pivot_wider(names_from = rowname, values_from = c(est, rob_pval0)) ) %>%
+    bind_rows(.id = "run")
+
+  output[["summary"]] <-psych::describe(coefs[,-1], fast = TRUE)
+
+  output[["coefs"]] <-coefs
+
+  pvals <- output[["coefs"]] %>% dplyr::select(starts_with("rob_pval0"))
+
+  output[["power"]] <- 100*table(apply(pvals,1,  function(x) all(x<0.05)))/nrow(pvals)
+
+
+  output[["metainfo"]] <- c(Path = designfile, NoSim = no_sim, NoResp =respondents)
+
+
+  print(kable(output[["summary"]],digits = 2, format = "rst"))
+
+
+  print(output[["power"]])
+
+
+  return(output)
+
+
+}
diff --git a/R/simulate_choices.R b/R/simulate_choices.R
new file mode 100644
index 0000000..5372556
--- /dev/null
+++ b/R/simulate_choices.R
@@ -0,0 +1,87 @@
+#' Simulate choices based on a dataframe with a design
+#'
+#' @param data a dataframe that includes a design repeated for the number of observations
+#' @param utility a list with the utility functions, one utility function for each alternatives
+#' @param setspp an integeger, the number of choice sets per person
+#'
+#' @return a dataframe that includes simulated choices and a design
+#' @export
+#'
+#' @examples \dontrun{simulate_choices(datadet, utils,setspp)}
+simulate_choices <- function(data=datadet, utility =utils, setspp) {  #the part in dataset that needs to be repeated in each run
+
+
+
+  by_formula <- function(equation){ #used to take formulas as inputs in simulation utility function
+    # //! cur_data_all may get deprecated in favor of pick
+    # pick(everything()) %>%
+    cur_data_all() |>
+      dplyr::transmute(!!formula.tools::lhs(equation) := !!formula.tools::rhs(equation) )
+  }
+
+  cat(" \n does sou_gis exist: ", exists("sou_gis"), "\n")
+
+  if (exists("sou_gis") && is.function(sou_gis)) {
+    sou_gis()
+
+    cat("\n source of gis has been done \n")
+  }
+
+
+  if(!exists("manipulations")) manipulations=list() ## If no user input on further data manipulations
+
+  n=seq_along(1:length(utility))    # number of utility functions
+
+
+  cat("\n dataset final_set exists: ",exists("final_set"), "\n")
+
+  if(exists("final_set")) data = left_join(data,final_set, by = "ID")
+
+  cat("\n decisiongroups exists: " ,exists("decisiongroups"))
+
+  if(exists("decisiongroups"))  {     ### create a new variable to classify decision groups.
+
+    data = dplyr::mutate(data,group = as.numeric(cut(row_number(),
+                                              breaks = decisiongroups * n(),
+                                              labels = seq_along(decisiongroups[-length(decisiongroups)]),
+                                              include.lowest = TRUE)))
+
+    print(table(data$group))
+  } else {
+
+    data$group=1
+  }
+
+
+  data<- data %>%
+    dplyr::group_by(ID) %>%
+    dplyr::mutate(!!! manipulations)
+
+
+
+  subsets<- split(data,data$group)
+
+  subsets <-  purrr::map2(.x = seq_along(u),.y = subsets,
+                   ~ mutate(.y,map_dfc(u[[.x]],by_formula)))
+
+  data <-bind_rows(subsets)
+
+  data<- data %>%
+    dplyr::rename_with(~ stringr::str_replace(.,pattern = "\\.","_"), tidyr::everything()) %>%
+    mutate(across(.cols=n,.fns = ~ rgumbel(setspp,loc=0, scale=1), .names = "{'e'}_{n}" ),
+           across(starts_with("V_"), .names = "{'U'}_{n}") + across(starts_with("e_")) ) %>% ungroup() %>%
+    dplyr::mutate(CHOICE=max.col(.[,grep("U_",names(.))])
+    )   %>%
+    as.data.frame()
+
+
+
+
+  cat("\n data has been made \n")
+
+  cat("\n First few observations \n ")
+  print(head(data))
+  cat("\n \n ")
+  return(data)
+
+}
diff --git a/R/utils.R b/R/utils.R
new file mode 100644
index 0000000..89801dd
--- /dev/null
+++ b/R/utils.R
@@ -0,0 +1,55 @@
+plot_multi_histogram <- function(df, feature, label_column) { #function to create nice multi histograms, taken somewhere from the web
+  plt <- ggplot(df, aes(x=eval(parse(text=feature)), fill=eval(parse(text=label_column)))) +
+    #geom_histogram(alpha=0.7, position="identity", aes(y = ..density..), color="black") +
+    geom_density(alpha=0.5) +
+    geom_vline(aes(xintercept=mean(eval(parse(text=feature)))), color="black", linetype="dashed", linewidth=1) +  ## this makes a vertical line of the mean
+    labs(x=feature, y = "Density")
+  plt + guides(fill=guide_legend(title=label_column))
+}
+
+
+
+download_and_extract_zip <- function(url, dest_folder = ".", zip_name = NULL) {
+  # If zip_name is not provided, extract it from the URL
+  if (is.null(zip_name)) {
+    zip_name <- basename(url)
+  }
+  folder <- paste0(dest_folder,"/data")
+
+
+  # Check if the folder is empty
+  if (length(list.files(folder)) > 0) {
+    warning("Destination folder is not empty. Nothing copied.")
+    return(invisible(NULL))
+  }
+
+
+  # Download the zip file
+  download.file(url, zip_name, method = "auto", quiet = FALSE, mode = "w", cacheOK = TRUE)
+
+  # Extract the contents
+  unzip(zip_name, exdir = dest_folder)
+
+
+  # Return the path to the extracted folder
+  return(file.path(dest_folder, tools::file_path_sans_ext(zip_name)))
+}
+
+
+
+
+
+
+make_md <- function(f=file){
+
+
+
+
+  rmarkdown::render("simulation_output.rmd",
+                    output_file = paste0(
+                      stringr::str_remove_all(
+                        file,"parameters_|.R$"),".html"),
+                    params = list(file=file)
+  )
+
+}
diff --git a/inst/extdata/Rbook/design1.RDS b/inst/extdata/Rbook/design1.RDS
new file mode 100644
index 0000000000000000000000000000000000000000..a8a4542cb2073b93b4d59dbc55563047599de62e
GIT binary patch
literal 1279
zcmb2|=3oE==C{#j{cbx5#Lcl6nyM9K`Q(6NSCMYe$))B+uY|V*<!h)cRA5@+xg_ql
zLal_HV*iBX{>r8x;qz<W<lDZf+MB1p=1E_A*|XU<Ki)bf|0Ua9Gc@?CM$lHbnpINq
zcJ)>N4Y%#>o|;#)r?>y+PrtXvSJzBke`!kBJK@lxCl9KQdhg1Xi>}kWw>aOgZ1#K;
zr{A81`#$cgSh%kE=EI5RpY1k26y005tv2oS^Q`r&=3nBtx_;63+6?>qRW;umvt@<+
zE><zw<#X-zKlSeTrdqz*|Ds<%@6`)^t9}2s+Lz+hS#vIYUwiz-UjLZiF=apARfpf%
z^ey1?r1|fwPXGJ!yes^Q{*_Xmuk$wi6u)u(_wSUNIU0|v!t6HH%~_vq?d5I5cjVxo
zs3cjjns3KHYFx=*6CCkg;+3THJ;NtQt(Mrh?a%p|9JfaN`okOgE6*p%ZmW9z^Ygmj
ze7}3YX4*vCJS)6!e)9WCv%quaD)s4eBd*u&cYG)Fc1`}L(z*T5TX*$5zZSOF!i4XK
z;yj7-busKut`<#yuYWahjsJnGAEmoxf31IBu2n8kd#tM9Zo+koOQnfB1>a2YKQj4)
zp!jvgSoRh1w<XW7(XO1&Bb<6i{U&Rz!>1M%)_(qj=f1xe_@ehCP-eaV^T5d6^U~)q
z-q@+PZaMqsnD$&-+Yir63*(Nt%UolBaNg>%$KO^PH~WIx8|i)T8&j;GIm)b={d{A7
zj=N0X;XOO<)s@v&%~fyu&*>3!pF!o_ZoL;f^aa9GA8(OAtId7?sPdKXMWOd?7w&WK
zTDN)C`)vW0wXb!4?@d0eYjrBl{dSeitE!LI|Ke-Ehn334e#}u0smZvv=tU|2*SoQz
zeiv;*`$Oc^_jzv$KkEL&ZoAWa^Z2{w>vR9r?_Hm}_{HbwF8x>GTjVnDDf&ON<NSN)
zYwf<9uL_Uq&c0v%V_*L2UzTcD&a1nA{&nr~tmUh|7xmfx>|>YfZ9npU(flLt75P3D
zTirWmZ}q87=)Cr)MPmDIN3<XK-u5U^_1~pw{$Io^5~BAXv3ZgIaI>nN#ADZaF*njJ
zmfV+4n3FwkPR-7G_LI)H%@f??COmVUm#_VUabNFulTV)icCWenK$Nv+;`eDk!npIJ
zW|VLHuv5u?Kl5+CCvF><`y%d(sXX%7WAD8%u2=u7;Nxuu{eN#Y^;tx$H{suP=)N)2
z{1bI-=iC0I_;*NiS)5QeUe{FlM6y!u=k>a4@z0a3ueaAMZmR5@|I}*2a#uOYA7O8r
zj~%vgo)9}hVEO9<`yN=S{+sf5<Ng9!Ub}7Fg=@4E<tk&|*st~e?%3G(^_ybcgU;U9
z$A#+SCVXT3EgWal7xJk#sJmaoZeIA0KgWNRPHIodKjv<27iIGOO})csU#t8HdtD7>
zXC>ceOf)&UeeSIF%Zs)K@4RuWRCdx{M?=B6#%V4UvQ|+`ig|q}y$w0X{3&<xnu&Y;
z*w=6NecmHEQEut89p$`MJH*e1oMZdB?C-3VKYr|)mvH^NVeiYNO;IbK*gIW0aU&*P
zPcJQe$<?G@KkLmWtBR_&&bf1zarxewAB9!&x5G;wE}s9<-rwJUzk#B4>38>}9AB~5
z{Nk(M&ru6IueVO^^lszBMoOn=?|oWT_xWvA?X=|6zjU8(zCHWvoE-P++i%jg&b#A(
z^Ts{t<@~ElwVt0?k(3f*cxq+Rl;EQp%ew;OL>HHc_MSd^_=}m{&(Go?mwB8!X~e()
E0BV7vRsaA1

literal 0
HcmV?d00001

diff --git a/man/simulate_choices.Rd b/man/simulate_choices.Rd
new file mode 100644
index 0000000..80d6ecc
--- /dev/null
+++ b/man/simulate_choices.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/simulate_choices.R
+\name{simulate_choices}
+\alias{simulate_choices}
+\title{Simulate choices based on a dataframe with a design}
+\usage{
+simulate_choices(data = datadet, utility = utils, setspp)
+}
+\arguments{
+\item{data}{a dataframe that includes a design repeated for the number of observations}
+
+\item{utility}{a list with the utility functions, one utility function for each alternatives}
+
+\item{setspp}{an integeger, the number of choice sets per person}
+}
+\value{
+a dataframe that includes simulated choices and a design
+}
+\description{
+Simulate choices based on a dataframe with a design
+}
+\examples{
+\dontrun{simulate_choices(datadet, utils,setspp)}
+}
diff --git a/tests/manualtests.R b/tests/manualtests.R
new file mode 100644
index 0000000..f4df9b6
--- /dev/null
+++ b/tests/manualtests.R
@@ -0,0 +1,44 @@
+
+rm(list=ls())
+devtools::load_all()
+
+
+
+
+designpath<- system.file("extdata","Rbook" ,package = "simulateDCE")
+
+#notes <- "This design consists of different heuristics. One group did not attend the methan attribute, another group only decided based on the payment"
+
+notes <- "No Heuristics"
+
+resps =240  # number of respondents
+nosim=2 # number of simulations to run (about 500 is minimum)
+
+#betacoefficients should not include "-"
+bsq=0.00
+bredkite=-0.05
+bdistance=0.50
+bcost=-0.05
+bfarm2=0.25
+bfarm3=0.50
+bheight2=0.25
+bheight3=0.50
+
+
+destype <- "spdesign"
+
+
+#place your utility functions here
+u<- list(u1= list(
+  v1 =V.1 ~  bsq * alt1.sq,
+  v2 =V.2 ~  bfarm2 * alt2.farm2 + bfarm3 * alt2.farm3 + bheight2 * alt2.height2 + bheight3 * alt2.height3 +  bredkite * alt2.redkite + bdistance * alt2.distance + bcost * alt2.cost,
+  v3 =V.3 ~  bfarm2 * alt3.farm2 + bfarm3 * alt3.farm3 + bheight2 * alt3.height2 + bheight3 * alt3.height3 +  bredkite * alt3.redkite + bdistance * alt3.distance + bcost * alt3.cost
+)
+)
+
+
+
+sim_all()
+
+
+
-- 
GitLab