From f264a641346d1d2ce2eca1437d8c6452b1037285 Mon Sep 17 00:00:00 2001 From: dj44vuri <julian.sagebiel@idiv.de> Date: Tue, 19 Dec 2023 10:50:18 +0100 Subject: [PATCH] first working version tested with RBook --- DESCRIPTION | 7 +++ NAMESPACE | 1 + R/sim_all.R | 64 +++++++++++++++++++ R/sim_choice.R | 109 +++++++++++++++++++++++++++++++++ R/simulate_choices.R | 87 ++++++++++++++++++++++++++ R/utils.R | 55 +++++++++++++++++ inst/extdata/Rbook/design1.RDS | Bin 0 -> 1279 bytes man/simulate_choices.Rd | 24 ++++++++ tests/manualtests.R | 44 +++++++++++++ 9 files changed, 391 insertions(+) create mode 100644 R/sim_all.R create mode 100644 R/sim_choice.R create mode 100644 R/simulate_choices.R create mode 100644 R/utils.R create mode 100644 inst/extdata/Rbook/design1.RDS create mode 100644 man/simulate_choices.Rd create mode 100644 tests/manualtests.R diff --git a/DESCRIPTION b/DESCRIPTION index 268a44c..4bfb583 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -11,8 +11,15 @@ Roxygen: list(markdown = TRUE) RoxygenNote: 7.2.2 Imports: dplyr, + formula.tools, + mixl, + psych, + purrr, readr, + rmarkdown, stringr, + tibble, + tictoc, tidyr Suggests: testthat (>= 3.0.0) diff --git a/NAMESPACE b/NAMESPACE index d7d3adc..2179643 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,3 +1,4 @@ # Generated by roxygen2: do not edit by hand export(readdesign) +export(simulate_choices) diff --git a/R/sim_all.R b/R/sim_all.R new file mode 100644 index 0000000..96c2f0e --- /dev/null +++ b/R/sim_all.R @@ -0,0 +1,64 @@ +sim_all <- function(){ + + require("stringr") + + designfile<-list.files(designpath,full.names = T) + designname <- str_remove_all(list.files(designpath,full.names = F), + "(.ngd|_|.RDS)") ## Make sure designnames to not contain file ending and "_", as the may cause issues when replace + + if (!exists("destype")) destype="ngene" + + tictoc::tic() + + all_designs<- purrr::map(designfile, sim_choice, + no_sim= nosim,respondents = resps, mnl_U = mnl_U, destype=destype) %>% ## iterate simulation over all designs + setNames(designname) + + + time <- tictoc::toc() + + print(time) + + + + powa <- map(all_designs, ~ .x$power) + + + + + summaryall <- as.data.frame(purrr::map(all_designs, ~.x$summary)) %>% + dplyr::select(!ends_with("vars")) %>% + relocate(ends_with(c(".n", "mean","sd", "min" ,"max", "range" , "se" ))) + + coefall <- map(all_designs, ~ .x$coefs) + + pat<-paste0("(",paste(designname,collapse = "|"),").") # needed to identify pattern to be replaced + + s<-as.data.frame(coefall) %>% + dplyr::select(!matches("pval|run")) %>% + rename_with(~ sub("est_b", "", .x), everything()) %>% + # rename_with(~ sub("est_asc_", "asc", .x), everything()) %>% + rename_with( ~ paste0(.,"_",stringr::str_extract(.,pat )), everything() ) %>% # rename attributes for reshape part 1 + rename_with( ~ stringr::str_replace(.,pattern = pat,replacement=""), everything() ) %>% + reshape(varying =1:ncol(.), sep = "_" , direction = "long" ,timevar = "design", idvar = "run" ) + + + p=list() + + for (att in names(dplyr::select(s,-c("design","run")))) { + + p[[att]] <- plot_multi_histogram(s,att,"design") + + print(p[[att]]) + + } + + all_designs[["summaryall"]] = summaryall + all_designs[["graphs"]]=p + all_designs[["powa"]]=powa + all_designs[["time"]]=time + + + + return(all_designs) +} diff --git a/R/sim_choice.R b/R/sim_choice.R new file mode 100644 index 0000000..0634ac7 --- /dev/null +++ b/R/sim_choice.R @@ -0,0 +1,109 @@ +sim_choice <- function(designfile, no_sim=10, respondents=330, mnl_U,utils=u[[1]] ,destype) { + + + require("tictoc") + require("readr") + require("psych") + require("dplyr") + require("evd") + require("tidyr") + require("kableExtra") + require("gridExtra") + require("stringr") + require("mixl") + require("furrr") + require("purrr") + require("ggplot2") + require("formula.tools") + require("rlang") + + + estimate_sim <- function(run=1) { #start loop + + cat("This is Run number ", run) + + database <- simulate_choices(datadet, utility = utils, setspp=setpp ) + + + cat("This is the utility functions \n" , mnl_U) + + model<-mixl::estimate(model_spec,start_values = est, availabilities = availabilities, data= database,) + + return(model) + + } + + mnl_U <-paste(map_chr(utils,as.character,keep.source.attr = TRUE),collapse = "",";") %>% + str_replace_all( c( "priors\\[\"" = "" , "\"\\]" = "" , "~" = "=", "\\." = "_" , " b" = " @b" , "V_"="U_", " alt"="$alt")) + + cat("mixl \n") + cat(mnl_U) + + cat("\n Simulation \n") + + print(u) + + + designs_all <- list() + + + design<- readdesign(design = designfile) + + if (!exists("design$Block")) design$Block=1 + + nsets<-nrow(design) + nblocks<-max(design$Block) + setpp <- nsets/nblocks # Choice Sets per respondent; in this 'no blocks' design everyone sees all 24 sets + + replications <- respondents/nblocks + + datadet<- design %>% + arrange(Block,Choice.situation) %>% + slice(rep(row_number(), replications)) %>% ## replicate design according to number of replications + mutate(ID = rep(1:respondents, each=setpp)) %>% # create Respondent ID. + relocate(ID,`Choice.situation`) %>% + as.data.frame() + + database <- simulate_choices(data=datadet, utility = utils, setspp = setpp) + + model_spec <- mixl::specify_model(mnl_U, database, disable_multicore=F) + + est=setNames(rep(0,length(model_spec$beta_names)), model_spec$beta_names) + + + availabilities <- mixl::generate_default_availabilities( + database, model_spec$num_utility_functions) + + + output<- 1:no_sim %>% map(estimate_sim) + + + + + coefs<-map(1:length(output),~summary(output[[.]])[["coefTable"]][c(1,8)] %>% + tibble::rownames_to_column() %>% + pivot_wider(names_from = rowname, values_from = c(est, rob_pval0)) ) %>% + bind_rows(.id = "run") + + output[["summary"]] <-psych::describe(coefs[,-1], fast = TRUE) + + output[["coefs"]] <-coefs + + pvals <- output[["coefs"]] %>% dplyr::select(starts_with("rob_pval0")) + + output[["power"]] <- 100*table(apply(pvals,1, function(x) all(x<0.05)))/nrow(pvals) + + + output[["metainfo"]] <- c(Path = designfile, NoSim = no_sim, NoResp =respondents) + + + print(kable(output[["summary"]],digits = 2, format = "rst")) + + + print(output[["power"]]) + + + return(output) + + +} diff --git a/R/simulate_choices.R b/R/simulate_choices.R new file mode 100644 index 0000000..5372556 --- /dev/null +++ b/R/simulate_choices.R @@ -0,0 +1,87 @@ +#' Simulate choices based on a dataframe with a design +#' +#' @param data a dataframe that includes a design repeated for the number of observations +#' @param utility a list with the utility functions, one utility function for each alternatives +#' @param setspp an integeger, the number of choice sets per person +#' +#' @return a dataframe that includes simulated choices and a design +#' @export +#' +#' @examples \dontrun{simulate_choices(datadet, utils,setspp)} +simulate_choices <- function(data=datadet, utility =utils, setspp) { #the part in dataset that needs to be repeated in each run + + + + by_formula <- function(equation){ #used to take formulas as inputs in simulation utility function + # //! cur_data_all may get deprecated in favor of pick + # pick(everything()) %>% + cur_data_all() |> + dplyr::transmute(!!formula.tools::lhs(equation) := !!formula.tools::rhs(equation) ) + } + + cat(" \n does sou_gis exist: ", exists("sou_gis"), "\n") + + if (exists("sou_gis") && is.function(sou_gis)) { + sou_gis() + + cat("\n source of gis has been done \n") + } + + + if(!exists("manipulations")) manipulations=list() ## If no user input on further data manipulations + + n=seq_along(1:length(utility)) # number of utility functions + + + cat("\n dataset final_set exists: ",exists("final_set"), "\n") + + if(exists("final_set")) data = left_join(data,final_set, by = "ID") + + cat("\n decisiongroups exists: " ,exists("decisiongroups")) + + if(exists("decisiongroups")) { ### create a new variable to classify decision groups. + + data = dplyr::mutate(data,group = as.numeric(cut(row_number(), + breaks = decisiongroups * n(), + labels = seq_along(decisiongroups[-length(decisiongroups)]), + include.lowest = TRUE))) + + print(table(data$group)) + } else { + + data$group=1 + } + + + data<- data %>% + dplyr::group_by(ID) %>% + dplyr::mutate(!!! manipulations) + + + + subsets<- split(data,data$group) + + subsets <- purrr::map2(.x = seq_along(u),.y = subsets, + ~ mutate(.y,map_dfc(u[[.x]],by_formula))) + + data <-bind_rows(subsets) + + data<- data %>% + dplyr::rename_with(~ stringr::str_replace(.,pattern = "\\.","_"), tidyr::everything()) %>% + mutate(across(.cols=n,.fns = ~ rgumbel(setspp,loc=0, scale=1), .names = "{'e'}_{n}" ), + across(starts_with("V_"), .names = "{'U'}_{n}") + across(starts_with("e_")) ) %>% ungroup() %>% + dplyr::mutate(CHOICE=max.col(.[,grep("U_",names(.))]) + ) %>% + as.data.frame() + + + + + cat("\n data has been made \n") + + cat("\n First few observations \n ") + print(head(data)) + cat("\n \n ") + return(data) + +} diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 0000000..89801dd --- /dev/null +++ b/R/utils.R @@ -0,0 +1,55 @@ +plot_multi_histogram <- function(df, feature, label_column) { #function to create nice multi histograms, taken somewhere from the web + plt <- ggplot(df, aes(x=eval(parse(text=feature)), fill=eval(parse(text=label_column)))) + + #geom_histogram(alpha=0.7, position="identity", aes(y = ..density..), color="black") + + geom_density(alpha=0.5) + + geom_vline(aes(xintercept=mean(eval(parse(text=feature)))), color="black", linetype="dashed", linewidth=1) + ## this makes a vertical line of the mean + labs(x=feature, y = "Density") + plt + guides(fill=guide_legend(title=label_column)) +} + + + +download_and_extract_zip <- function(url, dest_folder = ".", zip_name = NULL) { + # If zip_name is not provided, extract it from the URL + if (is.null(zip_name)) { + zip_name <- basename(url) + } + folder <- paste0(dest_folder,"/data") + + + # Check if the folder is empty + if (length(list.files(folder)) > 0) { + warning("Destination folder is not empty. Nothing copied.") + return(invisible(NULL)) + } + + + # Download the zip file + download.file(url, zip_name, method = "auto", quiet = FALSE, mode = "w", cacheOK = TRUE) + + # Extract the contents + unzip(zip_name, exdir = dest_folder) + + + # Return the path to the extracted folder + return(file.path(dest_folder, tools::file_path_sans_ext(zip_name))) +} + + + + + + +make_md <- function(f=file){ + + + + + rmarkdown::render("simulation_output.rmd", + output_file = paste0( + stringr::str_remove_all( + file,"parameters_|.R$"),".html"), + params = list(file=file) + ) + +} diff --git a/inst/extdata/Rbook/design1.RDS b/inst/extdata/Rbook/design1.RDS new file mode 100644 index 0000000000000000000000000000000000000000..a8a4542cb2073b93b4d59dbc55563047599de62e GIT binary patch literal 1279 zcmb2|=3oE==C{#j{cbx5#Lcl6nyM9K`Q(6NSCMYe$))B+uY|V*<!h)cRA5@+xg_ql zLal_HV*iBX{>r8x;qz<W<lDZf+MB1p=1E_A*|XU<Ki)bf|0Ua9Gc@?CM$lHbnpINq zcJ)>N4Y%#>o|;#)r?>y+PrtXvSJzBke`!kBJK@lxCl9KQdhg1Xi>}kWw>aOgZ1#K; zr{A81`#$cgSh%kE=EI5RpY1k26y005tv2oS^Q`r&=3nBtx_;63+6?>qRW;umvt@<+ zE><zw<#X-zKlSeTrdqz*|Ds<%@6`)^t9}2s+Lz+hS#vIYUwiz-UjLZiF=apARfpf% z^ey1?r1|fwPXGJ!yes^Q{*_Xmuk$wi6u)u(_wSUNIU0|v!t6HH%~_vq?d5I5cjVxo zs3cjjns3KHYFx=*6CCkg;+3THJ;NtQt(Mrh?a%p|9JfaN`okOgE6*p%ZmW9z^Ygmj ze7}3YX4*vCJS)6!e)9WCv%quaD)s4eBd*u&cYG)Fc1`}L(z*T5TX*$5zZSOF!i4XK z;yj7-busKut`<#yuYWahjsJnGAEmoxf31IBu2n8kd#tM9Zo+koOQnfB1>a2YKQj4) zp!jvgSoRh1w<XW7(XO1&Bb<6i{U&Rz!>1M%)_(qj=f1xe_@ehCP-eaV^T5d6^U~)q z-q@+PZaMqsnD$&-+Yir63*(Nt%UolBaNg>%$KO^PH~WIx8|i)T8&j;GIm)b={d{A7 zj=N0X;XOO<)s@v&%~fyu&*>3!pF!o_ZoL;f^aa9GA8(OAtId7?sPdKXMWOd?7w&WK zTDN)C`)vW0wXb!4?@d0eYjrBl{dSeitE!LI|Ke-Ehn334e#}u0smZvv=tU|2*SoQz zeiv;*`$Oc^_jzv$KkEL&ZoAWa^Z2{w>vR9r?_Hm}_{HbwF8x>GTjVnDDf&ON<NSN) zYwf<9uL_Uq&c0v%V_*L2UzTcD&a1nA{&nr~tmUh|7xmfx>|>YfZ9npU(flLt75P3D zTirWmZ}q87=)Cr)MPmDIN3<XK-u5U^_1~pw{$Io^5~BAXv3ZgIaI>nN#ADZaF*njJ zmfV+4n3FwkPR-7G_LI)H%@f??COmVUm#_VUabNFulTV)icCWenK$Nv+;`eDk!npIJ zW|VLHuv5u?Kl5+CCvF><`y%d(sXX%7WAD8%u2=u7;Nxuu{eN#Y^;tx$H{suP=)N)2 z{1bI-=iC0I_;*NiS)5QeUe{FlM6y!u=k>a4@z0a3ueaAMZmR5@|I}*2a#uOYA7O8r zj~%vgo)9}hVEO9<`yN=S{+sf5<Ng9!Ub}7Fg=@4E<tk&|*st~e?%3G(^_ybcgU;U9 z$A#+SCVXT3EgWal7xJk#sJmaoZeIA0KgWNRPHIodKjv<27iIGOO})csU#t8HdtD7> zXC>ceOf)&UeeSIF%Zs)K@4RuWRCdx{M?=B6#%V4UvQ|+`ig|q}y$w0X{3&<xnu&Y; z*w=6NecmHEQEut89p$`MJH*e1oMZdB?C-3VKYr|)mvH^NVeiYNO;IbK*gIW0aU&*P zPcJQe$<?G@KkLmWtBR_&&bf1zarxewAB9!&x5G;wE}s9<-rwJUzk#B4>38>}9AB~5 z{Nk(M&ru6IueVO^^lszBMoOn=?|oWT_xWvA?X=|6zjU8(zCHWvoE-P++i%jg&b#A( z^Ts{t<@~ElwVt0?k(3f*cxq+Rl;EQp%ew;OL>HHc_MSd^_=}m{&(Go?mwB8!X~e() E0BV7vRsaA1 literal 0 HcmV?d00001 diff --git a/man/simulate_choices.Rd b/man/simulate_choices.Rd new file mode 100644 index 0000000..80d6ecc --- /dev/null +++ b/man/simulate_choices.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/simulate_choices.R +\name{simulate_choices} +\alias{simulate_choices} +\title{Simulate choices based on a dataframe with a design} +\usage{ +simulate_choices(data = datadet, utility = utils, setspp) +} +\arguments{ +\item{data}{a dataframe that includes a design repeated for the number of observations} + +\item{utility}{a list with the utility functions, one utility function for each alternatives} + +\item{setspp}{an integeger, the number of choice sets per person} +} +\value{ +a dataframe that includes simulated choices and a design +} +\description{ +Simulate choices based on a dataframe with a design +} +\examples{ +\dontrun{simulate_choices(datadet, utils,setspp)} +} diff --git a/tests/manualtests.R b/tests/manualtests.R new file mode 100644 index 0000000..f4df9b6 --- /dev/null +++ b/tests/manualtests.R @@ -0,0 +1,44 @@ + +rm(list=ls()) +devtools::load_all() + + + + +designpath<- system.file("extdata","Rbook" ,package = "simulateDCE") + +#notes <- "This design consists of different heuristics. One group did not attend the methan attribute, another group only decided based on the payment" + +notes <- "No Heuristics" + +resps =240 # number of respondents +nosim=2 # number of simulations to run (about 500 is minimum) + +#betacoefficients should not include "-" +bsq=0.00 +bredkite=-0.05 +bdistance=0.50 +bcost=-0.05 +bfarm2=0.25 +bfarm3=0.50 +bheight2=0.25 +bheight3=0.50 + + +destype <- "spdesign" + + +#place your utility functions here +u<- list(u1= list( + v1 =V.1 ~ bsq * alt1.sq, + v2 =V.2 ~ bfarm2 * alt2.farm2 + bfarm3 * alt2.farm3 + bheight2 * alt2.height2 + bheight3 * alt2.height3 + bredkite * alt2.redkite + bdistance * alt2.distance + bcost * alt2.cost, + v3 =V.3 ~ bfarm2 * alt3.farm2 + bfarm3 * alt3.farm3 + bheight2 * alt3.height2 + bheight3 * alt3.height3 + bredkite * alt3.redkite + bdistance * alt3.distance + bcost * alt3.cost +) +) + + + +sim_all() + + + -- GitLab