Skip to content
Snippets Groups Projects
Commit f264a641 authored by dj44vuri's avatar dj44vuri
Browse files

first working version tested with RBook

parent 4b1d858e
No related branches found
No related tags found
No related merge requests found
......@@ -11,8 +11,15 @@ Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.2
Imports:
dplyr,
formula.tools,
mixl,
psych,
purrr,
readr,
rmarkdown,
stringr,
tibble,
tictoc,
tidyr
Suggests:
testthat (>= 3.0.0)
......
# Generated by roxygen2: do not edit by hand
export(readdesign)
export(simulate_choices)
sim_all <- function(){
require("stringr")
designfile<-list.files(designpath,full.names = T)
designname <- str_remove_all(list.files(designpath,full.names = F),
"(.ngd|_|.RDS)") ## Make sure designnames to not contain file ending and "_", as the may cause issues when replace
if (!exists("destype")) destype="ngene"
tictoc::tic()
all_designs<- purrr::map(designfile, sim_choice,
no_sim= nosim,respondents = resps, mnl_U = mnl_U, destype=destype) %>% ## iterate simulation over all designs
setNames(designname)
time <- tictoc::toc()
print(time)
powa <- map(all_designs, ~ .x$power)
summaryall <- as.data.frame(purrr::map(all_designs, ~.x$summary)) %>%
dplyr::select(!ends_with("vars")) %>%
relocate(ends_with(c(".n", "mean","sd", "min" ,"max", "range" , "se" )))
coefall <- map(all_designs, ~ .x$coefs)
pat<-paste0("(",paste(designname,collapse = "|"),").") # needed to identify pattern to be replaced
s<-as.data.frame(coefall) %>%
dplyr::select(!matches("pval|run")) %>%
rename_with(~ sub("est_b", "", .x), everything()) %>%
# rename_with(~ sub("est_asc_", "asc", .x), everything()) %>%
rename_with( ~ paste0(.,"_",stringr::str_extract(.,pat )), everything() ) %>% # rename attributes for reshape part 1
rename_with( ~ stringr::str_replace(.,pattern = pat,replacement=""), everything() ) %>%
reshape(varying =1:ncol(.), sep = "_" , direction = "long" ,timevar = "design", idvar = "run" )
p=list()
for (att in names(dplyr::select(s,-c("design","run")))) {
p[[att]] <- plot_multi_histogram(s,att,"design")
print(p[[att]])
}
all_designs[["summaryall"]] = summaryall
all_designs[["graphs"]]=p
all_designs[["powa"]]=powa
all_designs[["time"]]=time
return(all_designs)
}
sim_choice <- function(designfile, no_sim=10, respondents=330, mnl_U,utils=u[[1]] ,destype) {
require("tictoc")
require("readr")
require("psych")
require("dplyr")
require("evd")
require("tidyr")
require("kableExtra")
require("gridExtra")
require("stringr")
require("mixl")
require("furrr")
require("purrr")
require("ggplot2")
require("formula.tools")
require("rlang")
estimate_sim <- function(run=1) { #start loop
cat("This is Run number ", run)
database <- simulate_choices(datadet, utility = utils, setspp=setpp )
cat("This is the utility functions \n" , mnl_U)
model<-mixl::estimate(model_spec,start_values = est, availabilities = availabilities, data= database,)
return(model)
}
mnl_U <-paste(map_chr(utils,as.character,keep.source.attr = TRUE),collapse = "",";") %>%
str_replace_all( c( "priors\\[\"" = "" , "\"\\]" = "" , "~" = "=", "\\." = "_" , " b" = " @b" , "V_"="U_", " alt"="$alt"))
cat("mixl \n")
cat(mnl_U)
cat("\n Simulation \n")
print(u)
designs_all <- list()
design<- readdesign(design = designfile)
if (!exists("design$Block")) design$Block=1
nsets<-nrow(design)
nblocks<-max(design$Block)
setpp <- nsets/nblocks # Choice Sets per respondent; in this 'no blocks' design everyone sees all 24 sets
replications <- respondents/nblocks
datadet<- design %>%
arrange(Block,Choice.situation) %>%
slice(rep(row_number(), replications)) %>% ## replicate design according to number of replications
mutate(ID = rep(1:respondents, each=setpp)) %>% # create Respondent ID.
relocate(ID,`Choice.situation`) %>%
as.data.frame()
database <- simulate_choices(data=datadet, utility = utils, setspp = setpp)
model_spec <- mixl::specify_model(mnl_U, database, disable_multicore=F)
est=setNames(rep(0,length(model_spec$beta_names)), model_spec$beta_names)
availabilities <- mixl::generate_default_availabilities(
database, model_spec$num_utility_functions)
output<- 1:no_sim %>% map(estimate_sim)
coefs<-map(1:length(output),~summary(output[[.]])[["coefTable"]][c(1,8)] %>%
tibble::rownames_to_column() %>%
pivot_wider(names_from = rowname, values_from = c(est, rob_pval0)) ) %>%
bind_rows(.id = "run")
output[["summary"]] <-psych::describe(coefs[,-1], fast = TRUE)
output[["coefs"]] <-coefs
pvals <- output[["coefs"]] %>% dplyr::select(starts_with("rob_pval0"))
output[["power"]] <- 100*table(apply(pvals,1, function(x) all(x<0.05)))/nrow(pvals)
output[["metainfo"]] <- c(Path = designfile, NoSim = no_sim, NoResp =respondents)
print(kable(output[["summary"]],digits = 2, format = "rst"))
print(output[["power"]])
return(output)
}
#' Simulate choices based on a dataframe with a design
#'
#' @param data a dataframe that includes a design repeated for the number of observations
#' @param utility a list with the utility functions, one utility function for each alternatives
#' @param setspp an integeger, the number of choice sets per person
#'
#' @return a dataframe that includes simulated choices and a design
#' @export
#'
#' @examples \dontrun{simulate_choices(datadet, utils,setspp)}
simulate_choices <- function(data=datadet, utility =utils, setspp) { #the part in dataset that needs to be repeated in each run
by_formula <- function(equation){ #used to take formulas as inputs in simulation utility function
# //! cur_data_all may get deprecated in favor of pick
# pick(everything()) %>%
cur_data_all() |>
dplyr::transmute(!!formula.tools::lhs(equation) := !!formula.tools::rhs(equation) )
}
cat(" \n does sou_gis exist: ", exists("sou_gis"), "\n")
if (exists("sou_gis") && is.function(sou_gis)) {
sou_gis()
cat("\n source of gis has been done \n")
}
if(!exists("manipulations")) manipulations=list() ## If no user input on further data manipulations
n=seq_along(1:length(utility)) # number of utility functions
cat("\n dataset final_set exists: ",exists("final_set"), "\n")
if(exists("final_set")) data = left_join(data,final_set, by = "ID")
cat("\n decisiongroups exists: " ,exists("decisiongroups"))
if(exists("decisiongroups")) { ### create a new variable to classify decision groups.
data = dplyr::mutate(data,group = as.numeric(cut(row_number(),
breaks = decisiongroups * n(),
labels = seq_along(decisiongroups[-length(decisiongroups)]),
include.lowest = TRUE)))
print(table(data$group))
} else {
data$group=1
}
data<- data %>%
dplyr::group_by(ID) %>%
dplyr::mutate(!!! manipulations)
subsets<- split(data,data$group)
subsets <- purrr::map2(.x = seq_along(u),.y = subsets,
~ mutate(.y,map_dfc(u[[.x]],by_formula)))
data <-bind_rows(subsets)
data<- data %>%
dplyr::rename_with(~ stringr::str_replace(.,pattern = "\\.","_"), tidyr::everything()) %>%
mutate(across(.cols=n,.fns = ~ rgumbel(setspp,loc=0, scale=1), .names = "{'e'}_{n}" ),
across(starts_with("V_"), .names = "{'U'}_{n}") + across(starts_with("e_")) ) %>% ungroup() %>%
dplyr::mutate(CHOICE=max.col(.[,grep("U_",names(.))])
) %>%
as.data.frame()
cat("\n data has been made \n")
cat("\n First few observations \n ")
print(head(data))
cat("\n \n ")
return(data)
}
plot_multi_histogram <- function(df, feature, label_column) { #function to create nice multi histograms, taken somewhere from the web
plt <- ggplot(df, aes(x=eval(parse(text=feature)), fill=eval(parse(text=label_column)))) +
#geom_histogram(alpha=0.7, position="identity", aes(y = ..density..), color="black") +
geom_density(alpha=0.5) +
geom_vline(aes(xintercept=mean(eval(parse(text=feature)))), color="black", linetype="dashed", linewidth=1) + ## this makes a vertical line of the mean
labs(x=feature, y = "Density")
plt + guides(fill=guide_legend(title=label_column))
}
download_and_extract_zip <- function(url, dest_folder = ".", zip_name = NULL) {
# If zip_name is not provided, extract it from the URL
if (is.null(zip_name)) {
zip_name <- basename(url)
}
folder <- paste0(dest_folder,"/data")
# Check if the folder is empty
if (length(list.files(folder)) > 0) {
warning("Destination folder is not empty. Nothing copied.")
return(invisible(NULL))
}
# Download the zip file
download.file(url, zip_name, method = "auto", quiet = FALSE, mode = "w", cacheOK = TRUE)
# Extract the contents
unzip(zip_name, exdir = dest_folder)
# Return the path to the extracted folder
return(file.path(dest_folder, tools::file_path_sans_ext(zip_name)))
}
make_md <- function(f=file){
rmarkdown::render("simulation_output.rmd",
output_file = paste0(
stringr::str_remove_all(
file,"parameters_|.R$"),".html"),
params = list(file=file)
)
}
File added
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/simulate_choices.R
\name{simulate_choices}
\alias{simulate_choices}
\title{Simulate choices based on a dataframe with a design}
\usage{
simulate_choices(data = datadet, utility = utils, setspp)
}
\arguments{
\item{data}{a dataframe that includes a design repeated for the number of observations}
\item{utility}{a list with the utility functions, one utility function for each alternatives}
\item{setspp}{an integeger, the number of choice sets per person}
}
\value{
a dataframe that includes simulated choices and a design
}
\description{
Simulate choices based on a dataframe with a design
}
\examples{
\dontrun{simulate_choices(datadet, utils,setspp)}
}
rm(list=ls())
devtools::load_all()
designpath<- system.file("extdata","Rbook" ,package = "simulateDCE")
#notes <- "This design consists of different heuristics. One group did not attend the methan attribute, another group only decided based on the payment"
notes <- "No Heuristics"
resps =240 # number of respondents
nosim=2 # number of simulations to run (about 500 is minimum)
#betacoefficients should not include "-"
bsq=0.00
bredkite=-0.05
bdistance=0.50
bcost=-0.05
bfarm2=0.25
bfarm3=0.50
bheight2=0.25
bheight3=0.50
destype <- "spdesign"
#place your utility functions here
u<- list(u1= list(
v1 =V.1 ~ bsq * alt1.sq,
v2 =V.2 ~ bfarm2 * alt2.farm2 + bfarm3 * alt2.farm3 + bheight2 * alt2.height2 + bheight3 * alt2.height3 + bredkite * alt2.redkite + bdistance * alt2.distance + bcost * alt2.cost,
v3 =V.3 ~ bfarm2 * alt3.farm2 + bfarm3 * alt3.farm3 + bheight2 * alt3.height2 + bheight3 * alt3.height3 + bredkite * alt3.redkite + bdistance * alt3.distance + bcost * alt3.cost
)
)
sim_all()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment