Skip to content
Snippets Groups Projects
Commit 45359a50 authored by dj44vuri's avatar dj44vuri
Browse files

added true coefficients to summaryall and changed argument names to be more...

added true coefficients to summaryall and changed argument names to be more consistent accross functions
parent 7b1501af
Branches
No related tags found
No related merge requests found
......@@ -27,7 +27,8 @@ Imports:
stringr,
tibble,
tictoc,
tidyr
tidyr,
rlang
Suggests:
knitr,
testthat (>= 3.0.0)
......
utils::globalVariables(c("designfile", "destype", "Choice.situation", "Design", "." ,"ID", ":=", "..density..", "sou_gis", "block", "Block"))
utils::globalVariables(c("designfile", "destype", "Choice.situation", "Design", "." ,"ID", ":=", "..density..", "sou_gis", "block", "Block", "row_id", "final_set"))
......@@ -4,8 +4,6 @@
#' @param resps Number of respondents you want to simulate
#' @inheritParams readdesign
#' @param designpath The path to the folder where the designs are stored. For example "c:/myfancydec/Designs"
#' @param u A list with utility functions. The list can incorporate as many decision rule groups as you want. However, each group must be in a list in this list. If you just use one group (the normal), this group still has to be in a list in the u list. As a convention name beta coefficients starting with a lower case "b"
#' @param bcoeff List of initial coefficients for the utility function. List content/length can vary based on application, but should all begin with b and be the same as those entered in the utility functions
#' @inheritParams sim_choice
#' @inheritParams simulate_choices
#' @return A list, with all information on the simulation. This list an be easily processed by the user and in the rmarkdown template.
......@@ -27,15 +25,27 @@
#' bheight2=0.25,
#' bheight3=0.50)
#'
sim_all <- function(nosim=2, resps, designtype=NULL, destype = NULL, designpath, u, bcoeff, decisiongroups = c(0,1), manipulations = list(), estimate = TRUE, chunks=1, utility_transform_type = "simple"){
sim_all <- function(nosim = 2,
resps,
designtype = NULL,
destype = NULL,
designpath,
u,
bcoeff,
decisiongroups = c(0, 1),
manipulations = list(),
estimate = TRUE,
chunks = 1,
utility_transform_type = "simple") {
#################################################
########## Input Validation Test ###############
#################################################
########### validate the utility function ########
if (missing(u) || !(is.list(u) && any(sapply(u, is.list)))){
stop(" 'u' must be provided and must be a list containing at least one list element (list of lists).")
if (missing(u) || !(is.list(u) && any(sapply(u, is.list)))) {
stop(
" 'u' must be provided and must be a list containing at least one list element (list of lists)."
)
}
########## validate the bcoeff list ################
......@@ -46,7 +56,9 @@ sim_all <- function(nosim=2, resps, designtype=NULL, destype = NULL, designpath,
if (nosim < chunks) {
stop("You cannot have more chunks than runs. The number of chunks tells us how often we save the simulation results on disk. Maximum one per run.")
stop(
"You cannot have more chunks than runs. The number of chunks tells us how often we save the simulation results on disk. Maximum one per run."
)
}
# Check if bcoeff is a list
......@@ -54,7 +66,7 @@ sim_all <- function(nosim=2, resps, designtype=NULL, destype = NULL, designpath,
stop("Argument 'bcoeff' must be a list.")
}
if (length(u) != length(decisiongroups) -1){
if (length(u) != length(decisiongroups) - 1) {
stop("Number of decision groups must equal number of utility functions!")
}
if (!is.vector(decisiongroups)) {
......@@ -78,7 +90,7 @@ sim_all <- function(nosim=2, resps, designtype=NULL, destype = NULL, designpath,
}
#### check that all the coefficients in utility function have a cooresponding value in bcoeff ####
# Extract coefficients from utility function starting with "b"
# Extract coefficients from utility function starting with "b"
coeff_names_ul <- unique(unlist(lapply(u, function(u) {
formula_strings <- unlist(u)
coef_names <- unique(unlist(lapply(formula_strings, function(f) {
......@@ -93,30 +105,52 @@ sim_all <- function(nosim=2, resps, designtype=NULL, destype = NULL, designpath,
# Check if all utility function coefficients starting with "b" are covered in bcoeff list
missing_coeffs <- coeff_names_ul[!(coeff_names_ul %in% names(bcoeff))]
if (length(missing_coeffs) > 0) {
stop(paste("Missing coefficients in 'bcoeff':", paste(missing_coeffs, collapse = ", "), ". Perhaps there is a typo?"))
stop(paste(
"Missing coefficients in 'bcoeff':",
paste(missing_coeffs, collapse = ", "),
". Perhaps there is a typo?"
))
}
########## validate resps #####################
if (missing(resps) || !(is.integer(resps) || (is.numeric(resps) && identical(trunc(resps), resps)))) {
stop(" 'resps' must be provided and must be an integer indicating the number of respondents per run.")
if (missing(resps) ||
!(is.integer(resps) ||
(is.numeric(resps) && identical(trunc(resps), resps)))) {
stop(
" 'resps' must be provided and must be an integer indicating the number of respondents per run."
)
}
########## validate designpath ################
if (!dir.exists(designpath)) {
stop(" The folder where your designs are stored does not exist. \n Check if designpath is correctly specified")
stop(
" The folder where your designs are stored does not exist. \n Check if designpath is correctly specified"
)
}
#################################################
########## End Validation Tests #################
#################################################
designfile<-list.files(designpath,full.names = T)
designname <- stringr::str_remove_all(list.files(designpath,full.names = F),
"(.ngd|_|.RDS)") ## Make sure designnames to not contain file ending and "_", as the may cause issues when replace
designfile <- list.files(designpath, full.names = T)
designname <- stringr::str_remove_all(list.files(designpath, full.names = F), "(.ngd|_|.RDS)") ## Make sure designnames to not contain file ending and "_", as the may cause issues when replace
tictoc::tic()
all_designs<- purrr::map(designfile, sim_choice,
no_sim= nosim,respondents = resps, designtype=designtype, destype =destype, ut=u, bcoefficients = bcoeff, decisiongroups = decisiongroups, manipulations = manipulations, estimate = estimate, chunks =chunks, utility_transform_type = utility_transform_type) %>% ## iterate simulation over all designs
all_designs <- purrr::map(
designfile,
sim_choice,
no_sim = nosim,
respondents = resps,
designtype = designtype,
destype = destype,
u = u,
bcoeff = bcoeff,
decisiongroups = decisiongroups,
manipulations = manipulations,
estimate = estimate,
chunks = chunks,
utility_transform_type = utility_transform_type
) %>% ## iterate simulation over all designs
stats::setNames(designname)
......@@ -124,49 +158,63 @@ sim_all <- function(nosim=2, resps, designtype=NULL, destype = NULL, designpath,
print(time)
if (estimate==TRUE) {
powa <- purrr::map(all_designs, ~ .x$power)
if (estimate == TRUE) {
powa <- purrr::map(all_designs, ~ .x$power)
browser()
summaryall <- data.frame(truepar = as.double(c(bcoeff,rep(NA,length(bcoeff)))), as.data.frame(purrr::map(all_designs, ~ .x$summary)) )%>%
dplyr::select(!dplyr::ends_with("vars")) %>%
dplyr::relocate(truepar, dplyr::ends_with(c(
".n", "mean", "sd", "min" , "max", "range" , "se"
)))
summaryall <- as.data.frame(purrr::map(all_designs, ~.x$summary)) %>%
dplyr::select(!dplyr::ends_with("vars")) %>%
dplyr::relocate(dplyr::ends_with(c(".n", "mean","sd", "min" ,"max", "range" , "se" )))
coefall <- purrr::map(all_designs, ~ .x$coefs)
coefall <- purrr::map(all_designs, ~ .x$coefs)
pat <- paste0("(", paste(designname, collapse = "|"), ").") # needed to identify pattern to be replaced
pat<-paste0("(",paste(designname,collapse = "|"),").") # needed to identify pattern to be replaced
s <- as.data.frame(coefall) %>%
dplyr::select(!dplyr::matches("pval|run")) %>%
dplyr::rename_with( ~ sub("est_b", "", .x), dplyr::everything()) %>%
dplyr::rename_with(~ paste0(., "_", stringr::str_extract(., pat)),
dplyr::everything()) %>% # rename attributes for reshape part 1
dplyr::rename_with(~ stringr::str_replace(., pattern = pat, replacement =
""),
dplyr::everything()) %>%
stats::reshape(
varying = 1:ncol(.),
sep = "_" ,
direction = "long" ,
timevar = "design",
idvar = "run"
)
s<-as.data.frame(coefall) %>%
dplyr::select(!dplyr::matches("pval|run")) %>%
dplyr::rename_with(~ sub("est_b", "", .x), dplyr::everything()) %>%
dplyr::rename_with( ~ paste0(.,"_",stringr::str_extract(.,pat )), dplyr::everything() ) %>% # rename attributes for reshape part 1
dplyr::rename_with( ~ stringr::str_replace(.,pattern = pat,replacement=""), dplyr::everything() ) %>%
stats::reshape(varying =1:ncol(.), sep = "_" , direction = "long" ,timevar = "design", idvar = "run" )
p = list()
p=list()
for (att in names(dplyr::select(s, -c("design", "run")))) {
p[[att]] <- plot_multi_histogram(s, att, "design")
for (att in names(dplyr::select(s,-c("design","run")))) {
print(p[[att]])
p[[att]] <- plot_multi_histogram(s,att,"design")
}
print(p[[att]])
all_designs[["summaryall"]] = summaryall
all_designs[["graphs"]] = p
all_designs[["powa"]] = powa
}
all_designs[["summaryall"]] = summaryall
all_designs[["graphs"]]=p
all_designs[["powa"]]=powa
}
all_designs[["time"]]=time
all_designs[["arguements"]] = list( "Beta values" = bcoeff, "Utility functions" = u , "Decision groups" =decisiongroups , "Manipulation of vars" = manipulations,
"Number Simulations" = nosim, "Respondents" = resps, "Designpath" = designpath)
all_designs[["time"]] = time
all_designs[["arguements"]] = list(
"Beta values" = bcoeff,
"Utility functions" = u ,
"Decision groups" = decisiongroups ,
"Manipulation of vars" = manipulations,
"Number Simulations" = nosim,
"Respondents" = resps,
"Designpath" = designpath
)
......
......@@ -3,7 +3,7 @@
#' @param designfile path to a file containing a design.
#' @param no_sim Number of runs i.e. how often do you want the simulation to be repeated
#' @param respondents Number of respondents. How many respondents do you want to simulate in each run.
#' @param ut The first element of the utility function list
#' @param u A list with utility functions. The list can incorporate as many decision rule groups as you want. However, each group must be in a list in this list. If you just use one group (the normal), this group still has to be in a list in the u list. As a convention name beta coefficients starting with a lower case "b"
#' @inheritParams readdesign
#' @inheritParams simulate_choices
#' @param chunks The number of chunks determines how often results should be stored on disk as a safety measure to not loose simulations if models have already been estimated. For example, if no_sim is 100 and chunks = 2, the data will be saved on disk after 50 and after 100 runs.
......@@ -12,13 +12,13 @@
#' @export
#'
#' @examples \dontrun{ simchoice(designfile="somefile", no_sim=10, respondents=330,
#' mnl_U,ut=u[[1]] ,designtype="ngene")}
#' mnl_U,u=u[[1]] ,designtype="ngene")}
#'
sim_choice <- function(designfile, no_sim = 10, respondents = 330,ut ,designtype = NULL, destype = NULL, bcoefficients, decisiongroups=c(0,1), manipulations = list() , estimate, chunks=1, utility_transform_type = "simple") {
sim_choice <- function(designfile, no_sim = 10, respondents = 330, u ,designtype = NULL, destype = NULL, bcoeff, decisiongroups=c(0,1), manipulations = list() , estimate, chunks=1, utility_transform_type = "simple") {
if (utility_transform_type == "simple") {
message("'simple' is deprecated and will be removed in the future. Use 'exact' instead.", call. = FALSE)
message("'simple' is deprecated and will be removed in the future. Use 'exact' instead.")
}
......@@ -32,7 +32,7 @@ sim_choice <- function(designfile, no_sim = 10, respondents = 330,ut ,designtype
cat("This is Run number ", run)
database <- simulate_choices(datadet, utility = ut, setspp=setpp, bcoefficients = bcoefficients, decisiongroups = decisiongroups, manipulations = manipulations)
database <- simulate_choices(datadet, utility = u, setspp=setpp, bcoeff = bcoeff, decisiongroups = decisiongroups, manipulations = manipulations)
model<-mixl::estimate(model_spec,start_values = est, availabilities = availabilities, data= database)
......@@ -52,7 +52,7 @@ designs_all <- list()
cat("Utility function used in simulation, ie the true utility: \n\n")
print(ut)
print(u)
......@@ -90,7 +90,7 @@ designs_all <- list()
database <- simulate_choices(data=datadet, utility = ut, setspp = setpp, bcoefficients = bcoefficients, decisiongroups = decisiongroups, manipulations = manipulations)
database <- simulate_choices(data=datadet, utility = u, setspp = setpp, bcoeff = bcoeff, decisiongroups = decisiongroups, manipulations = manipulations)
### start estimation
......@@ -102,7 +102,7 @@ designs_all <- list()
#### Function that transforms user written utility for simulation into utility function for mixl.
transform_util <- function() {
mnl_U <-paste(purrr::map_chr(ut[[1]],as.character,keep.source.attr = TRUE),collapse = "",";") %>%
mnl_U <-paste(purrr::map_chr(u[[1]],as.character,keep.source.attr = TRUE),collapse = "",";") %>%
stringr::str_replace_all( c( "priors\\[\"" = "" , "\"\\]" = "" , "~" = "=", "\\." = "_" , " b" = " @b" , "V_"="U_", " alt"=" $alt"))
}
......@@ -116,14 +116,14 @@ transform_util2 <- function() {
)
mnl_U <- paste(
purrr::map_chr(ut[[1]], as.character, keep.source.attr = TRUE),
purrr::map_chr(u[[1]], as.character, keep.source.attr = TRUE),
collapse = "",
";"
) %>%
# Replace coefficients with exact matches
stringr::str_replace_all(stats::setNames(
paste0("@", names(bcoefficients)),
paste0("(?<![._a-zA-Z0-9])", names(bcoefficients), "(?![._a-zA-Z0-9-])")
paste0("@", names(bcoeff)),
paste0("(?<![._a-zA-Z0-9])", names(bcoeff), "(?![._a-zA-Z0-9-])")
)) %>%
# General transformations
stringr::str_replace_all(c(
......@@ -260,7 +260,7 @@ transform_util2 <- function() {
return(output)
} else {
output<- 1:no_sim %>% purrr::map(~ simulate_choices(datadet, utility = ut, setspp=setpp, bcoefficients = bcoefficients, decisiongroups = decisiongroups, manipulations = manipulations))
output<- 1:no_sim %>% purrr::map(~ simulate_choices(datadet, utility = u, setspp=setpp, bcoeff = bcoeff, decisiongroups = decisiongroups, manipulations = manipulations))
return(output)
}
......
#' Simulate choices based on a dataframe with a design
#' Simulate choices based on a data.frame with a design and respondents
#'
#' @param data a dataframe that includes a design repeated for the number of observations
#' @param utility a list with the utility functions, one utility function for each alternatives
#' @param setspp an integer, the number of choice sets per person
#' @param bcoefficients List of initial coefficients for the utility function. List content/length can vary based on application, but should all begin with b and be the same as those entered in the utility functions
#' @param bcoeff List of initial coefficients for the utility function. List content/length can vary based on application. I ideally begins (but does not have to) with b and need be the same as those entered in the utility functions
#' @param decisiongroups A vector showing how decision groups are numerically distributed
#' @param manipulations A variable to alter terms of the utility functions examples may be applying a factor or applying changes to terms selectively for different groups
#' @param estimate If TRUE models will be estimated. If false only a dataset will be simulated. Default is true
#' @return a dataframe that includes simulated choices and a design
#' @return a data.frame that includes simulated choices and a design
#' @export
#'
#' @examples \dontrun{simulate_choices(datadet, ut,setspp)}
simulate_choices <- function(data, utility, setspp, bcoefficients, decisiongroups = c(0,1), manipulations = list(), estimate) { #the part in dataset that needs to be repeated in each run
simulate_choices <- function(data, utility, setspp, bcoeff, decisiongroups = c(0,1), manipulations = list(), estimate) { #the part in dataset that needs to be repeated in each run
### unpack the bcoeff list so variables are accessible
for (key in names(bcoefficients)) {
assign(key, bcoefficients[[key]])
for (key in names(bcoeff)) {
assign(key, bcoeff[[key]])
}
......
......@@ -33,7 +33,7 @@ sim_all(
\item{u}{A list with utility functions. The list can incorporate as many decision rule groups as you want. However, each group must be in a list in this list. If you just use one group (the normal), this group still has to be in a list in the u list. As a convention name beta coefficients starting with a lower case "b"}
\item{bcoeff}{List of initial coefficients for the utility function. List content/length can vary based on application, but should all begin with b and be the same as those entered in the utility functions}
\item{bcoeff}{List of initial coefficients for the utility function. List content/length can vary based on application. I ideally begins (but does not have to) with b and need be the same as those entered in the utility functions}
\item{decisiongroups}{A vector showing how decision groups are numerically distributed}
......
......@@ -8,10 +8,10 @@ sim_choice(
designfile,
no_sim = 10,
respondents = 330,
ut,
u,
designtype = NULL,
destype = NULL,
bcoefficients,
bcoeff,
decisiongroups = c(0, 1),
manipulations = list(),
estimate,
......@@ -26,13 +26,13 @@ sim_choice(
\item{respondents}{Number of respondents. How many respondents do you want to simulate in each run.}
\item{ut}{The first element of the utility function list}
\item{u}{A list with utility functions. The list can incorporate as many decision rule groups as you want. However, each group must be in a list in this list. If you just use one group (the normal), this group still has to be in a list in the u list. As a convention name beta coefficients starting with a lower case "b"}
\item{designtype}{Is it a design created with ngene, spdesign or idefix. use 'ngene', 'spdesign' or 'idefix. Ngene designs should be stored as the standard .ngd output. spdesign should be the spdesign object stored as an RDS file. Idefix objects should also be stored as an RDS file. If designtype is not specified, I try to guess what it is. This is especially helpful if you want to carry out a simulation for both spdesign designs and ngene designs at the same time.}
\item{destype}{Deprecated. Use designtype instead.}
\item{bcoefficients}{List of initial coefficients for the utility function. List content/length can vary based on application, but should all begin with b and be the same as those entered in the utility functions}
\item{bcoeff}{List of initial coefficients for the utility function. List content/length can vary based on application. I ideally begins (but does not have to) with b and need be the same as those entered in the utility functions}
\item{decisiongroups}{A vector showing how decision groups are numerically distributed}
......@@ -52,6 +52,6 @@ Simulate and estimate choices
}
\examples{
\dontrun{ simchoice(designfile="somefile", no_sim=10, respondents=330,
mnl_U,ut=u[[1]] ,designtype="ngene")}
mnl_U,u=u[[1]] ,designtype="ngene")}
}
......@@ -2,13 +2,13 @@
% Please edit documentation in R/simulate_choices.R
\name{simulate_choices}
\alias{simulate_choices}
\title{Simulate choices based on a dataframe with a design}
\title{Simulate choices based on a data.frame with a design and respondents}
\usage{
simulate_choices(
data,
utility,
setspp,
bcoefficients,
bcoeff,
decisiongroups = c(0, 1),
manipulations = list(),
estimate
......@@ -21,7 +21,7 @@ simulate_choices(
\item{setspp}{an integer, the number of choice sets per person}
\item{bcoefficients}{List of initial coefficients for the utility function. List content/length can vary based on application, but should all begin with b and be the same as those entered in the utility functions}
\item{bcoeff}{List of initial coefficients for the utility function. List content/length can vary based on application. I ideally begins (but does not have to) with b and need be the same as those entered in the utility functions}
\item{decisiongroups}{A vector showing how decision groups are numerically distributed}
......@@ -30,10 +30,10 @@ simulate_choices(
\item{estimate}{If TRUE models will be estimated. If false only a dataset will be simulated. Default is true}
}
\value{
a dataframe that includes simulated choices and a design
a data.frame that includes simulated choices and a design
}
\description{
Simulate choices based on a dataframe with a design
Simulate choices based on a data.frame with a design and respondents
}
\examples{
\dontrun{simulate_choices(datadet, ut,setspp)}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment