Select Git revision
PreProcessor.cpp
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
functions.R 9.46 KiB
make_md <- function(f=file){
rmarkdown::render("simulation_output.rmd",
output_file = paste0(
stringr::str_remove_all(
file,"parameters_|.R$"),".html"),
params = list(file=file)
)
}
sim_all <- function(){
require("stringr")
designfile<-list.files(designpath,full.names = T)
designname <- str_remove_all(list.files(designpath,full.names = F),
"(.ngd|_|.RDS)") ## Make sure designnames to not contain file ending and "_", as the may cause issues when replace
if (!exists("destype")) destype="ngene"
tictoc::tic()
all_designs<- purrr::map(designfile, sim_choice,
no_sim= nosim,respondents = resps, mnl_U = mnl_U, destype=destype) %>% ## iterate simulation over all designs
setNames(designname)
time <- tictoc::toc()
print(time)
powa <- map(all_designs, ~ .x$power)
summaryall <- as.data.frame(purrr::map(all_designs, ~.x$summary)) %>%
dplyr::select(!ends_with("vars")) %>%
relocate(ends_with(c(".n", "mean","sd", "min" ,"max", "range" , "se" )))
coefall <- map(all_designs, ~ .x$coefs)
pat<-paste0("(",paste(designname,collapse = "|"),").") # needed to identify pattern to be replaced
s<-as.data.frame(coefall) %>%
dplyr::select(!matches("pval|run")) %>%
rename_with(~ sub("est_b", "", .x), everything()) %>%
# rename_with(~ sub("est_asc_", "asc", .x), everything()) %>%
rename_with( ~ paste0(.,"_",stringr::str_extract(.,pat )), everything() ) %>% # rename attributes for reshape part 1
rename_with( ~ stringr::str_replace(.,pattern = pat,replacement=""), everything() ) %>%
reshape(varying =1:ncol(.), sep = "_" , direction = "long" ,timevar = "design", idvar = "run" )
p=list()
for (att in names(dplyr::select(s,-c("design","run")))) {
p[[att]] <- plot_multi_histogram(s,att,"design")
print(p[[att]])
}
all_designs[["summaryall"]] = summaryall
all_designs[["graphs"]]=p
all_designs[["powa"]]=powa
all_designs[["time"]]=time
return(all_designs)
}
readdesign <- function(design = designfile, designtype=destype) {
design <- switch(designtype,
"ngene" =readr::read_delim(design,delim = "\t",
escape_double = FALSE,
trim_ws = TRUE ,
col_select = c(-Design, -starts_with("...")) ,
name_repair = "universal" , show_col_types = FALSE) %>%
filter(!is.na(Choice.situation)),
"spdesign" =as.data.frame(readRDS(design)) %>%
mutate(Choice.situation=1:n()) %>%
rename_with(~ stringr::str_replace(.,pattern = "_","\\."), everything()),
stop("Invalid value for design. Please provide either 'ngene' or 'spdesign'.")
)
}
simulate_choices <- function(data=datadet, utility =utils, setspp) { #the part in dataset that needs to be repeated in each run
by_formula <- function(equation){ #used to take formulas as inputs in simulation utility function
# //! cur_data_all may get deprecated in favor of pick
# pick(everything()) %>%
cur_data_all() %>%
transmute(!!lhs(equation) := !!rhs(equation) )
}
cat(" \n does sou_gis exist: ", exists("sou_gis"), "\n")
if (exists("sou_gis") && is.function(sou_gis)) {
sou_gis()
cat("\n source of gis has been done \n")
}
# source("Projects/ValuGaps/code/GIS_data.R")
if(!exists("manipulations")) manipulations=list() ## If no user input on further data manipulations
n=seq_along(1:length(utility)) # number of utility functions
cat("\n dataset final_set exists: ",exists("final_set"), "\n")
if(exists("final_set")) data = left_join(data,final_set, by = "ID")
cat("\n decisiongroups exists: " ,exists("decisiongroups"))
if(exists("decisiongroups")) { ### create a new variable to classify decision groups.
data = mutate(data,group = as.numeric(cut(row_number(),
breaks = decisiongroups * n(),
labels = seq_along(decisiongroups[-length(decisiongroups)]),
include.lowest = TRUE)))
print(table(data$group))
} else {
data$group=1
}
data<- data %>%
group_by(ID) %>%
mutate(!!! manipulations)
subsets<- split(data,data$group)
subsets <- map2(.x = seq_along(u),.y = subsets,
~ mutate(.y,map_dfc(u[[.x]],by_formula)))
data <-bind_rows(subsets)
data<- data %>%
rename_with(~ stringr::str_replace(.,pattern = "\\.","_"), everything()) %>%
mutate(across(.cols=n,.fns = ~ rgumbel(setspp,loc=0, scale=1), .names = "{'e'}_{n}" ),
across(starts_with("V_"), .names = "{'U'}_{n}") + across(starts_with("e_")) ) %>% ungroup() %>%
mutate(CHOICE=max.col(.[,grep("U_",names(.))])
) %>%
as.data.frame()
cat("\n data has been made \n")
cat("\n First few observations \n ")
print(head(data))
cat("\n \n ")
return(data)
}
sim_choice <- function(designfile, no_sim=10, respondents=330, mnl_U,utils=u[[1]] ,destype) {
require("tictoc")
require("readr")
require("psych")
require("dplyr")
require("evd")
require("tidyr")
require("kableExtra")
require("gridExtra")
require("stringr")
require("mixl")
require("furrr")
require("purrr")
require("ggplot2")
require("formula.tools")
require("rlang")
mnl_U <-paste(map_chr(utils,as.character,keep.source.attr = TRUE),collapse = "",";") %>%
str_replace_all( c( "priors\\[\"" = "" , "\"\\]" = "" , "~" = "=", "\\." = "_" , " b" = " @b" , "V_"="U_", " alt"="$alt"))
cat("mixl \n")
cat(mnl_U)
cat("\n Simulation \n")
print(u)
estimate_sim <- function(run=1) { #start loop
cat("This is Run number ", run)
database <- simulate_choices(datadet, utility = utils, setspp=setpp )
cat("This is the utility functions \n" , mnl_U)
model<-mixl::estimate(model_spec,start_values = est, availabilities = availabilities, data= database,)
return(model)
}
designs_all <- list()
design<- readdesign(design = designfile)
if (!exists("design$Block")) design$Block=1
nsets<-nrow(design)
nblocks<-max(design$Block)
setpp <- nsets/nblocks # Choice Sets per respondent; in this 'no blocks' design everyone sees all 24 sets
replications <- respondents/nblocks
datadet<- design %>%
arrange(Block,Choice.situation) %>%
slice(rep(row_number(), replications)) %>% ## replicate design according to number of replications
mutate(ID = rep(1:respondents, each=setpp)) %>% # create Respondent ID.
relocate(ID,`Choice.situation`) %>%
as.data.frame()
database <- simulate_choices(data=datadet, utility = utils, setspp = setpp)
model_spec <- mixl::specify_model(mnl_U, database, disable_multicore=F)
est=setNames(rep(0,length(model_spec$beta_names)), model_spec$beta_names)
availabilities <- mixl::generate_default_availabilities(
database, model_spec$num_utility_functions)
output<- 1:no_sim %>% map(estimate_sim)
coefs<-map(1:length(output),~summary(output[[.]])[["coefTable"]][c(1,8)] %>%
tibble::rownames_to_column() %>%
pivot_wider(names_from = rowname, values_from = c(est, rob_pval0)) ) %>%
bind_rows(.id = "run")
output[["summary"]] <-psych::describe(coefs[,-1], fast = TRUE)
output[["coefs"]] <-coefs
pvals <- output[["coefs"]] %>% dplyr::select(starts_with("rob_pval0"))
output[["power"]] <- 100*table(apply(pvals,1, function(x) all(x<0.05)))/nrow(pvals)
output[["metainfo"]] <- c(Path = designfile, NoSim = no_sim, NoResp =respondents)
print(kable(output[["summary"]],digits = 2, format = "rst"))
print(output[["power"]])
return(output)
}
plot_multi_histogram <- function(df, feature, label_column) { #function to create nice multi histograms, taken somewhere from the web
plt <- ggplot(df, aes(x=eval(parse(text=feature)), fill=eval(parse(text=label_column)))) +
#geom_histogram(alpha=0.7, position="identity", aes(y = ..density..), color="black") +
geom_density(alpha=0.5) +
geom_vline(aes(xintercept=mean(eval(parse(text=feature)))), color="black", linetype="dashed", linewidth=1) + ## this makes a vertical line of the mean
labs(x=feature, y = "Density")
plt + guides(fill=guide_legend(title=label_column))
}
download_and_extract_zip <- function(url, dest_folder = ".", zip_name = NULL) {
# If zip_name is not provided, extract it from the URL
if (is.null(zip_name)) {
zip_name <- basename(url)
}
folder <- paste0(dest_folder,"/data")
# Check if the folder is empty
if (length(list.files(folder)) > 0) {
warning("Destination folder is not empty. Nothing copied.")
return(invisible(NULL))
}
# Download the zip file
download.file(url, zip_name, method = "auto", quiet = FALSE, mode = "w", cacheOK = TRUE)
# Extract the contents
unzip(zip_name, exdir = dest_folder)
# Return the path to the extracted folder
return(file.path(dest_folder, tools::file_path_sans_ext(zip_name)))
}