From b16a131d4c0fd3f49ccd3776fe61ddcf0be0039a Mon Sep 17 00:00:00 2001 From: Julian Sagebiel <julian.sagebiel@idiv.de> Date: Thu, 24 Aug 2023 16:45:16 +0200 Subject: [PATCH] many changes in function to make valugaps data work --- Projects/SE_AGRI/parameters_SE Design-Agri.R | 2 + Projects/ValuGaps/code/GIS_data.R | 2 +- Projects/ValuGaps/parameters_valugaps.R | 11 +- functions.R | 355 ++++++++++--------- simulationcore_purrr.R | 11 +- 5 files changed, 210 insertions(+), 171 deletions(-) diff --git a/Projects/SE_AGRI/parameters_SE Design-Agri.R b/Projects/SE_AGRI/parameters_SE Design-Agri.R index 1d51a9f..52e7994 100644 --- a/Projects/SE_AGRI/parameters_SE Design-Agri.R +++ b/Projects/SE_AGRI/parameters_SE Design-Agri.R @@ -14,6 +14,8 @@ bforeign = 0.3 bdamage = 0.6 bprice = 0.2 + + manipulations = list(alt1.professional= expr(alt1.initiator==1), alt2.professional= expr(alt2.initiator==1), alt1.expert = expr(alt1.initiator==2), diff --git a/Projects/ValuGaps/code/GIS_data.R b/Projects/ValuGaps/code/GIS_data.R index c278b5f..ac73130 100644 --- a/Projects/ValuGaps/code/GIS_data.R +++ b/Projects/ValuGaps/code/GIS_data.R @@ -27,7 +27,7 @@ process_data <- function(data, name_prefix, cover_data) { #### Draw respondents #### # Load population density data for Germany -density_raster <- rast("data/gis/BBSR_Landnutzung_Bevoelkerung/ewz250_2011_v1_multi.tif") +density_raster <- rast("Projects/ValuGaps/data/gis/BBSR_Landnutzung_Bevoelkerung/ewz250_2011_v1_multi.tif") ##### Do plot of the pop density #### diff --git a/Projects/ValuGaps/parameters_valugaps.R b/Projects/ValuGaps/parameters_valugaps.R index b156f44..9df3ee3 100644 --- a/Projects/ValuGaps/parameters_valugaps.R +++ b/Projects/ValuGaps/parameters_valugaps.R @@ -96,7 +96,7 @@ protected_areas <- mask(protected_areas, vect(germany_geo_c)) #### Define simulation parameters #### resps = 96 # number of respondents -nosim=2 # number of simulations to run (about 500 is minimum) +nosim=3 # number of simulations to run (about 500 is minimum) #buffer_dist <- 15000 buffer_dist_ls <- list(10000, 15000, 25000, 50000) # define radius for the respondents in meter @@ -110,6 +110,7 @@ bc= 1.4 bc2 = -0.0175 bp=-0.03 +mani1 = expr(left_join(final_set, by="ID")) manipulations = list( #expr(across(c("HNV_on_corine", "Protected_on_corine"), ~replace_na(.,0)) ), @@ -135,3 +136,11 @@ u<-list( # v1 =V.1~ basc + bb*alt1.b+ bc * alt1.c + bp * alt1.p , # v2 =V.2~ basc + bb*alt2.b+ bc * alt2.c + bp * alt2.p , # v3 =V.3~ 0) + + +## this function will be called before the data is simulated +sou_gis <- function() { + + source("Projects/ValuGaps/code/GIS_data.R") + +} diff --git a/functions.R b/functions.R index 1ff2fc9..1feaa7e 100644 --- a/functions.R +++ b/functions.R @@ -1,165 +1,192 @@ -sim_choice <- function(designfile, no_sim=10, respondents=330, mnl_U,utils=u ) { - - - require("tictoc") - require("readr") - require("psych") - require("dplyr") - require("evd") - require("tidyr") - require("kableExtra") - require("gridExtra") - require("stringr") - require("mixl") - require("furrr") - require("purrr") - require("ggplot2") - require("formula.tools") - - - mnl_U <-paste(map_chr(u,as.character,keep.source.attr = TRUE),collapse = "",";") %>% - str_replace_all( c( "priors\\[\"" = "" , "\"\\]" = "" , "~" = "=", "\\." = "_" , " b" = " @b" , "V."="U_", " alt"="$alt")) - - - - - - by_formula <- function(equation){ #used to take formulas as inputs in simulation utility function - # //! cur_data_all may get deprecated in favor of pick - # pick(everything()) %>% - cur_data_all() %>% - transmute(!!lhs(equation) := !!rhs(equation) ) - } - - - - - - simulate_choices <- function(data=database) { #the part in dataset that needs to be repeated in each run - - - # browser() - - n=seq_along(1:length(utils)) - - print("DANGER") - - data <- data %>% - group_by(ID) %>% - mutate( - across(.cols=n,.fns = ~ rgumbel(setpp,loc=0, scale=1), .names = "{'e'}_{n}" ), - across(starts_with("V_"), .names = "{'U'}_{n}") + across(starts_with("e_")) ) %>% ungroup() %>% - mutate(CHOICE=max.col(.[,grep("U_",names(.))]) - ) %>% - as.data.frame() - - - - - return(data) - - } - - estimate_sim <- function(run=1) { #start loop - - cat(run) - - database <- simulate_choices() - - - - - model<-mixl::estimate(model_spec,start_values = est, availabilities = availabilities, data= database,) - - return(model) - - } - - designs_all <- list() - - design <- read_delim(designfile,delim = "\t", - escape_double = FALSE, - trim_ws = TRUE , - col_select = c(-Design, -starts_with("...")) , - name_repair = "universal") %>% - filter(!is.na(Choice.situation)) - - - - nsets<-nrow(design) - nblocks<-max(design$Block) - setpp <- nsets/nblocks # Choice Sets per respondent; in this 'no blocks' design everyone sees all 24 sets - #respondents <- replications*nblocks - replications <- respondents/nblocks - #browser() - - if(!exists("manipulations")) manipulations=list() - - database<- design %>% - arrange(Block,Choice.situation) %>% - slice(rep(row_number(), replications)) %>% ## replicate design according to number of replications - mutate(RID = rep(1:respondents, each=setpp)) %>% # create Respondent ID. - relocate(RID,`Choice.situation`) %>% - mutate( !!! manipulations , - map_dfc(utils,by_formula) #our functions to create utility variables. They need to be entered as a formula list as an argument - - ) %>% - rename(ID ="RID") %>% - rename_with(~ stringr::str_replace(.,pattern = "\\.","_"), everything()) %>% - as.data.frame() - - - - - - database <- simulate_choices() - - model_spec <- mixl::specify_model(mnl_U, database, disable_multicore=F) - - est=setNames(rep(0,length(model_spec$beta_names)), model_spec$beta_names) - - - availabilities <- mixl::generate_default_availabilities( - database, model_spec$num_utility_functions) - - plan(multisession, workers = 8) - - output<- 1:no_sim %>% map(estimate_sim) - - coefs<-map(1:length(output),~summary(output[[.]])[["coefTable"]][c(1,8)] %>% - tibble::rownames_to_column() %>% - pivot_wider(names_from = rowname, values_from = c(est, rob_pval0)) ) %>% - bind_rows(.id = "run") - - output[["summary"]] <-describe(coefs[,-1], fast = TRUE) - - output[["coefs"]] <-coefs - - pvals <- output[["coefs"]] %>% select(starts_with("rob_pval0")) - - output[["power"]] <- 100*table(apply(pvals,1, function(x) all(x<0.05)))/nrow(pvals) - - - output[["metainfo"]] <- c(Path = designfile, NoSim = no_sim, NoResp =respondents) - - - print(kable(output[["summary"]],digits = 2, format = "rst")) - - - print(output[["power"]]) - - - return(output) - - -} - - -plot_multi_histogram <- function(df, feature, label_column) { #function to create nice multi histograms, taken somewhere from the web - plt <- ggplot(df, aes(x=eval(parse(text=feature)), fill=eval(parse(text=label_column)))) + - #geom_histogram(alpha=0.7, position="identity", aes(y = ..density..), color="black") + - geom_density(alpha=0.5) + - geom_vline(aes(xintercept=mean(eval(parse(text=feature)))), color="black", linetype="dashed", linewidth=1) + ## this makes a vertical line of the mean - labs(x=feature, y = "Density") - plt + guides(fill=guide_legend(title=label_column)) +sim_choice <- function(designfile, no_sim=10, respondents=330, mnl_U,utils=u ) { + + + require("tictoc") + require("readr") + require("psych") + require("dplyr") + require("evd") + require("tidyr") + require("kableExtra") + require("gridExtra") + require("stringr") + require("mixl") + require("furrr") + require("purrr") + require("ggplot2") + require("formula.tools") + + + mnl_U <-paste(map_chr(u,as.character,keep.source.attr = TRUE),collapse = "",";") %>% + str_replace_all( c( "priors\\[\"" = "" , "\"\\]" = "" , "~" = "=", "\\." = "_" , " b" = " @b" , "V_"="U_", " alt"="$alt")) + + cat("mixl \n") + print(mnl_U) + + cat("\n Simulation \n") + + print(u) + + + + by_formula <- function(equation){ #used to take formulas as inputs in simulation utility function + # //! cur_data_all may get deprecated in favor of pick + # pick(everything()) %>% + cur_data_all() %>% + transmute(!!lhs(equation) := !!rhs(equation) ) + } + + + + + + simulate_choices <- function(data=datadet) { #the part in dataset that needs to be repeated in each run +cat("does sou_gis exist") + print( exists("sou_gis")) + # browser() + + if (exists("sou_gis") && is.function(sou_gis)) { + sou_gis() + + cat("source of gis has been done") + } + + # source("Projects/ValuGaps/code/GIS_data.R") + + if(!exists("manipulations")) manipulations=list() ## If no user input on further data manipulations + + n=seq_along(1:length(utils)) # number of utility functions + + + + print("DANGER") + + print(exists("final_set")) + + if(exists("final_set")) data = left_join(data,final_set, by = "ID") + + data <- data %>% + # rename(ID="RID") %>% + group_by(ID) %>% + + #rename(ID ="RID") %>% + #rename_with(~ stringr::str_replace(.,pattern = "\\.","_"), everything()) %>% + mutate(!!! manipulations , + map_dfc(utils,by_formula)) %>% #our functions to create utility variables. They need to be entered as a formula list as an argument + rename_with(~ stringr::str_replace(.,pattern = "\\.","_"), everything()) %>% + + mutate(across(.cols=n,.fns = ~ rgumbel(setpp,loc=0, scale=1), .names = "{'e'}_{n}" ), + across(starts_with("V_"), .names = "{'U'}_{n}") + across(starts_with("e_")) ) %>% ungroup() %>% + mutate(CHOICE=max.col(.[,grep("U_",names(.))]) + ) %>% + as.data.frame() + + +print("data has been made") + +cat(head(data$CHOICE)) + + return(data) + + } + + estimate_sim <- function(run=1) { #start loop + + cat("This is Run number ", run) + + database <- simulate_choices(datadet) + + + cat("This is the utility functions \n" , mnl_U) + + model<-mixl::estimate(model_spec,start_values = est, availabilities = availabilities, data= database,) + + return(model) + + } + + designs_all <- list() + + design <- read_delim(designfile,delim = "\t", + escape_double = FALSE, + trim_ws = TRUE , + col_select = c(-Design, -starts_with("...")) , + name_repair = "universal") %>% + filter(!is.na(Choice.situation)) + + + + nsets<-nrow(design) + nblocks<-max(design$Block) + setpp <- nsets/nblocks # Choice Sets per respondent; in this 'no blocks' design everyone sees all 24 sets + #respondents <- replications*nblocks + replications <- respondents/nblocks + #browser() + + + + datadet<- design %>% + arrange(Block,Choice.situation) %>% + slice(rep(row_number(), replications)) %>% ## replicate design according to number of replications + mutate(ID = rep(1:respondents, each=setpp)) %>% # create Respondent ID. + relocate(ID,`Choice.situation`) %>% +as.data.frame() + + + + + + database <- simulate_choices() + + model_spec <- mixl::specify_model(mnl_U, database, disable_multicore=F) + + est=setNames(rep(0,length(model_spec$beta_names)), model_spec$beta_names) + + + availabilities <- mixl::generate_default_availabilities( + database, model_spec$num_utility_functions) + + plan(multisession, workers = 8) + + output<- 1:no_sim %>% map(estimate_sim) + + + + + coefs<-map(1:length(output),~summary(output[[.]])[["coefTable"]][c(1,8)] %>% + tibble::rownames_to_column() %>% + pivot_wider(names_from = rowname, values_from = c(est, rob_pval0)) ) %>% + bind_rows(.id = "run") + + output[["summary"]] <-describe(coefs[,-1], fast = TRUE) + + output[["coefs"]] <-coefs + + pvals <- output[["coefs"]] %>% dplyr::select(starts_with("rob_pval0")) + + output[["power"]] <- 100*table(apply(pvals,1, function(x) all(x<0.05)))/nrow(pvals) + + + output[["metainfo"]] <- c(Path = designfile, NoSim = no_sim, NoResp =respondents) + + + print(kable(output[["summary"]],digits = 2, format = "rst")) + + + print(output[["power"]]) + + + return(output) + + +} + + +plot_multi_histogram <- function(df, feature, label_column) { #function to create nice multi histograms, taken somewhere from the web + plt <- ggplot(df, aes(x=eval(parse(text=feature)), fill=eval(parse(text=label_column)))) + + #geom_histogram(alpha=0.7, position="identity", aes(y = ..density..), color="black") + + geom_density(alpha=0.5) + + geom_vline(aes(xintercept=mean(eval(parse(text=feature)))), color="black", linetype="dashed", linewidth=1) + ## this makes a vertical line of the mean + labs(x=feature, y = "Density") + plt + guides(fill=guide_legend(title=label_column)) } \ No newline at end of file diff --git a/simulationcore_purrr.R b/simulationcore_purrr.R index b95ce75..67f02e0 100644 --- a/simulationcore_purrr.R +++ b/simulationcore_purrr.R @@ -6,8 +6,9 @@ library("formula.tools") library(ggplot2) library(stringr) -source("Projects/ValuGaps/parameters_valugaps.R") -source(params$file) +source("Projects/SE_AGRI/parameters_SE Design-Agri.R") +#source("Projects/ValuGaps/parameters_valugaps.R",echo = TRUE) +#source(params$file) designfile<-list.files(designpath,full.names = T) @@ -39,7 +40,7 @@ powa <- map(all_designs, ~ .x$power) summaryall <- as.data.frame(map(all_designs, ~.x$summary)) %>% - select(!ends_with("vars")) %>% + dplyr::select(!ends_with("vars")) %>% relocate(ends_with(c(".n", "mean","sd", "min" ,"max", "range" , "se" ))) coefall <- map(all_designs, ~ .x$coefs) @@ -47,7 +48,7 @@ coefall <- map(all_designs, ~ .x$coefs) pat<-paste0("(",paste(designname,collapse = "|"),").") # needed to identify pattern to be replaced s<-as.data.frame(coefall) %>% - select(!matches("pval|run")) %>% + dplyr::select(!matches("pval|run")) %>% rename_with(~ sub("est_b", "", .x), everything()) %>% # rename_with(~ sub("est_asc_", "asc", .x), everything()) %>% rename_with( ~ paste0(.,"_",stringr::str_extract(.,pat )), everything() ) %>% # rename attributes for reshape part 1 @@ -57,7 +58,7 @@ s<-as.data.frame(coefall) %>% p=list() -for (att in names(select(s,-c("design","run")))) { +for (att in names(dplyr::select(s,-c("design","run")))) { p[[att]] <- plot_multi_histogram(s,att,"design") -- GitLab