Skip to content
Snippets Groups Projects
Commit b16a131d authored by dj44vuri's avatar dj44vuri
Browse files

many changes in function to make valugaps data work

parent 08b17c11
No related branches found
No related tags found
No related merge requests found
......@@ -14,6 +14,8 @@ bforeign = 0.3
bdamage = 0.6
bprice = 0.2
manipulations = list(alt1.professional= expr(alt1.initiator==1),
alt2.professional= expr(alt2.initiator==1),
alt1.expert = expr(alt1.initiator==2),
......
......@@ -27,7 +27,7 @@ process_data <- function(data, name_prefix, cover_data) {
#### Draw respondents ####
# Load population density data for Germany
density_raster <- rast("data/gis/BBSR_Landnutzung_Bevoelkerung/ewz250_2011_v1_multi.tif")
density_raster <- rast("Projects/ValuGaps/data/gis/BBSR_Landnutzung_Bevoelkerung/ewz250_2011_v1_multi.tif")
##### Do plot of the pop density ####
......
......@@ -96,7 +96,7 @@ protected_areas <- mask(protected_areas, vect(germany_geo_c))
#### Define simulation parameters ####
resps = 96 # number of respondents
nosim=2 # number of simulations to run (about 500 is minimum)
nosim=3 # number of simulations to run (about 500 is minimum)
#buffer_dist <- 15000
buffer_dist_ls <- list(10000, 15000, 25000, 50000) # define radius for the respondents in meter
......@@ -110,6 +110,7 @@ bc= 1.4
bc2 = -0.0175
bp=-0.03
mani1 = expr(left_join(final_set, by="ID"))
manipulations = list(
#expr(across(c("HNV_on_corine", "Protected_on_corine"), ~replace_na(.,0)) ),
......@@ -135,3 +136,11 @@ u<-list(
# v1 =V.1~ basc + bb*alt1.b+ bc * alt1.c + bp * alt1.p ,
# v2 =V.2~ basc + bb*alt2.b+ bc * alt2.c + bp * alt2.p ,
# v3 =V.3~ 0)
## this function will be called before the data is simulated
sou_gis <- function() {
source("Projects/ValuGaps/code/GIS_data.R")
}
sim_choice <- function(designfile, no_sim=10, respondents=330, mnl_U,utils=u ) {
require("tictoc")
require("readr")
require("psych")
require("dplyr")
require("evd")
require("tidyr")
require("kableExtra")
require("gridExtra")
require("stringr")
require("mixl")
require("furrr")
require("purrr")
require("ggplot2")
require("formula.tools")
mnl_U <-paste(map_chr(u,as.character,keep.source.attr = TRUE),collapse = "",";") %>%
str_replace_all( c( "priors\\[\"" = "" , "\"\\]" = "" , "~" = "=", "\\." = "_" , " b" = " @b" , "V."="U_", " alt"="$alt"))
by_formula <- function(equation){ #used to take formulas as inputs in simulation utility function
# //! cur_data_all may get deprecated in favor of pick
# pick(everything()) %>%
cur_data_all() %>%
transmute(!!lhs(equation) := !!rhs(equation) )
}
simulate_choices <- function(data=database) { #the part in dataset that needs to be repeated in each run
# browser()
n=seq_along(1:length(utils))
print("DANGER")
data <- data %>%
group_by(ID) %>%
mutate(
across(.cols=n,.fns = ~ rgumbel(setpp,loc=0, scale=1), .names = "{'e'}_{n}" ),
across(starts_with("V_"), .names = "{'U'}_{n}") + across(starts_with("e_")) ) %>% ungroup() %>%
mutate(CHOICE=max.col(.[,grep("U_",names(.))])
) %>%
as.data.frame()
return(data)
}
estimate_sim <- function(run=1) { #start loop
cat(run)
database <- simulate_choices()
model<-mixl::estimate(model_spec,start_values = est, availabilities = availabilities, data= database,)
return(model)
}
designs_all <- list()
design <- read_delim(designfile,delim = "\t",
escape_double = FALSE,
trim_ws = TRUE ,
col_select = c(-Design, -starts_with("...")) ,
name_repair = "universal") %>%
filter(!is.na(Choice.situation))
nsets<-nrow(design)
nblocks<-max(design$Block)
setpp <- nsets/nblocks # Choice Sets per respondent; in this 'no blocks' design everyone sees all 24 sets
#respondents <- replications*nblocks
replications <- respondents/nblocks
#browser()
if(!exists("manipulations")) manipulations=list()
database<- design %>%
arrange(Block,Choice.situation) %>%
slice(rep(row_number(), replications)) %>% ## replicate design according to number of replications
mutate(RID = rep(1:respondents, each=setpp)) %>% # create Respondent ID.
relocate(RID,`Choice.situation`) %>%
mutate( !!! manipulations ,
map_dfc(utils,by_formula) #our functions to create utility variables. They need to be entered as a formula list as an argument
) %>%
rename(ID ="RID") %>%
rename_with(~ stringr::str_replace(.,pattern = "\\.","_"), everything()) %>%
as.data.frame()
database <- simulate_choices()
model_spec <- mixl::specify_model(mnl_U, database, disable_multicore=F)
est=setNames(rep(0,length(model_spec$beta_names)), model_spec$beta_names)
availabilities <- mixl::generate_default_availabilities(
database, model_spec$num_utility_functions)
plan(multisession, workers = 8)
output<- 1:no_sim %>% map(estimate_sim)
coefs<-map(1:length(output),~summary(output[[.]])[["coefTable"]][c(1,8)] %>%
tibble::rownames_to_column() %>%
pivot_wider(names_from = rowname, values_from = c(est, rob_pval0)) ) %>%
bind_rows(.id = "run")
output[["summary"]] <-describe(coefs[,-1], fast = TRUE)
output[["coefs"]] <-coefs
pvals <- output[["coefs"]] %>% select(starts_with("rob_pval0"))
output[["power"]] <- 100*table(apply(pvals,1, function(x) all(x<0.05)))/nrow(pvals)
output[["metainfo"]] <- c(Path = designfile, NoSim = no_sim, NoResp =respondents)
print(kable(output[["summary"]],digits = 2, format = "rst"))
print(output[["power"]])
return(output)
}
plot_multi_histogram <- function(df, feature, label_column) { #function to create nice multi histograms, taken somewhere from the web
plt <- ggplot(df, aes(x=eval(parse(text=feature)), fill=eval(parse(text=label_column)))) +
#geom_histogram(alpha=0.7, position="identity", aes(y = ..density..), color="black") +
geom_density(alpha=0.5) +
geom_vline(aes(xintercept=mean(eval(parse(text=feature)))), color="black", linetype="dashed", linewidth=1) + ## this makes a vertical line of the mean
labs(x=feature, y = "Density")
plt + guides(fill=guide_legend(title=label_column))
sim_choice <- function(designfile, no_sim=10, respondents=330, mnl_U,utils=u ) {
require("tictoc")
require("readr")
require("psych")
require("dplyr")
require("evd")
require("tidyr")
require("kableExtra")
require("gridExtra")
require("stringr")
require("mixl")
require("furrr")
require("purrr")
require("ggplot2")
require("formula.tools")
mnl_U <-paste(map_chr(u,as.character,keep.source.attr = TRUE),collapse = "",";") %>%
str_replace_all( c( "priors\\[\"" = "" , "\"\\]" = "" , "~" = "=", "\\." = "_" , " b" = " @b" , "V_"="U_", " alt"="$alt"))
cat("mixl \n")
print(mnl_U)
cat("\n Simulation \n")
print(u)
by_formula <- function(equation){ #used to take formulas as inputs in simulation utility function
# //! cur_data_all may get deprecated in favor of pick
# pick(everything()) %>%
cur_data_all() %>%
transmute(!!lhs(equation) := !!rhs(equation) )
}
simulate_choices <- function(data=datadet) { #the part in dataset that needs to be repeated in each run
cat("does sou_gis exist")
print( exists("sou_gis"))
# browser()
if (exists("sou_gis") && is.function(sou_gis)) {
sou_gis()
cat("source of gis has been done")
}
# source("Projects/ValuGaps/code/GIS_data.R")
if(!exists("manipulations")) manipulations=list() ## If no user input on further data manipulations
n=seq_along(1:length(utils)) # number of utility functions
print("DANGER")
print(exists("final_set"))
if(exists("final_set")) data = left_join(data,final_set, by = "ID")
data <- data %>%
# rename(ID="RID") %>%
group_by(ID) %>%
#rename(ID ="RID") %>%
#rename_with(~ stringr::str_replace(.,pattern = "\\.","_"), everything()) %>%
mutate(!!! manipulations ,
map_dfc(utils,by_formula)) %>% #our functions to create utility variables. They need to be entered as a formula list as an argument
rename_with(~ stringr::str_replace(.,pattern = "\\.","_"), everything()) %>%
mutate(across(.cols=n,.fns = ~ rgumbel(setpp,loc=0, scale=1), .names = "{'e'}_{n}" ),
across(starts_with("V_"), .names = "{'U'}_{n}") + across(starts_with("e_")) ) %>% ungroup() %>%
mutate(CHOICE=max.col(.[,grep("U_",names(.))])
) %>%
as.data.frame()
print("data has been made")
cat(head(data$CHOICE))
return(data)
}
estimate_sim <- function(run=1) { #start loop
cat("This is Run number ", run)
database <- simulate_choices(datadet)
cat("This is the utility functions \n" , mnl_U)
model<-mixl::estimate(model_spec,start_values = est, availabilities = availabilities, data= database,)
return(model)
}
designs_all <- list()
design <- read_delim(designfile,delim = "\t",
escape_double = FALSE,
trim_ws = TRUE ,
col_select = c(-Design, -starts_with("...")) ,
name_repair = "universal") %>%
filter(!is.na(Choice.situation))
nsets<-nrow(design)
nblocks<-max(design$Block)
setpp <- nsets/nblocks # Choice Sets per respondent; in this 'no blocks' design everyone sees all 24 sets
#respondents <- replications*nblocks
replications <- respondents/nblocks
#browser()
datadet<- design %>%
arrange(Block,Choice.situation) %>%
slice(rep(row_number(), replications)) %>% ## replicate design according to number of replications
mutate(ID = rep(1:respondents, each=setpp)) %>% # create Respondent ID.
relocate(ID,`Choice.situation`) %>%
as.data.frame()
database <- simulate_choices()
model_spec <- mixl::specify_model(mnl_U, database, disable_multicore=F)
est=setNames(rep(0,length(model_spec$beta_names)), model_spec$beta_names)
availabilities <- mixl::generate_default_availabilities(
database, model_spec$num_utility_functions)
plan(multisession, workers = 8)
output<- 1:no_sim %>% map(estimate_sim)
coefs<-map(1:length(output),~summary(output[[.]])[["coefTable"]][c(1,8)] %>%
tibble::rownames_to_column() %>%
pivot_wider(names_from = rowname, values_from = c(est, rob_pval0)) ) %>%
bind_rows(.id = "run")
output[["summary"]] <-describe(coefs[,-1], fast = TRUE)
output[["coefs"]] <-coefs
pvals <- output[["coefs"]] %>% dplyr::select(starts_with("rob_pval0"))
output[["power"]] <- 100*table(apply(pvals,1, function(x) all(x<0.05)))/nrow(pvals)
output[["metainfo"]] <- c(Path = designfile, NoSim = no_sim, NoResp =respondents)
print(kable(output[["summary"]],digits = 2, format = "rst"))
print(output[["power"]])
return(output)
}
plot_multi_histogram <- function(df, feature, label_column) { #function to create nice multi histograms, taken somewhere from the web
plt <- ggplot(df, aes(x=eval(parse(text=feature)), fill=eval(parse(text=label_column)))) +
#geom_histogram(alpha=0.7, position="identity", aes(y = ..density..), color="black") +
geom_density(alpha=0.5) +
geom_vline(aes(xintercept=mean(eval(parse(text=feature)))), color="black", linetype="dashed", linewidth=1) + ## this makes a vertical line of the mean
labs(x=feature, y = "Density")
plt + guides(fill=guide_legend(title=label_column))
}
\ No newline at end of file
......@@ -6,8 +6,9 @@ library("formula.tools")
library(ggplot2)
library(stringr)
source("Projects/ValuGaps/parameters_valugaps.R")
source(params$file)
source("Projects/SE_AGRI/parameters_SE Design-Agri.R")
#source("Projects/ValuGaps/parameters_valugaps.R",echo = TRUE)
#source(params$file)
designfile<-list.files(designpath,full.names = T)
......@@ -39,7 +40,7 @@ powa <- map(all_designs, ~ .x$power)
summaryall <- as.data.frame(map(all_designs, ~.x$summary)) %>%
select(!ends_with("vars")) %>%
dplyr::select(!ends_with("vars")) %>%
relocate(ends_with(c(".n", "mean","sd", "min" ,"max", "range" , "se" )))
coefall <- map(all_designs, ~ .x$coefs)
......@@ -47,7 +48,7 @@ coefall <- map(all_designs, ~ .x$coefs)
pat<-paste0("(",paste(designname,collapse = "|"),").") # needed to identify pattern to be replaced
s<-as.data.frame(coefall) %>%
select(!matches("pval|run")) %>%
dplyr::select(!matches("pval|run")) %>%
rename_with(~ sub("est_b", "", .x), everything()) %>%
# rename_with(~ sub("est_asc_", "asc", .x), everything()) %>%
rename_with( ~ paste0(.,"_",stringr::str_extract(.,pat )), everything() ) %>% # rename attributes for reshape part 1
......@@ -57,7 +58,7 @@ s<-as.data.frame(coefall) %>%
p=list()
for (att in names(select(s,-c("design","run")))) {
for (att in names(dplyr::select(s,-c("design","run")))) {
p[[att]] <- plot_multi_histogram(s,att,"design")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment