From 98d3c134b1e2569f5691b9029a7772727708a46c Mon Sep 17 00:00:00 2001
From: Nino Cavallaro <nino.cavallaro@idiv.de>
Date: Fri, 21 Mar 2025 12:10:52 +0100
Subject: [PATCH] lapage test etc.

---
 Scripts/create_tables.R              |  58 +++-
 Scripts/data_prep.R                  | 272 ++++++++---------
 Scripts/descriptives.R               | 439 +++++++++++++++++++++++++++
 Scripts/lapage_results.R             | 210 +++++++++++++
 Scripts/lapage_results_naturalness.R | 372 +++++++++++++++++++++++
 Scripts/logit/chr_vol_treat.R        | 372 ++++++++++++-----------
 Scripts/ols/ols_consequentiality.R   |   5 +
 Scripts/ols/ols_time_spent.R         |   4 +-
 Scripts/visualize_distr_spli.R       | 231 +++++++++-----
 9 files changed, 1565 insertions(+), 398 deletions(-)
 create mode 100644 Scripts/descriptives.R
 create mode 100644 Scripts/lapage_results.R
 create mode 100644 Scripts/lapage_results_naturalness.R

diff --git a/Scripts/create_tables.R b/Scripts/create_tables.R
index a56556e..8f0c0ba 100644
--- a/Scripts/create_tables.R
+++ b/Scripts/create_tables.R
@@ -11,6 +11,9 @@ list_ols <- list("(Intercept)" = "Intercept", "as.factor(Treatment_A)Treated" =
                  "as.factor(Treatment_C)No Video 2" = "Text 2", "as.factor(Treatment_C)Video 1" = "Video 1",
                  "as.factor(Treatment_C)Video 2" = "Video 2", "as.factor(Treatment_D)Treated" = "Treated", "as.factor(Treatment_D)Vol. Treated" = "Vol. Treated",
                  "as.factor(Treatment_D)No Info 2" = "No Info", 
+                 "as.factor(Matching_Group)Control Pred" = "Control Predicted", "as.factor(Matching_Group)Opt Not Pred" = "Opt. Treatment Not Predicted",
+                 "as.factor(Matching_Group)Opt Pred" = "Opt. Treatment Predicted", "as.factor(Matching_Group)Treat Not Pred" = "Treated Not Predicted",
+                 "as.factor(Matching_Group)Treat Pred" = "Treated Predicted",
                  "Z_Mean_NR" = "NR-Index", "as.factor(Gender)2" = "Female",
                  "Age_mean" = "Age", "QFIncome" = "Income", "Uni_degree" = "University Degree")
 # OLS A
@@ -49,6 +52,25 @@ texreg(l=list(ols_time_spent_control_D, ols_time_cc_control_D, ols_percentage_co
        caption = "Results of OLS regressions for Scenario Case B.",
        file="Tables/ols/ols_D.tex")
 
+# OLS G
+texreg(l=list(ols_time_spent_control_G, ols_time_cc_control_G, ols_percentage_correct_control_G,  conseq_model_control_G),
+       custom.model.names = c("Interview Time",  "CC Time", "Quiz",  "Cons. Score"),
+       custom.header = list("Model 1B" = 1:1, "Model 2B" = 2:2, "Model 3B" = 3:3, "Model 4B" = 4:4),
+       custom.coef.map = list_ols,  stars = c(0.01, 0.05, 0.1), float.pos="tb", 
+       custom.note = "Notes: (i) The dependent variables examined in this regression analysis are as follows: 
+       Model 1B refers to net interview time, 
+       Model 2B denotes choice card time, Model 1B represents the percentage of correct quiz questions, and Model 4B represents consequentiality score.
+       (ii) The variables included in the analysis are as follows: Treated is a dummy variable indicating membership 
+       in the obligatory treated group, Vol. Treated is a dummy variable indicating the group that voluntarily chose the optional treatment, 
+       while No Info indicates the group that did not opt for the treatment, with the reference group being Non-Treated. NR-Index represents the z-standardized
+       natural relatedness index. Female is a dummy variable denoting gender, Age has been mean-centered and measured in years, 
+       Income is a continuous variable indicating a transition from one income group to the next higher, and University Degree is 
+       a dummy variable indicating whether an individual holds a university degree; (iii) %stars and standard errors in parentheses.",
+       label = "tab:olsD",
+       caption = "Results of OLS regressions for Scenario Case B.",
+       file="Tables/ols/ols_G.tex")
+
+
 # Manipulation check
 texreg(l=list(ols_percentage_correct_A,  ols_percentage_correct_control_A, ols_percentage_correct_C, ols_percentage_correct_control_C),
        custom.model.names = c("Case A", "with Controls", "Case B", "with Controls"),
@@ -60,6 +82,7 @@ texreg(l=list(ols_percentage_correct_A,  ols_percentage_correct_control_A, ols_p
        file="Tables/ols/manipulation.tex")
 
 
+
 # Net interview time 
 texreg(l=list(ols_time_spent_A,  ols_time_spent_control_A, ols_time_spent_C,  ols_time_spent_control_C),
        custom.model.names = c("Case A", "with Controls", "Case B", "with Controls"),
@@ -241,7 +264,7 @@ texreg(c(case_B_cols_NR[1], remGOF(case_B_cols_NR[2:6])),
        caption = "Results of mixed logit model with treatment interactions for Case B including NR-index.",
        file="Tables/mxl/case_B_rent_INT_NR.tex")
 
-
+#### Case A split samples #####
 mxl_tr <- apollo_loadModel("Estimation_results/mxl/Split_samples/MXL_wtp Treated A")
 mxl_vol_tr <- apollo_loadModel("Estimation_results/mxl/Split_samples/MXL_wtp Vol_Treated A")
 mxl_not_tr <- apollo_loadModel("Estimation_results/mxl/Split_samples/MXL_wtp Not_Treated A")
@@ -257,12 +280,41 @@ texreg(c(not_tr, treated, opt_tr),
                               "ASC_sq_info" = "ASC SQ", "rent_info" = "Rent", "nat_info" = "Naturalness", "walking_info" = "Walking Distance",
                               "sig_natural" = "Naturalness SD", "sig_walking" = "Walking Distance SD", "sig_rent" = "Rent SD",
                               "sig_ASC_1" = "ASC 1 SD", "sig_ASC_2" = "ASC 2 SD"),
-       custom.model.names = c("Not Treated", "Treated", "Optional Treatment"), custom.note = "%stars (one-sided). Robust standard errors in parentheses.",
+       custom.model.names = c("Control", "Treated", "Optional Treatment"), custom.note = "%stars (one-sided). Robust standard errors in parentheses.",
        stars = c(0.01, 0.05, 0.1), float.pos="tb",
-       label = "tab:mxl_C_NR",
+       label = "tab:mxl_A_split",
        caption = "Results of mixed logit models for case A.",
        file="Tables/mxl/case_A_splits.tex")
 
+
+#### Matching split samples #####
+MXL_wtp_Control_Not_Pred_model <- apollo_loadModel("Estimation_results/mxl/prediction/Split_samples/MXL_wtp_Control_Not_Pred")
+MXL_wtp_Control_Pred_model <- apollo_loadModel("Estimation_results/mxl/prediction/Split_samples/MXL_wtp_Control_Pred")
+MXL_wtp_Opt_Not_Pred_model <- apollo_loadModel("Estimation_results/mxl/prediction/Split_samples/MXL_wtp_Opt_Not_Pred")
+MXL_wtp_Opt_Pred_model <- apollo_loadModel("Estimation_results/mxl/prediction/Split_samples/MXL_wtp_Opt_Pred")
+MXL_wtp_Treated_Not_Pred_model <- apollo_loadModel("Estimation_results/mxl/prediction/Split_samples/MXL_wtp_Treated_Not_Pred")
+MXL_wtp_Treated_Pred_model <- apollo_loadModel("Estimation_results/mxl/prediction/Split_samples/MXL_wtp_Treated_Pred")
+
+control_not_pred <- quicktexregapollo(MXL_wtp_Control_Not_Pred_model)
+control_pred <- quicktexregapollo(MXL_wtp_Control_Pred_model)
+treat_not_pred <- quicktexregapollo(MXL_wtp_Treated_Not_Pred_model)
+treat_pred <- quicktexregapollo(MXL_wtp_Treated_Pred_model)
+opt_not_pred <- quicktexregapollo(MXL_wtp_Opt_Not_Pred_model)
+opt_pred <- quicktexregapollo(MXL_wtp_Opt_Pred_model)
+
+texreg(c(control_not_pred, control_pred, treat_not_pred, treat_pred, opt_not_pred, opt_pred),
+       custom.coef.map = list("mu_natural" = "Naturalness", "mu_walking" = "Walking Distance", "mu_rent" = "Rent",
+                              "ASC_1" = "ASC 1","ASC_2" = "ASC 2", "_natural" = "Naturalness", "nat" = "Naturalness",
+                              "wd" = "Walking Distance", "asc" = "ASC SQ", "ASC" ="ASC SQ",
+                              "ASC_sq_info" = "ASC SQ", "rent_info" = "Rent", "nat_info" = "Naturalness", "walking_info" = "Walking Distance",
+                              "sig_natural" = "Naturalness SD", "sig_walking" = "Walking Distance SD", "sig_rent" = "Rent SD",
+                              "sig_ASC_1" = "ASC 1 SD", "sig_ASC_2" = "ASC 2 SD"),
+       custom.model.names = c("Control Not Predicted", "Control Predicted", "Treated Not Predicted", "Treated Predicted",  "Opt. Treatment Not Predicted", "Opt. Treatment Predicted"), custom.note = "%stars (one-sided). Robust standard errors in parentheses.",
+       stars = c(0.01, 0.05, 0.1), float.pos="tb",
+       label = "tab:mxl_B_Match",
+       caption = "Results of mixed logit models for case B.",
+       file="Tables/mxl/case_B_splits.tex")
+
 # Main model
 # texreg(l=list(mxl_wtp_case_a_rentINT),
 #        custom.coef.map = list("mu_natural" = "Naturalness", "mu_walking" = "Walking Distance", "mu_rent" = "Rent",
diff --git a/Scripts/data_prep.R b/Scripts/data_prep.R
index 9094278..8c88b1f 100644
--- a/Scripts/data_prep.R
+++ b/Scripts/data_prep.R
@@ -1,136 +1,136 @@
-# Prepare variables that we want to use
-
-database_full <- database_full %>% rename(Gender = "Q03W123", Education = "Q06W123", HHSize = "Q41W123",
-                                  WorkingTime = "Q44W123", Birthyear = "Q01W123", Rent_net = "Q07W123",
-                                  Number_Kids = "Q42W123", Employment_type = "Q43W123", Conseq_UGS = "Q28W3",
-                                  Conseq_Money = "Q29W3", UGS_visits = "Q23W123") 
-
-
-database_full <- database_full %>% mutate(Gender = dplyr::recode(Gender, "A1" = 1, "A2" = 2, "A3"=3),
-                                Education = dplyr::recode(Education, "A1" = 1, "A2" = 2, "A3"=3, "A4" = 4, "A5" = 5),
-                                Employment_type = dplyr::recode(Employment_type, "A1" = 1, "A2" = 2, "A3"=3, "A4" = 4, 
-                                                         "A5" = 5, "A6" = 6),
-                                Conseq_UGS = dplyr::recode(Conseq_UGS, "A1" = 5, "A2" = 4, "A3"=3, "A4" = 2, "A5" = 1, "A6" = NA_real_),
-                                Conseq_Money = dplyr::recode(Conseq_Money, "A1" = 5, "A2" = 4, "A3"=3, "A4" = 2, "A5" = 1, "A6" = NA_real_),
-                                UGS_visits = dplyr::recode(UGS_visits, "A1" = 1, "A2" = 2, "A3"=3, "A4" = 4, "A5" = 5, "A6" = 6,
-                                                           "A7" = 7, "A8" = 8, "A9" = 9, "A10"= 10))
-
-database_full <- database_full %>% mutate(Gender_female = case_when(Gender == 2 ~1, TRUE~0),
-                                          Age = 2023-Birthyear,
-                                          Uni_degree = case_when(Education == 5 ~1, TRUE~0),
-                                          Kids_Dummy = case_when(Number_Kids > 0 ~ 1, TRUE ~0),
-                                          Employment_full = case_when(Employment_type == 1 ~ 1, TRUE~0),
-                                          Pensioner = case_when(Employment_type == 6 ~ 1, TRUE~0),
-                                          Age_mean = Age - mean(Age),
-                                          Income_mean = QFIncome - mean(QFIncome))
-
-# Data cleaning 
-
-
-
-database_full <- database_full %>% filter(Rent_SQ <= 10000 & Rent_SQ >=50) %>%
-  filter(WalkingDistance_SQ > 0 & WalkingDistance_SQ <= 300) %>% 
-  filter(Gender!=3)
-
-database <- database %>% filter(Rent_SQ <= 10000 & Rent_SQ >=50) %>% 
-  filter(WalkingDistance_SQ > 0 & WalkingDistance_SQ <= 300)
-
-
-summary(database_full$interviewtime)
-
-database_full <- database_full %>% filter(interviewtime >= 300) # make change time to 10 seconds?
-
-
-database_full <- database_full %>%
-  filter(!is.na(Treatment_new)) %>%
-  mutate(Treatment_A = case_when(
-    Treatment == 1 ~ "Treated",
-    Treatment == 2 ~ "Vol_Treated",
-    Treatment == 3 ~ "Not_Treated",
-    TRUE ~ NA_character_
-  )) %>% 
-  mutate(Treatment_B = case_when(
-    Treatment_new == 1 | Treatment_new == 2 | Treatment_new == 4 | Treatment_new == 5 ~ "Treated",
-    Treatment_new == 3 | Treatment_new == 6 ~ "Not_Treated"
-  )) %>% 
-  mutate(Treatment_C = case_when(
-    Treatment_new == 1 ~ 'Video 1',
-    Treatment_new == 2 ~ 'No Video 1',
-    Treatment_new == 3 ~ 'No Info 2',
-    Treatment_new == 4 ~ 'No Video 2',
-    Treatment_new == 5 ~ 'Video 2',
-    Treatment_new == 6 ~ 'No Treatment 3',
-    TRUE ~ NA_character_
-  )) %>% 
-  mutate(Treatment_D = case_when(
-    Treatment_new == 1 | Treatment_new == 2 ~ 'Treated',
-    Treatment_new == 3 ~ 'No Info 2',
-    Treatment_new == 4 | Treatment_new == 5 ~ 'Vol. Treated',
-    Treatment_new == 6 ~ 'No Treatment 3',
-    TRUE ~ NA_character_
-  ))
-
-id_list <- unique(database_full$id)
-
-# Do we sill want to use this? or only database full?
-database <- database %>% filter(id %in% id_list) %>% filter(!is.na(Treatment_new))
-# Building NR Index
-
-for (i in 1:21) {
-  variable_name <- paste0("Q38S", sprintf("%02d", i), "W3")  # Generate variable name
-  cat("Table for", variable_name, ":\n")
-  print(table(database_full[[variable_name]]))
-  cat("\n")
-  database_full[[variable_name]] <- as.numeric(factor(database_full[[variable_name]], levels = c("A1", "A2", "A3", "A4", "A5")))
-  cat("Table for", variable_name, ":\n")
-  print(table(database_full[[variable_name]]))
-  cat("\n")
-}
-
-variables_to_reverse <- c("Q38S02W3", "Q38S03W3", "Q38S10W3", "Q38S11W3", "Q38S13W3", "Q38S14W3", "Q38S15W3", "Q38S18W3")
-for (variable_name in variables_to_reverse) {
-  cat("Table for", variable_name, ":\n")
-  
-  # Convert the variable to a factor with numerical levels and reverse the scores
-  database_full[[variable_name]] <- 6 - as.numeric(database_full[[variable_name]])
-  
-  # Print the table
-  print(table(database_full[[variable_name]]))
-  cat("\n")
-}
-q38_variables <- grep("^Q38", names(database_full), value = TRUE)
-database_full$Total_NR <- rowSums(database_full[q38_variables])
-hist(database_full$Total_NR)
-database_full <- database_full %>% 
-  mutate(Mean_NR=Total_NR/21) 
-mean_nr<-mean(database_full$Mean_NR, na.rm = TRUE)
-sd_nr<-sd(database_full$Mean_NR, na.rm = TRUE)
-database_full <- database_full %>% 
-  mutate(Z_Mean_NR=(Mean_NR-mean_nr)/sd_nr)
-database$Z_Mean_NR<- database_full$Z_Mean_NR
-summary(database$Z_Mean_NR)
-
-#Self-Reference Index
-
-for (i in 8:10) {
-  variable_name <- paste0("TV", sprintf("%02d", i), "W3")  # Generate variable name
-  cat("Table for", variable_name, ":\n")
-  print(table(database_full[[variable_name]]))
-  cat("\n")
-  database_full[[variable_name]] <- as.numeric(factor(database_full[[variable_name]], levels = c("A1", "A2", "A3", "A4", "A5")))
-  cat("Table for", variable_name, ":\n")
-  print(table(database_full[[variable_name]]))
-  cat("\n")
-}
-
-
-database_full$Total_SR <- database_full$TV08W3+database_full$TV09W3+database_full$TV10W3
-hist(database_full$Total_SR)
-database_full <- database_full %>% 
-  mutate(Mean_SR=Total_SR/3) 
-mean_sr<-mean(database_full$Mean_SR, na.rm = TRUE)
-sd_sr<-sd(database_full$Mean_SR, na.rm = TRUE)
-database_full <- database_full %>% 
-  mutate(Z_Mean_SR=(Mean_SR-mean_sr)/sd_sr)
-database$Z_Mean_SR<- database_full$Z_Mean_SR
-summary(database$Z_Mean_SR)
+# Prepare variables that we want to use
+
+database_full <- database_full %>% rename(Gender = "Q03W123", Education = "Q06W123", HHSize = "Q41W123",
+                                  WorkingTime = "Q44W123", Birthyear = "Q01W123", Rent_net = "Q07W123",
+                                  Number_Kids = "Q42W123", Employment_type = "Q43W123", Conseq_UGS = "Q28W3",
+                                  Conseq_Money = "Q29W3", UGS_visits = "Q23W123") 
+
+
+database_full <- database_full %>% mutate(Gender = dplyr::recode(Gender, "A1" = 1, "A2" = 2, "A3"=3),
+                                Education = dplyr::recode(Education, "A1" = 1, "A2" = 2, "A3"=3, "A4" = 4, "A5" = 5),
+                                Employment_type = dplyr::recode(Employment_type, "A1" = 1, "A2" = 2, "A3"=3, "A4" = 4, 
+                                                         "A5" = 5, "A6" = 6),
+                                Conseq_UGS = dplyr::recode(Conseq_UGS, "A1" = 5, "A2" = 4, "A3"=3, "A4" = 2, "A5" = 1, "A6" = NA_real_),
+                                Conseq_Money = dplyr::recode(Conseq_Money, "A1" = 5, "A2" = 4, "A3"=3, "A4" = 2, "A5" = 1, "A6" = NA_real_),
+                                UGS_visits = dplyr::recode(UGS_visits, "A1" = 1, "A2" = 2, "A3"=3, "A4" = 4, "A5" = 5, "A6" = 6,
+                                                           "A7" = 7, "A8" = 8, "A9" = 9, "A10"= 10))
+
+database_full <- database_full %>% mutate(Gender_female = case_when(Gender == 2 ~1, TRUE~0),
+                                          Age = 2023-Birthyear,
+                                          Uni_degree = case_when(Education == 5 ~1, TRUE~0),
+                                          Kids_Dummy = case_when(Number_Kids > 0 ~ 1, TRUE ~0),
+                                          Employment_full = case_when(Employment_type == 1 ~ 1, TRUE~0),
+                                          Pensioner = case_when(Employment_type == 6 ~ 1, TRUE~0),
+                                          Age_mean = Age - mean(Age),
+                                          Income_mean = QFIncome - mean(QFIncome))
+
+# Data cleaning 
+
+
+
+database_full <- database_full %>% filter(Rent_SQ <= 10000 & Rent_SQ >=50) %>%
+  filter(WalkingDistance_SQ > 0 & WalkingDistance_SQ <= 300) %>% 
+  filter(Gender!=3)
+
+database <- database %>% filter(Rent_SQ <= 10000 & Rent_SQ >=50) %>% 
+  filter(WalkingDistance_SQ > 0 & WalkingDistance_SQ <= 300)
+
+
+summary(database_full$interviewtime)
+
+database_full <- database_full %>% filter(interviewtime >= 300) # make change time to 10 seconds?
+
+
+database_full <- database_full %>%
+  filter(!is.na(Treatment_new)) %>%
+  mutate(Treatment_A = case_when(
+    Treatment == 1 ~ "Treated",
+    Treatment == 2 ~ "Vol_Treated",
+    Treatment == 3 ~ "Not_Treated",
+    TRUE ~ NA_character_
+  )) %>% 
+  mutate(Treatment_B = case_when(
+    Treatment_new == 1 | Treatment_new == 2 | Treatment_new == 4 | Treatment_new == 5 ~ "Treated",
+    Treatment_new == 3 | Treatment_new == 6 ~ "Not_Treated"
+  )) %>% 
+  mutate(Treatment_C = case_when(
+    Treatment_new == 1 ~ 'Video 1',
+    Treatment_new == 2 ~ 'No Video 1',
+    Treatment_new == 3 ~ 'No Info 2',
+    Treatment_new == 4 ~ 'No Video 2',
+    Treatment_new == 5 ~ 'Video 2',
+    Treatment_new == 6 ~ 'No Treatment 3',
+    TRUE ~ NA_character_
+  )) %>% 
+  mutate(Treatment_D = case_when(
+    Treatment_new == 1 | Treatment_new == 2 ~ 'Treated',
+    Treatment_new == 3 ~ 'No Info 2',
+    Treatment_new == 4 | Treatment_new == 5 ~ 'Vol. Treated',
+    Treatment_new == 6 ~ 'No Treatment 3',
+    TRUE ~ NA_character_
+  ))
+
+id_list <- unique(database_full$id)
+
+# Do we sill want to use this? or only database full?
+database <- database %>% filter(id %in% id_list) %>% filter(!is.na(Treatment_new))
+# Building NR Index
+
+for (i in 1:21) {
+  variable_name <- paste0("Q38S", sprintf("%02d", i), "W3")  # Generate variable name
+  cat("Table for", variable_name, ":\n")
+  print(table(database_full[[variable_name]]))
+  cat("\n")
+  database_full[[variable_name]] <- as.numeric(factor(database_full[[variable_name]], levels = c("A1", "A2", "A3", "A4", "A5")))
+  cat("Table for", variable_name, ":\n")
+  print(table(database_full[[variable_name]]))
+  cat("\n")
+}
+
+variables_to_reverse <- c("Q38S02W3", "Q38S03W3", "Q38S10W3", "Q38S11W3", "Q38S13W3", "Q38S14W3", "Q38S15W3", "Q38S18W3")
+for (variable_name in variables_to_reverse) {
+  cat("Table for", variable_name, ":\n")
+  
+  # Convert the variable to a factor with numerical levels and reverse the scores
+  database_full[[variable_name]] <- 6 - as.numeric(database_full[[variable_name]])
+  
+  # Print the table
+  print(table(database_full[[variable_name]]))
+  cat("\n")
+}
+q38_variables <- grep("^Q38", names(database_full), value = TRUE)
+database_full$Total_NR <- rowSums(database_full[q38_variables])
+hist(database_full$Total_NR)
+database_full <- database_full %>% 
+  mutate(Mean_NR=Total_NR/21) 
+mean_nr<-mean(database_full$Mean_NR, na.rm = TRUE)
+sd_nr<-sd(database_full$Mean_NR, na.rm = TRUE)
+database_full <- database_full %>% 
+  mutate(Z_Mean_NR=(Mean_NR-mean_nr)/sd_nr)
+database$Z_Mean_NR<- database_full$Z_Mean_NR
+summary(database$Z_Mean_NR)
+
+#Self-Reference Index
+
+for (i in 8:10) {
+  variable_name <- paste0("TV", sprintf("%02d", i), "W3")  # Generate variable name
+  cat("Table for", variable_name, ":\n")
+  print(table(database_full[[variable_name]]))
+  cat("\n")
+  database_full[[variable_name]] <- as.numeric(factor(database_full[[variable_name]], levels = c("A1", "A2", "A3", "A4", "A5")))
+  cat("Table for", variable_name, ":\n")
+  print(table(database_full[[variable_name]]))
+  cat("\n")
+}
+
+
+database_full$Total_SR <- database_full$TV08W3+database_full$TV09W3+database_full$TV10W3
+hist(database_full$Total_SR)
+database_full <- database_full %>% 
+  mutate(Mean_SR=Total_SR/3) 
+mean_sr<-mean(database_full$Mean_SR, na.rm = TRUE)
+sd_sr<-sd(database_full$Mean_SR, na.rm = TRUE)
+database_full <- database_full %>% 
+  mutate(Z_Mean_SR=(Mean_SR-mean_sr)/sd_sr)
+database$Z_Mean_SR<- database_full$Z_Mean_SR
+summary(database$Z_Mean_SR)
diff --git a/Scripts/descriptives.R b/Scripts/descriptives.R
new file mode 100644
index 0000000..96497de
--- /dev/null
+++ b/Scripts/descriptives.R
@@ -0,0 +1,439 @@
+# Descriptives
+
+source("Scripts/ols/ols_time_spent.R")
+source("Scripts/ols/ols_quiz.R")
+data <- left_join(data, select(quiz_data, id, percentage_correct), by = "id")
+
+data <- data %>%
+  mutate(
+    Control_Not_Pred = ifelse(Matching_Group == "Control Not Pred", 1, 0),
+    Control_Pred = ifelse(Matching_Group == "Control Pred", 1, 0),
+    Opt_Not_Pred = ifelse(Matching_Group == "Opt Not Pred", 1, 0),
+    Opt_Pred = ifelse(Matching_Group == "Opt Pred", 1, 0),
+    Treat_Not_Pred = ifelse(Matching_Group == "Treat Not Pred", 1, 0),
+    Treat_Pred = ifelse(Matching_Group == "Treat Pred", 1, 0),
+    Opt_Treatment = Dummy_Vol_Treated+Dummy_no_info,
+    Control = ifelse(Treatment_A=="Not_Treated",1,0)
+  )
+
+data$Conseq_score<- data$Conseq_Money+data$Conseq_UGS
+table(data$QFIncome)
+data <- data %>%
+  mutate(income = ifelse(QFIncome == 0, NA, QFIncome))
+table(data$Dummy_Vol_Treated)
+table(data$Dummy_no_info)
+# Compute summary statistics for each group
+summary_stats <- data %>%
+  summarise(
+    # Number of respondents per group
+    Full_Sample_n = n(),
+    Dummy_Treated_n = sum(Dummy_Treated, na.rm = TRUE),
+    Dummy_Vol_Treated_n = sum(Dummy_Vol_Treated, na.rm = TRUE),
+    Dummy_no_info_n = sum(Dummy_no_info, na.rm = TRUE),
+    Control_Not_Pred_n = sum(Control_Not_Pred, na.rm = TRUE),
+    Control_Pred_n = sum(Control_Pred, na.rm = TRUE),
+    Opt_Not_Pred_n = sum(Opt_Not_Pred, na.rm = TRUE),
+    Opt_Pred_n = sum(Opt_Pred, na.rm = TRUE),
+    Treat_Not_Pred_n = sum(Treat_Not_Pred, na.rm = TRUE),
+    Treat_Pred_n = sum(Treat_Pred, na.rm = TRUE),
+    Opt_Treatment_n = sum(Opt_Treatment, na.rm = TRUE),
+    Control_n = sum(Control, na.rm = TRUE),
+    
+    
+    # Gender (% Female)
+    Full_Sample_Gender = mean(Gender_female, na.rm = TRUE) * 100,
+    Dummy_Treated_Gender = mean(Gender_female[Dummy_Treated == 1], na.rm = TRUE) * 100,
+    Dummy_Vol_Treated_Gender = mean(Gender_female[Dummy_Vol_Treated == 1], na.rm = TRUE) * 100,
+    Dummy_no_info_Gender = mean(Gender_female[Dummy_no_info == 1], na.rm = TRUE) * 100,
+    Control_Not_Pred_Gender = mean(Gender_female[Control_Not_Pred == 1], na.rm = TRUE) * 100,
+    Control_Pred_Gender = mean(Gender_female[Control_Pred == 1], na.rm = TRUE) * 100,
+    Opt_Not_Pred_Gender = mean(Gender_female[Opt_Not_Pred == 1], na.rm = TRUE) * 100,
+    Opt_Pred_Gender = mean(Gender_female[Opt_Pred == 1], na.rm = TRUE) * 100,
+    Treat_Not_Pred_Gender = mean(Gender_female[Treat_Not_Pred == 1], na.rm = TRUE) * 100,
+    Treat_Pred_Gender = mean(Gender_female[Treat_Pred == 1], na.rm = TRUE) * 100,
+    Opt_Treatment_Gender = mean(Gender_female[Opt_Treatment == 1], na.rm = TRUE) * 100,
+    Control_Gender = mean(Gender_female[Control == 1], na.rm = TRUE) * 100,
+    
+  
+    # University Degree (%)
+    Full_Sample_UniDegree = mean(Uni_degree, na.rm = TRUE) * 100,
+    Dummy_Treated_UniDegree = mean(Uni_degree[Dummy_Treated == 1], na.rm = TRUE) * 100,
+    Dummy_Vol_Treated_UniDegree = mean(Uni_degree[Dummy_Vol_Treated == 1], na.rm = TRUE) * 100,
+    Dummy_no_info_UniDegree = mean(Uni_degree[Dummy_no_info == 1], na.rm = TRUE) * 100,
+    Control_Not_Pred_UniDegree = mean(Uni_degree[Control_Not_Pred == 1], na.rm = TRUE) * 100,
+    Control_Pred_UniDegree = mean(Uni_degree[Control_Pred == 1], na.rm = TRUE) * 100,
+    Opt_Not_Pred_UniDegree = mean(Uni_degree[Opt_Not_Pred == 1], na.rm = TRUE) * 100,
+    Opt_Pred_UniDegree = mean(Uni_degree[Opt_Pred == 1], na.rm = TRUE) * 100,
+    Treat_Not_Pred_UniDegree = mean(Uni_degree[Treat_Not_Pred == 1], na.rm = TRUE) * 100,
+    Treat_Pred_UniDegree = mean(Uni_degree[Treat_Pred == 1], na.rm = TRUE) * 100,
+    Opt_Treatment_UniDegree = mean(Uni_degree[Opt_Treatment == 1], na.rm = TRUE) * 100,
+    Control_UniDegree = mean(Uni_degree[Control == 1], na.rm = TRUE) * 100,
+    
+    # Income Proportion (% for each category 1,2,3,4) for all groups
+    Full_Sample_Income_1 = mean(income == 1, na.rm = TRUE) * 100,
+    Full_Sample_Income_2 = mean(income == 2, na.rm = TRUE) * 100,
+    Full_Sample_Income_3 = mean(income == 3, na.rm = TRUE) * 100,
+    Full_Sample_Income_4 = mean(income == 4, na.rm = TRUE) * 100,
+    
+    Dummy_Treated_Income_1 = mean(income[Dummy_Treated == 1] == 1, na.rm = TRUE) * 100,
+    Dummy_Treated_Income_2 = mean(income[Dummy_Treated == 1] == 2, na.rm = TRUE) * 100,
+    Dummy_Treated_Income_3 = mean(income[Dummy_Treated == 1] == 3, na.rm = TRUE) * 100,
+    Dummy_Treated_Income_4 = mean(income[Dummy_Treated == 1] == 4, na.rm = TRUE) * 100,
+    
+    Dummy_no_info_Income_1 = mean(income[Dummy_no_info == 1] == 1, na.rm = TRUE) * 100,
+    Dummy_no_info_Income_2 = mean(income[Dummy_no_info == 1] == 2, na.rm = TRUE) * 100,
+    Dummy_no_info_Income_3 = mean(income[Dummy_no_info == 1] == 3, na.rm = TRUE) * 100,
+    Dummy_no_info_Income_4 = mean(income[Dummy_no_info == 1] == 4, na.rm = TRUE) * 100,
+    
+    Dummy_Vol_Treated_Income_1 = mean(income[Dummy_Vol_Treated == 1] == 1, na.rm = TRUE) * 100,
+    Dummy_Vol_Treated_Income_2 = mean(income[Dummy_Vol_Treated == 1] == 2, na.rm = TRUE) * 100,
+    Dummy_Vol_Treated_Income_3 = mean(income[Dummy_Vol_Treated == 1] == 3, na.rm = TRUE) * 100,
+    Dummy_Vol_Treated_Income_4 = mean(income[Dummy_Vol_Treated == 1] == 4, na.rm = TRUE) * 100,
+    
+    Control_Not_Pred_Income_1 = mean(income[Control_Not_Pred == 1] == 1, na.rm = TRUE) * 100,
+    Control_Not_Pred_Income_2 = mean(income[Control_Not_Pred == 1] == 2, na.rm = TRUE) * 100,
+    Control_Not_Pred_Income_3 = mean(income[Control_Not_Pred == 1] == 3, na.rm = TRUE) * 100,
+    Control_Not_Pred_Income_4 = mean(income[Control_Not_Pred == 1] == 4, na.rm = TRUE) * 100,
+    
+    Control_Pred_Income_1 = mean(income[Control_Pred == 1] == 1, na.rm = TRUE) * 100,
+    Control_Pred_Income_2 = mean(income[Control_Pred == 1] == 2, na.rm = TRUE) * 100,
+    Control_Pred_Income_3 = mean(income[Control_Pred == 1] == 3, na.rm = TRUE) * 100,
+    Control_Pred_Income_4 = mean(income[Control_Pred == 1] == 4, na.rm = TRUE) * 100,
+    
+    Opt_Not_Pred_Income_1 = mean(income[Opt_Not_Pred == 1] == 1, na.rm = TRUE) * 100,
+    Opt_Not_Pred_Income_2 = mean(income[Opt_Not_Pred == 1] == 2, na.rm = TRUE) * 100,
+    Opt_Not_Pred_Income_3 = mean(income[Opt_Not_Pred == 1] == 3, na.rm = TRUE) * 100,
+    Opt_Not_Pred_Income_4 = mean(income[Opt_Not_Pred == 1] == 4, na.rm = TRUE) * 100,
+    
+    Opt_Pred_Income_1 = mean(income[Opt_Pred == 1] == 1, na.rm = TRUE) * 100,
+    Opt_Pred_Income_2 = mean(income[Opt_Pred == 1] == 2, na.rm = TRUE) * 100,
+    Opt_Pred_Income_3 = mean(income[Opt_Pred == 1] == 3, na.rm = TRUE) * 100,
+    Opt_Pred_Income_4 = mean(income[Opt_Pred == 1] == 4, na.rm = TRUE) * 100,
+    
+    Treat_Not_Pred_Income_1 = mean(income[Treat_Not_Pred == 1] == 1, na.rm = TRUE) * 100,
+    Treat_Not_Pred_Income_2 = mean(income[Treat_Not_Pred == 1] == 2, na.rm = TRUE) * 100,
+    Treat_Not_Pred_Income_3 = mean(income[Treat_Not_Pred == 1] == 3, na.rm = TRUE) * 100,
+    Treat_Not_Pred_Income_4 = mean(income[Treat_Not_Pred == 1] == 4, na.rm = TRUE) * 100,
+    
+    Treat_Pred_Income_1 = mean(income[Treat_Pred == 1] == 1, na.rm = TRUE) * 100,
+    Treat_Pred_Income_2 = mean(income[Treat_Pred == 1] == 2, na.rm = TRUE) * 100,
+    Treat_Pred_Income_3 = mean(income[Treat_Pred == 1] == 3, na.rm = TRUE) * 100,
+    Treat_Pred_Income_4 = mean(income[Treat_Pred == 1] == 4, na.rm = TRUE) * 100,
+    
+    Opt_Treatment_Income_1 = mean(income[Opt_Treatment == 1] == 1, na.rm = TRUE) * 100,
+    Opt_Treatment_Income_2 = mean(income[Opt_Treatment == 1] == 2, na.rm = TRUE) * 100,
+    Opt_Treatment_Income_3 = mean(income[Opt_Treatment == 1] == 3, na.rm = TRUE) * 100,
+    Opt_Treatment_Income_4 = mean(income[Opt_Treatment == 1] == 4, na.rm = TRUE) * 100,
+    
+    Control_Income_1 = mean(income[Control == 1] == 1, na.rm = TRUE) * 100,
+    Control_Income_2 = mean(income[Control == 1] == 2, na.rm = TRUE) * 100,
+    Control_Income_3 = mean(income[Control == 1] == 3, na.rm = TRUE) * 100,
+    Control_Income_4 = mean(income[Control == 1] == 4, na.rm = TRUE) * 100,
+    
+    # Continuous Variables (Mean)
+    
+    Full_Sample_Age = mean(Age, na.rm = TRUE),
+    Dummy_Treated_Age = mean(Age[Dummy_Treated == 1], na.rm = TRUE),
+    Dummy_Vol_Treated_Age = mean(Age[Dummy_Vol_Treated == 1], na.rm = TRUE),
+    Dummy_no_info_Age = mean(Age[Dummy_no_info == 1], na.rm = TRUE),
+    Control_Not_Pred_Age = mean(Age[Control_Not_Pred == 1], na.rm = TRUE),
+    Control_Pred_Age = mean(Age[Control_Pred == 1], na.rm = TRUE),
+    Opt_Not_Pred_Age = mean(Age[Opt_Not_Pred == 1], na.rm = TRUE),
+    Opt_Pred_Age = mean(Age[Opt_Pred == 1], na.rm = TRUE),
+    Treat_Not_Pred_Age = mean(Age[Treat_Not_Pred == 1], na.rm = TRUE),
+    Treat_Pred_Age = mean(Age[Treat_Pred == 1], na.rm = TRUE),
+    Opt_Treatment_Age = mean(Age[Opt_Treatment == 1], na.rm = TRUE),
+    Control_Age = mean(Age[Control == 1], na.rm = TRUE),
+    
+    Full_Sample_NR = mean(Z_Mean_NR, na.rm = TRUE),
+    Dummy_Treated_NR = mean(Z_Mean_NR[Dummy_Treated == 1], na.rm = TRUE),
+    Dummy_Vol_Treated_NR = mean(Z_Mean_NR[Dummy_Vol_Treated == 1], na.rm = TRUE),
+    Dummy_no_info_NR = mean(Z_Mean_NR[Dummy_no_info == 1], na.rm = TRUE),
+    Control_Not_Pred_NR = mean(Z_Mean_NR[Control_Not_Pred == 1], na.rm = TRUE),
+    Control_Pred_NR = mean(Z_Mean_NR[Control_Pred == 1], na.rm = TRUE),
+    Opt_Not_Pred_NR = mean(Z_Mean_NR[Opt_Not_Pred == 1], na.rm = TRUE),
+    Opt_Pred_NR = mean(Z_Mean_NR[Opt_Pred == 1], na.rm = TRUE),
+    Treat_Not_Pred_NR = mean(Z_Mean_NR[Treat_Not_Pred == 1], na.rm = TRUE),
+    Treat_Pred_NR = mean(Z_Mean_NR[Treat_Pred == 1], na.rm = TRUE),
+    Opt_Treatment_NR = mean(Z_Mean_NR[Opt_Treatment == 1], na.rm = TRUE),
+    Control_NR = mean(Z_Mean_NR[Control == 1], na.rm = TRUE),
+    
+    Full_Sample_Conseq = mean(Conseq_score, na.rm = TRUE),
+    Dummy_Treated_Conseq = mean(Conseq_score[Dummy_Treated == 1], na.rm = TRUE),
+    Dummy_Vol_Treated_Conseq = mean(Conseq_score[Dummy_Vol_Treated == 1], na.rm = TRUE),
+    Dummy_no_info_Conseq = mean(Conseq_score[Dummy_no_info == 1], na.rm = TRUE),
+    Control_Not_Pred_Conseq = mean(Conseq_score[Control_Not_Pred == 1], na.rm = TRUE),
+    Control_Pred_Conseq = mean(Conseq_score[Control_Pred == 1], na.rm = TRUE),
+    Opt_Not_Pred_Conseq = mean(Conseq_score[Opt_Not_Pred == 1], na.rm = TRUE),
+    Opt_Pred_Conseq = mean(Conseq_score[Opt_Pred == 1], na.rm = TRUE),
+    Treat_Not_Pred_Conseq = mean(Conseq_score[Treat_Not_Pred == 1], na.rm = TRUE),
+    Treat_Pred_Conseq = mean(Conseq_score[Treat_Pred == 1], na.rm = TRUE),
+    Opt_Treatment_Conseq = mean(Conseq_score[Opt_Treatment == 1], na.rm = TRUE),
+    Control_Conseq = mean(Conseq_score[Control == 1], na.rm = TRUE),
+    
+    Full_Sample_Correct = mean(percentage_correct, na.rm = TRUE),
+    Dummy_Treated_Correct = mean(percentage_correct[Dummy_Treated == 1], na.rm = TRUE),
+    Dummy_Vol_Treated_Correct = mean(percentage_correct[Dummy_Vol_Treated == 1], na.rm = TRUE),
+    Dummy_no_info_Correct = mean(percentage_correct[Dummy_no_info == 1], na.rm = TRUE),
+    Control_Not_Pred_Correct = mean(percentage_correct[Control_Not_Pred == 1], na.rm = TRUE),
+    Control_Pred_Correct = mean(percentage_correct[Control_Pred == 1], na.rm = TRUE),
+    Opt_Not_Pred_Correct = mean(percentage_correct[Opt_Not_Pred == 1], na.rm = TRUE),
+    Opt_Pred_Correct = mean(percentage_correct[Opt_Pred == 1], na.rm = TRUE),
+    Treat_Not_Pred_Correct = mean(percentage_correct[Treat_Not_Pred == 1], na.rm = TRUE),
+    Treat_Pred_Correct = mean(percentage_correct[Treat_Pred == 1], na.rm = TRUE),
+    Opt_Treatment_Correct = mean(percentage_correct[Opt_Treatment == 1], na.rm = TRUE),
+    Control_Correct = mean(percentage_correct[Control == 1], na.rm = TRUE),
+    
+    Full_Sample_InterviewTime  = mean(interviewtime_net_clean, na.rm = TRUE),
+    Dummy_Treated_InterviewTime  = mean(interviewtime_net_clean[Dummy_Treated == 1], na.rm = TRUE),
+    Dummy_Vol_Treated_InterviewTime  = mean(interviewtime_net_clean[Dummy_Vol_Treated == 1], na.rm = TRUE),
+    Dummy_no_info_InterviewTime  = mean(interviewtime_net_clean[Dummy_no_info == 1], na.rm = TRUE),
+    Control_Not_Pred_InterviewTime  = mean(interviewtime_net_clean[Control_Not_Pred == 1], na.rm = TRUE),
+    Control_Pred_InterviewTime  = mean(interviewtime_net_clean[Control_Pred == 1], na.rm = TRUE),
+    Opt_Not_Pred_InterviewTime  = mean(interviewtime_net_clean[Opt_Not_Pred == 1], na.rm = TRUE),
+    Opt_Pred_InterviewTime  = mean(interviewtime_net_clean[Opt_Pred == 1], na.rm = TRUE),
+    Treat_Not_Pred_InterviewTime  = mean(interviewtime_net_clean[Treat_Not_Pred == 1], na.rm = TRUE),
+    Treat_Pred_InterviewTime  = mean(interviewtime_net_clean[Treat_Pred == 1], na.rm = TRUE),
+    Opt_Treatment_InterviewTime  = mean(interviewtime_net_clean[Opt_Treatment == 1], na.rm = TRUE),
+    Control_InterviewTime  = mean(interviewtime_net_clean[Control == 1], na.rm = TRUE),
+    
+
+    Full_Sample_CC_time  = mean(CC_time_mean_clean, na.rm = TRUE),
+    Dummy_Treated_CC_time  = mean(CC_time_mean_clean[Dummy_Treated == 1], na.rm = TRUE),
+    Dummy_Vol_Treated_CC_time  = mean(CC_time_mean_clean[Dummy_Vol_Treated == 1], na.rm = TRUE),
+    Dummy_no_info_CC_time  = mean(CC_time_mean_clean[Dummy_no_info == 1], na.rm = TRUE),
+    Control_Not_Pred_CC_time  = mean(CC_time_mean_clean[Control_Not_Pred == 1], na.rm = TRUE),
+    Control_Pred_CC_time  = mean(CC_time_mean_clean[Control_Pred == 1], na.rm = TRUE),
+    Opt_Not_Pred_CC_time  = mean(CC_time_mean_clean[Opt_Not_Pred == 1], na.rm = TRUE),
+    Opt_Pred_CC_time  = mean(CC_time_mean_clean[Opt_Pred == 1], na.rm = TRUE),
+    Treat_Not_Pred_CC_time  = mean(CC_time_mean_clean[Treat_Not_Pred == 1], na.rm = TRUE),
+    Treat_Pred_CC_time  = mean(CC_time_mean_clean[Treat_Pred == 1], na.rm = TRUE),
+    Opt_Treatment_CC_time  = mean(CC_time_mean_clean[Opt_Treatment == 1], na.rm = TRUE),
+    Control_CC_time  = mean(CC_time_mean_clean[Control == 1], na.rm = TRUE)
+    
+  )
+
+
+write.csv(summary_stats,"Data/descriptives.csv", row.names = F)
+write.table(summary_stats, "Data/descriptives.txt", sep="\t")
+
+
+
+# Names of the variables in the first Column:
+categories <- c("Gender", "Proportion Female:", "Age", "Net Houshold Income",
+                "Less than 1500€", "1500€ - 3000€", 
+                "300€- 4000€", "More than 4000€",
+                "Education", "University degree", "No University degree",
+                "NR-Index", "Correct quiz question", "Consequentiality score",
+                "Net interview time", "Mean Choice Card time", "Number of respondents")
+
+
+
+# Create the data frame
+table_df <- data.frame(Category = categories, 
+                       Sample = rep(NA, length(categories)),
+                       Treated = rep(NA, length(categories)),
+                       Opt_Treatment = rep(NA, length(categories)),
+                       Control = rep(NA, length(categories)),
+                       Vol_Treated = rep(NA, length(categories)),
+                       No_Info = rep(NA, length(categories)),
+                       Treated_Predicted = rep(NA, length(categories)),
+                       Treated_Not_Predicted = rep(NA, length(categories)),
+                       Opt_Treatment_Predicted = rep(NA, length(categories)),
+                       Opt_Treatment_Not_Predicted = rep(NA, length(categories)),
+                       Control_Predicted = rep(NA, length(categories)),
+                       Control_Not_Predicted = rep(NA, length(categories)))
+
+
+table_df$Sample[2:2] <-       round(summary_stats$Full_Sample_Gender[1:1],2)  # Ensure the counts match the categories
+table_df$Sample[3:3] <-       round(summary_stats$Full_Sample_Age[1:1],2)   # Ensure the counts match the categories
+table_df$Sample[5:5] <-       round(summary_stats$Full_Sample_Income_1[1:1],2)   # Ensure the counts match the categories
+table_df$Sample[6:6] <-       round(summary_stats$Full_Sample_Income_2[1:1],2)   # Ensure the counts match the categories
+table_df$Sample[7:7] <-       round(summary_stats$Full_Sample_Income_3[1:1],2)   # Ensure the counts match the categories
+table_df$Sample[8:8] <-       round(summary_stats$Full_Sample_Income_4[1:1],2)   # Ensure the counts match the categories
+table_df$Sample[10:10] <-     round(summary_stats$Full_Sample_UniDegree[1:1],2)   # Ensure the counts match the categories
+table_df$Sample[11:11] <- round(100-summary_stats$Full_Sample_UniDegree[1:1],2)   # Ensure the counts match the categories
+table_df$Sample[12:12] <-     round(summary_stats$Full_Sample_NR[1:1],2)   # Ensure the counts match the categories
+table_df$Sample[13:13] <-     round(summary_stats$Full_Sample_Correct [1:1],2)   # Ensure the counts match the categories
+table_df$Sample[14:14] <-     round(summary_stats$Full_Sample_Conseq [1:1],2)   # Ensure the counts match the categories
+table_df$Sample[15:15] <-     round(summary_stats$Full_Sample_InterviewTime[1:1],0)   # Ensure the counts match the categories
+table_df$Sample[16:16] <-     round(summary_stats$Full_Sample_CC_time[1:1],2)   # Ensure the counts match the categories
+table_df$Sample[17:17] <-     round(summary_stats$Full_Sample_n[1:1],0)   # Ensure the counts match the categories
+
+table_df$Treated[2:2] <-       round(summary_stats$Dummy_Treated_Gender[1:1],2)  # Ensure the counts match the categories
+table_df$Treated[3:3] <-       round(summary_stats$Dummy_Treated_Age[1:1],2)   # Ensure the counts match the categories
+table_df$Treated[5:5] <-       round(summary_stats$Dummy_Treated_Income_1[1:1],2)   # Ensure the counts match the categories
+table_df$Treated[6:6] <-       round(summary_stats$Dummy_Treated_Income_2[1:1],2)   # Ensure the counts match the categories
+table_df$Treated[7:7] <-       round(summary_stats$Dummy_Treated_Income_3[1:1],2)   # Ensure the counts match the categories
+table_df$Treated[8:8] <-       round(summary_stats$Dummy_Treated_Income_4[1:1],2)   # Ensure the counts match the categories
+table_df$Treated[10:10] <-     round(summary_stats$Dummy_Treated_UniDegree[1:1],2)   # Ensure the counts match the categories
+table_df$Treated[11:11] <- round(100-summary_stats$Dummy_Treated_UniDegree[1:1],2)   # Ensure the counts match the categories
+table_df$Treated[12:12] <-     round(summary_stats$Dummy_Treated_NR[1:1],2)   # Ensure the counts match the categories
+table_df$Treated[13:13] <-     round(summary_stats$Dummy_Treated_Correct [1:1],2)   # Ensure the counts match the categories
+table_df$Treated[14:14] <-     round(summary_stats$Dummy_Treated_Conseq [1:1],2)   # Ensure the counts match the categories
+table_df$Treated[15:15] <-     round(summary_stats$Dummy_Treated_InterviewTime[1:1],0)   # Ensure the counts match the categories
+table_df$Treated[16:16] <-     round(summary_stats$Dummy_Treated_CC_time[1:1],2)   # Ensure the counts match the categories
+table_df$Treated[17:17] <-     round(summary_stats$Dummy_Treated_n[1:1],0)   # Ensure the counts match the categories
+
+
+table_df$Opt_Treatment[2:2] <-       round(summary_stats$Opt_Treatment_Gender[1:1],2)  # Ensure the counts match the categories
+table_df$Opt_Treatment[3:3] <-       round(summary_stats$Opt_Treatment_Age[1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment[5:5] <-       round(summary_stats$Opt_Treatment_Income_1[1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment[6:6] <-       round(summary_stats$Opt_Treatment_Income_2[1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment[7:7] <-       round(summary_stats$Opt_Treatment_Income_3[1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment[8:8] <-       round(summary_stats$Opt_Treatment_Income_4[1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment[10:10] <-     round(summary_stats$Opt_Treatment_UniDegree[1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment[11:11] <- round(100-summary_stats$Opt_Treatment_UniDegree[1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment[12:12] <-     round(summary_stats$Opt_Treatment_NR[1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment[13:13] <-     round(summary_stats$Opt_Treatment_Correct [1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment[14:14] <-     round(summary_stats$Opt_Treatment_Conseq [1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment[15:15] <-     round(summary_stats$Opt_Treatment_InterviewTime[1:1],0)   # Ensure the counts match the categories
+table_df$Opt_Treatment[16:16] <-     round(summary_stats$Opt_Treatment_CC_time[1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment[17:17] <-     round(summary_stats$Opt_Treatment_n[1:1],0) 
+
+
+table_df$Control[2:2] <-       round(summary_stats$Control_Gender[1:1],2)  # Ensure the counts match the categories
+table_df$Control[3:3] <-       round(summary_stats$Control_Age[1:1],2)   # Ensure the counts match the categories
+table_df$Control[5:5] <-       round(summary_stats$Control_Income_1[1:1],2)   # Ensure the counts match the categories
+table_df$Control[6:6] <-       round(summary_stats$Control_Income_2[1:1],2)   # Ensure the counts match the categories
+table_df$Control[7:7] <-       round(summary_stats$Control_Income_3[1:1],2)   # Ensure the counts match the categories
+table_df$Control[8:8] <-       round(summary_stats$Control_Income_4[1:1],2)   # Ensure the counts match the categories
+table_df$Control[10:10] <-     round(summary_stats$Control_UniDegree[1:1],2)   # Ensure the counts match the categories
+table_df$Control[11:11] <- round(100-summary_stats$Control_UniDegree[1:1],2)   # Ensure the counts match the categories
+table_df$Control[12:12] <-     round(summary_stats$Control_NR[1:1],2)   # Ensure the counts match the categories
+table_df$Control[13:13] <-     round(summary_stats$Control_Correct [1:1],2)   # Ensure the counts match the categories
+table_df$Control[14:14] <-     round(summary_stats$Control_Conseq [1:1],2)   # Ensure the counts match the categories
+table_df$Control[15:15] <-     round(summary_stats$Control_InterviewTime[1:1],0)   # Ensure the counts match the categories
+table_df$Control[16:16] <-     round(summary_stats$Control_CC_time[1:1],2)   # Ensure the counts match the categories
+table_df$Control[17:17] <-     round(summary_stats$Control_n[1:1],0)   # Ensure the counts match the categories
+# Ensure the counts match the categories
+
+
+table_df$Vol_Treated[2:2] <-       round(summary_stats$Dummy_Vol_Treated_Gender[1:1],2)  # Ensure the counts match the categories
+table_df$Vol_Treated[3:3] <-       round(summary_stats$Dummy_Vol_Treated_Age[1:1],2)   # Ensure the counts match the categories
+table_df$Vol_Treated[5:5] <-       round(summary_stats$Dummy_Vol_Treated_Income_1[1:1],2)   # Ensure the counts match the categories
+table_df$Vol_Treated[6:6] <-       round(summary_stats$Dummy_Vol_Treated_Income_2[1:1],2)   # Ensure the counts match the categories
+table_df$Vol_Treated[7:7] <-       round(summary_stats$Dummy_Vol_Treated_Income_3[1:1],2)   # Ensure the counts match the categories
+table_df$Vol_Treated[8:8] <-       round(summary_stats$Dummy_Vol_Treated_Income_4[1:1],2)   # Ensure the counts match the categories
+table_df$Vol_Treated[10:10] <-     round(summary_stats$Dummy_Vol_Treated_UniDegree[1:1],2)   # Ensure the counts match the categories
+table_df$Vol_Treated[11:11] <- round(100-summary_stats$Dummy_Vol_Treated_UniDegree[1:1],2)   # Ensure the counts match the categories
+table_df$Vol_Treated[12:12] <-     round(summary_stats$Dummy_Vol_Treated_NR[1:1],2)   # Ensure the counts match the categories
+table_df$Vol_Treated[13:13] <-     round(summary_stats$Dummy_Vol_Treated_Correct [1:1],2)   # Ensure the counts match the categories
+table_df$Vol_Treated[14:14] <-     round(summary_stats$Dummy_Vol_Treated_Conseq [1:1],2)   # Ensure the counts match the categories
+table_df$Vol_Treated[15:15] <-     round(summary_stats$Dummy_Vol_Treated_InterviewTime[1:1],0)   # Ensure the counts match the categories
+table_df$Vol_Treated[16:16] <-     round(summary_stats$Dummy_Vol_Treated_CC_time[1:1],2)   # Ensure the counts match the categories
+table_df$Vol_Treated[17:17] <-     round(summary_stats$Dummy_Vol_Treated_n[1:1],0) 
+
+table_df$No_Info[2:2] <-       round(summary_stats$Dummy_no_info_Gender[1:1],2)  # Ensure the counts match the categories
+table_df$No_Info[3:3] <-       round(summary_stats$Dummy_no_info_Age[1:1],2)   # Ensure the counts match the categories
+table_df$No_Info[5:5] <-       round(summary_stats$Dummy_no_info_Income_1[1:1],2)   # Ensure the counts match the categories
+table_df$No_Info[6:6] <-       round(summary_stats$Dummy_no_info_Income_2[1:1],2)   # Ensure the counts match the categories
+table_df$No_Info[7:7] <-       round(summary_stats$Dummy_no_info_Income_3[1:1],2)   # Ensure the counts match the categories
+table_df$No_Info[8:8] <-       round(summary_stats$Dummy_no_info_Income_4[1:1],2)   # Ensure the counts match the categories
+table_df$No_Info[10:10] <-     round(summary_stats$Dummy_no_info_UniDegree[1:1],2)   # Ensure the counts match the categories
+table_df$No_Info[11:11] <- round(100-summary_stats$Dummy_no_info_UniDegree[1:1],2)   # Ensure the counts match the categories
+table_df$No_Info[12:12] <-     round(summary_stats$Dummy_no_info_NR[1:1],2)   # Ensure the counts match the categories
+table_df$No_Info[13:13] <-     round(summary_stats$Dummy_no_info_Correct [1:1],2)   # Ensure the counts match the categories
+table_df$No_Info[14:14] <-     round(summary_stats$Dummy_no_info_Conseq [1:1],2)   # Ensure the counts match the categories
+table_df$No_Info[15:15] <-     round(summary_stats$Dummy_no_info_InterviewTime[1:1],0)   # Ensure the counts match the categories
+table_df$No_Info[16:16] <-     round(summary_stats$Dummy_no_info_CC_time[1:1],2)   # Ensure the counts match the categories
+table_df$No_Info[17:17] <-     round(summary_stats$Dummy_no_info_n[1:1],0) 
+
+
+table_df$Treated_Predicted[2:2] <-       round(summary_stats$Treat_Pred_Gender[1:1],2)  # Ensure the counts match the categories
+table_df$Treated_Predicted[3:3] <-       round(summary_stats$Treat_Pred_Age[1:1],2)   # Ensure the counts match the categories
+table_df$Treated_Predicted[5:5] <-       round(summary_stats$Treat_Pred_Income_1[1:1],2)   # Ensure the counts match the categories
+table_df$Treated_Predicted[6:6] <-       round(summary_stats$Treat_Pred_Income_2[1:1],2)   # Ensure the counts match the categories
+table_df$Treated_Predicted[7:7] <-       round(summary_stats$Treat_Pred_Income_3[1:1],2)   # Ensure the counts match the categories
+table_df$Treated_Predicted[8:8] <-       round(summary_stats$Treat_Pred_Income_4[1:1],2)   # Ensure the counts match the categories
+table_df$Treated_Predicted[10:10] <-     round(summary_stats$Treat_Pred_UniDegree[1:1],2)   # Ensure the counts match the categories
+table_df$Treated_Predicted[11:11] <- round(100-summary_stats$Treat_Pred_UniDegree[1:1],2)   # Ensure the counts match the categories
+table_df$Treated_Predicted[12:12] <-     round(summary_stats$Treat_Pred_NR[1:1],2)   # Ensure the counts match the categories
+table_df$Treated_Predicted[13:13] <-     round(summary_stats$Treat_Pred_Correct [1:1],2)   # Ensure the counts match the categories
+table_df$Treated_Predicted[14:14] <-     round(summary_stats$Treat_Pred_Conseq [1:1],2)   # Ensure the counts match the categories
+table_df$Treated_Predicted[15:15] <-     round(summary_stats$Treat_Pred_InterviewTime[1:1],0)   # Ensure the counts match the categories
+table_df$Treated_Predicted[16:16] <-     round(summary_stats$Treat_Pred_CC_time[1:1],2)   # Ensure the counts match the categories
+table_df$Treated_Predicted[17:17] <-     round(summary_stats$Treat_Pred_n[1:1],0) 
+
+
+table_df$Treated_Not_Predicted[2:2] <-       round(summary_stats$Treat_Not_Pred_Gender[1:1],2)  # Ensure the counts match the categories
+table_df$Treated_Not_Predicted[3:3] <-       round(summary_stats$Treat_Not_Pred_Age[1:1],2)   # Ensure the counts match the categories
+table_df$Treated_Not_Predicted[5:5] <-       round(summary_stats$Treat_Not_Pred_Income_1[1:1],2)   # Ensure the counts match the categories
+table_df$Treated_Not_Predicted[6:6] <-       round(summary_stats$Treat_Not_Pred_Income_2[1:1],2)   # Ensure the counts match the categories
+table_df$Treated_Not_Predicted[7:7] <-       round(summary_stats$Treat_Not_Pred_Income_3[1:1],2)   # Ensure the counts match the categories
+table_df$Treated_Not_Predicted[8:8] <-       round(summary_stats$Treat_Not_Pred_Income_4[1:1],2)   # Ensure the counts match the categories
+table_df$Treated_Not_Predicted[10:10] <-     round(summary_stats$Treat_Not_Pred_UniDegree[1:1],2)   # Ensure the counts match the categories
+table_df$Treated_Not_Predicted[11:11] <- round(100-summary_stats$Treat_Not_Pred_UniDegree[1:1],2)   # Ensure the counts match the categories
+table_df$Treated_Not_Predicted[12:12] <-     round(summary_stats$Treat_Not_Pred_NR[1:1],2)   # Ensure the counts match the categories
+table_df$Treated_Not_Predicted[13:13] <-     round(summary_stats$Treat_Not_Pred_Correct [1:1],2)   # Ensure the counts match the categories
+table_df$Treated_Not_Predicted[14:14] <-     round(summary_stats$Treat_Not_Pred_Conseq [1:1],2)   # Ensure the counts match the categories
+table_df$Treated_Not_Predicted[15:15] <-     round(summary_stats$Treat_Not_Pred_InterviewTime[1:1],0)   # Ensure the counts match the categories
+table_df$Treated_Not_Predicted[16:16] <-     round(summary_stats$Treat_Not_Pred_CC_time[1:1],2)   # Ensure the counts match the categories
+table_df$Treated_Not_Predicted[17:17] <-     round(summary_stats$Treat_Not_Pred_n[1:1],0) 
+
+
+
+table_df$Opt_Treatment_Predicted[2:2] <-       round(summary_stats$Opt_Pred_Gender[1:1],2)  # Ensure the counts match the categories
+table_df$Opt_Treatment_Predicted[3:3] <-       round(summary_stats$Opt_Pred_Age[1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment_Predicted[5:5] <-       round(summary_stats$Opt_Pred_Income_1[1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment_Predicted[6:6] <-       round(summary_stats$Opt_Pred_Income_2[1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment_Predicted[7:7] <-       round(summary_stats$Opt_Pred_Income_3[1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment_Predicted[8:8] <-       round(summary_stats$Opt_Pred_Income_4[1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment_Predicted[10:10] <-     round(summary_stats$Opt_Pred_UniDegree[1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment_Predicted[11:11] <- round(100-summary_stats$Opt_Pred_UniDegree[1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment_Predicted[12:12] <-     round(summary_stats$Opt_Pred_NR[1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment_Predicted[13:13] <-     round(summary_stats$Opt_Pred_Correct [1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment_Predicted[14:14] <-     round(summary_stats$Opt_Pred_Conseq [1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment_Predicted[15:15] <-     round(summary_stats$Opt_Pred_InterviewTime[1:1],0)   # Ensure the counts match the categories
+table_df$Opt_Treatment_Predicted[16:16] <-     round(summary_stats$Opt_Pred_CC_time[1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment_Predicted[17:17] <-     round(summary_stats$Opt_Pred_n[1:1],0)
+
+table_df$Opt_Treatment_Not_Predicted[2:2] <-       round(summary_stats$Opt_Not_Pred_Gender[1:1],2)  # Ensure the counts match the categories
+table_df$Opt_Treatment_Not_Predicted[3:3] <-       round(summary_stats$Opt_Not_Pred_Age[1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment_Not_Predicted[5:5] <-       round(summary_stats$Opt_Not_Pred_Income_1[1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment_Not_Predicted[6:6] <-       round(summary_stats$Opt_Not_Pred_Income_2[1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment_Not_Predicted[7:7] <-       round(summary_stats$Opt_Not_Pred_Income_3[1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment_Not_Predicted[8:8] <-       round(summary_stats$Opt_Not_Pred_Income_4[1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment_Not_Predicted[10:10] <-     round(summary_stats$Opt_Not_Pred_UniDegree[1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment_Not_Predicted[11:11] <- round(100-summary_stats$Opt_Not_Pred_UniDegree[1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment_Not_Predicted[12:12] <-     round(summary_stats$Opt_Not_Pred_NR[1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment_Not_Predicted[13:13] <-     round(summary_stats$Opt_Not_Pred_Correct [1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment_Not_Predicted[14:14] <-     round(summary_stats$Opt_Not_Pred_Conseq [1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment_Not_Predicted[15:15] <-     round(summary_stats$Opt_Not_Pred_InterviewTime[1:1],0)   # Ensure the counts match the categories
+table_df$Opt_Treatment_Not_Predicted[16:16] <-     round(summary_stats$Opt_Not_Pred_CC_time[1:1],2)   # Ensure the counts match the categories
+table_df$Opt_Treatment_Not_Predicted[17:17] <-     round(summary_stats$Opt_Not_Pred_n[1:1],0)
+
+
+
+table_df$Control_Predicted[2:2] <-       round(summary_stats$Control_Pred_Gender[1:1],2)  # Ensure the counts match the categories
+table_df$Control_Predicted[3:3] <-       round(summary_stats$Control_Pred_Age[1:1],2)   # Ensure the counts match the categories
+table_df$Control_Predicted[5:5] <-       round(summary_stats$Control_Pred_Income_1[1:1],2)   # Ensure the counts match the categories
+table_df$Control_Predicted[6:6] <-       round(summary_stats$Control_Pred_Income_2[1:1],2)   # Ensure the counts match the categories
+table_df$Control_Predicted[7:7] <-       round(summary_stats$Control_Pred_Income_3[1:1],2)   # Ensure the counts match the categories
+table_df$Control_Predicted[8:8] <-       round(summary_stats$Control_Pred_Income_4[1:1],2)   # Ensure the counts match the categories
+table_df$Control_Predicted[10:10] <-     round(summary_stats$Control_Pred_UniDegree[1:1],2)   # Ensure the counts match the categories
+table_df$Control_Predicted[11:11] <- round(100-summary_stats$Control_Pred_UniDegree[1:1],2)   # Ensure the counts match the categories
+table_df$Control_Predicted[12:12] <-     round(summary_stats$Control_Pred_NR[1:1],2)   # Ensure the counts match the categories
+table_df$Control_Predicted[13:13] <-     round(summary_stats$Control_Pred_Correct [1:1],2)   # Ensure the counts match the categories
+table_df$Control_Predicted[14:14] <-     round(summary_stats$Control_Pred_Conseq [1:1],2)   # Ensure the counts match the categories
+table_df$Control_Predicted[15:15] <-     round(summary_stats$Control_Pred_InterviewTime[1:1],0)   # Ensure the counts match the categories
+table_df$Control_Predicted[16:16] <-     round(summary_stats$Control_Pred_CC_time[1:1],2)   # Ensure the counts match the categories
+table_df$Control_Predicted[17:17] <-     round(summary_stats$Control_Pred_n[1:1],0)
+
+table_df$Control_Not_Predicted[2:2] <-       round(summary_stats$Control_Not_Pred_Gender[1:1],2)  # Ensure the counts match the categories
+table_df$Control_Not_Predicted[3:3] <-       round(summary_stats$Control_Not_Pred_Age[1:1],2)   # Ensure the counts match the categories
+table_df$Control_Not_Predicted[5:5] <-       round(summary_stats$Control_Not_Pred_Income_1[1:1],2)   # Ensure the counts match the categories
+table_df$Control_Not_Predicted[6:6] <-       round(summary_stats$Control_Not_Pred_Income_2[1:1],2)   # Ensure the counts match the categories
+table_df$Control_Not_Predicted[7:7] <-       round(summary_stats$Control_Not_Pred_Income_3[1:1],2)   # Ensure the counts match the categories
+table_df$Control_Not_Predicted[8:8] <-       round(summary_stats$Control_Not_Pred_Income_4[1:1],2)   # Ensure the counts match the categories
+table_df$Control_Not_Predicted[10:10] <-     round(summary_stats$Control_Not_Pred_UniDegree[1:1],2)   # Ensure the counts match the categories
+table_df$Control_Not_Predicted[11:11] <- round(100-summary_stats$Control_Not_Pred_UniDegree[1:1],2)   # Ensure the counts match the categories
+table_df$Control_Not_Predicted[12:12] <-     round(summary_stats$Control_Not_Pred_NR[1:1],2)   # Ensure the counts match the categories
+table_df$Control_Not_Predicted[13:13] <-     round(summary_stats$Control_Not_Pred_Correct [1:1],2)   # Ensure the counts match the categories
+table_df$Control_Not_Predicted[14:14] <-     round(summary_stats$Control_Not_Pred_Conseq [1:1],2)   # Ensure the counts match the categories
+table_df$Control_Not_Predicted[15:15] <-     round(summary_stats$Control_Not_Pred_InterviewTime[1:1],0)   # Ensure the counts match the categories
+table_df$Control_Not_Predicted[16:16] <-     round(summary_stats$Control_Not_Pred_CC_time[1:1],2)   # Ensure the counts match the categories
+table_df$Control_Not_Predicted[17:17] <-     round(summary_stats$Control_Not_Pred_n[1:1],0)
+
+write.table(table_df, "Data/descriptives_long.txt", sep="\t")
+xtable(table_df, include.rownames = F)
diff --git a/Scripts/lapage_results.R b/Scripts/lapage_results.R
new file mode 100644
index 0000000..fa2e855
--- /dev/null
+++ b/Scripts/lapage_results.R
@@ -0,0 +1,210 @@
+#********************************************************************
+### LEPAGE TEST FUNCTIONS (FROM NSM3 PACKAGE)
+
+lepage.test <- function(x, y = NA, g = NA, method = NA, n.mc = 10000){
+  
+  ##Adapted from kruskal.test()##
+  if (is.list(x)) {
+    if (length(x) < 2L) 
+      stop("'x' must be a list with at least 2 elements")
+    y <- x[[2]]
+    x <- x[[1]]
+  }
+  else {
+    if(min(is.na(y)) != 0){
+      k <- length(unique(g))
+      if (length(x) != length(g)) 
+        stop("'x' and 'g' must have the same length")
+      if (k < 2) 
+        stop("all observations are in the same group")
+      y <- x[g == 2]
+      x <- x[g == 1]
+    }
+  }
+  #####################
+  
+  outp <- list()
+  outp$m <- m <- length(x)
+  outp$n <- n <- length(y)
+  outp$n.mc <- n.mc
+  N <- outp$m + outp$n
+  outp$ties <- (length(c(x, y)) != length(unique(c(x, y))))
+  even <- (outp$m + outp$n + 1)%%2
+  outp$stat.name <- "Lepage D"
+  
+  
+  ##When the user doesn't give us any indication of which method to use, try to pick one.
+  if(is.na(method)){
+    if(choose(outp$m + outp$n, outp$n) <= 10000){
+      method <- "Exact"
+    }
+    if(choose(outp$m + outp$n, outp$n) > 10000){
+      method <- "Monte Carlo"
+    }
+  }
+  #####################################################################
+  outp$method <- method
+  
+  tmp.W <- rank(c(x, y))
+  
+  our.data <- rbind(c(x, y), c(rep(1, length(x)), rep(0, length(y))))
+  sorted <- our.data[1, order(our.data[1, ]) ]
+  x.labels <-our.data[2, order(our.data[1, ]) ]
+  
+  med <- ceiling(N / 2)
+  if(even){no.ties <- c(1:med, med:1)}
+  if(!even){no.ties <- c(1:med, (med - 1):1)}
+  
+  obs.group <- numeric(N)
+  group.num <- 1
+  for(i in 1:N){
+    if(obs.group[i] == 0){
+      obs.group[i] <- group.num
+      for(j in i:N){
+        if(sorted[i] == sorted[j]){
+          obs.group[j] <- obs.group[i]
+        }
+      }
+      group.num <- group.num + 1;
+    }
+  }
+  
+  group.ranks <- tapply(no.ties, obs.group, mean)
+  
+  tied.ranks <- numeric(N)
+  for(i in 1:group.num){
+    tied.ranks[which(obs.group == as.numeric(names(group.ranks)[i]))] <- group.ranks[i]
+  }
+  
+  tmp.C <- c(tied.ranks[x.labels == 1], tied.ranks[x.labels == 0])
+  
+  ##Only needs to depend on y values
+  D.calc <- function(C.vals, W.vals){
+    
+    if(even){
+      exp.C <- n * (N + 2) / 4
+      var.C <- m * n * (N + 2) * (N - 2) / (48 * (N - 1))
+    }
+    if(!even){
+      exp.C <- n * (N + 1)^2 / (4 * N)
+      var.C <- m * n * (N + 1) * (3 + N^2) / (48 * N^2)
+    }
+    W.obs <- sum(W.vals)
+    W.star <- (W.obs - n * (N + 1) / 2) / sqrt(m * n * (N + 1) / 12)
+    C.star <- (sum(C.vals) - exp.C) / sqrt(var.C)
+    return(W.star^2 + C.star^2)
+  }
+  
+  outp$obs.stat <- D.calc(tmp.C[(m + 1):N], tmp.W[(m + 1):N])
+  
+  if(outp$method == "Exact"){
+    possible.orders <- gtools::combinations(outp$m + outp$n, outp$n)
+    
+    possible.C <- t(apply(possible.orders, 1, function(x) tmp.C[x]))
+    possible.W <- t(apply(possible.orders, 1, function(x) tmp.W[x]))
+    
+    theor.dist <- numeric(nrow(possible.C))
+    for(i in 1:nrow(possible.C)){
+      theor.dist[i] <- D.calc(possible.C[i, ], possible.W[i, ])
+    }
+    
+    outp$p.value <- mean(theor.dist >= outp$obs.stat)
+  }
+  
+  if(outp$method == "Asymptotic"){
+    outp$p.value <- (1 - pchisq(outp$obs.stat, 2))
+  }
+  
+  if(outp$method == "Monte Carlo"){
+    outp$p.value <- 0
+    for(i in 1:n.mc){
+      mc.sample <- sample(1:N, n)
+      
+      if(D.calc(tmp.C[mc.sample], tmp.W[mc.sample]) >= outp$obs.stat){
+        outp$p.value = outp$p.value + 1 / n.mc
+      }
+    }
+  }
+  
+  cat("\nLepage two-sample location-scale test\n")
+  cat("\nNull hypothesis: The locations and scales of the two population distributions are equal.\n")
+  cat("Alternative hypothesis: The locations and/or scales of the two population distributions differ.\n")
+  cat(paste("\nD = ", round(outp$obs.stat, 3), ", p-value = ", round(outp$p.value, 4), "\n\n", sep=""))
+  
+  #class(outp)="NSM3Ch5p"
+  return(outp)
+}
+# Example usage
+set.seed(123)
+x <- rnorm(244, mean = 38.63, sd = 26.68)
+y <- rnorm(240, mean = 31.87, sd = 17.81)
+x <- rnorm(300, mean = 1, sd = 1)
+y <- rnorm(300, mean = 0.963, sd = 0.827)
+lepage.test(x, y)
+
+
+set.seed(123) # Ensure reproducibility
+
+n_sim <- 500
+values <- data.frame(p=double(n_sim), D=double(n_sim) )
+
+for (i in 1:n_sim) {
+  # Sample means and standard deviations from their respective distributions
+  mean_x <- rnorm(1, mean = 38.63, sd = 7.05)
+  mean_y <- rnorm(1, mean = 31.87, sd = 5.12)
+  
+  sd_x <- rnorm(1, mean = 26.68, sd = 5.48)
+  sd_y <- rnorm(1, mean = 22.38, sd = 3.63)
+  
+  # Ensure standard deviations are positive
+  sd_x <- max(sd_x, 0.01)
+  sd_y <- max(sd_y, 0.01)
+  
+  # Generate data
+  x <- rnorm(244, mean = mean_x, sd = sd_x)
+  y <- rnorm(240, mean = mean_y, sd = sd_y)
+  
+  # Perform Lepage test
+  test <- lepage.test(x, y)
+  values$p[i] <- test$p.value
+  values$D[i] <- test$obs.stat
+
+}
+
+# Calculate the proportion of p-values < 0.05
+prop_reject <- mean(values$p < 0.10)
+
+
+# Output result
+print(prop_reject)
+
+set.seed(123) # Ensure reproducibility
+
+n_sim <- 500
+values_fixed <- data.frame(p=double(n_sim), D=double(n_sim) )
+var_coef_x=26.68/38.63
+var_coef_y=22.38/31.87
+for (i in 1:n_sim) {
+  # Sample means and standard deviations from their respective distributions
+  mean_x <- rnorm(1, mean = 38.63, sd = 7.05)
+  mean_y <- rnorm(1, mean = 31.87, sd = 5.12)
+  
+  sd_x <- abs(mean_x*var_coef_x)
+  sd_y <- abs(mean_y*var_coef_y)
+  # Generate data
+  x <- rnorm(244, mean = mean_x, sd = sd_x)
+  y <- rnorm(240, mean = mean_y, sd = sd_y)
+  
+  # Perform Lepage test
+  test <-  invisible(lepage.test(x, y))
+  values_fixed$p[i] <- test$p.value
+  values_fixed$D[i] <- test$obs.stat
+  
+}
+
+# Calculate the proportion of p-values < 0.05
+prop_reject <- mean(values_fixed$p < 0.10)
+
+# Output result
+print(prop_reject)
+hist(values_fixed$D)
diff --git a/Scripts/lapage_results_naturalness.R b/Scripts/lapage_results_naturalness.R
new file mode 100644
index 0000000..8cd1475
--- /dev/null
+++ b/Scripts/lapage_results_naturalness.R
@@ -0,0 +1,372 @@
+set.seed(123) # Ensure reproducibility
+
+lepage.test <- function(x, y = NA, g = NA, method = NA, n.mc = 10000){
+  
+  ##Adapted from kruskal.test()##
+  if (is.list(x)) {
+    if (length(x) < 2L) 
+      stop("'x' must be a list with at least 2 elements")
+    y <- x[[2]]
+    x <- x[[1]]
+  }
+  else {
+    if(min(is.na(y)) != 0){
+      k <- length(unique(g))
+      if (length(x) != length(g)) 
+        stop("'x' and 'g' must have the same length")
+      if (k < 2) 
+        stop("all observations are in the same group")
+      y <- x[g == 2]
+      x <- x[g == 1]
+    }
+  }
+  #####################
+  
+  outp <- list()
+  outp$m <- m <- length(x)
+  outp$n <- n <- length(y)
+  outp$n.mc <- n.mc
+  N <- outp$m + outp$n
+  outp$ties <- (length(c(x, y)) != length(unique(c(x, y))))
+  even <- (outp$m + outp$n + 1)%%2
+  outp$stat.name <- "Lepage D"
+  
+  
+  ##When the user doesn't give us any indication of which method to use, try to pick one.
+  if(is.na(method)){
+    if(choose(outp$m + outp$n, outp$n) <= 10000){
+      method <- "Exact"
+    }
+    if(choose(outp$m + outp$n, outp$n) > 10000){
+      method <- "Monte Carlo"
+    }
+  }
+  #####################################################################
+  outp$method <- method
+  
+  tmp.W <- rank(c(x, y))
+  
+  our.data <- rbind(c(x, y), c(rep(1, length(x)), rep(0, length(y))))
+  sorted <- our.data[1, order(our.data[1, ]) ]
+  x.labels <-our.data[2, order(our.data[1, ]) ]
+  
+  med <- ceiling(N / 2)
+  if(even){no.ties <- c(1:med, med:1)}
+  if(!even){no.ties <- c(1:med, (med - 1):1)}
+  
+  obs.group <- numeric(N)
+  group.num <- 1
+  for(i in 1:N){
+    if(obs.group[i] == 0){
+      obs.group[i] <- group.num
+      for(j in i:N){
+        if(sorted[i] == sorted[j]){
+          obs.group[j] <- obs.group[i]
+        }
+      }
+      group.num <- group.num + 1;
+    }
+  }
+  
+  group.ranks <- tapply(no.ties, obs.group, mean)
+  
+  tied.ranks <- numeric(N)
+  for(i in 1:group.num){
+    tied.ranks[which(obs.group == as.numeric(names(group.ranks)[i]))] <- group.ranks[i]
+  }
+  
+  tmp.C <- c(tied.ranks[x.labels == 1], tied.ranks[x.labels == 0])
+  
+  ##Only needs to depend on y values
+  D.calc <- function(C.vals, W.vals){
+    
+    if(even){
+      exp.C <- n * (N + 2) / 4
+      var.C <- m * n * (N + 2) * (N - 2) / (48 * (N - 1))
+    }
+    if(!even){
+      exp.C <- n * (N + 1)^2 / (4 * N)
+      var.C <- m * n * (N + 1) * (3 + N^2) / (48 * N^2)
+    }
+    W.obs <- sum(W.vals)
+    W.star <- (W.obs - n * (N + 1) / 2) / sqrt(m * n * (N + 1) / 12)
+    C.star <- (sum(C.vals) - exp.C) / sqrt(var.C)
+    return(W.star^2 + C.star^2)
+  }
+  
+  outp$obs.stat <- D.calc(tmp.C[(m + 1):N], tmp.W[(m + 1):N])
+  
+  if(outp$method == "Exact"){
+    possible.orders <- gtools::combinations(outp$m + outp$n, outp$n)
+    
+    possible.C <- t(apply(possible.orders, 1, function(x) tmp.C[x]))
+    possible.W <- t(apply(possible.orders, 1, function(x) tmp.W[x]))
+    
+    theor.dist <- numeric(nrow(possible.C))
+    for(i in 1:nrow(possible.C)){
+      theor.dist[i] <- D.calc(possible.C[i, ], possible.W[i, ])
+    }
+    
+    outp$p.value <- mean(theor.dist >= outp$obs.stat)
+  }
+  
+  if(outp$method == "Asymptotic"){
+    outp$p.value <- (1 - pchisq(outp$obs.stat, 2))
+  }
+  
+  if(outp$method == "Monte Carlo"){
+    outp$p.value <- 0
+    for(i in 1:n.mc){
+      mc.sample <- sample(1:N, n)
+      
+      if(D.calc(tmp.C[mc.sample], tmp.W[mc.sample]) >= outp$obs.stat){
+        outp$p.value = outp$p.value + 1 / n.mc
+      }
+    }
+  }
+  
+  cat("\nLepage two-sample location-scale test\n")
+  cat("\nNull hypothesis: The locations and scales of the two population distributions are equal.\n")
+  cat("Alternative hypothesis: The locations and/or scales of the two population distributions differ.\n")
+  cat(paste("\nD = ", round(outp$obs.stat, 3), ", p-value = ", round(outp$p.value, 4), "\n\n", sep=""))
+  
+  #class(outp)="NSM3Ch5p"
+  return(outp)
+}
+lapage <- function(mu_x, se_mu_x, sigma_x, se_sigma_x, n_x, 
+                   mu_y, se_mu_y, sigma_y, se_sigma_y, n_y, 
+                   model_x, model_y, n_sim) {  
+  
+  values <- data.frame(p = double(n_sim), D = double(n_sim))
+  
+  for (i in 1:n_sim) {
+    # Sample means and standard deviations from their respective distributions
+    mean_x <- rnorm(1, mean = mu_x, sd = se_mu_x)
+    mean_y <- rnorm(1, mean = mu_y, sd = se_mu_y)
+    
+    sd_x <- rnorm(1, mean = sigma_x, sd = se_sigma_x)
+    sd_y <- rnorm(1, mean = sigma_y, sd = se_sigma_y)
+    
+    # Ensure standard deviations are positive
+    sd_x <- max(sd_x, 0.01)
+    sd_y <- max(sd_y, 0.01)
+    
+    # Generate data
+    x <- rnorm(n_x, mean = mean_x, sd = sd_x)
+    y <- rnorm(n_y, mean = mean_y, sd = sd_y)
+    
+    # Perform Lepage test
+    test <- lepage.test(x, y)
+    values$p[i] <- test$p.value
+    values$D[i] <- test$obs.stat
+  }
+  
+  # Calculate the proportion of p-values < 0.05
+  prop_reject <- mean(values$p < 0.05)
+  
+  # Print the result
+  cat(paste0(model_x, " vs ", model_y, "\n"))
+  cat(paste0("Reject in ", prop_reject * 100, "%\n"))
+  
+  return(values)
+}
+
+
+
+# Load models
+MXL_wtp_Control_Not_Pred_model <- apollo_loadModel("Estimation_results/mxl/prediction/Split_samples/MXL_wtp_Control_Not_Pred")
+MXL_wtp_Control_Pred_model <- apollo_loadModel("Estimation_results/mxl/prediction/Split_samples/MXL_wtp_Control_Pred")
+MXL_wtp_Opt_Not_Pred_model <- apollo_loadModel("Estimation_results/mxl/prediction/Split_samples/MXL_wtp_Opt_Not_Pred")
+MXL_wtp_Opt_Pred_model <- apollo_loadModel("Estimation_results/mxl/prediction/Split_samples/MXL_wtp_Opt_Pred")
+MXL_wtp_Treated_Not_Pred_model <- apollo_loadModel("Estimation_results/mxl/prediction/Split_samples/MXL_wtp_Treated_Not_Pred")
+MXL_wtp_Treated_Pred_model <- apollo_loadModel("Estimation_results/mxl/prediction/Split_samples/MXL_wtp_Treated_Pred")
+
+MXL_wtp_opt_treated <- apollo_loadModel("Estimation_results/mxl/Split_samples/MXL_wtp Vol_Treat")
+MXL_wtp_opt_not_treated <- apollo_loadModel("Estimation_results/mxl/Split_samples/MXL_wtp Vol_Not_Treat")
+
+mxl_tr <- apollo_loadModel("Estimation_results/mxl/Split_samples/MXL_wtp Treated A")
+mxl_vol_tr <- apollo_loadModel("Estimation_results/mxl/Split_samples/MXL_wtp Vol_Treated A")
+mxl_not_tr <- apollo_loadModel("Estimation_results/mxl/Split_samples/MXL_wtp Not_Treated A")
+
+# List of models
+models <- list(
+  "Treated Not Pred" = MXL_wtp_Treated_Not_Pred_model,
+  "Opt Not Pred" = MXL_wtp_Opt_Not_Pred_model,
+  "Control Not Pred" = MXL_wtp_Control_Not_Pred_model,
+  "Treated Pred" = MXL_wtp_Treated_Pred_model,
+  "Opt Pred" = MXL_wtp_Opt_Pred_model,
+  "Control Pred" = MXL_wtp_Control_Pred_model,
+  "Treated Split Sample" = mxl_tr,
+  "Opt Split Sample" = mxl_vol_tr,
+  "Control Split Sample" = mxl_not_tr
+)
+
+# Initialize an empty list to store results
+model_data_list <- list()
+
+# Loop through the models and extract coefficients
+for (model_name in names(models)) {
+  model <- models[[model_name]]
+  
+  # Extract mean and standard deviation for 'mu_natural' and 'sig_natural'
+  coef <- model$estimate
+  se <- model$se
+  
+  temp_data <- data.frame(
+    model = model_name,
+    mu = coef["mu_natural"],
+    sigma = coef["sig_natural"],
+    se_mu = se["mu_natural"],
+    se_sigma = se["sig_natural"],
+    n = model$nIndivs
+  )
+  
+  # Store in list
+  model_data_list[[model_name]] <- temp_data
+}
+
+# Combine all data into a single data frame
+model_data <- bind_rows(model_data_list)
+
+# Define the pairs of models for comparison
+model_pairs <- list(
+  c("Treated Split Sample", "Control Split Sample"),
+  c("Treated Split Sample", "Opt Split Sample"),
+  c("Opt Split Sample", "Control Split Sample"),
+  c("Treated Not Pred", "Treated Pred"),
+  c("Opt Not Pred", "Opt Pred"),
+  c("Control Not Pred" , "Control Pred" ),
+  c("Treated Not Pred", "Control Not Pred"),
+  c("Opt Not Pred", "Control Not Pred"),
+  c("Treated Not Pred", "Opt Not Pred"),
+  c("Treated Pred", "Control Pred"),
+  c("Opt Pred", "Control Pred"),
+  c("Treated Pred", "Opt Pred")
+)
+
+# Initialize an empty list to store results
+results_list <- list()
+
+# Number of simulations
+n_sim <- 1000  # Adjust as needed
+
+# Loop over model pairs
+for (pair in model_pairs) {
+  model_x <- pair[1]
+  model_y <- pair[2]
+  
+  # Extract relevant data
+  x_data <- model_data %>% filter(model == model_x)
+  y_data <- model_data %>% filter(model == model_y)
+  
+  # Run the Lapage function
+  result <- lapage(
+    x_data$mu, x_data$se_mu, x_data$sigma, x_data$se_sigma, x_data$n,
+    y_data$mu, y_data$se_mu, y_data$sigma, y_data$se_sigma, y_data$n,
+    model_x, model_y, n_sim
+  )
+  
+  # Store results
+  results_list[[paste(model_x, "vs", model_y)]] <- data.frame(
+    model_x = model_x,
+    model_y = model_y,
+    prop_reject = mean(result$p < 0.05) * 100.00
+  )
+}
+
+# Combine results into a single data frame
+final_results <- bind_rows(results_list)
+
+# Print or save results
+print(final_results)
+
+
+
+
+
+
+
+
+
+
+
+# Initialize an empty list to store results
+model_data_list <- list()
+
+# Loop through the models and extract coefficients
+for (model_name in names(models)) {
+  model <- models[[model_name]]
+  
+  # Extract mean and standard deviation for 'mu_natural' and 'sig_natural'
+  coef <- model$estimate
+  se <- model$se
+  
+  temp_data <- data.frame(
+    model = model_name,
+    mu = coef["mu_walking"],
+    sigma = coef["sig_walking"],
+    se_mu = se["mu_walking"],
+    se_sigma = se["sig_walking"],
+    n = model$nIndivs
+  )
+  
+  # Store in list
+  model_data_list[[model_name]] <- temp_data
+}
+
+# Combine all data into a single data frame
+model_data <- bind_rows(model_data_list)
+
+# Define the pairs of models for comparison
+model_pairs <- list(
+  c("Treated Split Sample", "Control Split Sample"),
+  c("Treated Split Sample", "Opt Split Sample"),
+  c("Opt Split Sample", "Control Split Sample"),
+  c("Treated Not Pred", "Treated Pred"),
+  c("Opt Not Pred", "Opt Pred"),
+  c("Control Not Pred" , "Control Pred" ),
+  c("Treated Not Pred", "Control Not Pred"),
+  c("Opt Not Pred", "Control Not Pred"),
+  c("Treated Not Pred", "Opt Not Pred"),
+  c("Treated Pred", "Control Pred"),
+  c("Opt Pred", "Control Pred"),
+  c("Treated Pred", "Opt Pred")
+)
+
+# Initialize an empty list to store results
+results_list <- list()
+
+# Number of simulations
+n_sim <- 1000  # Adjust as needed
+
+# Loop over model pairs
+for (pair in model_pairs) {
+  model_x <- pair[1]
+  model_y <- pair[2]
+  
+  # Extract relevant data
+  x_data <- model_data %>% filter(model == model_x)
+  y_data <- model_data %>% filter(model == model_y)
+  
+  # Run the Lapage function
+  result <- lapage(
+    x_data$mu, x_data$se_mu, x_data$sigma, x_data$se_sigma, x_data$n,
+    y_data$mu, y_data$se_mu, y_data$sigma, y_data$se_sigma, y_data$n,
+    model_x, model_y, n_sim
+  )
+  
+  # Store results
+  results_list[[paste(model_x, "vs", model_y)]] <- data.frame(
+    model_x = model_x,
+    model_y = model_y,
+    prop_reject = mean(result$p < 0.05) * 100.00
+  )
+}
+
+# Combine results into a single data frame
+final_results_wd <- bind_rows(results_list)
+
+# Print or save results
+print(final_results_wd)
+saveRDS(final_results, "Data/Lapage_Nat.RDS")
+saveRDS(final_results_wd, "Data/Lapage_Nat.RDS")
diff --git a/Scripts/logit/chr_vol_treat.R b/Scripts/logit/chr_vol_treat.R
index caff63e..bfe28f7 100644
--- a/Scripts/logit/chr_vol_treat.R
+++ b/Scripts/logit/chr_vol_treat.R
@@ -1,186 +1,188 @@
-library(dplyr)
-library(tidyr)
-library(margins)
-library(lubridate)
-library(caret)
-library(randomForest)
-library(pROC)
-
-library(xgboost)
-# Test treatment effect
-
-database_full <- database_full %>%
-  filter(!is.na(Treatment_new)) %>%
-  mutate(Dummy_Video_1 = case_when(Treatment_new == 1 ~ 1, TRUE ~ 0),
-         Dummy_Video_2 = case_when(Treatment_new == 5 ~ 1, TRUE ~ 0),
-         Dummy_no_info = case_when(Treatment_new == 3 ~ 1, TRUE~0),
-         Dummy_Info_nv1 = case_when(Treatment_new == 2 ~1, TRUE~0),
-         Dummy_Info_nv2 = case_when(Treatment_new == 4 ~1 , TRUE~0)) %>% 
-  mutate(Engagement_ugs = groupTime1731 + groupTime1726 + groupTime1769 + groupTime1727 + groupTime1770 + groupTime1771 + groupTime1768 +
-           groupTime1738)
-
-
-
-
-data <- database_full %>%
-  group_by(id) %>%
-  dplyr::slice(1) %>%
-  ungroup()
-data <- data %>% 
-  mutate(Choice_Treat = ifelse( Dummy_Video_2 == 1 | Dummy_Info_nv2 == 1, 1, 
-                                ifelse(Dummy_no_info==1 ,0,NA))) 
-
-
-
-table(data$Choice_Treat)  
-
-
-
-logit_choice_treat<-glm(Choice_Treat ~  as.factor(Gender)+Z_Mean_NR+Age_mean + QFIncome +
-                          as.factor(Education), data, family=binomial)
-summary(logit_choice_treat)
-
-
-logit_choice_treat_uni<-glm(Choice_Treat ~  as.factor(Gender)+Z_Mean_NR+Age_mean + QFIncome +
-                              Uni_degree + Kids_Dummy + Engagement_ugs + UGS_visits, data, family=binomial)
-summary(logit_choice_treat_uni)
-
-
-logit_sig <- glm(Choice_Treat ~  Z_Mean_NR + Age_mean + QFIncome, data, family=binomial)
-
-# Calculate marginal effects
-marginal_effects <- margins(logit_choice_treat_uni)
-
-# Display the marginal effects
-summary(marginal_effects)               
-
-
-cut_off <- 0.45
-
-
-# Make predictions
-data$probabilities <- logit_sig %>% predict(data, type = "response")
-data$predicted.classes <- ifelse(data$probabilities  >= cut_off, 1, 0)
-# Model accuracy
-mean(data$predicted.classes == data$Choice_Treat, na.rm = T)
-
-
-
-
-
-
-
-# Assuming your data is in a data frame called 'data'
-set.seed(123)  # For reproducibility
-data<-select(data, Choice_Treat, id, Age, Q02W123, Uni_degree, QFIncome,Z_Mean_NR, City, Screen,
-             Uni_degree, Q02W123,Q04W123,Q05W123, Q08W123,Q09W123,Q10W123,Q11W3,Q12W123,Q13W23,Q14S01W123,Q14S02W23,
-             Q14S03W123,Q14S04W123,Q14S05W123,Q14S06W23,Q14S07W3,Q14S08W2,Q14W23,
-             Q15S01W3,Q15S02W3,Q16W3,Q17W13,Q18W123,Q19W3,C02W23,Q20W23,Q21W23,
-             Q22S01W123,Q22S02W23,UGS_visits,Q24S01W123,
-             Q24S02W123,Q24S03W123,Q24S04W23,Q24S05W123,Q25W23,Q26S01W123,Q26S02W123,
-             Q26S03W23,Q26S04W123,Q26S05W123,Q26S06W123,Q26S07W23,Q26S08W23,Q26S99W23,
-             Q27W123,Q30W23,Q31S01W23,Q31S02W23,Q31S03W23,Q31S04W23,
-             Q31S05W23,Q31S06W23,Q31S08W23,Q32S01W23,Q32S02W23,Q32S03W23,
-             Q33S01W3,Q33S02W3,Q33S03W3,Q33S04W3,Q33S05W3,Q33S06W3,Q33S07W3,Q33S99W3,
-             Q34W23,Q35W23,Q36W23,Q37W23,Q38S01W3,Q38S02W3,Q38S03W3,Q38S04W3,Q38S05W3,
-             Q38S06W3,Q38S07W3,Q38S08W3,Q38S09W3,Q38S10W3,Q38S11W3,Q38S12W3,Q38S13W3,
-             Q38S14W3,Q38S15W3,Q38S16W3,Q38S17W3,Q38S18W3,Q38S19W3,Q38S20W3,Q38S21W3,
-             V01W3,Q39S01W3,Q39S02W3,Q39S03W3,Q39S04W3,Q39S05W3,Q40W3
-             ,Q45W123,Q46S01W3,Q46S02W3,Q46S03W3,Q46S04W3,
-             Q47S01W23,Q47S02W3,Q47S03W3,Q47S04W3,Q47S05W3,Q47S06W3,Q47S07W3,Q47S08W3,
-             Q47S09W3,Q48S01W3,Q48S02W3,Q48S03W3,Q48S04W3,Q48S05W3,Q49W23,Q50W123)
-data <- data %>%
-  select(where(~ all(!is.na(.)) & all(. != "")) | any_of("Choice_Treat"))
-# Split the data into labeled and unlabeled sets
-labeled_data <- filter(data, Choice_Treat==1| Choice_Treat==0)
-unlabeled_data <- filter(data, is.na(Choice_Treat))
-labeled_data_id<-labeled_data
-labeled_data<-select(labeled_data,-id)
-# Assuming the group information is in the column called 'Group'
-labeled_data$Choice_Treat<- as.factor(labeled_data$Choice_Treat)
-unlabeled_data$Choice_Treat<- as.factor(unlabeled_data$Choice_Treat)
-
-trainIndex <- createDataPartition(labeled_data$Choice_Treat, p = 0.8, list = FALSE)
-trainData <- labeled_data[trainIndex, ]
-testData <- labeled_data[-trainIndex, ]
-
-
-
-
-
-
-# tuneGrid <- expand.grid(
-#   nrounds = c(300),
-#   max_depth = c(15),
-#   eta = c(0.05),
-#   gamma = c(0, 0.1),
-#   colsample_bytree = c(0.6, 0.8),     # Fraction of features to be used for each tree
-#   min_child_weight = c(1, 3, 5),      # Minimum sum of instance weight (hessian) needed in a child
-#   subsample = c(0.7, 0.8)             # Fraction of samples to be used for each tree
-# )
-# 
-# 
-# model3 <- train(Choice_Treat ~ ., 
-#                 data = trainData, 
-#                 method = "xgbTree", 
-#                 tuneGrid = tuneGrid,
-#                 trControl = trainControl(method = "cv", number = 5))
-# 
-# 
-# # Get variable importance
-# varImp(model3)
-# 
-# 
-# 
-# # Evaluate the model
-# predictions <- predict(model3, newdata = testData)
-# confusionMatrix(predictions, testData$Choice_Treat)
-# labeled_predictions <- predict(model3, newdata = labeled_data)
-# labeled_data$PredictedGroup <- labeled_predictions
-# 
-# table(labeled_data$Choice_Treat, labeled_data$PredictedGroup)
-# 
-# unlabeled_predictions <- predict(model3, newdata = unlabeled_data)
-# labeled_data_id$PredictedGroup <- labeled_predictions
-# data_prediction_labeled<-select(labeled_data_id, c("id", "PredictedGroup"))
-# saveRDS(data_prediction_labeled, "Data/predictions_labeled.RDS")
-# 
-# unlabeled_data$PredictedGroup <- unlabeled_predictions
-# data_prediction<-select(unlabeled_data, c("id", "PredictedGroup"))
-# saveRDS(data_prediction, "Data/predictions.RDS")
-# 
-# print(model3$bestTune)
-# 
-# 
-# table(data$predicted.classes ,data$Choice_Treat)
-# 
-# calculate_metrics <- function(confusion_matrix) {
-#   # Extract values from the confusion matrix
-#   TN <- confusion_matrix[1, 1]
-#   FP <- confusion_matrix[2, 1]
-#   FN <- confusion_matrix[1, 2]
-#   TP <- confusion_matrix[2, 2]
-#   
-#   # Calculate sensitivity
-#   sensitivity <- round(TP / (TP + FN), 2)
-#   
-#   # Calculate specificity
-#   specificity <- round(TN / (TN + FP), 2)
-#   
-#   # Return the results as a list
-#   return(list(sensitivity = sensitivity, specificity = specificity))
-# }
-# 
-# # Calculate metrics
-# metrics <- calculate_metrics(table(data$predicted.classes ,data$Choice_Treat))
-# 
-# # Print results
-# print(paste("Sensitivity:", metrics$sensitivity))
-# print(paste("Specificity:", metrics$specificity))
-# roc_obj <- roc(data$Choice_Treat, data$probabilities)
-# plot(roc_obj, main = "ROC Curve", col = "blue", lwd = 2)
-# auc_value <- auc(roc_obj)
-# best_coords <- coords(roc_obj, "best", best.method="youden")
-# 
-# 
+library(dplyr)
+library(tidyr)
+library(margins)
+library(lubridate)
+library(caret)
+library(randomForest)
+library(pROC)
+
+library(xgboost)
+# Test treatment effect
+
+database_full <- database_full %>%
+  filter(!is.na(Treatment_new)) %>%
+  mutate(Dummy_Video_1 = case_when(Treatment_new == 1 ~ 1, TRUE ~ 0),
+         Dummy_Video_2 = case_when(Treatment_new == 5 ~ 1, TRUE ~ 0),
+         Dummy_no_info = case_when(Treatment_new == 3 ~ 1, TRUE~0),
+         Dummy_Info_nv1 = case_when(Treatment_new == 2 ~1, TRUE~0),
+         Dummy_Info_nv2 = case_when(Treatment_new == 4 ~1 , TRUE~0)) %>% 
+  mutate(Engagement_ugs = groupTime1731 + groupTime1726 + groupTime1769 + groupTime1727 + groupTime1770 + groupTime1771 + groupTime1768 +
+           groupTime1738)
+
+
+
+
+data <- database_full %>%
+  group_by(id) %>%
+  dplyr::slice(1) %>%
+  ungroup()
+data <- data %>% 
+  mutate(Choice_Treat = ifelse( Dummy_Video_2 == 1 | Dummy_Info_nv2 == 1, 1, 
+                                ifelse(Dummy_no_info==1 ,0,NA))) 
+
+
+
+table(data$Choice_Treat)  
+
+
+
+logit_choice_treat<-glm(Choice_Treat ~  as.factor(Gender)+Z_Mean_NR+Age_mean + QFIncome +
+                          as.factor(Education), data, family=binomial)
+summary(logit_choice_treat)
+
+
+logit_choice_treat_uni<-glm(Choice_Treat ~  as.factor(Gender)+Z_Mean_NR+Age_mean + QFIncome +
+                              Uni_degree + Kids_Dummy + Engagement_ugs + UGS_visits, data, family=binomial)
+summary(logit_choice_treat_uni)
+
+
+logit_sig <- glm(Choice_Treat ~  Z_Mean_NR + Uni_degree+as.factor(Gender) + Age_mean + QFIncome, data, family=binomial)
+summary(logit_sig)
+1 - (logLik(logit_sig) / logLik(update(logit_sig, . ~ 1)))
+
+# Calculate marginal effects
+marginal_effects <- margins(logit_choice_treat_uni)
+
+# Display the marginal effects
+summary(marginal_effects)               
+
+
+cut_off <- 0.45
+
+
+# Make predictions
+data$probabilities <- logit_sig %>% predict(data, type = "response")
+data$predicted.classes <- ifelse(data$probabilities  >= cut_off, 1, 0)
+# Model accuracy
+mean(data$predicted.classes == data$Choice_Treat, na.rm = T)
+
+
+
+
+
+
+
+# Assuming your data is in a data frame called 'data'
+set.seed(123)  # For reproducibility
+data<-select(data, Choice_Treat, id, Age, Q02W123, Uni_degree, QFIncome,Z_Mean_NR, City, Screen,
+             Uni_degree, Q02W123,Q04W123,Q05W123, Q08W123,Q09W123,Q10W123,Q11W3,Q12W123,Q13W23,Q14S01W123,Q14S02W23,
+             Q14S03W123,Q14S04W123,Q14S05W123,Q14S06W23,Q14S07W3,Q14S08W2,Q14W23,
+             Q15S01W3,Q15S02W3,Q16W3,Q17W13,Q18W123,Q19W3,C02W23,Q20W23,Q21W23,
+             Q22S01W123,Q22S02W23,UGS_visits,Q24S01W123,
+             Q24S02W123,Q24S03W123,Q24S04W23,Q24S05W123,Q25W23,Q26S01W123,Q26S02W123,
+             Q26S03W23,Q26S04W123,Q26S05W123,Q26S06W123,Q26S07W23,Q26S08W23,Q26S99W23,
+             Q27W123,Q30W23,Q31S01W23,Q31S02W23,Q31S03W23,Q31S04W23,
+             Q31S05W23,Q31S06W23,Q31S08W23,Q32S01W23,Q32S02W23,Q32S03W23,
+             Q33S01W3,Q33S02W3,Q33S03W3,Q33S04W3,Q33S05W3,Q33S06W3,Q33S07W3,Q33S99W3,
+             Q34W23,Q35W23,Q36W23,Q37W23,Q38S01W3,Q38S02W3,Q38S03W3,Q38S04W3,Q38S05W3,
+             Q38S06W3,Q38S07W3,Q38S08W3,Q38S09W3,Q38S10W3,Q38S11W3,Q38S12W3,Q38S13W3,
+             Q38S14W3,Q38S15W3,Q38S16W3,Q38S17W3,Q38S18W3,Q38S19W3,Q38S20W3,Q38S21W3,
+             V01W3,Q39S01W3,Q39S02W3,Q39S03W3,Q39S04W3,Q39S05W3,Q40W3
+             ,Q45W123,Q46S01W3,Q46S02W3,Q46S03W3,Q46S04W3,
+             Q47S01W23,Q47S02W3,Q47S03W3,Q47S04W3,Q47S05W3,Q47S06W3,Q47S07W3,Q47S08W3,
+             Q47S09W3,Q48S01W3,Q48S02W3,Q48S03W3,Q48S04W3,Q48S05W3,Q49W23,Q50W123)
+data <- data %>%
+  select(where(~ all(!is.na(.)) & all(. != "")) | any_of("Choice_Treat"))
+# Split the data into labeled and unlabeled sets
+labeled_data <- filter(data, Choice_Treat==1| Choice_Treat==0)
+unlabeled_data <- filter(data, is.na(Choice_Treat))
+labeled_data_id<-labeled_data
+labeled_data<-select(labeled_data,-id)
+# Assuming the group information is in the column called 'Group'
+labeled_data$Choice_Treat<- as.factor(labeled_data$Choice_Treat)
+unlabeled_data$Choice_Treat<- as.factor(unlabeled_data$Choice_Treat)
+
+trainIndex <- createDataPartition(labeled_data$Choice_Treat, p = 0.8, list = FALSE)
+trainData <- labeled_data[trainIndex, ]
+testData <- labeled_data[-trainIndex, ]
+
+
+
+
+
+
+ tuneGrid <- expand.grid(
+   nrounds = c(300),
+   max_depth = c(15),
+   eta = c(0.05),
+   gamma = c(0, 0.1),
+   colsample_bytree = c(0.6, 0.8),     # Fraction of features to be used for each tree
+   min_child_weight = c(1, 3, 5),      # Minimum sum of instance weight (hessian) needed in a child
+   subsample = c(0.7, 0.8)             # Fraction of samples to be used for each tree
+ )
+ 
+ 
+ model3 <- train(Choice_Treat ~ ., 
+                 data = trainData, 
+                 method = "xgbTree", 
+                 tuneGrid = tuneGrid,
+                 trControl = trainControl(method = "cv", number = 5))
+ 
+ 
+ # Get variable importance
+ varImp(model3)
+ 
+ 
+ 
+ # Evaluate the model
+ predictions <- predict(model3, newdata = testData)
+ confusionMatrix(predictions, testData$Choice_Treat)
+ labeled_predictions <- predict(model3, newdata = labeled_data)
+ labeled_data$PredictedGroup <- labeled_predictions
+ 
+ table(labeled_data$Choice_Treat, labeled_data$PredictedGroup)
+ 
+ unlabeled_predictions <- predict(model3, newdata = unlabeled_data)
+ labeled_data_id$PredictedGroup <- labeled_predictions
+ data_prediction_labeled<-select(labeled_data_id, c("id", "PredictedGroup"))
+ saveRDS(data_prediction_labeled, "Data/predictions_labeled.RDS")
+ 
+ unlabeled_data$PredictedGroup <- unlabeled_predictions
+ data_prediction<-select(unlabeled_data, c("id", "PredictedGroup"))
+ saveRDS(data_prediction, "Data/predictions.RDS")
+ 
+ print(model3$bestTune)
+ 
+ 
+# table(data$predicted.classes ,data$Choice_Treat)
+# 
+# calculate_metrics <- function(confusion_matrix) {
+#   # Extract values from the confusion matrix
+#   TN <- confusion_matrix[1, 1]
+#   FP <- confusion_matrix[2, 1]
+#   FN <- confusion_matrix[1, 2]
+#   TP <- confusion_matrix[2, 2]
+#   
+#   # Calculate sensitivity
+#   sensitivity <- round(TP / (TP + FN), 2)
+#   
+#   # Calculate specificity
+#   specificity <- round(TN / (TN + FP), 2)
+#   
+#   # Return the results as a list
+#   return(list(sensitivity = sensitivity, specificity = specificity))
+# }
+# 
+# # Calculate metrics
+# metrics <- calculate_metrics(table(data$predicted.classes ,data$Choice_Treat))
+# 
+# # Print results
+# print(paste("Sensitivity:", metrics$sensitivity))
+# print(paste("Specificity:", metrics$specificity))
+# roc_obj <- roc(data$Choice_Treat, data$probabilities)
+# plot(roc_obj, main = "ROC Curve", col = "blue", lwd = 2)
+# auc_value <- auc(roc_obj)
+# best_coords <- coords(roc_obj, "best", best.method="youden")
+# 
+# 
 # cut_off <- best_coords$threshold
\ No newline at end of file
diff --git a/Scripts/ols/ols_consequentiality.R b/Scripts/ols/ols_consequentiality.R
index 981a901..d7ab761 100644
--- a/Scripts/ols/ols_consequentiality.R
+++ b/Scripts/ols/ols_consequentiality.R
@@ -34,6 +34,11 @@ summary(conseq_model_D)
 conseq_model_control_D <- lm(Conseq_score ~ as.factor(Treatment_D) + Z_Mean_NR + as.factor(Gender)+Age_mean+ QFIncome + Uni_degree,data)
 summary(conseq_model_control_D)
 
+cons_data <- left_join(data, matching_group, by="id")
+
+conseq_model_control_G <- lm(Conseq_score ~ as.factor(Matching_Group) + Z_Mean_NR + as.factor(Gender)+Age_mean+ QFIncome + Uni_degree,cons_data)
+summary(conseq_model_control_G)
+
 
 # conseq_model_control_G <- lm(Conseq_score ~ as.factor(Matching_Group) + Z_Mean_NR + as.factor(Gender)+Age_mean+ QFIncome + Uni_degree,data)
 # summary(conseq_model_control_G)
diff --git a/Scripts/ols/ols_time_spent.R b/Scripts/ols/ols_time_spent.R
index fe6de07..bb00c30 100644
--- a/Scripts/ols/ols_time_spent.R
+++ b/Scripts/ols/ols_time_spent.R
@@ -19,8 +19,8 @@ database <- database %>%
                                     Dummy_Treated == 1 & PredictedGroup == 0 ~"Treat Not Pred",
                                     Treatment_A == "Vol_Treated" & PredictedGroup == 1 ~ "Opt Pred",
                                     Treatment_A == "Vol_Treated" & PredictedGroup == 0 ~ "Opt Not Pred",
-                                    Treatment_new == 6 & PredictedGroup == 0 ~ "Control Pred",
-                                    TRUE ~ "Control Not Pred"))
+                                    Treatment_new == 6 & PredictedGroup == 0 ~ "Control Not Pred",
+                                    TRUE ~ "Control Pred"))
 
 data <- database %>%
   group_by(id) %>%
diff --git a/Scripts/visualize_distr_spli.R b/Scripts/visualize_distr_spli.R
index 9fc1bdf..0c152b8 100644
--- a/Scripts/visualize_distr_spli.R
+++ b/Scripts/visualize_distr_spli.R
@@ -16,16 +16,16 @@ mxl_not_tr <- apollo_loadModel("Estimation_results/mxl/Split_samples/MXL_wtp Not
 # List of models
 models <- list(
   "Treated Not Pred" = MXL_wtp_Treated_Not_Pred_model,
-  "Control Not Pred" = MXL_wtp_Control_Not_Pred_model,
-  "Control Pred" = MXL_wtp_Control_Pred_model,
   "Opt Not Pred" = MXL_wtp_Opt_Not_Pred_model,
+  "Control Not Pred" = MXL_wtp_Control_Not_Pred_model,
+  "Treated Pred" = MXL_wtp_Treated_Pred_model,
   "Opt Pred" = MXL_wtp_Opt_Pred_model,
+  "Control Pred" = MXL_wtp_Control_Pred_model,
   "Opt No Info" = MXL_wtp_opt_not_treated,
   "Opt Info" = MXL_wtp_opt_treated,
-  "Treated Pred" = MXL_wtp_Treated_Pred_model,
-  "Control Split Sample" = mxl_not_tr,
+  "Treated Split Sample" = mxl_tr,
   "Opt Split Sample" = mxl_vol_tr,
-  "Treated Split Sample" = mxl_tr
+  "Control Split Sample" = mxl_not_tr
 )
 
 group_color_mapping <- c(
@@ -118,27 +118,6 @@ ggplot(plot_data, aes(x = x, y = density, color = Group, linetype = Prediction))
   theme(legend.position = "right")
 
 
-#### Z-test if distributions are different
-
-z_value <- (MXL_wtp_Treated_Pred_model$estimate["mu_natural"] - MXL_wtp_Control_Pred_model$estimate["mu_natural"])/
-  (sqrt((MXL_wtp_Treated_Pred_model$estimate["sig_natural"]/sqrt(MXL_wtp_Treated_Pred_model$nObs)) + 
-     (MXL_wtp_Control_Pred_model$estimate["sig_natural"]/sqrt(MXL_wtp_Control_Pred_model$nObs))))
-
-
-# Function to calculate z-value using your exact formula
-calculate_z_value <- function(model_1, model_2) {
-  # Calculate the z-value using your specific formula
-  z_value <- (model_1$estimate["mu_natural"] - model_2$estimate["mu_natural"]) /
-    (sqrt((model_1$robse["mu_natural"]^2 + model_2$robse["mu_natural"]^2)))
-  
-  return(z_value)
-}
-
-# Example of using the function
-z_value <- calculate_z_value(MXL_wtp_opt_treated, MXL_wtp_opt_not_treated)
-
-# Print the z-value
-z_value
 
 
 #### Plot underneath
@@ -267,12 +246,12 @@ split_sample_models <- models[names(models) %in% c("Treated Split Sample", "Opt
 
   
   # Function to perform Z-tests within a group while ensuring the correct order
-  perform_z_tests_within_group <- function(model_group, group_name) {
+  perform_z_tests_within_group <- function(model_group, group_name, coef="mu_natural") {
     model_names <- names(model_group)
     z_test_results <- data.frame()
     
     # Define the correct comparison order
-    comparison_order <- c("Treated", "Optional", "Control")  # Order: Treated vs Control, Treated vs Optional, Optional vs Control
+    comparison_order <- c("Treated",  "Opt", "Control")  # Order: Treated vs Control, Treated vs Optional, Optional vs Control
     
     # Reorder the models according to the desired comparison order
     ordered_models <- model_group[order(match(sub(" .*", "", model_names), comparison_order))]
@@ -281,7 +260,7 @@ split_sample_models <- models[names(models) %in% c("Treated Split Sample", "Opt
     for (i in seq_along(ordered_models)) {
       for (j in seq_along(ordered_models)) {
         if (i < j) {  # Avoid duplicate comparisons
-          test_result <- calculate_z_test(ordered_models[[i]], ordered_models[[j]], coef="mu_natural")
+          test_result <- calculate_z_test(ordered_models[[i]], ordered_models[[j]], coef=coef)
           test_result$Comparison <- paste(names(ordered_models)[i], "vs", names(ordered_models)[j])  # Add model pair label
           test_result$Group <- group_name  # Label the group
           z_test_results <- bind_rows(z_test_results, test_result)
@@ -292,9 +271,9 @@ split_sample_models <- models[names(models) %in% c("Treated Split Sample", "Opt
   }
   
   # Function to perform cross-group tests while ensuring "Predicted vs Not Predicted"
-  perform_cross_group_tests <- function(not_predicted, predicted) {
+  perform_cross_group_tests <- function(not_predicted, predicted, coef="mu_natural") {
     z_test_results <- data.frame()
-    comparison_order <- c("Treated", "Optional", "Control")  # Ensure correct order
+    comparison_order <- c("Treated",  "Optional", "Control")  # Ensure correct order
     
     category_map <- list(
       "Treated Pred" = "Treated Not Pred",
@@ -310,7 +289,7 @@ split_sample_models <- models[names(models) %in% c("Treated Split Sample", "Opt
       cat_not_pred <- category_map[[cat_pred]]
       
       if (cat_pred %in% names(ordered_predicted) & cat_not_pred %in% names(ordered_not_predicted)) {
-        test_result <- calculate_z_test(ordered_predicted[[cat_pred]], ordered_not_predicted[[cat_not_pred]], coef="mu_natural")
+        test_result <- calculate_z_test(ordered_predicted[[cat_pred]], ordered_not_predicted[[cat_not_pred]], coef=coef)
         test_result$Comparison <- paste(cat_pred, "vs", cat_not_pred)  # Now "Predicted vs Not Predicted"
         test_result$Group <- "Predicted vs Not Predicted"
         z_test_results <- bind_rows(z_test_results, test_result)
@@ -321,13 +300,20 @@ split_sample_models <- models[names(models) %in% c("Treated Split Sample", "Opt
   }
   
   # Perform Z-tests within each group while enforcing the correct order
-  z_tests_not_predicted <- perform_z_tests_within_group(not_predicted_models, "Not Predicted")
-  z_tests_predicted <- perform_z_tests_within_group(predicted_models, "Predicted")
-  z_tests_split_sample <- perform_z_tests_within_group(split_sample_models, "Split Sample")
+  coefficients_to_test <- c("mu_natural", "mu_walking", "mu_rent", "ASC_1", "ASC_2")
+  #coefficients_to_test <- c("sig_natural", "sig_walking", "sig_rent", "sig_ASC_1", "sig_ASC_2") # test sds
+
   
-  # Perform cross-group tests
-  z_tests_cross_group <- perform_cross_group_tests(not_predicted_models, predicted_models)
+  ##### For all 
+  # Within groups
+  z_tests_not_predicted <- perform_z_tests_within_group(not_predicted_models, "Not Predicted", coefficients_to_test)
+  z_tests_predicted <- perform_z_tests_within_group(predicted_models, "Predicted", coefficients_to_test)
+  z_tests_split_sample <- perform_z_tests_within_group(split_sample_models, "Split Sample", coefficients_to_test)
   
+  # Cross-group
+  z_tests_cross_group <- perform_cross_group_tests(not_predicted_models, predicted_models, coefficients_to_test)
+  
+################ Do Plots #########
   # Combine all Z-test results
   z_test_results_all <- bind_rows(
     z_tests_not_predicted, 
@@ -347,69 +333,170 @@ split_sample_models <- models[names(models) %in% c("Treated Split Sample", "Opt
   z_test_results_splits <- z_test_results_all %>% filter(Group %in% c("Split Sample"))
   z_test_results_cross <- z_test_results_all %>% filter(Group == "Predicted vs Not Predicted")
   
-  ### **Visualization 1: Within-Group Comparisons (Ordered by Group)** ###
-  ggplot(z_test_results_within, aes(x = reorder(Comparison, Group), y = Mean_Difference, color = Significant)) +
+
+  
+
+
+  
+  z_test_results_all <- z_test_results_all %>%
+    mutate(
+      Coefficient = sub("\\.+\\d+", "", rownames(z_test_results_all)),  # Clean coefficient names
+      Significant = ifelse(P_Value < 0.05, "Statistically significant difference", "No statistically significant difference")
+    )  
+  
+  
+  ggplot(z_test_results_all, aes(x = reorder(Comparison, Mean_Difference), y = Mean_Difference, color = Significant)) +
     geom_point(size = 3) +
     geom_errorbar(aes(ymin = Lower_CI, ymax = Upper_CI), width = 0.2) +
-    scale_color_manual(values = c("Significant" = "red", "Not Significant" = "blue")) +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "black") +  # Line at zero
-    coord_flip() +  # Flip for readability
+    geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
+    scale_color_manual(values = c("Statistically significant difference" = "red", "No statistically significant difference" = "grey")) +
+    coord_flip() +
+    facet_grid(Group ~ Coefficient, scales = "free", space = "free_y") +
     labs(
-      #title = "Within-Group Z-Test Results for Mean Differences",
       x = "Model Comparisons",
-      y = "Mean Difference (Naturalness)",
+      y = "Mean Difference",
       color = "Significance"
     ) +
-    facet_wrap(~ Group, scales = "free_y", ncol = 1) +  # Separate groups visually
-    theme_minimal() +
-    theme(legend.position = "right")
+    theme_bw() +
+    theme(
+      strip.text = element_text(face = "bold"),
+      legend.position = "bottom",
+      panel.spacing = unit(0.8, "lines")
+    )
   
-  ggsave("Figures/z_test_predictions_between.png", width=8, height=5, dpi="print")
+#### Only ASC and rent for appendix
+  z_test_results_appendix <- z_test_results_all %>%
+    filter(!Coefficient %in% c("mu_natural", "mu_walking"))
   
-  #### Only split samples
+  ggplot(z_test_results_appendix, aes(x = factor(Comparison, level=c("Treated Not Pred vs Opt Not Pred", "Opt Not Pred vs Control Not Pred",
+                                                                      "Treated Not Pred vs Control Not Pred", "Treated Pred vs Opt Pred",
+                                                                        "Opt Pred vs Control Pred", "Treated Pred vs Control Pred",
+                                                                        "Treated Split Sample vs Opt Split Sample", "Opt Split Sample vs Control Split Sample",
+                                                                     "Treated Split Sample vs Control Split Sample", "Control Pred vs Control Not Pred",
+                                                                     "Opt Pred vs Opt Not Pred","Treated Pred vs Treated Not Pred"
+                                                                     )), y = Mean_Difference, color = Significant)) +
+    geom_point(size = 3) +
+    geom_errorbar(aes(ymin = Lower_CI, ymax = Upper_CI), width = 0.2) +
+    geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
+    scale_color_manual(values = c("Statistically significant difference" = "red", "No statistically significant difference" = "grey")) +
+    coord_flip() +
+    facet_grid(Group ~ Coefficient, scales = "free", space = "free_y") +
+    labs(
+      x = "Model Comparisons",
+      y = "Mean Difference",
+      color = "Significance"
+    ) +
+    theme_bw() +
+    theme(
+      strip.text = element_text(face = "bold"),
+      legend.position = "bottom",
+      panel.spacing = unit(0.8, "lines")
+    )
+  
+  
+  ggsave("Figures/z_test_appendix.png", width=12, height=10, dpi="print")
+
+  z_test_results_split_nat_wd <- z_test_results_all %>%
+   filter(Coefficient %in% c("mu_natural", "mu_walking")) %>% filter(Group=="Split Sample") %>%  mutate(Comparison = case_when(
+     grepl("Treated Split Sample vs Control Split Sample", Comparison) ~ "Treated vs Control",
+     grepl("Opt Split Sample vs Control Split Sample", Comparison) ~ "Optional vs Control",
+     grepl("Treated Split Sample vs Opt Split Sample", Comparison) ~ "Treated vs Optional",
+     TRUE ~ Comparison  # Keep original if not matched
+   ))
+  
+  
+  ggplot(z_test_results_split_nat_wd, aes(x = factor(Comparison, level=c("Treated vs Optional", "Optional vs Control","Treated vs Control")), y = Mean_Difference, color = Significant)) +
+    geom_point(size = 3) +
+    geom_errorbar(aes(ymin = Lower_CI, ymax = Upper_CI), width = 0.2) +
+    geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
+    scale_color_manual(values = c("Statistically significant difference" = "red", "No statistically significant difference" = "grey")) +
+    coord_flip() +
+    facet_grid(Group ~ Coefficient, scales = "free", space = "free_y",
+               labeller = labeller(
+                 Coefficient = c(
+                   "mu_natural" = "Naturalness",
+                   "mu_walking" = "Walking Distance"))) +
+    labs(
+      x = "",
+      y = "Mean Difference",
+      color = "Significance"
+    ) +
+    theme_bw() +
+    theme(
+      strip.text = element_text(face = "bold"),
+      legend.position = "bottom",
+      panel.spacing = unit(0.8, "lines")
+    )
+  
+  ggsave("Figures/z_test_split_samples.png", width=8, height=5, dpi="print")
   
-  z_test_results_splits <- z_test_results_splits %>%
-    mutate(Comparison = case_when(
+  
+  z_test_results_pred_nat_wd <- z_test_results_all %>%
+    filter(Coefficient %in% c("mu_natural", "mu_walking")) %>% filter(Group %in% c("Predicted", "Not Predicted")) %>%  mutate(Comparison = case_when(
       grepl("Treated Split Sample vs Control Split Sample", Comparison) ~ "Treated vs Control",
       grepl("Treated Split Sample vs Opt Split Sample", Comparison) ~ "Treated vs Optional",
       grepl("Control Split Sample vs Opt Split Sample", Comparison) ~ "Control vs Optional",
       TRUE ~ Comparison  # Keep original if not matched
     ))
   
-  ggplot(z_test_results_splits, aes(x = reorder(Comparison, Group), y = Mean_Difference, color = Significant)) +
+  ggplot(z_test_results_pred_nat_wd, aes(x = factor(Comparison, level=c("Treated Not Pred vs Opt Not Pred", "Opt Not Pred vs Control Not Pred",
+                                                                        "Treated Not Pred vs Control Not Pred", "Treated Pred vs Opt Pred",
+                                                                        "Opt Pred vs Control Pred", "Treated Pred vs Control Pred")), y = Mean_Difference, color = Significant)) +
     geom_point(size = 3) +
     geom_errorbar(aes(ymin = Lower_CI, ymax = Upper_CI), width = 0.2) +
-    scale_color_manual(values = c("Significant" = "red", "Not Significant" = "blue")) +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "black") +  # Line at zero
-    coord_flip() +  # Flip for readability
+    geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
+    scale_color_manual(values = c("Statistically significant difference" = "red", "No statistically significant difference" = "grey")) +
+    coord_flip() +
+    facet_grid(Group ~ Coefficient, scales = "free", space = "free_y",
+               labeller = labeller(
+                 Coefficient = c(
+                   "mu_natural" = "Naturalness",
+                   "mu_walking" = "Walking Distance"))) +
     labs(
-      # title = "Within-Group Z-Test Results for Mean Differences",
-      x = "Model Comparisons",
-      y = "Mean Difference (Naturalness)",
+      x = "",
+      y = "Mean Difference",
       color = "Significance"
     ) +
-    theme_minimal() +
-    theme(legend.position = "right")
+    theme_bw() +
+    theme(
+      strip.text = element_text(face = "bold"),
+      legend.position = "bottom",
+      panel.spacing = unit(0.8, "lines")
+    )
   
-  ggsave("Figures/z_test_split_samples.png", width=8, height=5, dpi="print")
+  ggsave("Figures/z_test_predictions_between.png", width=9, height=5, dpi="print")
   
   
-  ### **Visualization 2: Predicted vs Not Predicted Comparisons** ###
-  ggplot(z_test_results_cross, aes(x = Comparison, y = Mean_Difference, color = Significant)) +
+  z_test_results_pred_vs_nat_wd <- z_test_results_all %>%
+    filter(Coefficient %in% c("mu_natural", "mu_walking")) %>% filter(Group %in% c("Predicted vs Not Predicted")) %>%  mutate(Comparison = case_when(
+      grepl("Treated Split Sample vs Control Split Sample", Comparison) ~ "Treated vs Control",
+      grepl("Treated Split Sample vs Opt Split Sample", Comparison) ~ "Treated vs Optional",
+      grepl("Control Split Sample vs Opt Split Sample", Comparison) ~ "Control vs Optional",
+      TRUE ~ Comparison  # Keep original if not matched
+    ))
+  
+  ggplot(z_test_results_pred_vs_nat_wd, aes(x = factor(Comparison, level=c("Control Pred vs Control Not Pred", "Opt Pred vs Opt Not Pred","Treated Pred vs Treated Not Pred")), y = Mean_Difference, color = Significant)) +
     geom_point(size = 3) +
     geom_errorbar(aes(ymin = Lower_CI, ymax = Upper_CI), width = 0.2) +
-    scale_color_manual(values = c("Significant" = "red", "Not Significant" = "blue")) +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "black") +  # Line at zero
-    coord_flip() +  # Flip for readability
+    geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
+    scale_color_manual(values = c("Statistically significant difference" = "red", "No statistically significant difference" = "grey")) +
+    coord_flip() +
+    facet_grid(Group ~ Coefficient, scales = "free", space = "free_y",
+               labeller = labeller(
+                 Coefficient = c(
+                   "mu_natural" = "Naturalness",
+                   "mu_walking" = "Walking Distance"))) +
     labs(
-      #title = "Predicted vs Not Predicted (Same Group) Z-Test Results",
-      x = "Model Comparisons",
-      y = "Mean Difference (Naturalness)",
+      x = "",
+      y = "Mean Difference",
       color = "Significance"
     ) +
-    theme_minimal() +
-    theme(legend.position = "right")
+    theme_bw() +
+    theme(
+      strip.text = element_text(face = "bold"),
+      legend.position = "bottom",
+      panel.spacing = unit(0.8, "lines")
+    )
   
   ggsave("Figures/z_test_predictions.png", width=8, height=5, dpi="print")
-  
   
\ No newline at end of file
-- 
GitLab