library(viridis)

data_grouped <- database %>%
  group_by(id) %>%
  summarise(across(c(Treatment, Treatment_new), list( gr= ~unique(.))))

data_grouped <- data_grouped %>%
  mutate(Treatment_name = case_when(
    Treatment_new_gr == 1 ~ 'Video 1',
    Treatment_new_gr == 2 ~ 'No Video 1',
    Treatment_new_gr == 3 ~ 'No Info 2',
    Treatment_new_gr == 4 ~ 'No Video 2',
    Treatment_new_gr == 5 ~ 'Video 2',
    Treatment_new_gr == 6 ~ 'No Treatment 3',
    TRUE ~ NA_character_
  ))
treatment_order<- c("Video 1", "No Video 1", "No Info 2","No Video 2", "Video 2"  ,"No Treatment 3")

data_grouped <- data_grouped %>% filter(!is.na(Treatment_gr))

ggplot(data=data_grouped) +
  geom_bar(aes(x=factor(Treatment_name, levels=c("No Video 1", "Video 1", "No Info 2", "No Video 2", "Video 2",
                                                 "No Treatment 3")),
                        group=as.factor(Treatment_gr), fill=as.factor(Treatment_gr))) +
  xlab("Treatment Group") +
  ylab("Count") + 
  scale_fill_viridis(option="D", discrete = T, labels=c("Always Info", "Optional Info", "Never Info")) +
  labs(fill="Treatment Group") 

ggsave("Figures/barplot_treatment.png", width=7, height=5, dpi="print")


#### Inspect socio-demographic differences ####


### Case A
treatment_socio_A <- database_full %>% 
  group_by(Treatment_A) %>% 
  summarize_at(c('Gender_female', 'Uni_degree', 'Age', 'HHSize', "Rent_SQ", "Kids_Dummy", "WalkingDistance_SQ",
                 "Naturalness_SQ", "Employment_full", "Z_Mean_NR"),
               ~ round(mean(., na.rm = TRUE), 2))


# Export table as tex file 
print(xtable(treatment_socio_A, type ="latex"), 
      include.rownames = F, file ="Tables/socio_demos_A.tex")

### Case B

treatment_socio_B <- database_full %>% filter(Treatment_B == "Treated" | Treatment_A == "Not_Treated") %>% 
  group_by(Treatment_B) %>% 
  summarize_at(c('Gender_female', 'Uni_degree', 'Age', 'HHSize', "Rent_SQ", "Kids_Dummy", "WalkingDistance_SQ",
                 "Naturalness_SQ", "Employment_full", "Pensioner"),
               ~ round(mean(., na.rm = TRUE), 2))


# Export table as tex file 
print(xtable(treatment_socio_B, type ="latex"), 
      include.rownames = F, file ="Tables/socio_demos_B.tex")

### Case C
treatment_socio <- database_full %>% filter(!is.na(Treatment)) %>% group_by(Treatment_C) %>% 
  summarize_at(c('Gender_female', 'Uni_degree', 'Age', 'HHSize', "Rent_SQ", "Kids_Dummy", "WalkingDistance_SQ",
                 "Naturalness_SQ", "Employment_full", "Z_Mean_NR"),
               ~ round(mean(., na.rm = TRUE), 2))

treatment_socio_C <- database_full %>% filter(!is.na(Treatment_new)) %>% group_by(Treatment_C) %>% 
  summarize_at(c('Gender_female', 'Uni_degree', 'Age', 'HHSize', "Rent_SQ", "Kids_Dummy", "WalkingDistance_SQ",
                 "Naturalness_SQ", "Employment_full",  "Z_Mean_NR"),
               ~ round(mean(., na.rm = TRUE), 2))
# 
# treatment_socio_C <- treatment_socio_C %>%  mutate(Treatment_name = case_when(
#   Treatment_new == 1 ~ 'Video 1',
#   Treatment_new == 2 ~ 'No Video 1',
#   Treatment_new == 3 ~ 'No Info 2',
#   Treatment_new == 4 ~ 'No Video 2',
#   Treatment_new == 5 ~ 'Video 2',
#   Treatment_new == 6 ~ 'No Treatment 3',
#   TRUE ~ NA_character_
# ))

# Export table as tex file 
print(xtable(treatment_socio_C, type ="latex"), 
      include.rownames = F, file ="Tables/socio_demos_C.tex")


### Create boxplot for text treatment page #

database_textpage <- database_full %>% filter(Treatment_new != 6)

ggplot(data=database_textpage) +
  geom_boxplot(aes(y=groupTime1774, x= Treatment_new, group=Treatment_name, fill=Treatment_name), outlier.shape = NA) +
  coord_cartesian(ylim = c(0, 180)) +
  labs(fill="Treatment") +
  xlab("Treatment") +
  ylab("Time on Info Text Page")

ggsave("Figures/treatment_time_bxplt.png", width=7, height = 5, dpi="print")



table(database_full$Treatment_C)
### Create boxplot for interview time per group #, remove quiz group 1776
database_full <- database_full %>% mutate(groupTime1774 = case_when(is.na(groupTime1774) ~ 0, TRUE ~ groupTime1774),
                                          groupTime1784 = case_when(is.na(groupTime1784) ~ 0, TRUE ~ groupTime1784),
                                          groupTime1775 = case_when(is.na(groupTime1775) ~ 0, TRUE ~ groupTime1775),
                                          groupTime1785 = case_when(is.na(groupTime1785) ~ 0, TRUE ~ groupTime1785),
                                          groupTime1786 = case_when(is.na(groupTime1786) ~ 0, TRUE ~ groupTime1786)) %>% 
  mutate(interviewtime_net = interviewtime - groupTime1774 - groupTime1784 - groupTime1775 - groupTime1785- groupTime1786 - groupTime1776)
# Calculate the cutoff values for the lowest and highest 1 percent
lower_cutoff <- quantile(database_full$interviewtime_net, 0.01)
upper_cutoff <- quantile(database_full$interviewtime_net, 0.95)

# Filter the data to keep only values within the specified range
database_full <- database_full %>%
  mutate(interviewtime_net_clean = ifelse(between(interviewtime_net, lower_cutoff, upper_cutoff), interviewtime_net, NA)) 

# Assuming 'database' is your data frame
video2_data <- subset(database_full, Treatment_C == "Video 2")

# Summary statistics for interview time in 'Video 2' cases
summary(video2_data$interviewtime_net_clean)
# Assuming 'database' is your data frame
sorted_database <- video2_data[order(video2_data$interviewtime_net_clean, decreasing = F), ]

# Display the ten highest values of interview time
top_10_highest <- tail(sorted_database$interviewtime_net_clean, 200)
print(top_10_highest)


bxplt_interview_time_A<-ggplot(data=database_full[!is.na(database_full$Treatment_A), ]) +
  geom_boxplot(aes(y=interviewtime_net_clean, x= Treatment_A, group=Treatment_A, fill=Treatment_A), outlier.shape = NA) +
  coord_cartesian(ylim = c(500, 2900)) +
  labs(fill="Treatment") +
  xlab("") +
  ylab("Net Interview Time (s)")+
  scale_x_discrete( guide = guide_axis(angle = 45))+
  theme_minimal() +  # Adjust the theme as needed
  theme(legend.position = "none")

bxplt_interview_time_B<-ggplot(data=database_full) +
  geom_boxplot(aes(y=interviewtime_net_clean, x= Treatment_B, group=Treatment_B, fill=Treatment_B), outlier.shape = NA) +
  coord_cartesian(ylim = c(500, 2900)) +
  labs(fill="Treatment") +
  xlab("") +
  ylab("Net Interview Time (s)")+
  scale_x_discrete( guide = guide_axis(angle = 45))+
  theme_minimal() +  # Adjust the theme as needed
  theme(legend.position = "none")

bxplt_interview_time_C<-ggplot(data=database_full) +
  geom_boxplot(aes(y=interviewtime_net_clean, x= factor(Treatment_new, labels = treatment_order), group=Treatment_name, fill=Treatment_name), outlier.shape = NA) +
  coord_cartesian(ylim = c(500, 2900)) +
  labs(fill="Treatment") +
  xlab("") +
  ylab("Net Interview Time (s)")+
  scale_x_discrete( guide = guide_axis(angle = 45))+
  theme_minimal() +  # Adjust the theme as needed
  theme(legend.position = "none") 

ggsave("Figures/interview_time_bxplt.png", width=7, height = 5, dpi="print")


# Boxplot Video Time

video_resp <- database_full %>% filter(Treatment_new == 1 | Treatment_new == 5)

ggplot(data=video_resp) +
  geom_boxplot(aes(y=groupTime1784, x= Treatment_name, group=Treatment_name, fill=Treatment_name), outlier.shape = NA) +
  coord_cartesian(ylim = c(100, 300)) +
  labs(fill="Treatment") +
  xlab("Treatment") +
  ylab("Time on Video Page")

ggsave("Figures/video_time_bxplt.png", width=7, height = 5, dpi="print")


#### Time per choice cards ####

database_full <- database_full %>% rowwise() %>% mutate(CC_time = sum(c(c_across(groupTime1740:groupTime1767), groupTime1735,
                                                                           groupTime1736), na.rm=TRUE),
                                                           CC_time_mean = CC_time/10)

# Calculate the cutoff values for the lowest and highest 1 percent
lower_cutoff <- quantile(database_full$CC_time_mean, 0.01)
upper_cutoff <- quantile(database_full$CC_time_mean, 0.99)

# Filter the data to keep only values within the specified range
database_full <- database_full %>%
  mutate(CC_time_mean_clean = ifelse(between(CC_time_mean, lower_cutoff, upper_cutoff), CC_time_mean, NA)) 
  
bxplt_cc_time_A<- ggplot(data=database_full[!is.na(database_full$Treatment_A), ]) +
  geom_boxplot(aes(y=CC_time_mean_clean, x= Treatment_A, group=Treatment_A, fill=Treatment_A), outlier.shape = NA) +
  coord_cartesian(ylim = c(5, 40)) +
  labs(fill="Treatment") +
  xlab("Treatment") +
  ylab("Mean Time CC (s)")+
  scale_x_discrete( guide = guide_axis(angle = 45))+
  theme_minimal() +  # Adjust the theme as needed
  theme(legend.position = "none") 

bxplt_cc_time_B<- ggplot(data=database_full) +
  geom_boxplot(aes(y=CC_time_mean_clean, x= Treatment_B, group=Treatment_B, fill=Treatment_B), outlier.shape = NA) +
  coord_cartesian(ylim = c(5, 40)) +
  labs(fill="Treatment") +
  xlab("Treatment") +
  ylab("Mean Time CC (s)")+
  scale_x_discrete( guide = guide_axis(angle = 45))+
  theme_minimal() +  # Adjust the theme as needed
  theme(legend.position = "none") 

 bxplt_cc_time_C<- ggplot(data=database_full) +
  geom_boxplot(aes(y=CC_time_mean_clean, x= factor(Treatment_new, labels = treatment_order), group=Treatment_name, fill=Treatment_name), outlier.shape = NA) +
 coord_cartesian(ylim = c(5, 40)) +
  labs(fill="Treatment") +
  xlab("Treatment") +
  ylab("Mean Time CC (s)")+
    scale_x_discrete( guide = guide_axis(angle = 45))+
    theme_minimal() +  # Adjust the theme as needed
    theme(legend.position = "none") 
  
  ggsave("Figures/cc_time_bxplt.png", width=7, height = 5, dpi="print")
  
#### Regression Treatment Time ####
  

no_panel <- database_full %>% filter(!is.na(Treatment)) %>% group_by(id) %>% 
  summarize_at(c('Gender_female', 'Uni_degree', 'Age', 'HHSize', "Rent_SQ", "groupTime1774", "Treatment_new", 
                 "WorkingTime", "interviewtime", "groupTime1733", "groupTime1784", "groupTime1775",
                 "groupTime1785", "Number_Kids", "Kids_Dummy", 
                 "Screen", "Naturalness_SQ", "Employment_full", "Pensioner"),
               ~ round(mean(., na.rm = TRUE), 2))

#### Number opt-out choices
  
  database_full <- database_full %>%
    group_by(id) %>%
    mutate(count_choosen_3 = sum(choice == 3, na.rm = TRUE)) %>%
    ungroup()
  
  bxplt_opt_A <-ggplot(data=database_full[!is.na(database_full$Treatment_A), ])  +
    geom_boxplot(aes(y=count_choosen_3, x= Treatment_A, group=Treatment_A, fill=Treatment_A), outlier.shape = NA) +
    stat_summary(
      aes(x = Treatment_A, y = count_choosen_3, group = Treatment_A),
      fun = mean, geom = "point", shape = 18, size = 3,
      position = position_dodge(width = 0.75) )+
    coord_cartesian(ylim = c(0, 10)) +
    labs(fill="Treatment") +
    xlab("") +
    ylab("No. of Opt-Out Choices")+
    scale_x_discrete( guide = guide_axis(angle = 45))+
    theme_minimal() +  # Adjust the theme as needed
    theme(legend.position = "none") 
  
  bxplt_opt_B <-ggplot(data=database_full) +
    geom_boxplot(aes(y=count_choosen_3, x= Treatment_B, group=Treatment_B, fill=Treatment_B), outlier.shape = NA) +
    stat_summary(
      aes(x = Treatment_B, y = count_choosen_3, group = Treatment_B),
      fun = mean, geom = "point", shape = 18, size = 3,
      position = position_dodge(width = 0.75) )+
    coord_cartesian(ylim = c(0, 10)) +
    labs(fill="Treatment") +
    xlab("") +
    ylab("No. of Opt-Out Choices")+
    scale_x_discrete( guide = guide_axis(angle = 45))+
    theme_minimal() +  # Adjust the theme as needed
    theme(legend.position = "none") 
  
  bxplt_opt_C <-ggplot(data=database_full) +
    geom_boxplot(aes(y=count_choosen_3, x= factor(Treatment_new, labels = treatment_order) , group=Treatment_name, fill=Treatment_name), outlier.shape = NA) +
    stat_summary(
      aes(x = factor(Treatment_new, labels = treatment_order), y = count_choosen_3, group = Treatment_name),
      fun = mean, geom = "point", shape = 18, size = 3,
      position = position_dodge(width = 0.75) )+
    coord_cartesian(ylim = c(0, 10)) +
    labs(fill="Treatment") +
    xlab("") +
    ylab("No. of Opt-Out Choices")+
    scale_x_discrete( guide = guide_axis(angle = 45))+
    theme_minimal() +  # Adjust the theme as needed
    theme(legend.position = "none") 
  
  ggsave("Figures/number_opt_out_bxplt.png", width=7, height = 5, dpi="print")
  
# Take a look at always opt out people   
always_opt_out <- database_full %>% filter(count_choosen_3 == 10)

table(always_opt_out$Treatment)
table(always_opt_out$Treatment_C)
summary(always_opt_out$Z_Mean_NR)
  
  
#### Quiz questions
  
  data <- database_full %>%
    group_by(id) %>%
    dplyr::slice(1) %>%
    ungroup()
  
  ## Count correct answers after Treatment for Treatment Group (before CE)
  data <- data %>%
    mutate(correct_q1 = ifelse(TV01W3 == "A1", 1, 0))     %>%
    mutate(correct_q2 = ifelse(TV02W3 == "A2", 1, 0)) %>%
    mutate(correct_q3 = ifelse(TV03W3 == "AO01", 1, 0)) %>%
    mutate(correct_q4 = ifelse(TV04W3 == "AO01", 1, 0)) %>%
    mutate(correct_q5 = ifelse(TV05W3 == "AO02", 1, 0)) %>%
    mutate(correct_q6 = ifelse(TV06W3 == "AO02", 1, 0)) %>%
    mutate(correct_q7 = ifelse(TV07W3 == "AO01", 1, 0)) %>%
    mutate(number_correct = rowSums(select(., starts_with("correct_q")))) %>%
    mutate(number_correct = ifelse(TV01W3=="",NA , number_correct))
  
  table(data$number_correct, useNA = "always" )
  ## Count correct answers for all groups (after DCE)
  data <- data %>%
    mutate(b_correct_q1 = ifelse(TV11W3 == "A1", 1, 0))     %>%
    mutate(b_correct_q2 = ifelse(TV12W3 == "A2", 1, 0)) %>%
    mutate(b_correct_q3 = ifelse(TV15W3 == "AO01", 1, 0)) %>%
    mutate(b_correct_q4 = ifelse(TV16W3 == "AO01", 1, 0)) %>%
    mutate(b_correct_q5 = ifelse(TV17W3 == "AO02", 1, 0)) %>%
    mutate(b_correct_q6 = ifelse(TV18W3 == "AO02", 1, 0)) %>%
    mutate(b_correct_q7 = ifelse(TV19W3 == "AO01", 1, 0)) %>%
    mutate(b_number_correct = rowSums(select(., starts_with("b_correct_q"))))%>%
    mutate(number_correct = ifelse(is.na(number_correct), b_number_correct, number_correct)) %>% 
    mutate(percentage_correct = (number_correct/7)*100)
  
  bxplt_quiz_A <- ggplot(data=data[!is.na(data$Treatment_A), ])  +
    geom_boxplot(aes(y=percentage_correct, x= Treatment_A , group=Treatment_A, fill=Treatment_A), outlier.shape = NA) +
    stat_summary(
      aes(x = Treatment_A, y = percentage_correct, group = Treatment_A),
      fun = mean, geom = "point", shape = 18, size = 3,
      position = position_dodge(width = 0.75) )+
    coord_cartesian(ylim = c(20, 100)) +
    labs(fill="Treatment") +
    xlab("") +
    ylab("%. of Correct Statements")+
    scale_x_discrete( guide = guide_axis(angle = 45))+
    theme_minimal() +  # Adjust the theme as needed
    theme(legend.position = "none") 
  
  bxplt_quiz_B <- ggplot(data=data) +
    geom_boxplot(aes(y=percentage_correct, x= Treatment_B , group=Treatment_B, fill=Treatment_B), outlier.shape = NA) +
    stat_summary(
      aes(x = Treatment_B, y = percentage_correct, group = Treatment_B),
      fun = mean, geom = "point", shape = 18, size = 3,
      position = position_dodge(width = 0.75) )+
    coord_cartesian(ylim = c(20, 100)) +
    labs(fill="Treatment") +
    xlab("") +
    ylab("%. of Correct Statements")+
    scale_x_discrete( guide = guide_axis(angle = 45))+
    theme_minimal() +  # Adjust the theme as needed
    theme(legend.position = "none") 
  
  bxplt_quiz_C <- ggplot(data=data) +
    geom_boxplot(aes(y=percentage_correct, x= factor(Treatment_new, labels = treatment_order) , group=Treatment_name, fill=Treatment_name), outlier.shape = NA) +
    stat_summary(
    aes(x = factor(Treatment_new, labels = treatment_order), y = percentage_correct, group = Treatment_name),
    fun = mean, geom = "point", shape = 18, size = 3,
    position = position_dodge(width = 0.75) )+
    coord_cartesian(ylim = c(20, 100)) +
    labs(fill="Treatment") +
    xlab("") +
    ylab("%. of Correct Statements")+
    scale_x_discrete( guide = guide_axis(angle = 45))+
    theme_minimal() +  # Adjust the theme as needed
    theme(legend.position = "none") 
  
  ggsave("Figures/correct_statements_bxplt.png", width=7, height = 5, dpi="print")
  quiz_data<-data
# Maybe need to add other groups, ask Fabian
no_panel <- no_panel %>% mutate(groupTime1774 = case_when(is.na(groupTime1774) ~ 0, TRUE ~ groupTime1774),
                                groupTime1784 = case_when(is.na(groupTime1784) ~ 0, TRUE ~ groupTime1784),
                                groupTime1775 = case_when(is.na(groupTime1775) ~ 0, TRUE ~ groupTime1775),
                                groupTime1785 = case_when(is.na(groupTime1785) ~ 0, TRUE ~ groupTime1785)) %>% 
  mutate(interviewtime_net = interviewtime - groupTime1774 - groupTime1784 - groupTime1775 - groupTime1785)

no_panel_filt <- no_panel %>% filter(Treatment_new != 3 & Treatment_new != 6) %>% 
  mutate(Dummy_Group_2 = case_when(Treatment_new > 2 ~ 1, TRUE ~0))

treatment_model <- lm(data=no_panel_filt, log(groupTime1774) ~ Age + Gender_female + 
                        Kids_Dummy + Uni_degree + log(interviewtime_net) + log(Screen) + Dummy_Group_2 +
                        Employment_full + Pensioner)

summary(treatment_model)


interviewtime_model <- lm(data=no_panel, log(interviewtime_net) ~ Age + Gender_female + 
                            log(Rent_SQ) + Kids_Dummy + Uni_degree + Employment_full +
                            Pensioner + log(Screen))

summary(interviewtime_model)