library(viridis) data_grouped <- database %>% group_by(id) %>% summarise(across(c(Treatment, Treatment_new), list( gr= ~unique(.)))) data_grouped <- data_grouped %>% mutate(Treatment_name = case_when( Treatment_new_gr == 1 ~ 'Video 1', Treatment_new_gr == 2 ~ 'No Video 1', Treatment_new_gr == 3 ~ 'No Info 2', Treatment_new_gr == 4 ~ 'No Video 2', Treatment_new_gr == 5 ~ 'Video 2', Treatment_new_gr == 6 ~ 'No Treatment 3', TRUE ~ NA_character_ )) treatment_order<- c("Video 1", "No Video 1", "No Info 2","No Video 2", "Video 2" ,"No Treatment 3") data_grouped <- data_grouped %>% filter(!is.na(Treatment_gr)) ggplot(data=data_grouped) + geom_bar(aes(x=factor(Treatment_name, levels=c("No Video 1", "Video 1", "No Info 2", "No Video 2", "Video 2", "No Treatment 3")), group=as.factor(Treatment_gr), fill=as.factor(Treatment_gr))) + xlab("Treatment Group") + ylab("Count") + scale_fill_viridis(option="D", discrete = T, labels=c("Always Info", "Optional Info", "Never Info")) + labs(fill="Treatment Group") ggsave("Figures/barplot_treatment.png", width=7, height=5, dpi="print") #### Inspect socio-demographic differences #### ### Case A treatment_socio_A <- database_full %>% group_by(Treatment_A) %>% summarize_at(c('Gender_female', 'Uni_degree', 'Age', 'HHSize', "Rent_SQ", "Kids_Dummy", "WalkingDistance_SQ", "Naturalness_SQ", "Employment_full", "Z_Mean_NR"), ~ round(mean(., na.rm = TRUE), 2)) # Export table as tex file print(xtable(treatment_socio_A, type ="latex"), include.rownames = F, file ="Tables/socio_demos_A.tex") ### Case B treatment_socio_B <- database_full %>% filter(Treatment_B == "Treated" | Treatment_A == "Not_Treated") %>% group_by(Treatment_B) %>% summarize_at(c('Gender_female', 'Uni_degree', 'Age', 'HHSize', "Rent_SQ", "Kids_Dummy", "WalkingDistance_SQ", "Naturalness_SQ", "Employment_full", "Pensioner"), ~ round(mean(., na.rm = TRUE), 2)) # Export table as tex file print(xtable(treatment_socio_B, type ="latex"), include.rownames = F, file ="Tables/socio_demos_B.tex") ### Case C treatment_socio <- database_full %>% filter(!is.na(Treatment)) %>% group_by(Treatment_C) %>% summarize_at(c('Gender_female', 'Uni_degree', 'Age', 'HHSize', "Rent_SQ", "Kids_Dummy", "WalkingDistance_SQ", "Naturalness_SQ", "Employment_full", "Z_Mean_NR"), ~ round(mean(., na.rm = TRUE), 2)) treatment_socio_C <- database_full %>% filter(!is.na(Treatment_new)) %>% group_by(Treatment_C) %>% summarize_at(c('Gender_female', 'Uni_degree', 'Age', 'HHSize', "Rent_SQ", "Kids_Dummy", "WalkingDistance_SQ", "Naturalness_SQ", "Employment_full", "Z_Mean_NR"), ~ round(mean(., na.rm = TRUE), 2)) # # treatment_socio_C <- treatment_socio_C %>% mutate(Treatment_name = case_when( # Treatment_new == 1 ~ 'Video 1', # Treatment_new == 2 ~ 'No Video 1', # Treatment_new == 3 ~ 'No Info 2', # Treatment_new == 4 ~ 'No Video 2', # Treatment_new == 5 ~ 'Video 2', # Treatment_new == 6 ~ 'No Treatment 3', # TRUE ~ NA_character_ # )) # Export table as tex file print(xtable(treatment_socio_C, type ="latex"), include.rownames = F, file ="Tables/socio_demos_C.tex") ### Create boxplot for text treatment page # database_textpage <- database_full %>% filter(Treatment_new != 6) ggplot(data=database_textpage) + geom_boxplot(aes(y=groupTime1774, x= Treatment_new, group=Treatment_name, fill=Treatment_name), outlier.shape = NA) + coord_cartesian(ylim = c(0, 180)) + labs(fill="Treatment") + xlab("Treatment") + ylab("Time on Info Text Page") ggsave("Figures/treatment_time_bxplt.png", width=7, height = 5, dpi="print") table(database_full$Treatment_C) ### Create boxplot for interview time per group #, remove quiz group 1776 database_full <- database_full %>% mutate(groupTime1774 = case_when(is.na(groupTime1774) ~ 0, TRUE ~ groupTime1774), groupTime1784 = case_when(is.na(groupTime1784) ~ 0, TRUE ~ groupTime1784), groupTime1775 = case_when(is.na(groupTime1775) ~ 0, TRUE ~ groupTime1775), groupTime1785 = case_when(is.na(groupTime1785) ~ 0, TRUE ~ groupTime1785), groupTime1786 = case_when(is.na(groupTime1786) ~ 0, TRUE ~ groupTime1786)) %>% mutate(interviewtime_net = interviewtime - groupTime1774 - groupTime1784 - groupTime1775 - groupTime1785- groupTime1786 - groupTime1776) # Calculate the cutoff values for the lowest and highest 1 percent lower_cutoff <- quantile(database_full$interviewtime_net, 0.01) upper_cutoff <- quantile(database_full$interviewtime_net, 0.95) # Filter the data to keep only values within the specified range database_full <- database_full %>% mutate(interviewtime_net_clean = ifelse(between(interviewtime_net, lower_cutoff, upper_cutoff), interviewtime_net, NA)) # Assuming 'database' is your data frame video2_data <- subset(database_full, Treatment_C == "Video 2") # Summary statistics for interview time in 'Video 2' cases summary(video2_data$interviewtime_net_clean) # Assuming 'database' is your data frame sorted_database <- video2_data[order(video2_data$interviewtime_net_clean, decreasing = F), ] # Display the ten highest values of interview time top_10_highest <- tail(sorted_database$interviewtime_net_clean, 200) print(top_10_highest) bxplt_interview_time_A<-ggplot(data=database_full[!is.na(database_full$Treatment_A), ]) + geom_boxplot(aes(y=interviewtime_net_clean, x= Treatment_A, group=Treatment_A, fill=Treatment_A), outlier.shape = NA) + coord_cartesian(ylim = c(500, 2900)) + labs(fill="Treatment") + xlab("") + ylab("Net Interview Time (s)")+ scale_x_discrete( guide = guide_axis(angle = 45))+ theme_minimal() + # Adjust the theme as needed theme(legend.position = "none") bxplt_interview_time_B<-ggplot(data=database_full) + geom_boxplot(aes(y=interviewtime_net_clean, x= Treatment_B, group=Treatment_B, fill=Treatment_B), outlier.shape = NA) + coord_cartesian(ylim = c(500, 2900)) + labs(fill="Treatment") + xlab("") + ylab("Net Interview Time (s)")+ scale_x_discrete( guide = guide_axis(angle = 45))+ theme_minimal() + # Adjust the theme as needed theme(legend.position = "none") bxplt_interview_time_C<-ggplot(data=database_full) + geom_boxplot(aes(y=interviewtime_net_clean, x= factor(Treatment_new, labels = treatment_order), group=Treatment_name, fill=Treatment_name), outlier.shape = NA) + coord_cartesian(ylim = c(500, 2900)) + labs(fill="Treatment") + xlab("") + ylab("Net Interview Time (s)")+ scale_x_discrete( guide = guide_axis(angle = 45))+ theme_minimal() + # Adjust the theme as needed theme(legend.position = "none") ggsave("Figures/interview_time_bxplt.png", width=7, height = 5, dpi="print") # Boxplot Video Time video_resp <- database_full %>% filter(Treatment_new == 1 | Treatment_new == 5) ggplot(data=video_resp) + geom_boxplot(aes(y=groupTime1784, x= Treatment_name, group=Treatment_name, fill=Treatment_name), outlier.shape = NA) + coord_cartesian(ylim = c(100, 300)) + labs(fill="Treatment") + xlab("Treatment") + ylab("Time on Video Page") ggsave("Figures/video_time_bxplt.png", width=7, height = 5, dpi="print") #### Time per choice cards #### database_full <- database_full %>% rowwise() %>% mutate(CC_time = sum(c(c_across(groupTime1740:groupTime1767), groupTime1735, groupTime1736), na.rm=TRUE), CC_time_mean = CC_time/10) # Calculate the cutoff values for the lowest and highest 1 percent lower_cutoff <- quantile(database_full$CC_time_mean, 0.01) upper_cutoff <- quantile(database_full$CC_time_mean, 0.99) # Filter the data to keep only values within the specified range database_full <- database_full %>% mutate(CC_time_mean_clean = ifelse(between(CC_time_mean, lower_cutoff, upper_cutoff), CC_time_mean, NA)) bxplt_cc_time_A<- ggplot(data=database_full[!is.na(database_full$Treatment_A), ]) + geom_boxplot(aes(y=CC_time_mean_clean, x= Treatment_A, group=Treatment_A, fill=Treatment_A), outlier.shape = NA) + coord_cartesian(ylim = c(5, 40)) + labs(fill="Treatment") + xlab("Treatment") + ylab("Mean Time CC (s)")+ scale_x_discrete( guide = guide_axis(angle = 45))+ theme_minimal() + # Adjust the theme as needed theme(legend.position = "none") bxplt_cc_time_B<- ggplot(data=database_full) + geom_boxplot(aes(y=CC_time_mean_clean, x= Treatment_B, group=Treatment_B, fill=Treatment_B), outlier.shape = NA) + coord_cartesian(ylim = c(5, 40)) + labs(fill="Treatment") + xlab("Treatment") + ylab("Mean Time CC (s)")+ scale_x_discrete( guide = guide_axis(angle = 45))+ theme_minimal() + # Adjust the theme as needed theme(legend.position = "none") bxplt_cc_time_C<- ggplot(data=database_full) + geom_boxplot(aes(y=CC_time_mean_clean, x= factor(Treatment_new, labels = treatment_order), group=Treatment_name, fill=Treatment_name), outlier.shape = NA) + coord_cartesian(ylim = c(5, 40)) + labs(fill="Treatment") + xlab("Treatment") + ylab("Mean Time CC (s)")+ scale_x_discrete( guide = guide_axis(angle = 45))+ theme_minimal() + # Adjust the theme as needed theme(legend.position = "none") ggsave("Figures/cc_time_bxplt.png", width=7, height = 5, dpi="print") #### Regression Treatment Time #### no_panel <- database_full %>% filter(!is.na(Treatment)) %>% group_by(id) %>% summarize_at(c('Gender_female', 'Uni_degree', 'Age', 'HHSize', "Rent_SQ", "groupTime1774", "Treatment_new", "WorkingTime", "interviewtime", "groupTime1733", "groupTime1784", "groupTime1775", "groupTime1785", "Number_Kids", "Kids_Dummy", "Screen", "Naturalness_SQ", "Employment_full", "Pensioner"), ~ round(mean(., na.rm = TRUE), 2)) #### Number opt-out choices database_full <- database_full %>% group_by(id) %>% mutate(count_choosen_3 = sum(choice == 3, na.rm = TRUE)) %>% ungroup() bxplt_opt_A <-ggplot(data=database_full[!is.na(database_full$Treatment_A), ]) + geom_boxplot(aes(y=count_choosen_3, x= Treatment_A, group=Treatment_A, fill=Treatment_A), outlier.shape = NA) + stat_summary( aes(x = Treatment_A, y = count_choosen_3, group = Treatment_A), fun = mean, geom = "point", shape = 18, size = 3, position = position_dodge(width = 0.75) )+ coord_cartesian(ylim = c(0, 10)) + labs(fill="Treatment") + xlab("") + ylab("No. of Opt-Out Choices")+ scale_x_discrete( guide = guide_axis(angle = 45))+ theme_minimal() + # Adjust the theme as needed theme(legend.position = "none") bxplt_opt_B <-ggplot(data=database_full) + geom_boxplot(aes(y=count_choosen_3, x= Treatment_B, group=Treatment_B, fill=Treatment_B), outlier.shape = NA) + stat_summary( aes(x = Treatment_B, y = count_choosen_3, group = Treatment_B), fun = mean, geom = "point", shape = 18, size = 3, position = position_dodge(width = 0.75) )+ coord_cartesian(ylim = c(0, 10)) + labs(fill="Treatment") + xlab("") + ylab("No. of Opt-Out Choices")+ scale_x_discrete( guide = guide_axis(angle = 45))+ theme_minimal() + # Adjust the theme as needed theme(legend.position = "none") bxplt_opt_C <-ggplot(data=database_full) + geom_boxplot(aes(y=count_choosen_3, x= factor(Treatment_new, labels = treatment_order) , group=Treatment_name, fill=Treatment_name), outlier.shape = NA) + stat_summary( aes(x = factor(Treatment_new, labels = treatment_order), y = count_choosen_3, group = Treatment_name), fun = mean, geom = "point", shape = 18, size = 3, position = position_dodge(width = 0.75) )+ coord_cartesian(ylim = c(0, 10)) + labs(fill="Treatment") + xlab("") + ylab("No. of Opt-Out Choices")+ scale_x_discrete( guide = guide_axis(angle = 45))+ theme_minimal() + # Adjust the theme as needed theme(legend.position = "none") ggsave("Figures/number_opt_out_bxplt.png", width=7, height = 5, dpi="print") # Take a look at always opt out people always_opt_out <- database_full %>% filter(count_choosen_3 == 10) table(always_opt_out$Treatment) table(always_opt_out$Treatment_C) summary(always_opt_out$Z_Mean_NR) #### Quiz questions data <- database_full %>% group_by(id) %>% dplyr::slice(1) %>% ungroup() ## Count correct answers after Treatment for Treatment Group (before CE) data <- data %>% mutate(correct_q1 = ifelse(TV01W3 == "A1", 1, 0)) %>% mutate(correct_q2 = ifelse(TV02W3 == "A2", 1, 0)) %>% mutate(correct_q3 = ifelse(TV03W3 == "AO01", 1, 0)) %>% mutate(correct_q4 = ifelse(TV04W3 == "AO01", 1, 0)) %>% mutate(correct_q5 = ifelse(TV05W3 == "AO02", 1, 0)) %>% mutate(correct_q6 = ifelse(TV06W3 == "AO02", 1, 0)) %>% mutate(correct_q7 = ifelse(TV07W3 == "AO01", 1, 0)) %>% mutate(number_correct = rowSums(select(., starts_with("correct_q")))) %>% mutate(number_correct = ifelse(TV01W3=="",NA , number_correct)) table(data$number_correct, useNA = "always" ) ## Count correct answers for all groups (after DCE) data <- data %>% mutate(b_correct_q1 = ifelse(TV11W3 == "A1", 1, 0)) %>% mutate(b_correct_q2 = ifelse(TV12W3 == "A2", 1, 0)) %>% mutate(b_correct_q3 = ifelse(TV15W3 == "AO01", 1, 0)) %>% mutate(b_correct_q4 = ifelse(TV16W3 == "AO01", 1, 0)) %>% mutate(b_correct_q5 = ifelse(TV17W3 == "AO02", 1, 0)) %>% mutate(b_correct_q6 = ifelse(TV18W3 == "AO02", 1, 0)) %>% mutate(b_correct_q7 = ifelse(TV19W3 == "AO01", 1, 0)) %>% mutate(b_number_correct = rowSums(select(., starts_with("b_correct_q"))))%>% mutate(number_correct = ifelse(is.na(number_correct), b_number_correct, number_correct)) %>% mutate(percentage_correct = (number_correct/7)*100) bxplt_quiz_A <- ggplot(data=data[!is.na(data$Treatment_A), ]) + geom_boxplot(aes(y=percentage_correct, x= Treatment_A , group=Treatment_A, fill=Treatment_A), outlier.shape = NA) + stat_summary( aes(x = Treatment_A, y = percentage_correct, group = Treatment_A), fun = mean, geom = "point", shape = 18, size = 3, position = position_dodge(width = 0.75) )+ coord_cartesian(ylim = c(20, 100)) + labs(fill="Treatment") + xlab("") + ylab("%. of Correct Statements")+ scale_x_discrete( guide = guide_axis(angle = 45))+ theme_minimal() + # Adjust the theme as needed theme(legend.position = "none") bxplt_quiz_B <- ggplot(data=data) + geom_boxplot(aes(y=percentage_correct, x= Treatment_B , group=Treatment_B, fill=Treatment_B), outlier.shape = NA) + stat_summary( aes(x = Treatment_B, y = percentage_correct, group = Treatment_B), fun = mean, geom = "point", shape = 18, size = 3, position = position_dodge(width = 0.75) )+ coord_cartesian(ylim = c(20, 100)) + labs(fill="Treatment") + xlab("") + ylab("%. of Correct Statements")+ scale_x_discrete( guide = guide_axis(angle = 45))+ theme_minimal() + # Adjust the theme as needed theme(legend.position = "none") bxplt_quiz_C <- ggplot(data=data) + geom_boxplot(aes(y=percentage_correct, x= factor(Treatment_new, labels = treatment_order) , group=Treatment_name, fill=Treatment_name), outlier.shape = NA) + stat_summary( aes(x = factor(Treatment_new, labels = treatment_order), y = percentage_correct, group = Treatment_name), fun = mean, geom = "point", shape = 18, size = 3, position = position_dodge(width = 0.75) )+ coord_cartesian(ylim = c(20, 100)) + labs(fill="Treatment") + xlab("") + ylab("%. of Correct Statements")+ scale_x_discrete( guide = guide_axis(angle = 45))+ theme_minimal() + # Adjust the theme as needed theme(legend.position = "none") ggsave("Figures/correct_statements_bxplt.png", width=7, height = 5, dpi="print") quiz_data<-data # Maybe need to add other groups, ask Fabian no_panel <- no_panel %>% mutate(groupTime1774 = case_when(is.na(groupTime1774) ~ 0, TRUE ~ groupTime1774), groupTime1784 = case_when(is.na(groupTime1784) ~ 0, TRUE ~ groupTime1784), groupTime1775 = case_when(is.na(groupTime1775) ~ 0, TRUE ~ groupTime1775), groupTime1785 = case_when(is.na(groupTime1785) ~ 0, TRUE ~ groupTime1785)) %>% mutate(interviewtime_net = interviewtime - groupTime1774 - groupTime1784 - groupTime1775 - groupTime1785) no_panel_filt <- no_panel %>% filter(Treatment_new != 3 & Treatment_new != 6) %>% mutate(Dummy_Group_2 = case_when(Treatment_new > 2 ~ 1, TRUE ~0)) treatment_model <- lm(data=no_panel_filt, log(groupTime1774) ~ Age + Gender_female + Kids_Dummy + Uni_degree + log(interviewtime_net) + log(Screen) + Dummy_Group_2 + Employment_full + Pensioner) summary(treatment_model) interviewtime_model <- lm(data=no_panel, log(interviewtime_net) ~ Age + Gender_female + log(Rent_SQ) + Kids_Dummy + Uni_degree + Employment_full + Pensioner + log(Screen)) summary(interviewtime_model)