Code owners
Assign users and groups as approvers for specific file changes. Learn more.
treatment.R 17.12 KiB
library(viridis)
data_grouped <- database %>%
group_by(id) %>%
summarise(across(c(Treatment, Treatment_new), list( gr= ~unique(.))))
data_grouped <- data_grouped %>%
mutate(Treatment_name = case_when(
Treatment_new_gr == 1 ~ 'Video 1',
Treatment_new_gr == 2 ~ 'No Video 1',
Treatment_new_gr == 3 ~ 'No Info 2',
Treatment_new_gr == 4 ~ 'No Video 2',
Treatment_new_gr == 5 ~ 'Video 2',
Treatment_new_gr == 6 ~ 'No Treatment 3',
TRUE ~ NA_character_
))
treatment_order<- c("Video 1", "No Video 1", "No Info 2","No Video 2", "Video 2" ,"No Treatment 3")
data_grouped <- data_grouped %>% filter(!is.na(Treatment_gr))
ggplot(data=data_grouped) +
geom_bar(aes(x=factor(Treatment_name, levels=c("No Video 1", "Video 1", "No Info 2", "No Video 2", "Video 2",
"No Treatment 3")),
group=as.factor(Treatment_gr), fill=as.factor(Treatment_gr))) +
xlab("Treatment Group") +
ylab("Count") +
scale_fill_viridis(option="D", discrete = T, labels=c("Always Info", "Optional Info", "Never Info")) +
labs(fill="Treatment Group")
ggsave("Figures/barplot_treatment.png", width=7, height=5, dpi="print")
#### Inspect socio-demographic differences ####
### Case A
treatment_socio_A <- database_full %>%
group_by(Treatment_A) %>%
summarize_at(c('Gender_female', 'Uni_degree', 'Age', 'HHSize', "Rent_SQ", "Kids_Dummy", "WalkingDistance_SQ",
"Naturalness_SQ", "Employment_full", "Z_Mean_NR"),
~ round(mean(., na.rm = TRUE), 2))
# Export table as tex file
print(xtable(treatment_socio_A, type ="latex"),
include.rownames = F, file ="Tables/socio_demos_A.tex")
### Case B
treatment_socio_B <- database_full %>% filter(Treatment_B == "Treated" | Treatment_A == "Not_Treated") %>%
group_by(Treatment_B) %>%
summarize_at(c('Gender_female', 'Uni_degree', 'Age', 'HHSize', "Rent_SQ", "Kids_Dummy", "WalkingDistance_SQ",
"Naturalness_SQ", "Employment_full", "Pensioner"),
~ round(mean(., na.rm = TRUE), 2))
# Export table as tex file
print(xtable(treatment_socio_B, type ="latex"),
include.rownames = F, file ="Tables/socio_demos_B.tex")
### Case C
treatment_socio <- database_full %>% filter(!is.na(Treatment)) %>% group_by(Treatment_C) %>%
summarize_at(c('Gender_female', 'Uni_degree', 'Age', 'HHSize', "Rent_SQ", "Kids_Dummy", "WalkingDistance_SQ",
"Naturalness_SQ", "Employment_full", "Z_Mean_NR"),
~ round(mean(., na.rm = TRUE), 2))
treatment_socio_C <- database_full %>% filter(!is.na(Treatment_new)) %>% group_by(Treatment_C) %>%
summarize_at(c('Gender_female', 'Uni_degree', 'Age', 'HHSize', "Rent_SQ", "Kids_Dummy", "WalkingDistance_SQ",
"Naturalness_SQ", "Employment_full", "Z_Mean_NR"),
~ round(mean(., na.rm = TRUE), 2))
#
# treatment_socio_C <- treatment_socio_C %>% mutate(Treatment_name = case_when(
# Treatment_new == 1 ~ 'Video 1',
# Treatment_new == 2 ~ 'No Video 1',
# Treatment_new == 3 ~ 'No Info 2',
# Treatment_new == 4 ~ 'No Video 2',
# Treatment_new == 5 ~ 'Video 2',
# Treatment_new == 6 ~ 'No Treatment 3',
# TRUE ~ NA_character_
# ))
# Export table as tex file
print(xtable(treatment_socio_C, type ="latex"),
include.rownames = F, file ="Tables/socio_demos_C.tex")
### Create boxplot for text treatment page #
database_textpage <- database_full %>% filter(Treatment_new != 6)
ggplot(data=database_textpage) +
geom_boxplot(aes(y=groupTime1774, x= Treatment_new, group=Treatment_name, fill=Treatment_name), outlier.shape = NA) +
coord_cartesian(ylim = c(0, 180)) +
labs(fill="Treatment") +
xlab("Treatment") +
ylab("Time on Info Text Page")
ggsave("Figures/treatment_time_bxplt.png", width=7, height = 5, dpi="print")
table(database_full$Treatment_C)
### Create boxplot for interview time per group #, remove quiz group 1776
database_full <- database_full %>% mutate(groupTime1774 = case_when(is.na(groupTime1774) ~ 0, TRUE ~ groupTime1774),
groupTime1784 = case_when(is.na(groupTime1784) ~ 0, TRUE ~ groupTime1784),
groupTime1775 = case_when(is.na(groupTime1775) ~ 0, TRUE ~ groupTime1775),
groupTime1785 = case_when(is.na(groupTime1785) ~ 0, TRUE ~ groupTime1785),
groupTime1786 = case_when(is.na(groupTime1786) ~ 0, TRUE ~ groupTime1786)) %>%
mutate(interviewtime_net = interviewtime - groupTime1774 - groupTime1784 - groupTime1775 - groupTime1785- groupTime1786 - groupTime1776)
# Calculate the cutoff values for the lowest and highest 1 percent
lower_cutoff <- quantile(database_full$interviewtime_net, 0.01)
upper_cutoff <- quantile(database_full$interviewtime_net, 0.95)
# Filter the data to keep only values within the specified range
database_full <- database_full %>%
mutate(interviewtime_net_clean = ifelse(between(interviewtime_net, lower_cutoff, upper_cutoff), interviewtime_net, NA))
# Assuming 'database' is your data frame
video2_data <- subset(database_full, Treatment_C == "Video 2")
# Summary statistics for interview time in 'Video 2' cases
summary(video2_data$interviewtime_net_clean)
# Assuming 'database' is your data frame
sorted_database <- video2_data[order(video2_data$interviewtime_net_clean, decreasing = F), ]
# Display the ten highest values of interview time
top_10_highest <- tail(sorted_database$interviewtime_net_clean, 200)
print(top_10_highest)
bxplt_interview_time_A<-ggplot(data=database_full[!is.na(database_full$Treatment_A), ]) +
geom_boxplot(aes(y=interviewtime_net_clean, x= Treatment_A, group=Treatment_A, fill=Treatment_A), outlier.shape = NA) +
coord_cartesian(ylim = c(500, 2900)) +
labs(fill="Treatment") +
xlab("") +
ylab("Net Interview Time (s)")+
scale_x_discrete( guide = guide_axis(angle = 45))+
theme_minimal() + # Adjust the theme as needed
theme(legend.position = "none")
bxplt_interview_time_B<-ggplot(data=database_full) +
geom_boxplot(aes(y=interviewtime_net_clean, x= Treatment_B, group=Treatment_B, fill=Treatment_B), outlier.shape = NA) +
coord_cartesian(ylim = c(500, 2900)) +
labs(fill="Treatment") +
xlab("") +
ylab("Net Interview Time (s)")+
scale_x_discrete( guide = guide_axis(angle = 45))+
theme_minimal() + # Adjust the theme as needed
theme(legend.position = "none")
bxplt_interview_time_C<-ggplot(data=database_full) +
geom_boxplot(aes(y=interviewtime_net_clean, x= factor(Treatment_new, labels = treatment_order), group=Treatment_name, fill=Treatment_name), outlier.shape = NA) +
coord_cartesian(ylim = c(500, 2900)) +
labs(fill="Treatment") +
xlab("") +
ylab("Net Interview Time (s)")+
scale_x_discrete( guide = guide_axis(angle = 45))+
theme_minimal() + # Adjust the theme as needed
theme(legend.position = "none")
ggsave("Figures/interview_time_bxplt.png", width=7, height = 5, dpi="print")
# Boxplot Video Time
video_resp <- database_full %>% filter(Treatment_new == 1 | Treatment_new == 5)
ggplot(data=video_resp) +
geom_boxplot(aes(y=groupTime1784, x= Treatment_name, group=Treatment_name, fill=Treatment_name), outlier.shape = NA) +
coord_cartesian(ylim = c(100, 300)) +
labs(fill="Treatment") +
xlab("Treatment") +
ylab("Time on Video Page")
ggsave("Figures/video_time_bxplt.png", width=7, height = 5, dpi="print")
#### Time per choice cards ####
database_full <- database_full %>% rowwise() %>% mutate(CC_time = sum(c(c_across(groupTime1740:groupTime1767), groupTime1735,
groupTime1736), na.rm=TRUE),
CC_time_mean = CC_time/10)
# Calculate the cutoff values for the lowest and highest 1 percent
lower_cutoff <- quantile(database_full$CC_time_mean, 0.01)
upper_cutoff <- quantile(database_full$CC_time_mean, 0.99)
# Filter the data to keep only values within the specified range
database_full <- database_full %>%
mutate(CC_time_mean_clean = ifelse(between(CC_time_mean, lower_cutoff, upper_cutoff), CC_time_mean, NA))
bxplt_cc_time_A<- ggplot(data=database_full[!is.na(database_full$Treatment_A), ]) +
geom_boxplot(aes(y=CC_time_mean_clean, x= Treatment_A, group=Treatment_A, fill=Treatment_A), outlier.shape = NA) +
coord_cartesian(ylim = c(5, 40)) +
labs(fill="Treatment") +
xlab("Treatment") +
ylab("Mean Time CC (s)")+
scale_x_discrete( guide = guide_axis(angle = 45))+
theme_minimal() + # Adjust the theme as needed
theme(legend.position = "none")
bxplt_cc_time_B<- ggplot(data=database_full) +
geom_boxplot(aes(y=CC_time_mean_clean, x= Treatment_B, group=Treatment_B, fill=Treatment_B), outlier.shape = NA) +
coord_cartesian(ylim = c(5, 40)) +
labs(fill="Treatment") +
xlab("Treatment") +
ylab("Mean Time CC (s)")+
scale_x_discrete( guide = guide_axis(angle = 45))+
theme_minimal() + # Adjust the theme as needed
theme(legend.position = "none")
bxplt_cc_time_C<- ggplot(data=database_full) +
geom_boxplot(aes(y=CC_time_mean_clean, x= factor(Treatment_new, labels = treatment_order), group=Treatment_name, fill=Treatment_name), outlier.shape = NA) +
coord_cartesian(ylim = c(5, 40)) +
labs(fill="Treatment") +
xlab("Treatment") +
ylab("Mean Time CC (s)")+
scale_x_discrete( guide = guide_axis(angle = 45))+
theme_minimal() + # Adjust the theme as needed
theme(legend.position = "none")
ggsave("Figures/cc_time_bxplt.png", width=7, height = 5, dpi="print")
#### Regression Treatment Time ####
no_panel <- database_full %>% filter(!is.na(Treatment)) %>% group_by(id) %>%
summarize_at(c('Gender_female', 'Uni_degree', 'Age', 'HHSize', "Rent_SQ", "groupTime1774", "Treatment_new",
"WorkingTime", "interviewtime", "groupTime1733", "groupTime1784", "groupTime1775",
"groupTime1785", "Number_Kids", "Kids_Dummy",
"Screen", "Naturalness_SQ", "Employment_full", "Pensioner"),
~ round(mean(., na.rm = TRUE), 2))
#### Number opt-out choices
database_full <- database_full %>%
group_by(id) %>%
mutate(count_choosen_3 = sum(choice == 3, na.rm = TRUE)) %>%
ungroup()
bxplt_opt_A <-ggplot(data=database_full[!is.na(database_full$Treatment_A), ]) +
geom_boxplot(aes(y=count_choosen_3, x= Treatment_A, group=Treatment_A, fill=Treatment_A), outlier.shape = NA) +
stat_summary(
aes(x = Treatment_A, y = count_choosen_3, group = Treatment_A),
fun = mean, geom = "point", shape = 18, size = 3,
position = position_dodge(width = 0.75) )+
coord_cartesian(ylim = c(0, 10)) +
labs(fill="Treatment") +
xlab("") +
ylab("No. of Opt-Out Choices")+
scale_x_discrete( guide = guide_axis(angle = 45))+
theme_minimal() + # Adjust the theme as needed
theme(legend.position = "none")
bxplt_opt_B <-ggplot(data=database_full) +
geom_boxplot(aes(y=count_choosen_3, x= Treatment_B, group=Treatment_B, fill=Treatment_B), outlier.shape = NA) +
stat_summary(
aes(x = Treatment_B, y = count_choosen_3, group = Treatment_B),
fun = mean, geom = "point", shape = 18, size = 3,
position = position_dodge(width = 0.75) )+
coord_cartesian(ylim = c(0, 10)) +
labs(fill="Treatment") +
xlab("") +
ylab("No. of Opt-Out Choices")+
scale_x_discrete( guide = guide_axis(angle = 45))+
theme_minimal() + # Adjust the theme as needed
theme(legend.position = "none")
bxplt_opt_C <-ggplot(data=database_full) +
geom_boxplot(aes(y=count_choosen_3, x= factor(Treatment_new, labels = treatment_order) , group=Treatment_name, fill=Treatment_name), outlier.shape = NA) +
stat_summary(
aes(x = factor(Treatment_new, labels = treatment_order), y = count_choosen_3, group = Treatment_name),
fun = mean, geom = "point", shape = 18, size = 3,
position = position_dodge(width = 0.75) )+
coord_cartesian(ylim = c(0, 10)) +
labs(fill="Treatment") +
xlab("") +
ylab("No. of Opt-Out Choices")+
scale_x_discrete( guide = guide_axis(angle = 45))+
theme_minimal() + # Adjust the theme as needed
theme(legend.position = "none")
ggsave("Figures/number_opt_out_bxplt.png", width=7, height = 5, dpi="print")
# Take a look at always opt out people
always_opt_out <- database_full %>% filter(count_choosen_3 == 10)
table(always_opt_out$Treatment)
table(always_opt_out$Treatment_C)
summary(always_opt_out$Z_Mean_NR)
#### Quiz questions
data <- database_full %>%
group_by(id) %>%
dplyr::slice(1) %>%
ungroup()
## Count correct answers after Treatment for Treatment Group (before CE)
data <- data %>%
mutate(correct_q1 = ifelse(TV01W3 == "A1", 1, 0)) %>%
mutate(correct_q2 = ifelse(TV02W3 == "A2", 1, 0)) %>%
mutate(correct_q3 = ifelse(TV03W3 == "AO01", 1, 0)) %>%
mutate(correct_q4 = ifelse(TV04W3 == "AO01", 1, 0)) %>%
mutate(correct_q5 = ifelse(TV05W3 == "AO02", 1, 0)) %>%
mutate(correct_q6 = ifelse(TV06W3 == "AO02", 1, 0)) %>%
mutate(correct_q7 = ifelse(TV07W3 == "AO01", 1, 0)) %>%
mutate(number_correct = rowSums(select(., starts_with("correct_q")))) %>%
mutate(number_correct = ifelse(TV01W3=="",NA , number_correct))
table(data$number_correct, useNA = "always" )
## Count correct answers for all groups (after DCE)
data <- data %>%
mutate(b_correct_q1 = ifelse(TV11W3 == "A1", 1, 0)) %>%
mutate(b_correct_q2 = ifelse(TV12W3 == "A2", 1, 0)) %>%
mutate(b_correct_q3 = ifelse(TV15W3 == "AO01", 1, 0)) %>%
mutate(b_correct_q4 = ifelse(TV16W3 == "AO01", 1, 0)) %>%
mutate(b_correct_q5 = ifelse(TV17W3 == "AO02", 1, 0)) %>%
mutate(b_correct_q6 = ifelse(TV18W3 == "AO02", 1, 0)) %>%
mutate(b_correct_q7 = ifelse(TV19W3 == "AO01", 1, 0)) %>%
mutate(b_number_correct = rowSums(select(., starts_with("b_correct_q"))))%>%
mutate(number_correct = ifelse(is.na(number_correct), b_number_correct, number_correct)) %>%
mutate(percentage_correct = (number_correct/7)*100)
bxplt_quiz_A <- ggplot(data=data[!is.na(data$Treatment_A), ]) +
geom_boxplot(aes(y=percentage_correct, x= Treatment_A , group=Treatment_A, fill=Treatment_A), outlier.shape = NA) +
stat_summary(
aes(x = Treatment_A, y = percentage_correct, group = Treatment_A),
fun = mean, geom = "point", shape = 18, size = 3,
position = position_dodge(width = 0.75) )+
coord_cartesian(ylim = c(20, 100)) +
labs(fill="Treatment") +
xlab("") +
ylab("%. of Correct Statements")+
scale_x_discrete( guide = guide_axis(angle = 45))+
theme_minimal() + # Adjust the theme as needed
theme(legend.position = "none")
bxplt_quiz_B <- ggplot(data=data) +
geom_boxplot(aes(y=percentage_correct, x= Treatment_B , group=Treatment_B, fill=Treatment_B), outlier.shape = NA) +
stat_summary(
aes(x = Treatment_B, y = percentage_correct, group = Treatment_B),
fun = mean, geom = "point", shape = 18, size = 3,
position = position_dodge(width = 0.75) )+
coord_cartesian(ylim = c(20, 100)) +
labs(fill="Treatment") +
xlab("") +
ylab("%. of Correct Statements")+
scale_x_discrete( guide = guide_axis(angle = 45))+
theme_minimal() + # Adjust the theme as needed
theme(legend.position = "none")
bxplt_quiz_C <- ggplot(data=data) +
geom_boxplot(aes(y=percentage_correct, x= factor(Treatment_new, labels = treatment_order) , group=Treatment_name, fill=Treatment_name), outlier.shape = NA) +
stat_summary(
aes(x = factor(Treatment_new, labels = treatment_order), y = percentage_correct, group = Treatment_name),
fun = mean, geom = "point", shape = 18, size = 3,
position = position_dodge(width = 0.75) )+
coord_cartesian(ylim = c(20, 100)) +
labs(fill="Treatment") +
xlab("") +
ylab("%. of Correct Statements")+
scale_x_discrete( guide = guide_axis(angle = 45))+
theme_minimal() + # Adjust the theme as needed
theme(legend.position = "none")
ggsave("Figures/correct_statements_bxplt.png", width=7, height = 5, dpi="print")
quiz_data<-data
# Maybe need to add other groups, ask Fabian
no_panel <- no_panel %>% mutate(groupTime1774 = case_when(is.na(groupTime1774) ~ 0, TRUE ~ groupTime1774),
groupTime1784 = case_when(is.na(groupTime1784) ~ 0, TRUE ~ groupTime1784),
groupTime1775 = case_when(is.na(groupTime1775) ~ 0, TRUE ~ groupTime1775),
groupTime1785 = case_when(is.na(groupTime1785) ~ 0, TRUE ~ groupTime1785)) %>%
mutate(interviewtime_net = interviewtime - groupTime1774 - groupTime1784 - groupTime1775 - groupTime1785)
no_panel_filt <- no_panel %>% filter(Treatment_new != 3 & Treatment_new != 6) %>%
mutate(Dummy_Group_2 = case_when(Treatment_new > 2 ~ 1, TRUE ~0))
treatment_model <- lm(data=no_panel_filt, log(groupTime1774) ~ Age + Gender_female +
Kids_Dummy + Uni_degree + log(interviewtime_net) + log(Screen) + Dummy_Group_2 +
Employment_full + Pensioner)
summary(treatment_model)
interviewtime_model <- lm(data=no_panel, log(interviewtime_net) ~ Age + Gender_female +
log(Rent_SQ) + Kids_Dummy + Uni_degree + Employment_full +
Pensioner + log(Screen))
summary(interviewtime_model)