# Prepare variables that we want to use database_full <- database_full %>% rename(Gender = "Q03W123", Education = "Q06W123", HHSize = "Q41W123", WorkingTime = "Q44W123", Birthyear = "Q01W123", Rent_net = "Q07W123", Number_Kids = "Q42W123", Employment_type = "Q43W123", Conseq_UGS = "Q28W3", Conseq_Money = "Q29W3") database_full <- database_full %>% mutate(Gender = dplyr::recode(Gender, "A1" = 1, "A2" = 2, "A3"=3), Education = dplyr::recode(Education, "A1" = 1, "A2" = 2, "A3"=3, "A4" = 4, "A5" = 5), Employment_type = dplyr::recode(Employment_type, "A1" = 1, "A2" = 2, "A3"=3, "A4" = 4, "A5" = 5, "A6" = 6), Conseq_UGS = dplyr::recode(Conseq_UGS, "A1" = 5, "A2" = 4, "A3"=3, "A4" = 2, "A5" = 1, "A6" = NA_real_), Conseq_Money = dplyr::recode(Conseq_Money, "A1" = 5, "A2" = 4, "A3"=3, "A4" = 2, "A5" = 1, "A6" = NA_real_)) database_full <- database_full %>% mutate(Gender_female = case_when(Gender == 2 ~1, TRUE~0), Age = 2023-Birthyear, Uni_degree = case_when(Education == 5 ~1, TRUE~0), Kids_Dummy = case_when(Number_Kids > 0 ~ 1, TRUE ~0), Employment_full = case_when(Employment_type == 1 ~ 1, TRUE~0), Pensioner = case_when(Employment_type == 6 ~ 1, TRUE~0), Age_mean = Age - mean(Age)) # Data cleaning database_full <- database_full %>% filter(Rent_SQ <= 10000 & Rent_SQ >=50) %>% filter(WalkingDistance_SQ > 0 & WalkingDistance_SQ <= 300) %>% filter(Gender!=3) database <- database %>% filter(Rent_SQ <= 10000 & Rent_SQ >=50) %>% filter(WalkingDistance_SQ > 0 & WalkingDistance_SQ <= 300) summary(database_full$interviewtime) database_full <- database_full %>% filter(interviewtime >= 300) # make change time to 10 seconds? database_full <- database_full %>% filter(!is.na(Treatment_new)) %>% mutate(Treatment_A = case_when( Treatment == 1 ~ "Treated", Treatment == 2 ~ "Vol_Treated", Treatment == 3 ~ "Not_Treated", TRUE ~ NA_character_ )) %>% mutate(Treatment_B = case_when( Treatment_new == 1 | Treatment_new == 2 | Treatment_new == 4 | Treatment_new == 5 ~ "Treated", Treatment_new == 3 | Treatment_new == 6 ~ "Not_Treated" )) %>% mutate(Treatment_C = case_when( Treatment_new == 1 ~ 'Video 1', Treatment_new == 2 ~ 'No Video 1', Treatment_new == 3 ~ 'No Info 2', Treatment_new == 4 ~ 'No Video 2', Treatment_new == 5 ~ 'Video 2', Treatment_new == 6 ~ 'No Treatment 3', TRUE ~ NA_character_ )) id_list <- unique(database_full$id) # Do we sill want to use this? or only database full? database <- database %>% filter(id %in% id_list) %>% filter(!is.na(Treatment_new)) # Building NR Index for (i in 1:21) { variable_name <- paste0("Q38S", sprintf("%02d", i), "W3") # Generate variable name cat("Table for", variable_name, ":\n") print(table(database_full[[variable_name]])) cat("\n") database_full[[variable_name]] <- as.numeric(factor(database_full[[variable_name]], levels = c("A1", "A2", "A3", "A4", "A5"))) cat("Table for", variable_name, ":\n") print(table(database_full[[variable_name]])) cat("\n") } variables_to_reverse <- c("Q38S02W3", "Q38S03W3", "Q38S10W3", "Q38S11W3", "Q38S13W3", "Q38S14W3", "Q38S15W3", "Q38S18W3") for (variable_name in variables_to_reverse) { cat("Table for", variable_name, ":\n") # Convert the variable to a factor with numerical levels and reverse the scores database_full[[variable_name]] <- 6 - as.numeric(database_full[[variable_name]]) # Print the table print(table(database_full[[variable_name]])) cat("\n") } q38_variables <- grep("^Q38", names(database_full), value = TRUE) database_full$Total_NR <- rowSums(database_full[q38_variables]) hist(database_full$Total_NR) database_full <- database_full %>% mutate(Mean_NR=Total_NR/21) mean_nr<-mean(database_full$Mean_NR, na.rm = TRUE) sd_nr<-sd(database_full$Mean_NR, na.rm = TRUE) database_full <- database_full %>% mutate(Z_Mean_NR=(Mean_NR-mean_nr)/sd_nr) database$Z_Mean_NR<- database_full$Z_Mean_NR summary(database$Z_Mean_NR) #Self-Reference Index for (i in 8:10) { variable_name <- paste0("TV", sprintf("%02d", i), "W3") # Generate variable name cat("Table for", variable_name, ":\n") print(table(database_full[[variable_name]])) cat("\n") database_full[[variable_name]] <- as.numeric(factor(database_full[[variable_name]], levels = c("A1", "A2", "A3", "A4", "A5"))) cat("Table for", variable_name, ":\n") print(table(database_full[[variable_name]])) cat("\n") } database_full$Total_SR <- database_full$TV08W3+database_full$TV09W3+database_full$TV10W3 hist(database_full$Total_SR) database_full <- database_full %>% mutate(Mean_SR=Total_SR/3) mean_sr<-mean(database_full$Mean_SR, na.rm = TRUE) sd_sr<-sd(database_full$Mean_SR, na.rm = TRUE) database_full <- database_full %>% mutate(Z_Mean_SR=(Mean_SR-mean_sr)/sd_sr) database$Z_Mean_SR<- database_full$Z_Mean_SR summary(database$Z_Mean_SR)