Code owners
Assign users and groups as approvers for specific file changes. Learn more.
data_prep.R 5.28 KiB
# Prepare variables that we want to use
database_full <- database_full %>% rename(Gender = "Q03W123", Education = "Q06W123", HHSize = "Q41W123",
WorkingTime = "Q44W123", Birthyear = "Q01W123", Rent_net = "Q07W123",
Number_Kids = "Q42W123", Employment_type = "Q43W123", Conseq_UGS = "Q28W3",
Conseq_Money = "Q29W3")
database_full <- database_full %>% mutate(Gender = dplyr::recode(Gender, "A1" = 1, "A2" = 2, "A3"=3),
Education = dplyr::recode(Education, "A1" = 1, "A2" = 2, "A3"=3, "A4" = 4, "A5" = 5),
Employment_type = dplyr::recode(Employment_type, "A1" = 1, "A2" = 2, "A3"=3, "A4" = 4,
"A5" = 5, "A6" = 6),
Conseq_UGS = dplyr::recode(Conseq_UGS, "A1" = 5, "A2" = 4, "A3"=3, "A4" = 2, "A5" = 1, "A6" = NA_real_),
Conseq_Money = dplyr::recode(Conseq_Money, "A1" = 5, "A2" = 4, "A3"=3, "A4" = 2, "A5" = 1, "A6" = NA_real_))
database_full <- database_full %>% mutate(Gender_female = case_when(Gender == 2 ~1, TRUE~0),
Age = 2023-Birthyear,
Uni_degree = case_when(Education == 5 ~1, TRUE~0),
Kids_Dummy = case_when(Number_Kids > 0 ~ 1, TRUE ~0),
Employment_full = case_when(Employment_type == 1 ~ 1, TRUE~0),
Pensioner = case_when(Employment_type == 6 ~ 1, TRUE~0),
Age_mean = Age - mean(Age))
# Data cleaning
database_full <- database_full %>% filter(Rent_SQ <= 10000 & Rent_SQ >=50) %>%
filter(WalkingDistance_SQ > 0 & WalkingDistance_SQ <= 300) %>%
filter(Gender!=3)
database <- database %>% filter(Rent_SQ <= 10000 & Rent_SQ >=50) %>%
filter(WalkingDistance_SQ > 0 & WalkingDistance_SQ <= 300)
summary(database_full$interviewtime)
database_full <- database_full %>% filter(interviewtime >= 300) # make change time to 10 seconds?
database_full <- database_full %>%
filter(!is.na(Treatment_new)) %>%
mutate(Treatment_A = case_when(
Treatment == 1 ~ "Treated",
Treatment == 2 ~ "Vol_Treated",
Treatment == 3 ~ "Not_Treated",
TRUE ~ NA_character_
)) %>%
mutate(Treatment_B = case_when(
Treatment_new == 1 | Treatment_new == 2 | Treatment_new == 4 | Treatment_new == 5 ~ "Treated",
Treatment_new == 3 | Treatment_new == 6 ~ "Not_Treated"
)) %>%
mutate(Treatment_C = case_when(
Treatment_new == 1 ~ 'Video 1',
Treatment_new == 2 ~ 'No Video 1',
Treatment_new == 3 ~ 'No Info 2',
Treatment_new == 4 ~ 'No Video 2',
Treatment_new == 5 ~ 'Video 2',
Treatment_new == 6 ~ 'No Treatment 3',
TRUE ~ NA_character_
))
id_list <- unique(database_full$id)
# Do we sill want to use this? or only database full?
database <- database %>% filter(id %in% id_list) %>% filter(!is.na(Treatment_new))
# Building NR Index
for (i in 1:21) {
variable_name <- paste0("Q38S", sprintf("%02d", i), "W3") # Generate variable name
cat("Table for", variable_name, ":\n")
print(table(database_full[[variable_name]]))
cat("\n")
database_full[[variable_name]] <- as.numeric(factor(database_full[[variable_name]], levels = c("A1", "A2", "A3", "A4", "A5")))
cat("Table for", variable_name, ":\n")
print(table(database_full[[variable_name]]))
cat("\n")
}
variables_to_reverse <- c("Q38S02W3", "Q38S03W3", "Q38S10W3", "Q38S11W3", "Q38S13W3", "Q38S14W3", "Q38S15W3", "Q38S18W3")
for (variable_name in variables_to_reverse) {
cat("Table for", variable_name, ":\n")
# Convert the variable to a factor with numerical levels and reverse the scores
database_full[[variable_name]] <- 6 - as.numeric(database_full[[variable_name]])
# Print the table
print(table(database_full[[variable_name]]))
cat("\n")
}
q38_variables <- grep("^Q38", names(database_full), value = TRUE)
database_full$Total_NR <- rowSums(database_full[q38_variables])
hist(database_full$Total_NR)
database_full <- database_full %>%
mutate(Mean_NR=Total_NR/21)
mean_nr<-mean(database_full$Mean_NR, na.rm = TRUE)
sd_nr<-sd(database_full$Mean_NR, na.rm = TRUE)
database_full <- database_full %>%
mutate(Z_Mean_NR=(Mean_NR-mean_nr)/sd_nr)
database$Z_Mean_NR<- database_full$Z_Mean_NR
summary(database$Z_Mean_NR)
#Self-Reference Index
for (i in 8:10) {
variable_name <- paste0("TV", sprintf("%02d", i), "W3") # Generate variable name
cat("Table for", variable_name, ":\n")
print(table(database_full[[variable_name]]))
cat("\n")
database_full[[variable_name]] <- as.numeric(factor(database_full[[variable_name]], levels = c("A1", "A2", "A3", "A4", "A5")))
cat("Table for", variable_name, ":\n")
print(table(database_full[[variable_name]]))
cat("\n")
}
database_full$Total_SR <- database_full$TV08W3+database_full$TV09W3+database_full$TV10W3
hist(database_full$Total_SR)
database_full <- database_full %>%
mutate(Mean_SR=Total_SR/3)
mean_sr<-mean(database_full$Mean_SR, na.rm = TRUE)
sd_sr<-sd(database_full$Mean_SR, na.rm = TRUE)
database_full <- database_full %>%
mutate(Z_Mean_SR=(Mean_SR-mean_sr)/sd_sr)
database$Z_Mean_SR<- database_full$Z_Mean_SR
summary(database$Z_Mean_SR)