Skip to content
Snippets Groups Projects
Commit 004a1e65 authored by nc71qaxa's avatar nc71qaxa
Browse files

ols matching

parent 74712a44
Branches
No related tags found
No related merge requests found
Showing
with 449 additions and 384 deletions
......@@ -109,78 +109,78 @@ testData <- labeled_data[-trainIndex, ]
tuneGrid <- expand.grid(
nrounds = c(300),
max_depth = c(15),
eta = c(0.05),
gamma = c(0, 0.1),
colsample_bytree = c(0.6, 0.8), # Fraction of features to be used for each tree
min_child_weight = c(1, 3, 5), # Minimum sum of instance weight (hessian) needed in a child
subsample = c(0.7, 0.8) # Fraction of samples to be used for each tree
)
model3 <- train(Choice_Treat ~ .,
data = trainData,
method = "xgbTree",
tuneGrid = tuneGrid,
trControl = trainControl(method = "cv", number = 5))
# Get variable importance
varImp(model3)
# Evaluate the model
predictions <- predict(model3, newdata = testData)
confusionMatrix(predictions, testData$Choice_Treat)
labeled_predictions <- predict(model3, newdata = labeled_data)
labeled_data$PredictedGroup <- labeled_predictions
table(labeled_data$Choice_Treat, labeled_data$PredictedGroup)
unlabeled_predictions <- predict(model3, newdata = unlabeled_data)
labeled_data_id$PredictedGroup <- labeled_predictions
data_prediction_labeled<-select(labeled_data_id, c("id", "PredictedGroup"))
saveRDS(data_prediction_labeled, "Data/predictions_labeled.RDS")
unlabeled_data$PredictedGroup <- unlabeled_predictions
data_prediction<-select(unlabeled_data, c("id", "PredictedGroup"))
saveRDS(data_prediction, "Data/predictions.RDS")
print(model3$bestTune)
table(data$predicted.classes ,data$Choice_Treat)
calculate_metrics <- function(confusion_matrix) {
# Extract values from the confusion matrix
TN <- confusion_matrix[1, 1]
FP <- confusion_matrix[2, 1]
FN <- confusion_matrix[1, 2]
TP <- confusion_matrix[2, 2]
# Calculate sensitivity
sensitivity <- round(TP / (TP + FN), 2)
# Calculate specificity
specificity <- round(TN / (TN + FP), 2)
# Return the results as a list
return(list(sensitivity = sensitivity, specificity = specificity))
}
# Calculate metrics
metrics <- calculate_metrics(table(data$predicted.classes ,data$Choice_Treat))
# Print results
print(paste("Sensitivity:", metrics$sensitivity))
print(paste("Specificity:", metrics$specificity))
roc_obj <- roc(data$Choice_Treat, data$probabilities)
plot(roc_obj, main = "ROC Curve", col = "blue", lwd = 2)
auc_value <- auc(roc_obj)
best_coords <- coords(roc_obj, "best", best.method="youden")
cut_off <- best_coords$threshold
\ No newline at end of file
# tuneGrid <- expand.grid(
# nrounds = c(300),
# max_depth = c(15),
# eta = c(0.05),
# gamma = c(0, 0.1),
# colsample_bytree = c(0.6, 0.8), # Fraction of features to be used for each tree
# min_child_weight = c(1, 3, 5), # Minimum sum of instance weight (hessian) needed in a child
# subsample = c(0.7, 0.8) # Fraction of samples to be used for each tree
# )
#
#
# model3 <- train(Choice_Treat ~ .,
# data = trainData,
# method = "xgbTree",
# tuneGrid = tuneGrid,
# trControl = trainControl(method = "cv", number = 5))
#
#
# # Get variable importance
# varImp(model3)
#
#
#
# # Evaluate the model
# predictions <- predict(model3, newdata = testData)
# confusionMatrix(predictions, testData$Choice_Treat)
# labeled_predictions <- predict(model3, newdata = labeled_data)
# labeled_data$PredictedGroup <- labeled_predictions
#
# table(labeled_data$Choice_Treat, labeled_data$PredictedGroup)
#
# unlabeled_predictions <- predict(model3, newdata = unlabeled_data)
# labeled_data_id$PredictedGroup <- labeled_predictions
# data_prediction_labeled<-select(labeled_data_id, c("id", "PredictedGroup"))
# saveRDS(data_prediction_labeled, "Data/predictions_labeled.RDS")
#
# unlabeled_data$PredictedGroup <- unlabeled_predictions
# data_prediction<-select(unlabeled_data, c("id", "PredictedGroup"))
# saveRDS(data_prediction, "Data/predictions.RDS")
#
# print(model3$bestTune)
#
#
# table(data$predicted.classes ,data$Choice_Treat)
#
# calculate_metrics <- function(confusion_matrix) {
# # Extract values from the confusion matrix
# TN <- confusion_matrix[1, 1]
# FP <- confusion_matrix[2, 1]
# FN <- confusion_matrix[1, 2]
# TP <- confusion_matrix[2, 2]
#
# # Calculate sensitivity
# sensitivity <- round(TP / (TP + FN), 2)
#
# # Calculate specificity
# specificity <- round(TN / (TN + FP), 2)
#
# # Return the results as a list
# return(list(sensitivity = sensitivity, specificity = specificity))
# }
#
# # Calculate metrics
# metrics <- calculate_metrics(table(data$predicted.classes ,data$Choice_Treat))
#
# # Print results
# print(paste("Sensitivity:", metrics$sensitivity))
# print(paste("Specificity:", metrics$specificity))
# roc_obj <- roc(data$Choice_Treat, data$probabilities)
# plot(roc_obj, main = "ROC Curve", col = "blue", lwd = 2)
# auc_value <- auc(roc_obj)
# best_coords <- coords(roc_obj, "best", best.method="youden")
#
#
# cut_off <- best_coords$threshold
\ No newline at end of file
......@@ -16,7 +16,7 @@ database_full <- database_full %>%
data <- database_full %>%
group_by(id) %>%
slice(1) %>%
dplyr::slice(1) %>%
ungroup()
data <- data %>%
......
......@@ -142,7 +142,7 @@ apollo_probabilities=function(apollo_beta, apollo_inputs, functionality="estimat
model = apollo_estimate(apollo_beta, apollo_fixed,
apollo_probabilities, apollo_inputs,
estimate_settings=list(maxIterations=400,
estimationRoutine="bfgs",
estimationRoutine="bgw",
hessianRoutine="analytic"))
......
......@@ -142,7 +142,7 @@ apollo_probabilities=function(apollo_beta, apollo_inputs, functionality="estimat
model = apollo_estimate(apollo_beta, apollo_fixed,
apollo_probabilities, apollo_inputs,
estimate_settings=list(maxIterations=400,
estimationRoutine="bfgs",
estimationRoutine="bgw",
hessianRoutine="analytic"))
......
......@@ -142,7 +142,7 @@ apollo_probabilities=function(apollo_beta, apollo_inputs, functionality="estimat
model = apollo_estimate(apollo_beta, apollo_fixed,
apollo_probabilities, apollo_inputs,
estimate_settings=list(maxIterations=400,
estimationRoutine="bfgs",
estimationRoutine="bgw",
hessianRoutine="analytic"))
......
......@@ -142,7 +142,7 @@ apollo_probabilities=function(apollo_beta, apollo_inputs, functionality="estimat
model = apollo_estimate(apollo_beta, apollo_fixed,
apollo_probabilities, apollo_inputs,
estimate_settings=list(maxIterations=400,
estimationRoutine="bfgs",
estimationRoutine="bgw",
hessianRoutine="analytic"))
......
......@@ -142,7 +142,7 @@ apollo_probabilities=function(apollo_beta, apollo_inputs, functionality="estimat
model = apollo_estimate(apollo_beta, apollo_fixed,
apollo_probabilities, apollo_inputs,
estimate_settings=list(maxIterations=400,
estimationRoutine="bfgs",
estimationRoutine="bgw",
hessianRoutine="analytic"))
......
......@@ -33,3 +33,7 @@ summary(conseq_model_D)
conseq_model_control_D <- lm(Conseq_score ~ as.factor(Treatment_D) + Z_Mean_NR + as.factor(Gender)+Age_mean+ QFIncome + Uni_degree,data)
summary(conseq_model_control_D)
conseq_model_control_G <- lm(Conseq_score ~ as.factor(Matching_Group) + Z_Mean_NR + as.factor(Gender)+Age_mean+ QFIncome + Uni_degree,data)
summary(conseq_model_control_G)
data <- database_full %>%
group_by(id) %>%
slice(1) %>%
dplyr::slice(1) %>%
ungroup()
data$Treatment_C <- as.factor(data$Treatment_C)
......
matching_group <- data %>% dplyr::select(id, Matching_Group)
quiz_data <- left_join(quiz_data, matching_group, by="id")
quiz_data$Treatment_C <- as.factor(quiz_data$Treatment_C)
quiz_data$Treatment_D <- as.factor(quiz_data$Treatment_D)
......@@ -27,6 +31,9 @@ ols_percentage_correct_D<- lm( percentage_correct ~ as.factor(Treatment_D) ,quiz
summary(ols_percentage_correct_D)
ols_percentage_correct_control_D<- lm( percentage_correct ~ as.factor(Treatment_D) + Z_Mean_NR + as.factor(Gender)+Age_mean+ QFIncome + Uni_degree,quiz_data)
summary(ols_percentage_correct_control_D)
ols_percentage_correct_control_G<- lm( percentage_correct ~ as.factor(Matching_Group) + Z_Mean_NR + as.factor(Gender)+Age_mean+ QFIncome + Uni_degree,quiz_data)
summary(ols_percentage_correct_control_G)
#
# # Create an HTML results table with customized names and stars
# results_table_3 <- stargazer(
......
library(car)
data <- database_full %>%
data_predictions1 <- readRDS("Data/predictions.RDS")
data_predictions2 <- readRDS("Data/predictions_labeled.RDS")
data_predictions <- bind_rows(data_predictions1, data_predictions2)
database <- left_join(database_full, data_predictions, by="id")
database <- database %>%
filter(!is.na(Treatment_new)) %>%
mutate(Dummy_Treated = case_when(Treatment_new == 1|Treatment_new == 2 ~ 1, TRUE ~ 0),
Dummy_Vol_Treated = case_when(Treatment_new == 5 |Treatment_new == 4 ~ 1, TRUE ~ 0),
Dummy_no_info = case_when(Treatment_new == 3 ~ 1, TRUE~0)) %>%
mutate(Matching_Group = case_when(Dummy_Treated == 1 & PredictedGroup == 1 ~"Treat Pred",
Dummy_Treated == 1 & PredictedGroup == 0 ~"Treat Not Pred",
Treatment_A == "Vol_Treated" & PredictedGroup == 1 ~ "Opt Pred",
Treatment_A == "Vol_Treated" & PredictedGroup == 0 ~ "Opt Not Pred",
Treatment_new == 6 & PredictedGroup == 0 ~ "Control Pred",
TRUE ~ "Control Not Pred"))
data <- database %>%
group_by(id) %>%
dplyr::slice(1) %>%
ungroup()
......@@ -28,6 +50,11 @@ summary(ols_time_spent_D)
ols_time_spent_control_D<- lm( interviewtime_net_clean ~ as.factor(Treatment_D) + Z_Mean_NR + as.factor(Gender)+Age_mean+ QFIncome + Uni_degree,data)
summary(ols_time_spent_control_D)
ols_time_spent_control_G<- lm( interviewtime_net_clean ~ as.factor(Matching_Group) + Z_Mean_NR + as.factor(Gender)+Age_mean+ QFIncome + Uni_degree,data)
summary(ols_time_spent_control_G)
ols_time_cc_A<- lm( CC_time_mean_clean ~ as.factor(Treatment_A) ,data)
summary(ols_time_cc_A)
ols_time_cc_control_A<- lm( CC_time_mean_clean ~ as.factor(Treatment_A) + Z_Mean_NR + as.factor(Gender)+Age_mean + QFIncome +Uni_degree,data)
......@@ -45,6 +72,10 @@ ols_time_cc_D<- lm( CC_time_mean_clean ~ as.factor(Treatment_D) ,data)
summary(ols_time_cc_D)
ols_time_cc_control_D<- lm( CC_time_mean_clean ~ as.factor(Treatment_D) + Z_Mean_NR + as.factor(Gender)+Age_mean + QFIncome +Uni_degree,data)
summary(ols_time_cc_control_D)
ols_time_cc_control_G<- lm( CC_time_mean_clean ~ as.factor(Matching_Group) + Z_Mean_NR + as.factor(Gender)+Age_mean + QFIncome +Uni_degree,data)
summary(ols_time_cc_control_G)
# # Create an HTML results table with customized names and stars
# results_table_5 <- stargazer(
# ols_tme_spent_1, ols_tme_spent_control_1,
......
# Load models
MXL_wtp_Control_Not_Pred_model <- readRDS("C:/nextcloud/Hot_Topic_Cool_Choices/Estimation_results/mxl/prediction/Split_samples/MXL_wtp_Control_Not_Pred_model.rds")
MXL_wtp_Control_Pred_model <- readRDS("C:/nextcloud/Hot_Topic_Cool_Choices/Estimation_results/mxl/prediction/Split_samples/MXL_wtp_Control_Pred_model.rds")
MXL_wtp_Opt_Not_Pred_model <- readRDS("C:/nextcloud/Hot_Topic_Cool_Choices/Estimation_results/mxl/prediction/Split_samples/MXL_wtp_Opt_Not_Pred_model.rds")
MXL_wtp_Opt_Pred_model <- readRDS("C:/nextcloud/Hot_Topic_Cool_Choices/Estimation_results/mxl/prediction/Split_samples/MXL_wtp_Opt_Pred_model.rds")
MXL_wtp_Treated_Not_Pred_model <- readRDS("C:/nextcloud/Hot_Topic_Cool_Choices/Estimation_results/mxl/prediction/Split_samples/MXL_wtp_Treated_Not_Pred_model.rds")
MXL_wtp_Treated_Pred_model <- readRDS("C:/nextcloud/Hot_Topic_Cool_Choices/Estimation_results/mxl/prediction/Split_samples/MXL_wtp_Treated_Pred_model.rds")
MXL_wtp_Control_Not_Pred_model <- apollo_loadModel("Estimation_results/mxl/prediction/Split_samples/MXL_wtp_Control_Not_Pred")
MXL_wtp_Control_Pred_model <- apollo_loadModel("Estimation_results/mxl/prediction/Split_samples/MXL_wtp_Control_Pred")
MXL_wtp_Opt_Not_Pred_model <- apollo_loadModel("Estimation_results/mxl/prediction/Split_samples/MXL_wtp_Opt_Not_Pred")
MXL_wtp_Opt_Pred_model <- apollo_loadModel("Estimation_results/mxl/prediction/Split_samples/MXL_wtp_Opt_Pred")
MXL_wtp_Treated_Not_Pred_model <- apollo_loadModel("Estimation_results/mxl/prediction/Split_samples/MXL_wtp_Treated_Not_Pred")
MXL_wtp_Treated_Pred_model <- apollo_loadModel("Estimation_results/mxl/prediction/Split_samples/MXL_wtp_Treated_Pred")
# List of models
models <- list(
......@@ -17,39 +17,62 @@ models <- list(
)
# Define the x-range for plotting (1000 values)
x_range <- seq(-10, 100, length.out = 1000)
x_range <- seq(-20, 100, length.out = 1000)
# Define colors and line types for each model
colors <- c("blue", "red", "green", "purple", "orange", "brown")
line_types <- 1:6
# Define an empty data frame to store the results
plot_data <- data.frame()
# Initialize the plot with the first model
model <- models[["Treated Not Pred"]]
coef <- summary(model)$estimate
mean <- coef["mu_natural", "Estimate"]
sd <- coef["sig_natural", "Estimate"]
y <- dnorm(x_range, mean, sd)
plot(x_range, y, type = "l", col = colors[1], lwd = 2, lty = line_types[1],
ylim = c(0, max(y)), xlab = "x", ylab = "Density",
main = "Normal Distributions from Multiple MXL Models")
# Loop through the remaining models and add their distributions to the plot
for (i in 2:length(models)) {
# Loop through the models and calculate the density for each
for (i in seq_along(models)) {
model_name <- names(models)[i]
model <- models[[i]]
# Extract mean and standard deviation for 'mu_natural' and 'sig_natural'
coef <- summary(model)$estimate
mean <- coef["mu_natural", "Estimate"]
sd <- coef["sig_natural", "Estimate"]
coef <- model$estimate
mean <- coef["mu_natural"]
sd <- coef["sig_natural"]
# Compute the normal distribution based on extracted mean and sd
# Compute the normal distribution based on the extracted mean and sd
y <- dnorm(x_range, mean, sd)
# Add the distribution to the existing plot
lines(x_range, y, col = colors[i], lwd = 2, lty = line_types[i])
# Create a data frame for this model and append it to the main data frame
temp_data <- data.frame(x = x_range, density = y, model = model_name)
plot_data <- bind_rows(plot_data, temp_data)
}
# Plot using ggplot2
ggplot(plot_data, aes(x = x, y = density, color = model)) +
geom_line() +
labs(
title = "Normal Distributions from Multiple MXL Models",
x = "x",
y = "Density",
color = "Model"
) +
theme_minimal() +
theme(legend.position = "right") +
scale_color_brewer(palette = "Set1")
#### Z-test if distributions are different
z_value <- (MXL_wtp_Treated_Pred_model$estimate["mu_natural"] - MXL_wtp_Control_Pred_model$estimate["mu_natural"])/
(sqrt((MXL_wtp_Treated_Pred_model$estimate["sig_natural"]/sqrt(MXL_wtp_Treated_Pred_model$nObs)) +
(MXL_wtp_Control_Pred_model$estimate["sig_natural"]/sqrt(MXL_wtp_Control_Pred_model$nObs))))
# Function to calculate z-value using your exact formula
calculate_z_value <- function(model_1, model_2) {
# Calculate the z-value using your specific formula
z_value <- (model_1$estimate["mu_natural"] - model_2$estimate["mu_natural"]) /
(sqrt((model_1$estimate["sig_natural"] / sqrt(model_1$nObs/10)) +
(model_2$estimate["sig_natural"] / sqrt(model_2$nObs/10))))
return(z_value)
}
# Add a legend
legend("topright", inset = c(-0.25, 0), legend = names(models), col = colors,
lty = line_types, lwd = 2, xpd = TRUE)
# Example of using the function
z_value <- calculate_z_value(MXL_wtp_Treated_Pred_model, MXL_wtp_Control_Pred_model)
# Print the z-value
z_value
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment