From cbc47e17c5441cb3e950b6954c2580d770d7eae0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=B6nig?= <ye87zine@usr.idiv.de> Date: Thu, 12 Dec 2024 14:55:36 +0100 Subject: [PATCH] additions to msdm analysis --- R/05_02_MSDM_comparison.qmd | 143 +++++++++++++++++++++++++++++++----- 1 file changed, 125 insertions(+), 18 deletions(-) diff --git a/R/05_02_MSDM_comparison.qmd b/R/05_02_MSDM_comparison.qmd index 0adece2..5b14c8a 100644 --- a/R/05_02_MSDM_comparison.qmd +++ b/R/05_02_MSDM_comparison.qmd @@ -277,28 +277,15 @@ bslib::card(plot, full_screen = T) ## *Relative Performance* -### *Ranking* - - ```{r delta, echo = FALSE, message=FALSE, warnings=FALSE} -results_ranked = results_final_long %>% - group_by(species, metric) %>% - mutate(rank = rev(rank(value))) %>% - group_by(model, metric) %>% - summarize(mean_rank = mean(rank)) %>% - group_by(metric) %>% - mutate(position = rank(mean_rank)) - results_ranked_obs = results_final_long %>% group_by(species, metric) %>% mutate(rank = rev(rank(value))) -ggplot(data = results_ranked_obs, aes(x = obs, y = rank, color = model)) + - geom_point(alpha = 0.1) + - scale_y_continuous(name = "rank (lower is better)") + - scale_x_log10() + - geom_smooth() + - theme_minimal() +reglines = results_ranked_obs %>% + group_by(model, metric) %>% + group_modify(~as.data.frame(loess.smooth(.x$obs, .x$rank))) + # The table below summarizes the relative performance of the models across different observation frequency ranges. The `rank` column indicates the model's performance rank compared to all other models for a given combination of model and metric. The subsequent columns, `(1,10]`, `(10,25]`, ..., `(5000, Inf]`, represent bins of observation frequency. The values in these columns show how many times the model's performance was ranked at the specified `rank` within the respective frequency range. @@ -320,7 +307,127 @@ ggplot(data = results_ranked_obs, aes(x = obs, y = rank, color = model)) + # DT::datatable(df_print) ``` -### *Trait space* +::: panel-tabset +### *AUC* + +```{r echo = FALSE} +df_plot = dplyr::filter(results_ranked_obs, metric == "auc") +reglines_plot = dplyr::filter(reglines, metric == "auc") + +plot = plot_ly( + data = df_plot, + x = ~obs, + y = ~rank, + color = ~model, + type = 'scatter', + mode = 'markers', + opacity = 0.5 +) %>% + layout( + yaxis = list(title = "rank (lower is better)"), + xaxis = list(title = "number of observations", type = "log"), # log scale for x-axis + showlegend = TRUE + ) + + +for(model_name in unique(df_plot$model)){ + reg_data = dplyr::filter(reglines_plot, model == model_name) + plot = plot %>% + add_lines( + data = reg_data, + x = ~x, + y = ~y, + color = model_name, # Set color to match legendgroup + legendgroup = model_name, + name = paste(model_name, '(smooth)'), + showlegend = FALSE, + ) +} + +bslib::card(plot, full_screen = T) +``` + +### *Accuracy* + +```{r echo = FALSE} +df_plot = dplyr::filter(results_ranked_obs, metric == "accuracy") +reglines_plot = dplyr::filter(reglines, metric == "accuracy") + +plot = plot_ly( + data = df_plot, + x = ~obs, + y = ~rank, + color = ~model, + type = 'scatter', + mode = 'markers', + opacity = 0.5 +) %>% + layout( + yaxis = list(title = "rank (lower is better)"), + xaxis = list(title = "number of observations", type = "log"), # log scale for x-axis + showlegend = TRUE + ) + + +for(model_name in unique(df_plot$model)){ + reg_data = dplyr::filter(reglines_plot, model == model_name) + plot = plot %>% + add_lines( + data = reg_data, + x = ~x, + y = ~y, + color = model_name, # Set color to match legendgroup + legendgroup = model_name, + name = paste(model_name, '(smooth)'), + showlegend = FALSE, + ) +} + +bslib::card(plot, full_screen = T) +``` + +### *F1 score* + +```{r echo = FALSE} +df_plot = dplyr::filter(results_ranked_obs, metric == "f1") +reglines_plot = dplyr::filter(reglines, metric == "f1") + +plot = plot_ly( + data = df_plot, + x = ~obs, + y = ~rank, + color = ~model, + type = 'scatter', + mode = 'markers', + opacity = 0.5 +) %>% + layout( + yaxis = list(title = "rank (lower is better)"), + xaxis = list(title = "number of observations", type = "log"), # log scale for x-axis + showlegend = TRUE + ) + + +for(model_name in unique(df_plot$model)){ + reg_data = dplyr::filter(reglines_plot, model == model_name) + plot = plot %>% + add_lines( + data = reg_data, + x = ~x, + y = ~y, + color = model_name, # Set color to match legendgroup + legendgroup = model_name, + name = paste(model_name, '(smooth)'), + showlegend = FALSE, + ) +} + +bslib::card(plot, full_screen = T) +``` +::: + + +## *Trait space* ```{r trait_pca, echo = FALSE, message=FALSE, warnings=FALSE} load("../data/r_objects/traits_proc.RData") -- GitLab