From cbc47e17c5441cb3e950b6954c2580d770d7eae0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?K=C3=B6nig?= <ye87zine@usr.idiv.de>
Date: Thu, 12 Dec 2024 14:55:36 +0100
Subject: [PATCH] additions to msdm analysis

---
 R/05_02_MSDM_comparison.qmd | 143 +++++++++++++++++++++++++++++++-----
 1 file changed, 125 insertions(+), 18 deletions(-)

diff --git a/R/05_02_MSDM_comparison.qmd b/R/05_02_MSDM_comparison.qmd
index 0adece2..5b14c8a 100644
--- a/R/05_02_MSDM_comparison.qmd
+++ b/R/05_02_MSDM_comparison.qmd
@@ -277,28 +277,15 @@ bslib::card(plot, full_screen = T)
 
 ## *Relative Performance*
 
-### *Ranking*
-
-
 ```{r delta, echo = FALSE, message=FALSE, warnings=FALSE}
-results_ranked = results_final_long %>% 
-  group_by(species, metric) %>% 
-  mutate(rank = rev(rank(value))) %>% 
-  group_by(model, metric) %>% 
-  summarize(mean_rank = mean(rank)) %>% 
-  group_by(metric) %>% 
-  mutate(position = rank(mean_rank))
-
 results_ranked_obs = results_final_long %>% 
   group_by(species,  metric) %>% 
   mutate(rank = rev(rank(value)))
 
-ggplot(data = results_ranked_obs, aes(x = obs, y = rank, color = model)) +
-  geom_point(alpha = 0.1) +
-  scale_y_continuous(name = "rank (lower is better)") +
-  scale_x_log10() +
-  geom_smooth() +
-  theme_minimal()
+reglines = results_ranked_obs %>%
+  group_by(model, metric) %>%
+  group_modify(~as.data.frame(loess.smooth(.x$obs, .x$rank)))
+
 
 # The table below summarizes the relative performance of the models across different observation frequency ranges. The `rank` column indicates the model's performance rank compared to all other models for a given combination of model and metric. The subsequent columns, `(1,10]`, `(10,25]`, ..., `(5000, Inf]`, represent bins of observation frequency. The values in these columns show how many times the model's performance was ranked at the specified `rank` within the respective frequency range.
 
@@ -320,7 +307,127 @@ ggplot(data = results_ranked_obs, aes(x = obs, y = rank, color = model)) +
 # DT::datatable(df_print)
 ```
 
-### *Trait space*
+::: panel-tabset
+### *AUC*
+
+```{r echo = FALSE}
+df_plot = dplyr::filter(results_ranked_obs, metric == "auc")
+reglines_plot = dplyr::filter(reglines, metric == "auc")
+
+plot = plot_ly(
+  data = df_plot,
+  x = ~obs, 
+  y = ~rank, 
+  color = ~model,
+  type = 'scatter', 
+  mode = 'markers', 
+  opacity = 0.5
+) %>%
+  layout(
+    yaxis = list(title = "rank (lower is better)"),
+    xaxis = list(title = "number of observations", type = "log"), # log scale for x-axis
+    showlegend = TRUE
+  ) 
+
+
+for(model_name in unique(df_plot$model)){
+  reg_data = dplyr::filter(reglines_plot, model == model_name)
+  plot = plot %>% 
+    add_lines(
+      data = reg_data,
+      x = ~x,
+      y = ~y,
+      color = model_name,  # Set color to match legendgroup
+      legendgroup = model_name,
+      name = paste(model_name, '(smooth)'),
+      showlegend = FALSE,
+    )
+}
+
+bslib::card(plot, full_screen = T)
+```
+
+### *Accuracy*
+
+```{r echo = FALSE}
+df_plot = dplyr::filter(results_ranked_obs, metric == "accuracy")
+reglines_plot = dplyr::filter(reglines, metric == "accuracy")
+
+plot = plot_ly(
+  data = df_plot,
+  x = ~obs, 
+  y = ~rank, 
+  color = ~model,
+  type = 'scatter', 
+  mode = 'markers', 
+  opacity = 0.5
+) %>%
+  layout(
+    yaxis = list(title = "rank (lower is better)"),
+    xaxis = list(title = "number of observations", type = "log"), # log scale for x-axis
+    showlegend = TRUE
+  ) 
+
+
+for(model_name in unique(df_plot$model)){
+  reg_data = dplyr::filter(reglines_plot, model == model_name)
+  plot = plot %>% 
+    add_lines(
+      data = reg_data,
+      x = ~x,
+      y = ~y,
+      color = model_name,  # Set color to match legendgroup
+      legendgroup = model_name,
+      name = paste(model_name, '(smooth)'),
+      showlegend = FALSE,
+    )
+}
+
+bslib::card(plot, full_screen = T)
+```
+
+### *F1 score*
+
+```{r echo = FALSE}
+df_plot = dplyr::filter(results_ranked_obs, metric == "f1")
+reglines_plot = dplyr::filter(reglines, metric == "f1")
+
+plot = plot_ly(
+  data = df_plot,
+  x = ~obs, 
+  y = ~rank, 
+  color = ~model,
+  type = 'scatter', 
+  mode = 'markers', 
+  opacity = 0.5
+) %>%
+  layout(
+    yaxis = list(title = "rank (lower is better)"),
+    xaxis = list(title = "number of observations", type = "log"), # log scale for x-axis
+    showlegend = TRUE
+  ) 
+
+
+for(model_name in unique(df_plot$model)){
+  reg_data = dplyr::filter(reglines_plot, model == model_name)
+  plot = plot %>% 
+    add_lines(
+      data = reg_data,
+      x = ~x,
+      y = ~y,
+      color = model_name,  # Set color to match legendgroup
+      legendgroup = model_name,
+      name = paste(model_name, '(smooth)'),
+      showlegend = FALSE,
+    )
+}
+
+bslib::card(plot, full_screen = T)
+```
+:::
+
+
+## *Trait space*
 
 ```{r trait_pca, echo = FALSE, message=FALSE, warnings=FALSE}
 load("../data/r_objects/traits_proc.RData")
-- 
GitLab