additions to msdm analysis

cbc47e17 · ye87zine · c49fa18f · cbc47e17
Commit cbc47e17 authored 7 months ago by ye87zine
--- a/R/05_02_MSDM_comparison.qmd
+++ b/R/05_02_MSDM_comparison.qmd
@@ -277,28 +277,15 @@ bslib::card(plot, full_screen = T)
 ## *Relative Performance*
-### *Ranking*
 ```{r delta, echo = FALSE, message=FALSE, warnings=FALSE}
-results_ranked = results_final_long %>% 
-  group_by(species, metric) %>% 
-  mutate(rank = rev(rank(value))) %>% 
-  group_by(model, metric) %>% 
-  summarize(mean_rank = mean(rank)) %>% 
-  group_by(metric) %>% 
-  mutate(position = rank(mean_rank))
 results_ranked_obs = results_final_long %>% 
  group_by(species,  metric) %>% 
  mutate(rank = rev(rank(value)))
-ggplot(data = results_ranked_obs, aes(x = obs, y = rank, color = model)) +
+reglines = results_ranked_obs %>%
-  geom_point(alpha = 0.1) +
+  group_by(model, metric) %>%
-  scale_y_continuous(name = "rank (lower is better)") +
+  group_modify(~as.data.frame(loess.smooth(.x$obs, .x$rank)))
-  scale_x_log10() +
-  geom_smooth() +
-  theme_minimal()
 # The table below summarizes the relative performance of the models across different observation frequency ranges. The `rank` column indicates the model's performance rank compared to all other models for a given combination of model and metric. The subsequent columns, `(1,10]`, `(10,25]`, ..., `(5000, Inf]`, represent bins of observation frequency. The values in these columns show how many times the model's performance was ranked at the specified `rank` within the respective frequency range.
@@ -320,7 +307,127 @@ ggplot(data = results_ranked_obs, aes(x = obs, y = rank, color = model)) +
 # DT::datatable(df_print)
 ```
-### *Trait space*
+::: panel-tabset
+### *AUC*
+```{r echo = FALSE}
+df_plot = dplyr::filter(results_ranked_obs, metric == "auc")
+reglines_plot = dplyr::filter(reglines, metric == "auc")
+plot = plot_ly(
+  data = df_plot,
+  x = ~obs, 
+  y = ~rank, 
+  color = ~model,
+  type = 'scatter', 
+  mode = 'markers', 
+  opacity = 0.5
+) %>%
+  layout(
+    yaxis = list(title = "rank (lower is better)"),
+    xaxis = list(title = "number of observations", type = "log"), # log scale for x-axis
+    showlegend = TRUE
+  ) 
+for(model_name in unique(df_plot$model)){
+  reg_data = dplyr::filter(reglines_plot, model == model_name)
+  plot = plot %>% 
+    add_lines(
+      data = reg_data,
+      x = ~x,
+      y = ~y,
+      color = model_name,  # Set color to match legendgroup
+      legendgroup = model_name,
+      name = paste(model_name, '(smooth)'),
+      showlegend = FALSE,
+    )
+}
+bslib::card(plot, full_screen = T)
+```
+### *Accuracy*
+```{r echo = FALSE}
+df_plot = dplyr::filter(results_ranked_obs, metric == "accuracy")
+reglines_plot = dplyr::filter(reglines, metric == "accuracy")
+plot = plot_ly(
+  data = df_plot,
+  x = ~obs, 
+  y = ~rank, 
+  color = ~model,
+  type = 'scatter', 
+  mode = 'markers', 
+  opacity = 0.5
+) %>%
+  layout(
+    yaxis = list(title = "rank (lower is better)"),
+    xaxis = list(title = "number of observations", type = "log"), # log scale for x-axis
+    showlegend = TRUE
+  ) 
+for(model_name in unique(df_plot$model)){
+  reg_data = dplyr::filter(reglines_plot, model == model_name)
+  plot = plot %>% 
+    add_lines(
+      data = reg_data,
+      x = ~x,
+      y = ~y,
+      color = model_name,  # Set color to match legendgroup
+      legendgroup = model_name,
+      name = paste(model_name, '(smooth)'),
+      showlegend = FALSE,
+    )
+}
+bslib::card(plot, full_screen = T)
+```
+### *F1 score*
+```{r echo = FALSE}
+df_plot = dplyr::filter(results_ranked_obs, metric == "f1")
+reglines_plot = dplyr::filter(reglines, metric == "f1")
+plot = plot_ly(
+  data = df_plot,
+  x = ~obs, 
+  y = ~rank, 
+  color = ~model,
+  type = 'scatter', 
+  mode = 'markers', 
+  opacity = 0.5
+) %>%
+  layout(
+    yaxis = list(title = "rank (lower is better)"),
+    xaxis = list(title = "number of observations", type = "log"), # log scale for x-axis
+    showlegend = TRUE
+  ) 
+for(model_name in unique(df_plot$model)){
+  reg_data = dplyr::filter(reglines_plot, model == model_name)
+  plot = plot %>% 
+    add_lines(
+      data = reg_data,
+      x = ~x,
+      y = ~y,
+      color = model_name,  # Set color to match legendgroup
+      legendgroup = model_name,
+      name = paste(model_name, '(smooth)'),
+      showlegend = FALSE,
+    )
+}
+bslib::card(plot, full_screen = T)
+```
+:::
+## *Trait space*
 ```{r trait_pca, echo = FALSE, message=FALSE, warnings=FALSE}
 load("../data/r_objects/traits_proc.RData")