Skip to content
Snippets Groups Projects
Commit cbc47e17 authored by ye87zine's avatar ye87zine
Browse files

additions to msdm analysis

parent c49fa18f
No related branches found
No related tags found
No related merge requests found
...@@ -277,28 +277,15 @@ bslib::card(plot, full_screen = T) ...@@ -277,28 +277,15 @@ bslib::card(plot, full_screen = T)
## *Relative Performance* ## *Relative Performance*
### *Ranking*
```{r delta, echo = FALSE, message=FALSE, warnings=FALSE} ```{r delta, echo = FALSE, message=FALSE, warnings=FALSE}
results_ranked = results_final_long %>%
group_by(species, metric) %>%
mutate(rank = rev(rank(value))) %>%
group_by(model, metric) %>%
summarize(mean_rank = mean(rank)) %>%
group_by(metric) %>%
mutate(position = rank(mean_rank))
results_ranked_obs = results_final_long %>% results_ranked_obs = results_final_long %>%
group_by(species, metric) %>% group_by(species, metric) %>%
mutate(rank = rev(rank(value))) mutate(rank = rev(rank(value)))
ggplot(data = results_ranked_obs, aes(x = obs, y = rank, color = model)) + reglines = results_ranked_obs %>%
geom_point(alpha = 0.1) + group_by(model, metric) %>%
scale_y_continuous(name = "rank (lower is better)") + group_modify(~as.data.frame(loess.smooth(.x$obs, .x$rank)))
scale_x_log10() +
geom_smooth() +
theme_minimal()
# The table below summarizes the relative performance of the models across different observation frequency ranges. The `rank` column indicates the model's performance rank compared to all other models for a given combination of model and metric. The subsequent columns, `(1,10]`, `(10,25]`, ..., `(5000, Inf]`, represent bins of observation frequency. The values in these columns show how many times the model's performance was ranked at the specified `rank` within the respective frequency range. # The table below summarizes the relative performance of the models across different observation frequency ranges. The `rank` column indicates the model's performance rank compared to all other models for a given combination of model and metric. The subsequent columns, `(1,10]`, `(10,25]`, ..., `(5000, Inf]`, represent bins of observation frequency. The values in these columns show how many times the model's performance was ranked at the specified `rank` within the respective frequency range.
...@@ -320,7 +307,127 @@ ggplot(data = results_ranked_obs, aes(x = obs, y = rank, color = model)) + ...@@ -320,7 +307,127 @@ ggplot(data = results_ranked_obs, aes(x = obs, y = rank, color = model)) +
# DT::datatable(df_print) # DT::datatable(df_print)
``` ```
### *Trait space* ::: panel-tabset
### *AUC*
```{r echo = FALSE}
df_plot = dplyr::filter(results_ranked_obs, metric == "auc")
reglines_plot = dplyr::filter(reglines, metric == "auc")
plot = plot_ly(
data = df_plot,
x = ~obs,
y = ~rank,
color = ~model,
type = 'scatter',
mode = 'markers',
opacity = 0.5
) %>%
layout(
yaxis = list(title = "rank (lower is better)"),
xaxis = list(title = "number of observations", type = "log"), # log scale for x-axis
showlegend = TRUE
)
for(model_name in unique(df_plot$model)){
reg_data = dplyr::filter(reglines_plot, model == model_name)
plot = plot %>%
add_lines(
data = reg_data,
x = ~x,
y = ~y,
color = model_name, # Set color to match legendgroup
legendgroup = model_name,
name = paste(model_name, '(smooth)'),
showlegend = FALSE,
)
}
bslib::card(plot, full_screen = T)
```
### *Accuracy*
```{r echo = FALSE}
df_plot = dplyr::filter(results_ranked_obs, metric == "accuracy")
reglines_plot = dplyr::filter(reglines, metric == "accuracy")
plot = plot_ly(
data = df_plot,
x = ~obs,
y = ~rank,
color = ~model,
type = 'scatter',
mode = 'markers',
opacity = 0.5
) %>%
layout(
yaxis = list(title = "rank (lower is better)"),
xaxis = list(title = "number of observations", type = "log"), # log scale for x-axis
showlegend = TRUE
)
for(model_name in unique(df_plot$model)){
reg_data = dplyr::filter(reglines_plot, model == model_name)
plot = plot %>%
add_lines(
data = reg_data,
x = ~x,
y = ~y,
color = model_name, # Set color to match legendgroup
legendgroup = model_name,
name = paste(model_name, '(smooth)'),
showlegend = FALSE,
)
}
bslib::card(plot, full_screen = T)
```
### *F1 score*
```{r echo = FALSE}
df_plot = dplyr::filter(results_ranked_obs, metric == "f1")
reglines_plot = dplyr::filter(reglines, metric == "f1")
plot = plot_ly(
data = df_plot,
x = ~obs,
y = ~rank,
color = ~model,
type = 'scatter',
mode = 'markers',
opacity = 0.5
) %>%
layout(
yaxis = list(title = "rank (lower is better)"),
xaxis = list(title = "number of observations", type = "log"), # log scale for x-axis
showlegend = TRUE
)
for(model_name in unique(df_plot$model)){
reg_data = dplyr::filter(reglines_plot, model == model_name)
plot = plot %>%
add_lines(
data = reg_data,
x = ~x,
y = ~y,
color = model_name, # Set color to match legendgroup
legendgroup = model_name,
name = paste(model_name, '(smooth)'),
showlegend = FALSE,
)
}
bslib::card(plot, full_screen = T)
```
:::
## *Trait space*
```{r trait_pca, echo = FALSE, message=FALSE, warnings=FALSE} ```{r trait_pca, echo = FALSE, message=FALSE, warnings=FALSE}
load("../data/r_objects/traits_proc.RData") load("../data/r_objects/traits_proc.RData")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment