Skip to content
Snippets Groups Projects
Commit cbc47e17 authored by ye87zine's avatar ye87zine
Browse files

additions to msdm analysis

parent c49fa18f
No related merge requests found
...@@ -277,28 +277,15 @@ bslib::card(plot, full_screen = T) ...@@ -277,28 +277,15 @@ bslib::card(plot, full_screen = T)
## *Relative Performance* ## *Relative Performance*
### *Ranking*
```{r delta, echo = FALSE, message=FALSE, warnings=FALSE} ```{r delta, echo = FALSE, message=FALSE, warnings=FALSE}
results_ranked = results_final_long %>%
group_by(species, metric) %>%
mutate(rank = rev(rank(value))) %>%
group_by(model, metric) %>%
summarize(mean_rank = mean(rank)) %>%
group_by(metric) %>%
mutate(position = rank(mean_rank))
results_ranked_obs = results_final_long %>% results_ranked_obs = results_final_long %>%
group_by(species, metric) %>% group_by(species, metric) %>%
mutate(rank = rev(rank(value))) mutate(rank = rev(rank(value)))
ggplot(data = results_ranked_obs, aes(x = obs, y = rank, color = model)) + reglines = results_ranked_obs %>%
geom_point(alpha = 0.1) + group_by(model, metric) %>%
scale_y_continuous(name = "rank (lower is better)") + group_modify(~as.data.frame(loess.smooth(.x$obs, .x$rank)))
scale_x_log10() +
geom_smooth() +
theme_minimal()
# The table below summarizes the relative performance of the models across different observation frequency ranges. The `rank` column indicates the model's performance rank compared to all other models for a given combination of model and metric. The subsequent columns, `(1,10]`, `(10,25]`, ..., `(5000, Inf]`, represent bins of observation frequency. The values in these columns show how many times the model's performance was ranked at the specified `rank` within the respective frequency range. # The table below summarizes the relative performance of the models across different observation frequency ranges. The `rank` column indicates the model's performance rank compared to all other models for a given combination of model and metric. The subsequent columns, `(1,10]`, `(10,25]`, ..., `(5000, Inf]`, represent bins of observation frequency. The values in these columns show how many times the model's performance was ranked at the specified `rank` within the respective frequency range.
...@@ -320,7 +307,127 @@ ggplot(data = results_ranked_obs, aes(x = obs, y = rank, color = model)) + ...@@ -320,7 +307,127 @@ ggplot(data = results_ranked_obs, aes(x = obs, y = rank, color = model)) +
# DT::datatable(df_print) # DT::datatable(df_print)
``` ```
### *Trait space* ::: panel-tabset
### *AUC*
```{r echo = FALSE}
df_plot = dplyr::filter(results_ranked_obs, metric == "auc")
reglines_plot = dplyr::filter(reglines, metric == "auc")
plot = plot_ly(
data = df_plot,
x = ~obs,
y = ~rank,
color = ~model,
type = 'scatter',
mode = 'markers',
opacity = 0.5
) %>%
layout(
yaxis = list(title = "rank (lower is better)"),
xaxis = list(title = "number of observations", type = "log"), # log scale for x-axis
showlegend = TRUE
)
for(model_name in unique(df_plot$model)){
reg_data = dplyr::filter(reglines_plot, model == model_name)
plot = plot %>%
add_lines(
data = reg_data,
x = ~x,
y = ~y,
color = model_name, # Set color to match legendgroup
legendgroup = model_name,
name = paste(model_name, '(smooth)'),
showlegend = FALSE,
)
}
bslib::card(plot, full_screen = T)
```
### *Accuracy*
```{r echo = FALSE}
df_plot = dplyr::filter(results_ranked_obs, metric == "accuracy")
reglines_plot = dplyr::filter(reglines, metric == "accuracy")
plot = plot_ly(
data = df_plot,
x = ~obs,
y = ~rank,
color = ~model,
type = 'scatter',
mode = 'markers',
opacity = 0.5
) %>%
layout(
yaxis = list(title = "rank (lower is better)"),
xaxis = list(title = "number of observations", type = "log"), # log scale for x-axis
showlegend = TRUE
)
for(model_name in unique(df_plot$model)){
reg_data = dplyr::filter(reglines_plot, model == model_name)
plot = plot %>%
add_lines(
data = reg_data,
x = ~x,
y = ~y,
color = model_name, # Set color to match legendgroup
legendgroup = model_name,
name = paste(model_name, '(smooth)'),
showlegend = FALSE,
)
}
bslib::card(plot, full_screen = T)
```
### *F1 score*
```{r echo = FALSE}
df_plot = dplyr::filter(results_ranked_obs, metric == "f1")
reglines_plot = dplyr::filter(reglines, metric == "f1")
plot = plot_ly(
data = df_plot,
x = ~obs,
y = ~rank,
color = ~model,
type = 'scatter',
mode = 'markers',
opacity = 0.5
) %>%
layout(
yaxis = list(title = "rank (lower is better)"),
xaxis = list(title = "number of observations", type = "log"), # log scale for x-axis
showlegend = TRUE
)
for(model_name in unique(df_plot$model)){
reg_data = dplyr::filter(reglines_plot, model == model_name)
plot = plot %>%
add_lines(
data = reg_data,
x = ~x,
y = ~y,
color = model_name, # Set color to match legendgroup
legendgroup = model_name,
name = paste(model_name, '(smooth)'),
showlegend = FALSE,
)
}
bslib::card(plot, full_screen = T)
```
:::
## *Trait space*
```{r trait_pca, echo = FALSE, message=FALSE, warnings=FALSE} ```{r trait_pca, echo = FALSE, message=FALSE, warnings=FALSE}
load("../data/r_objects/traits_proc.RData") load("../data/r_objects/traits_proc.RData")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment