diff --git a/R/04_02_msdm_modeling.R b/R/04_02_msdm_modeling.R
index 84f9ba5c2cd4faab3ce35ebc45b2331397ac0fd4..a9e5eea27027b37d37845d18320ac91ae19990ec 100644
--- a/R/04_02_msdm_modeling.R
+++ b/R/04_02_msdm_modeling.R
@@ -23,7 +23,7 @@ msdm_fit = dnn(
   hidden = c(500L, 1000L, 1000L),
   loss = "binomial",
   activation = c("sigmoid", "leaky_relu", "leaky_relu"),
-  epochs = 10000L, 
+  epochs = 25000L, 
   lr = 0.01,   
   baseloss = 1,
   batchsize = nrow(train_data)/5,
@@ -36,12 +36,12 @@ msdm_fit = dnn(
   device = "cuda"
 )
 
-save(msdm_fit, file = "data/r_objects/msdm_fit.RData")
+save(msdm_fit, file = "data/r_objects/msdm_fit2.RData")
 
 # ----------------------------------------------------------------------#
 # Evaluate model                                                     ####
 # ----------------------------------------------------------------------#
-load("data/r_objects/msdm_fit.RData")
+load("data/r_objects/msdm_fit2.RData")
 
 # Overall 
 preds_train = predict(msdm_fit, newdata = as.matrix(train_data), type = "response")
@@ -56,7 +56,8 @@ eval_overall = evaluate_model(msdm_fit,  test_data)
 data_split = split(model_data, model_data$species)
 
 msdm_results = lapply(data_split, function(data_spec){
-  test_data = dplyr::filter(data_spec, train == 0)
+  test_data = dplyr::filter(data_spec, train == 0) %>% 
+    dplyr::select(-species)
   
   msdm_performance = tryCatch({
     evaluate_model(msdm_fit, test_data)
@@ -77,4 +78,4 @@ msdm_results = lapply(data_split, function(data_spec){
   )
 }) %>% bind_rows()
 
-save(msdm_results, file = "data/r_objects/msdm_results.RData")
+save(msdm_results, file = "data/r_objects/msdm_results2.RData")
diff --git a/R/05_performance_analysis.qmd b/R/05_performance_analysis.qmd
index c2d7d442fe8689080c38bbbdc8acf5e926ab8c2a..7eb33f11dc84d6568cbb81df38cabb2cd9bd4a9a 100644
--- a/R/05_performance_analysis.qmd
+++ b/R/05_performance_analysis.qmd
@@ -12,8 +12,10 @@ library(sf)
 library(plotly)
 library(DT)
 
+
 load("../data/r_objects/ssdm_results.RData")
-load("../data/r_objects/msdm_results.RData")
+load("../data/r_objects/msdm_fit2.RData")
+load("../data/r_objects/msdm_results2.RData")
 load("../data/r_objects/range_maps.RData")
 load("../data/r_objects/range_maps_gridded.RData")
 load("../data/r_objects/occs_final.RData")
@@ -54,7 +56,7 @@ lin_regression = function(x, y){
 
 ## Summary
 
-This document summarizes the performance of four SDM algorithms (Random Forest, Gradient Boosting Machine, Generalized Linear Model, Deep Neural Network) for `r I(length(unique(ssdm_results$species)))` South American mammal species. We use `r I(length(focal_metrics))` metrics (`r I(paste(focal_metrics, collapse = ', '))`) to evaluate model performance and look at how performance varies with five factors (number of records, range size, range coverage, range coverage bias, and functional group).
+This document summarizes the performance of four SDM algorithms (Random Forest, Gradient Boosting Machine, Generalized Linear Model, Deep Neural Network) for `r I(length(unique(ssdm_results$species)))` South American mammal species. We use `r I(xfun::numbers_to_words(length(focal_metrics)))` metrics (`r I(paste(focal_metrics, collapse = ', '))`) to evaluate model performance and look at how performance varies with five factors (number of records, range size, range coverage, range coverage bias, and functional group).
 
 ### Modeling decisions:
 
@@ -70,31 +72,30 @@ This document summarizes the performance of four SDM algorithms (Random Forest,
 Random Forest
 
 -   Spatial block cross-validation during training
-
 -   Hyperparameter tuning of `mtry`
 
-Generalized boosted maching
+Generalized boosted machine
 
 -   Spatial block cross-validation during training
-
 -   Hyperparameter tuning across `n.trees` , `interaction.depth` , `shrinkage`, `n.minobsinnode`
 
 Generalized Linear Model
 
 -   Spatial block cross-validation during training
-
 -   Logistic model with binomial link function
 
 Neural Netwok (single-species)
 
 -   Three hidden layers (sigmoid - 500, leaky_relu - 200, leaky_relu - 50)
-
--   Binomial loss
-
--   Adam optimizer
+-   Binomial loss, ADAM optimizer
+-   Poor convergence
 
 Neural Network (multi-species)
 
+-   Three hidden layers (sigmoid - 500, leaky_relu - 1000, leaky_relu - 1000)
+-   Binomial loss, ADAM optimizer
+-   very slow convergence (`r I(length(which(!is.na(msdm_fit$losses$train_l))))` epochs)
+
 ### Key findings:
 
 -   Performance: RF \> GBM \> GLM \> NN
@@ -507,7 +508,7 @@ bslib::card(plot, full_screen = T)
 Functional groups were assigned based on taxonomic order. The following groupings were used:
 
 | Functional group      | Taxomic orders                                                        |
-|------------------------|------------------------------------------------|
+|------------------------|-----------------------------------------------|
 | large ground-dwelling | Carnivora, Artiodactyla, Cingulata, Perissodactyla                    |
 | small ground-dwelling | Rodentia, Didelphimorphia, Soricomorpha, Paucituberculata, Lagomorpha |
 | arboreal              | Primates, Pilosa                                                      |
@@ -560,3 +561,26 @@ plot <- plot %>%
 
 bslib::card(plot, full_screen = T)
 ```
+
+### Relative performance
+
+The table below summarizes the relative performance of the models across different observation frequency ranges. The `rank` column indicates the model's performance rank compared to all other models for a given combination of model and metric. The subsequent columns, `(1,10]`, `(10,25]`, ..., `(5000, Inf]`, represent bins of observation frequency. The values in these columns show how many times the model's performance was ranked at the specified `rank` within the respective frequency range.
+
+```{r msdm_vs_ssdm, echo = FALSE, message=FALSE, warnings=FALSE}
+freq_thresholds = c(1, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000, Inf)
+
+df_print = performance %>%
+  mutate(freq_class = cut(obs, freq_thresholds, dig.lab = 5)) %>%
+  group_by(species, metric, freq_class) %>%
+  dplyr::mutate(rank = order(value, decreasing = T)) %>% 
+  group_by(model, metric, rank, freq_class) %>% 
+  tally() %>% 
+  pivot_wider(names_from = freq_class, values_from = n) %>% 
+  dplyr::select("model", "metric", "rank", "(1,10]", "(10,25]", 
+                "(25,50]", "(50,100]", "(100,250]", "(250,500]", 
+                "(500,1000]", "(1000,2500]", "(2500,5000]", "(5000,Inf]") %>% 
+  dplyr::arrange(metric, rank, model) %>% 
+  replace(is.na(.), 0)
+
+DT::datatable(df_print)
+```
diff --git a/R/05_performance_analysis.rmarkdown b/R/05_performance_analysis.rmarkdown
deleted file mode 100644
index d82a62297406fbfbed633ecbdc7ecba1cbc828bf..0000000000000000000000000000000000000000
--- a/R/05_performance_analysis.rmarkdown
+++ /dev/null
@@ -1,575 +0,0 @@
----

-title: "sSDM Performance analysis"

-format: html

-editor: visual

-engine: knitr

----

-
-```{r init, echo = FALSE, include = FALSE}

-library(tidyverse)

-library(Symobio)

-library(sf)

-library(plotly)

-library(DT)

-

-load("../data/r_objects/ssdm_results.RData")

-load("../data/r_objects/msdm_results.RData")

-load("../data/r_objects/range_maps.RData")

-load("../data/r_objects/range_maps_gridded.RData")

-load("../data/r_objects/occs_final.RData")

-load("../data/r_objects/functional_groups.RData")

-sf::sf_use_s2(use_s2 = FALSE)

-```

-
-```{r globals, echo = FALSE, include = FALSE}

-# Select metrics

-focal_metrics = c("auc", "f1", "accuracy")  # There's a weird bug in plotly that scrambles up lines when using more then three groups

-

-# Dropdown options

-plotly_buttons = list()

-for(metric in focal_metrics){

-  plotly_buttons[[length(plotly_buttons) + 1]] = list(method = "restyle", args = list("transforms[0].value", metric), label = metric)

-}

-

-# Regression functions

-asym_regression = function(x, y){

-  nls_fit = nls(y ~ 1 - (1-b) * exp(-c * log(x)), start = list(b = 0.1, c = 0.1))

-  new_x = exp(seq(log(min(x)), log(max(x)), length.out = 100))

-  data.frame(

-    x = new_x,

-    fit = predict(nls_fit, newdata = data.frame(x = new_x))

-  )

-}

-

-lin_regression = function(x, y){

-  glm_fit = suppressWarnings(glm(y~x, family = "binomial"))

-  new_x = seq(min(x), max(x), length.out = 100)

-  data.frame(

-    x = new_x,

-    fit = predict(glm_fit, newdata = data.frame(x = new_x), type = "response")

-  )

-}

-

-```

-
-

-## Summary

-

-This document summarizes the performance of four SDM algorithms (Random Forest, Gradient Boosting Machine, Generalized Linear Model, Deep Neural Network) for `r I(length(unique(ssdm_results$species)))` South American mammal species. We use `r I(length(focal_metrics))` metrics (`r I(paste(focal_metrics, collapse = ', '))`) to evaluate model performance and look at how performance varies with five factors (number of records, range size, range coverage, range coverage bias, and functional group).

-

-### Modeling decisions:

-

-#### Data

-

--   Randomly sampled pseudo-absences from expanded area of extent of occurrence records (Ã—1.25)

--   Balanced presences and absences for each species

--   Predictors: all 19 CHELSA bioclim variables

--   70/30 Split of training vs. test data

-

-#### Algorithms

-

-Random Forest

-

--   Spatial block cross-validation during training

-

--   Hyperparameter tuning of `mtry`

-

-Generalized boosted maching

-

--   Spatial block cross-validation during training

-

--   Hyperparameter tuning across `n.trees` , `interaction.depth` , `shrinkage`, `n.minobsinnode`

-

-Generalized Linear Model

-

--   Spatial block cross-validation during training

-

--   Logistic model with binomial link function

-

-Neural Netwok (single-species)

-

--   Three hidden layers (sigmoid - 500, leaky_relu - 200, leaky_relu - 50)

-

--   Binomial loss

-

--   Adam optimizer

-

-Neural Network (multi-species)

-

-### Key findings:

-

--   Performance: RF \> GBM \> GLM \> NN

--   Convergence problems with Neural Notwork Models

--   More occurrence records and larger range sizes tended to improve model performance

--   Higher range coverage correlated with better performance.

--   Range coverage bias and functional group showed some impact but were less consistent

-

-## Analysis

-

-The table below shows the analysed modeling results.

-

-
-```{r performance, echo = FALSE, message=FALSE, warnings=FALSE}

-performance = bind_rows(ssdm_results, msdm_results) %>% 

-  pivot_longer(c(auc, accuracy, kappa, precision, recall, f1), names_to = "metric") %>% 

-  dplyr::filter(!is.na(value)) %>% 

-  dplyr::mutate(

-    metric = factor(metric, levels = c("auc", "kappa", "f1", "accuracy", "precision", "recall")),

-    value = round(pmax(value, 0, na.rm = T), 3) # Fix one weird instance of f1 < 0

-  ) %>% 

-  dplyr::filter(metric %in% focal_metrics)

-

-DT::datatable(performance)

-```

-
-

-### Number of records

-

--   Model performance was generally better for species with more observations

--   Very poor performance below 50-100 observations

-

-
-```{r number_of_records, echo = FALSE, message=FALSE, warnings=FALSE}

-df_plot = performance

-

-# Calculate regression lines for each model and metric combination

-suppressWarnings({

-  regression_lines = df_plot %>%

-    group_by(model, metric) %>%

-    group_modify(~asym_regression(.x$obs, .x$value))

-})

-

-# Create base plot

-plot <- plot_ly() %>% 

-  layout(

-    title = "Model Performance vs. Number of observations",

-    xaxis = list(title = "Number of observations", type = "log"),

-    yaxis = list(title = "Value"),

-    legend = list(x = 1.1, y = 0.5),  # Move legend to the right of the plot

-    margin = list(r = 150),  # Add right margin to accommodate legend

-    hovermode = 'closest',

-    updatemenus = list(

-      list(

-        type = "dropdown",

-        active = 0,

-        buttons = plotly_buttons

-      )

-    )

-  )

-

-# Points

-for (model_name in unique(df_plot$model)) {

-  plot = plot %>%

-    add_markers(

-      data = filter(df_plot, model == model_name),

-      x = ~obs,

-      y = ~value,

-      color = model_name,  # Set color to match legendgroup

-      legendgroup = model_name,

-      opacity = 0.6,

-      name = ~model,

-      hoverinfo = 'text',

-      text = ~paste("Species:", species, "<br>Observations:", obs, "<br>Value:", round(value, 3)),

-      transforms = list(

-        list(

-          type = 'filter',

-          target = ~metric,

-          operation = '=',

-          value = focal_metrics[1]

-        )

-      )

-    )

-}

-

-# Add regression lines

-for(model_name in unique(df_plot$model)){

-  reg_data = dplyr::filter(regression_lines, model == model_name)

-  plot = plot %>% 

-    add_lines(

-      data = reg_data,

-      x = ~x,

-      y = ~fit,

-      color = model_name,  # Set color to match legendgroup

-      legendgroup = model_name,

-      name = paste(model_name, '(fit)'),

-      showlegend = FALSE,

-      transforms = list(

-        list(

-          type = 'filter',

-          target = ~metric,

-          operation = '=',

-          value = focal_metrics[1]

-        )

-      )

-    )

-}

-

-bslib::card(plot, full_screen = T)

-```

-
-

-### Range characteristics

-

-#### Range size

-

-Range size was calculated based on polygon layers from the IUCN Red List of Threatened Species (2016).

-

--   Model performance tended to be slightly higher for species with larger range size

--   Only RF shows continuous performance improvements beyond range sizes of \~5M kmÂ²

-

-
-```{r range_size, echo = FALSE, message=FALSE, warnings=FALSE}

-

-df_join = range_maps %>% 

-  dplyr::mutate(range_size = as.numeric(st_area(range_maps) / 1000000)) %>%  # range in sqkm

-  sf::st_drop_geometry()

-

-df_plot = performance %>% 

-  inner_join(df_join, by = c("species" = "name_matched"))

-

-# Calculate regression lines for each model and metric combination

-suppressWarnings({

-  regression_lines = df_plot %>%

-    group_by(model, metric) %>%

-    group_modify(~asym_regression(.x$range_size, .x$value))

-})

-

-# Create base plot

-plot <- plot_ly() %>% 

-  layout(

-    title = "Model Performance vs. Range size",

-    xaxis = list(title = "Range size"),

-    yaxis = list(title = "Value"),

-    legend = list(x = 1.1, y = 0.5),  # Move legend to the right of the plot

-    margin = list(r = 150),  # Add right margin to accommodate legend

-    hovermode = 'closest',

-    updatemenus = list(

-      list(

-        type = "dropdown",

-        active = 0,

-        buttons = plotly_buttons

-      )

-    )

-  )

-

-# Add regression lines and confidence intervals for each model

-for (model_name in unique(df_plot$model)) {

-  plot <- plot %>%

-    add_markers(

-      data = filter(df_plot, model == model_name),

-      x = ~range_size,

-      y = ~value,

-      color = model_name,  # Set color to match legendgroup

-      legendgroup = model_name,

-      opacity = 0.6,

-      name = ~model,

-      hoverinfo = 'text',

-      text = ~paste("Species:", species, "<br>Range size:", range_size, "<br>Value:", round(value, 3)),

-      transforms = list(

-        list(

-          type = 'filter',

-          target = ~metric,

-          operation = '=',

-          value = focal_metrics[1]

-        )

-      )

-    )

-}

-

-for (model_name in unique(df_plot$model)) {

-  reg_data = dplyr::filter(regression_lines, model == model_name)

-  plot = plot %>% 

-    add_lines(

-      data = reg_data,

-      x = ~x,

-      y = ~fit,

-      color = model_name,  # Set color to match legendgroup

-      legendgroup = model_name,

-      name = paste(model_name, '(fit)'),

-      showlegend = FALSE,

-      transforms = list(

-        list(

-          type = 'filter',

-          target = ~metric,

-          operation = '=',

-          value = focal_metrics[1]

-        )

-      )

-    )

-}

-

-bslib::card(plot, full_screen = T)

-```

-
-

-#### Range coverage

-

-Species ranges were split into continuous hexagonal grid cells of 1 degree diameter. Range coverage was then calculated as the number of grid cells containing at least one occurrence record divided by the number of total grid cells.

-

-$$

-RangeCoverage = \frac{N_{cells\_occ}}{N_{cells\_total}}

-$$

-

--   Models for species with higher range coverage showed slightly better performance

-

-
-```{r range_coverage, echo = FALSE, message=FALSE, warnings=FALSE}

-df_cells_total = range_maps_gridded %>%

-  dplyr::rename("species" = name_matched) %>% 

-  group_by(species) %>%

-  summarise(cells_total = n()) %>% 

-  st_drop_geometry()

-

-df_cells_occ <- range_maps_gridded %>%

-  st_join(occs_final, join = st_intersects) %>%

-  filter(name_matched == species) %>%         # Filter only intersections of the same species

-  group_by(species) %>%

-  summarise(cells_occupied = n_distinct(geometry)) %>%

-  st_drop_geometry()

-

-df_join = df_cells_total %>% 

-  dplyr::inner_join(df_cells_occ, by = "species") %>% 

-  dplyr::mutate(range_cov = cells_occupied / cells_total) %>% 

-  dplyr::select(species, range_cov)

-

-df_plot = performance %>% 

-  inner_join(df_join, by = "species")

-

-# Calculate regression lines for each model and metric combination

-suppressWarnings({

-  regression_lines = df_plot %>%

-    group_by(model, metric) %>%

-    group_modify(~lin_regression(.x$range_cov, .x$value))

-})

-

-# Create base plot

-plot <- plot_ly() %>% 

-  layout(

-    title = "Model Performance vs. Range coverage",

-    xaxis = list(title = "Range coverage"),

-    yaxis = list(title = "Value"),

-    legend = list(x = 1.1, y = 0.5),  # Move legend to the right of the plot

-    margin = list(r = 150),  # Add right margin to accommodate legend

-    hovermode = 'closest',

-    updatemenus = list(

-      list(

-        type = "dropdown",

-        active = 0,

-        buttons = plotly_buttons

-      )

-    )

-  )

-

-# Add regression lines and confidence intervals for each model

-for (model_name in unique(df_plot$model)) {

-  plot <- plot %>%

-    add_markers(

-      data = filter(df_plot, model == model_name),

-      x = ~range_cov,

-      y = ~value,

-      color = model_name,  # Set color to match legendgroup

-      legendgroup = model_name,

-      opacity = 0.6,

-      name = ~model,

-      hoverinfo = 'text',

-      text = ~paste("Species:", species, "<br>Range coverage:", range_cov, "<br>Value:", round(value, 3)),

-      transforms = list(

-        list(

-          type = 'filter',

-          target = ~metric,

-          operation = '=',

-          value = focal_metrics[1]

-        )

-      )

-    )

-}

-

-for (model_name in unique(df_plot$model)) {

-  reg_data = dplyr::filter(regression_lines, model == model_name)

-  plot = plot %>% 

-    add_lines(

-      data = reg_data,

-      x = ~x,

-      y = ~fit,

-      color = model_name,  # Set color to match legendgroup

-      legendgroup = model_name,

-      name = paste(model_name, '(fit)'),

-      showlegend = FALSE,

-      transforms = list(

-        list(

-          type = 'filter',

-          target = ~metric,

-          operation = '=',

-          value = focal_metrics[1]

-        )

-      )

-    )

-}

-

-bslib::card(plot, full_screen = T)

-```

-
-

-#### Range coverage bias

-

-Range coverage bias was calculated as 1 minus the ratio of the actual range coverage and the hypothetical range coverage if all observations were maximally spread out across the range.

-

-$$

-RangeCoverageBias = 1 - \frac{RangeCoverage}{min({N_{obs\_total}} / {N_{cells\_total}}, 1)}

-$$

-

-Higher bias values indicate that occurrence records are spatially more clustered within the range of the species.

-

--   There was no strong relationship between range coverage bias and model performance

-

-
-```{r range_coverage_bias, echo = FALSE, message=FALSE, warnings=FALSE}

-df_occs_total = occs_final %>% 

-  st_drop_geometry() %>% 

-  group_by(species) %>% 

-  summarise(occs_total = n())

-

-df_join = df_occs_total %>% 

-  dplyr::inner_join(df_cells_total, by = "species") %>% 

-  dplyr::inner_join(df_cells_occ, by = "species") %>% 

-  dplyr::mutate(range_bias = 1-((cells_occupied / cells_total) / pmin(occs_total / cells_total, 1)))

-

-df_plot = performance %>% 

-  inner_join(df_join, by = "species")

-

-# Calculate regression lines for each model and metric combination

-suppressWarnings({

-  regression_lines = df_plot %>%

-    group_by(model, metric) %>%

-    group_modify(~lin_regression(.x$range_bias, .x$value))

-})

-

-# Create base plot

-plot <- plot_ly() %>% 

-  layout(

-    title = "Model Performance vs. Range coverage bias",

-    xaxis = list(title = "Range coverage bias"),

-    yaxis = list(title = "Value"),

-    legend = list(x = 1.1, y = 0.5),  # Move legend to the right of the plot

-    margin = list(r = 150),  # Add right margin to accommodate legend

-    hovermode = 'closest',

-    updatemenus = list(

-      list(

-        type = "dropdown",

-        active = 0,

-        buttons = plotly_buttons

-      )

-    )

-  )

-

-# Add regression lines and confidence intervals for each model

-for (model_name in unique(df_plot$model)) {

-  plot <- plot %>%

-    add_markers(

-      data = filter(df_plot, model == model_name),

-      x = ~range_bias,

-      y = ~value,

-      color = model_name,  # Set color to match legendgroup

-      legendgroup = model_name,

-      opacity = 0.6,

-      name = ~model,

-      hoverinfo = 'text',

-      text = ~paste("Species:", species, "<br>Range coverage bias:", range_bias, "<br>Value:", round(value, 3)),

-      transforms = list(

-        list(

-          type = 'filter',

-          target = ~metric,

-          operation = '=',

-          value = focal_metrics[1]

-        )

-      )

-    )

-}

-

-for (model_name in unique(df_plot$model)) {

-  reg_data = dplyr::filter(regression_lines, model == model_name)

-  plot = plot %>% 

-    add_lines(

-      data = reg_data,

-      x = ~x,

-      y = ~fit,

-      color = model_name,  # Set color to match legendgroup

-      legendgroup = model_name,

-      name = paste(model_name, '(fit)'),

-      showlegend = FALSE,

-      transforms = list(

-        list(

-          type = 'filter',

-          target = ~metric,

-          operation = '=',

-          value = focal_metrics[1]

-        )

-      )

-    )

-}

-

-

-bslib::card(plot, full_screen = T)

-```

-
-

-### Functional group

-

-Functional groups were assigned based on taxonomic order. The following groupings were used:

-

-| Functional group      | Taxomic orders                                                        |

-|------------------------|------------------------------------------------|

-| large ground-dwelling | Carnivora, Artiodactyla, Cingulata, Perissodactyla                    |

-| small ground-dwelling | Rodentia, Didelphimorphia, Soricomorpha, Paucituberculata, Lagomorpha |

-| arboreal              | Primates, Pilosa                                                      |

-| flying                | Chiroptera                                                            |

-

--   Models for bats tended to perform slightly worse than for other groups.

-

-
-```{r functional_groups, echo = FALSE, message=FALSE, warnings=FALSE}

-df_plot = performance %>% 

-  dplyr::left_join(functional_groups, by = c("species" = "name_matched"))

-

-plot <- plot_ly(

-  data = df_plot,

-  x = ~functional_group,

-  y = ~value,

-  color = ~model,

-  type = 'box',

-  boxpoints = "all",

-  jitter = 1,

-  pointpos = 0,

-  hoverinfo = 'text',

-  text = ~paste("Species:", species, "<br>Functional group:", functional_group, "<br>Value:", round(value, 3)),

-  transforms = list(

-    list(

-      type = 'filter',

-      target = ~metric,

-      operation = '=',

-      value = focal_metrics[1]  # default value

-    )

-  )

-)

-

-plot <- plot %>%

-  layout(

-    title = "Model Performance vs. Functional Group",

-    xaxis = list(title = "Functional group"),

-    yaxis = list(title = "Value"),

-    legend = list(x = 1.1, y = 0.5),  # Move legend to the right of the plot

-    margin = list(r = 150),  # Add right margin to accommodate legend

-    hovermode = 'closest',

-    boxmode = "group",

-    updatemenus = list(

-      list(

-        type = "dropdown",

-        active = 0,

-        buttons = plotly_buttons

-      )

-    )

-  )

-

-bslib::card(plot, full_screen = T)

-```

-
diff --git a/R/utils.R b/R/utils.R
index 5539cfd5a9eed4da5e7aea07de93fac44ac676e2..edde20d758b1d33e81cf00ef13caa997496b6fa3 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -40,13 +40,6 @@ evaluate_model <- function(model, test_data) {
   
   # Predict probabilities
   if(class(model) %in% c("citodnn", "citodnnBootstrap")){
-    # This is a hack to fix naming issue during initial training run. Fix for later runs:
-    # ----
-    test_data = test_data %>% 
-      dplyr::select(-species) %>% 
-      dplyr::rename(species = species_int)
-    # ----
-    
     probs <- predict(model, as.matrix(test_data), type = "response")[,1]
     preds <- factor(round(probs), levels = c("0", "1"), labels = c("A", "P"))
   } else {