diff --git a/.gitignore b/.gitignore
index ac6cec0ae6148953f7080d9e8bdbc5772b017dda..3acf079a9890636921e00602124682ed4a556f26 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,5 +11,6 @@ renv/cache/
 
 # Data files
 data/
+plots/
 R/*/
 R/*.html
\ No newline at end of file
diff --git a/R/04_01_ssdm_modeling.R b/R/04_01_ssdm_modeling.R
index efc6107302149c4b33aa8b0afc698e1e3fedcfd9..5a0de5e406d9b56940057a748520b12c6c4f7d3d 100644
--- a/R/04_01_ssdm_modeling.R
+++ b/R/04_01_ssdm_modeling.R
@@ -191,4 +191,15 @@ handlers("progress")
 
 data_split = group_split(model_data, species)
 
-train_models(data_split)
\ No newline at end of file
+train_models(data_split)
+
+# ----------------------------------------------------------------------#
+# Combine results                                                    ####
+# ----------------------------------------------------------------------#
+files = list.files("data/r_objects/nested_cv/model_results/", full.names = T)
+ssdm_results = lapply(files, function(f){load(f); return(performance_spec)}) %>% 
+  bind_rows() %>% 
+  dplyr::group_by(species, model, metric) %>% 
+  dplyr::summarize(value = mean(value, na.rm =T))
+
+save(ssdm_results, file = "data/r_objects/nested_cv/ssdm_results.RData")
diff --git a/R/05_01_performance_analysis.qmd b/R/05_01_performance_analysis.qmd
index e2c2f330a395e7f3bdabb992b950e2284d648033..44b9b8eaf83523ff260fc41018a6c7fb63eed089 100644
--- a/R/05_01_performance_analysis.qmd
+++ b/R/05_01_performance_analysis.qmd
@@ -11,12 +11,12 @@ library(sf)
 library(plotly)
 library(DT)
 
-
-load("../data/r_objects/ssdm_results.RData")
-load("../data/r_objects/msdm_fit.RData")
-load("../data/r_objects/msdm_results.RData")
-load("../data/r_objects/msdm_results_embedding_trained.RData")
-load("../data/r_objects/msdm_results_multiclass.RData.")
+load("../data/r_objects/nested_cv/model_data.RData")
+load("../data/r_objects/nested_cv/ssdm_results.RData")
+#load("../data/r_objects/msdm_fit.RData")
+#load("../data/r_objects/msdm_results.RData")
+#load("../data/r_objects/msdm_results_embedding_trained.RData")
+#load("../data/r_objects/msdm_results_multiclass.RData.")
 load("../data/r_objects/range_maps.RData")
 load("../data/r_objects/range_maps_gridded.RData")
 load("../data/r_objects/occs_final.RData")
@@ -25,14 +25,13 @@ sf::sf_use_s2(use_s2 = FALSE)
 ```
 
 ```{r globals, echo = FALSE, include = FALSE}
-# Select metrics
-focal_metrics = c("auc", "f1", "accuracy")  # There's a weird bug in plotly that scrambles up lines when using more than three groups
+# Count occs per species
+obs_count = model_data %>% 
+  sf::st_drop_geometry() %>% 
+  dplyr::filter(present == 1) %>% 
+  dplyr::group_by(species) %>% 
+  dplyr::summarise(obs = n())
 
-# Dropdown options
-plotly_buttons = list()
-for(metric in focal_metrics){
-  plotly_buttons[[length(plotly_buttons) + 1]] = list(method = "restyle", args = list("transforms[0].value", metric), label = metric)
-}
 
 # Regression functions
 asym_regression = function(x, y){
@@ -44,8 +43,8 @@ asym_regression = function(x, y){
   )
 }
 
-lin_regression = function(x, y){
-  glm_fit = suppressWarnings(glm(y~x, family = "binomial"))
+lin_regression = function(x, y, family = "binomial"){
+  glm_fit = suppressWarnings(glm(y~x, family = family))
   new_x = seq(min(x), max(x), length.out = 100)
   data.frame(
     x = new_x,
@@ -54,187 +53,220 @@ lin_regression = function(x, y){
 }
 
 # Performance table
-performance = bind_rows(ssdm_results, msdm_results, msdm_results_embedding_trained, msdm_results_multiclass) %>% 
-  pivot_longer(c(auc, accuracy, kappa, precision, recall, f1), names_to = "metric") %>% 
-  dplyr::filter(!is.na(value)) %>% 
+performance = bind_rows(ssdm_results) %>%
+  ungroup() %>% 
   dplyr::mutate(
-    metric = factor(metric, levels = c("auc", "kappa", "f1", "accuracy", "precision", "recall")),
-    value = round(pmax(value, 0, na.rm = T), 3) # Fix one weird instance of f1 < 0
-  ) %>% 
-  dplyr::filter(metric %in% focal_metrics)
+    value = case_when(
+      ((is.na(value) | is.nan(value)) & metric %in% c("auc", "f1", "accurracy", "precision", "recall")) ~ 0.5,
+      ((is.na(value) | is.nan(value)) & metric %in% c("kappa")) ~ 0,
+      .default = value
+    )
+  )
+
+focal_metrics = unique(performance$metric)
 ```
 
-## Summary
+## *Summary*
 
-This document summarizes the performance of different sSDM and mSDM algorithms for `r I(length(unique(performance$species)))` South American mammal species. Model performance is evaluated on `r I(xfun::numbers_to_words(length(focal_metrics)))` metrics (`r I(paste(focal_metrics, collapse = ', '))`) and analyzed along five potential influence factors (number of records, range size, range coverage, range coverage bias, and functional group). The comparison of sSDM vs mSDM approaches is of particular interest.
+*This document summarizes the performance of different sSDM and mSDM algorithms for `r I(length(unique(performance$species)))` South American mammal species. Model performance is evaluated on `r I(xfun::numbers_to_words(length(focal_metrics)))` metrics (`r I(paste(focal_metrics, collapse = ', '))`) and analyzed along five potential influence factors (number of records, range size, range coverage, range coverage bias, and functional group). The comparison of sSDM vs mSDM approaches is of particular interest.*
 
-Code can be found on [GitLab](https://git.idiv.de/ye87zine/symobio-modeling).
+*Code can be found on [GitLab](https://git.idiv.de/ye87zine/symobio-modeling).*
 
-### Modeling overview:
+### *Modeling overview:*
 
-#### General decisions
+#### *General decisions*
 
--   Randomly sampled pseudo-absences from expanded area of extent of occurrence records (×1.25)
--   Balanced presences and absences for each species
--   Predictors: all 19 CHELSA bioclim variables
--   70/30 Split of training vs. test data (except for NN models)
+-   *Randomly sampled pseudo-absences from expanded area of extent of occurrence records (×1.25)*
+-   *Balanced presences and absences for each species*
+-   *Predictors: all 19 CHELSA bioclim variables*
+-   *70/30 Split of training vs. test data (except for NN models)*
 
-#### sSDM Algorithms
+#### *sSDM Algorithms*
 
-Random Forest (**SSDM_RF**)
+*Random Forest (**SSDM_RF**)*
 
--   Hyperparameter tuning of `mtry`
--   Spatial block cross-validation during training
+-   *Hyperparameter tuning of `mtry`*
+-   *Spatial block cross-validation during training*
 
-Generalized boosted machine (**SSDM_GBM**)
+*Generalized boosted machine (**SSDM_GBM**)*
 
--   Hyperparameter tuning across `n.trees` , `interaction.depth` , `shrinkage`, `n.minobsinnode`
--   Spatial block cross-validation during training
+-   *Hyperparameter tuning across `n.trees` , `interaction.depth` , `shrinkage`, `n.minobsinnode`*
+-   *Spatial block cross-validation during training*
 
-Generalized Linear Model (**SSDM_GLM**)
+*Generalized Linear Model (**SSDM_GLM**)*
 
--   Logistic model with binomial link function
--   Spatial block cross-validation during training
+-   *Logistic model with binomial link function*
+-   *Spatial block cross-validation during training*
 
-Neural Netwok (**SSDM_NN**)
+*Neural Netwok (**SSDM_NN**)*
 
--   Three hidden layers, leaky ReLu activations, binomial loss
--   no spatial block cross-validation during training
+-   *Three hidden layers, leaky ReLu activations, binomial loss*
+-   *no spatial block cross-validation during training*
 
-#### mSDM Algorithms
+#### *mSDM Algorithms*
 
-Binary Neural Network with species embedding (**MSDM_embed**)
+*Binary Neural Network with species embedding (**MSDM_embed**)*
 
--   definition: presence \~ environment + embedding(species)
--   prediction: probability of occurrence given a set of (environmental) inputs and species identity
--   embedding initialized at random
--   three hidden layers, sigmoid + leaky ReLu activations, binomial loss
+-   *definition: presence \~ environment + embedding(species)*
+-   *prediction: probability of occurrence given a set of (environmental) inputs and species identity*
+-   *embedding initialized at random*
+-   *three hidden layers, sigmoid + leaky ReLu activations, binomial loss*
 
-Binary Neural Network with trait-informed species embedding (**MSDM_embed_informed_trained**)
+*Binary Neural Network with trait-informed species embedding (**MSDM_embed_informed_trained**)*
 
--   definition: presence \~ environment + embedding(species)
--   prediction: probability of occurrence given a set of (environmental) inputs and species identity
--   embedding initialized using eigenvectors of functional distance matrix, then further training on data
--   three hidden layers, sigmoid + leaky ReLu activations, binomial loss
+-   *definition: presence \~ environment + embedding(species)*
+-   *prediction: probability of occurrence given a set of (environmental) inputs and species identity*
+-   *embedding initialized using eigenvectors of functional distance matrix, then further training on data*
+-   *three hidden layers, sigmoid + leaky ReLu activations, binomial loss*
 
-Multi-Class Neural Network (**MSDM_multiclass**)
+*Multi-Class Neural Network (**MSDM_multiclass**)*
 
--   definition: species identity \~ environment
--   prediction: probability distribution across all observed species given a set of (environmental) inputs
--   presence-only data in training
--   three hidden layers, leaky ReLu activations, softmax loss
--   Top-k based evaluation (k=10, P/A \~ target species in / not among top 10 predictions)
+-   *definition: species identity \~ environment*
+-   *prediction: probability distribution across all observed species given a set of (environmental) inputs*
+-   *presence-only data in training*
+-   *three hidden layers, leaky ReLu activations, softmax loss*
+-   *Top-k based evaluation (k=10, P/A \~ target species in / not among top 10 predictions)*
 
-### Key findings:
+### *Key findings:*
 
--   sSDM algorithms (RF, GBM) outperformed mSDMs in most cases
--   mSDMs showed indications of better performance for rare species (\< 10-20 occurrences)
--   More occurrence records and larger range sizes tended to improve model performance
--   Higher range coverage correlated with better performance
--   Range coverage bias and functional group showed some impact but were less consistent
--   Convergence problems hampered NN sSDM performance
+-   *sSDM algorithms (RF, GBM) outperformed mSDMs in most cases*
+-   *mSDMs showed indications of better performance for rare species (\< 10-20 occurrences)*
+-   *More occurrence records and larger range sizes tended to improve model performance*
+-   *Higher range coverage correlated with better performance*
+-   *Range coverage bias and functional group showed some impact but were less consistent*
+-   *Convergence problems hampered NN sSDM performance*
 
-## Analysis
+## *Analysis*
 
-The table below shows the analysed modeling results.
+*The table below shows the analysed modeling results.*
 
 ```{r performance, echo = FALSE, message=FALSE, warnings=FALSE}
-DT::datatable(performance)
+DT::datatable(performance) %>% 
+  formatRound(columns="value", digits=3)
 ```
 
-### Number of records
+### *Number of records*
 
--   Model performance was generally better for species with more observations
--   Very poor performance below 50-100 observations
+-   *Model performance was generally better for species with more observations*
+-   *Very poor performance below 50-100 observations*
 
 ```{r number_of_records, echo = FALSE, message=FALSE, warnings=FALSE}
-df_plot = performance
-
-# Calculate regression lines for each model and metric combination
-suppressWarnings({
-  regression_lines = df_plot %>%
-    group_by(model, metric) %>%
-    group_modify(~asym_regression(.x$obs, .x$value))
-})
-
-# Create base plot
-plot <- plot_ly() %>% 
-  layout(
-    title = "Model Performance vs. Number of observations",
-    xaxis = list(title = "Number of observations", type = "log"),
-    yaxis = list(title = "Value"),
-    legend = list(x = 1.1, y = 0.5),  # Move legend to the right of the plot
-    margin = list(r = 150),  # Add right margin to accommodate legend
-    hovermode = 'closest',
-    updatemenus = list(
-      list(
-        type = "dropdown",
-        active = 0,
-        buttons = plotly_buttons
-      )
+plot_performance_over_frequency = function(df_plot, metric) {
+  df_plot = dplyr::filter(df_plot, metric == !!metric)
+  
+  # Calculate regression lines for each model and metric combination
+  suppressWarnings({
+    regression_lines = df_plot %>%
+      group_by(model) %>%
+      group_modify( ~ asym_regression(.x$obs, .x$value))
+  })
+  
+  # Create base plot
+  plot <- plot_ly() %>%
+    layout(
+      title = "Model Performance vs. Number of observations",
+      xaxis = list(title = "Number of observations", type = "log"),
+      yaxis = list(title = metric),
+      legend = list(x = 1.1, y = 0.5), # Move legend to the right of the plot
+      margin = list(r = 150), # Add right margin to accommodate legend
+      hovermode = 'closest'
     )
-  )
-
-# Points
-for (model_name in unique(df_plot$model)) {
-  plot = plot %>%
-    add_markers(
-      data = filter(df_plot, model == model_name),
-      x = ~obs,
-      y = ~value,
-      color = model_name,  # Set color to match legendgroup
-      legendgroup = model_name,
-      opacity = 0.6,
-      name = ~model,
-      hoverinfo = 'text',
-      text = ~paste("Species:", species, "<br>Observations:", obs, "<br>Value:", round(value, 3)),
-      transforms = list(
-        list(
-          type = 'filter',
-          target = ~metric,
-          operation = '=',
-          value = focal_metrics[1]
+  
+  # Points
+  for (model_name in unique(df_plot$model)) {
+    plot = plot %>%
+      add_markers(
+        data = filter(df_plot, model == model_name, metric %in% focal_metrics),
+        x = ~ obs,
+        y = ~ value,
+        color = model_name, # Set color to match legendgroup
+        legendgroup = model_name,
+        opacity = 0.6,
+        name = ~ model,
+        hoverinfo = 'text',
+        text = ~ paste(
+          "Species:", species, "<br>Observations:", obs, "<br>Value:", round(value, 3)
         )
       )
-    )
-}
-
-# Add regression lines
-for(model_name in unique(df_plot$model)){
-  reg_data = dplyr::filter(regression_lines, model == model_name)
-  plot = plot %>% 
-    add_lines(
-      data = reg_data,
-      x = ~x,
-      y = ~fit,
-      color = model_name,  # Set color to match legendgroup
-      legendgroup = model_name,
-      name = paste(model_name, '(fit)'),
-      showlegend = FALSE,
-      transforms = list(
-        list(
-          type = 'filter',
-          target = ~metric,
-          operation = '=',
-          value = focal_metrics[1]
-        )
+  }
+  
+  # Add regression lines
+  for (model_name in unique(df_plot$model)) {
+    reg_data = dplyr::filter(regression_lines, model == model_name)
+    plot = plot %>%
+      add_lines(
+        data = reg_data,
+        x = ~ x,
+        y = ~ fit,
+        color = model_name, # Set color to match legendgroup
+        legendgroup = model_name,
+        name = paste(model_name, '(fit)'),
+        showlegend = FALSE
       )
-    )
+  }
+  
+  return(plot)
 }
 
+df_plot = performance %>% dplyr::left_join(obs_count, by = "species")
+```
+
+::: panel-tabset
+#### *AUC*
+
+```{r echo = FALSE}
+plot = plot_performance_over_frequency(df_plot, metric = "auc")
+bslib::card(plot, full_screen = T)
+```
+
+#### *F1*
+
+```{r echo = FALSE}
+plot = plot_performance_over_frequency(df_plot, metric = "f1")
+bslib::card(plot, full_screen = T)
+```
+
+#### *Cohen's kappa*
+
+```{r echo = FALSE}
+plot = plot_performance_over_frequency(df_plot, metric = "kappa")
+bslib::card(plot, full_screen = T)
+```
+
+#### *Accurracy*
+
+```{r echo = FALSE}
+plot = plot_performance_over_frequency(df_plot, metric = "accuracy")
 bslib::card(plot, full_screen = T)
 ```
 
-### Range characteristics
+#### *Precision*
 
-#### Range size
+```{r echo = FALSE}
+plot = plot_performance_over_frequency(df_plot, metric = "precision")
+bslib::card(plot, full_screen = T)
+```
+
+#### *Recall*
+
+```{r echo = FALSE}
+plot = plot_performance_over_frequency(df_plot, metric = "recall")
+bslib::card(plot, full_screen = T)
+```
+:::
 
-Range size was calculated based on polygon layers from the IUCN Red List of Threatened Species (2016).
 
--   Model performance tended to be slightly higher for species with larger range size
--   Only RF shows continuous performance improvements beyond range sizes of \~5M km²
 
-```{r range_size, echo = FALSE, message=FALSE, warnings=FALSE}
+### *Range characteristics*
+
+#### *Range size*
+
+*Range size was calculated based on polygon layers from the IUCN Red List of Threatened Species (2016).*
+
+-   *Model performance tended to be slightly higher for species with larger range size*
+-   *Only RF shows continuous performance improvements beyond range sizes of \~5M km²*
+
+```{r range_size, echo = FALSE, message=FALSE, warnings=FALSE, eval=F}
 
 df_join = range_maps %>% 
   dplyr::mutate(range_size = as.numeric(st_area(range_maps) / 1000000)) %>%  # range in sqkm
@@ -317,17 +349,17 @@ for (model_name in unique(df_plot$model)) {
 bslib::card(plot, full_screen = T)
 ```
 
-#### Range coverage
+#### *Range coverage*
 
-Species ranges were split into continuous hexagonal grid cells of 1 degree diameter. Range coverage was then calculated as the number of grid cells containing at least one occurrence record divided by the number of total grid cells.
+*Species ranges were split into continuous hexagonal grid cells of 1 degree diameter. Range coverage was then calculated as the number of grid cells containing at least one occurrence record divided by the number of total grid cells.*
 
 $$
 RangeCoverage = \frac{N_{cells\_occ}}{N_{cells\_total}}
 $$
 
--   Models for species with higher range coverage showed slightly better performance
+-   *Models for species with higher range coverage showed slightly better performance*
 
-```{r range_coverage, echo = FALSE, message=FALSE, warnings=FALSE}
+```{r range_coverage, echo = FALSE, message=FALSE, warnings=FALSE, eval=F}
 df_cells_total = range_maps_gridded %>%
   dplyr::rename("species" = name_matched) %>% 
   group_by(species) %>%
@@ -423,19 +455,19 @@ for (model_name in unique(df_plot$model)) {
 bslib::card(plot, full_screen = T)
 ```
 
-#### Range coverage bias
+#### *Range coverage bias*
 
-Range coverage bias was calculated as 1 minus the ratio of the actual range coverage and the hypothetical range coverage if all observations were maximally spread out across the range.
+*Range coverage bias was calculated as 1 minus the ratio of the actual range coverage and the hypothetical range coverage if all observations were maximally spread out across the range.*
 
 $$
 RangeCoverageBias = 1 - \frac{RangeCoverage}{min({N_{obs\_total}} / {N_{cells\_total}}, 1)}
 $$
 
-Higher bias values indicate that occurrence records are spatially more clustered within the range of the species.
+*Higher bias values indicate that occurrence records are spatially more clustered within the range of the species.*
 
--   There was no strong relationship between range coverage bias and model performance
+-   *There was no strong relationship between range coverage bias and model performance*
 
-```{r range_coverage_bias, echo = FALSE, message=FALSE, warnings=FALSE}
+```{r range_coverage_bias, echo = FALSE, message=FALSE, warnings=FALSE, eval=F}
 df_occs_total = occs_final %>% 
   st_drop_geometry() %>% 
   group_by(species) %>% 
@@ -524,20 +556,20 @@ for (model_name in unique(df_plot$model)) {
 bslib::card(plot, full_screen = T)
 ```
 
-### Functional group
+### *Functional group*
 
-Functional groups were assigned based on taxonomic order. The following groupings were used:
+*Functional groups were assigned based on taxonomic order. The following groupings were used:*
 
-| Functional group      | Taxomic orders                                                        |
-|------------------|-----------------------------------------------------|
-| large ground-dwelling | Carnivora, Artiodactyla, Cingulata, Perissodactyla                    |
-| small ground-dwelling | Rodentia, Didelphimorphia, Soricomorpha, Paucituberculata, Lagomorpha |
-| arboreal              | Primates, Pilosa                                                      |
-| flying                | Chiroptera                                                            |
+| *Functional group*      | *Taxomic orders*                                                        |
+|-------------------|-----------------------------------------------------|
+| *large ground-dwelling* | *Carnivora, Artiodactyla, Cingulata, Perissodactyla*                    |
+| *small ground-dwelling* | *Rodentia, Didelphimorphia, Soricomorpha, Paucituberculata, Lagomorpha* |
+| *arboreal*              | *Primates, Pilosa*                                                      |
+| *flying*                | *Chiroptera*                                                            |
 
--   Models for bats tended to perform slightly worse than for other groups.
+-   *Models for bats tended to perform slightly worse than for other groups.*
 
-```{r functional_groups, echo = FALSE, message=FALSE, warnings=FALSE}
+```{r functional_groups, echo = FALSE, message=FALSE, warnings=FALSE, eval=F}
 df_plot = performance %>% 
   dplyr::left_join(functional_groups, by = c("species" = "name_matched"))
 
@@ -582,4 +614,3 @@ plot <- plot %>%
 
 bslib::card(plot, full_screen = T)
 ```
-