diff --git a/R/03_presence_absence_preparation.R b/R/03_presence_absence_preparation.R
index fe78ceaffe66603f17bb63acc246dd5704808ea4..9fd817c71fa460cf27c87aa9d023296999392b67 100644
--- a/R/03_presence_absence_preparation.R
+++ b/R/03_presence_absence_preparation.R
@@ -81,7 +81,7 @@ occs_final = occs %>%
 save(occs_final, file = "data/r_objects/occs_final.RData")
 
 # ---------------------------------------------------------------------------#
-# Create SSDM dataset                                                     ####
+# Create Modeling dataset                                                     ####
 # ---------------------------------------------------------------------------#
 load("data/r_objects/occs_final.RData")
 load("data/r_objects/sa_polygon.RData")
@@ -89,13 +89,8 @@ sf::sf_use_s2(use_s2 = FALSE)
 
 occs_split = split(occs_final, occs_final$species)
 
-future::plan("multisession", workers = 8)
-ssdm_data = furrr::future_map(occs_split, .progress = TRUE, .options = furrr::furrr_options(seed = 42), .f = function(occs_spec){
-  # skip low information species
-  if(nrow(occs_spec) < 5){
-    return(NULL)
-  }
-  
+future::plan("multisession", workers = 16)
+model_data = furrr::future_map(occs_split, .progress = TRUE, .options = furrr::furrr_options(seed = 42), .f = function(occs_spec){
   # Define model/sampling region
   occs_bbox = occs_spec %>% 
     sf::st_bbox() %>% 
@@ -132,77 +127,34 @@ ssdm_data = furrr::future_map(occs_split, .progress = TRUE, .options = furrr::fu
     dplyr::mutate(present = 1) %>% 
     bind_rows(abs_spec) 
   
-  # Define cross-validation folds
-  spatial_folds = blockCV::cv_spatial(
-    pa_spec,
-    column = "present",
-    k = 5,
-    progress = F, plot = F, report = F
-  )
-  
-  pa_spec$fold = spatial_folds$folds_ids
-  pa_spec$geometry = NULL
-  
   # Split into train and test datasets
   train_index = createDataPartition(pa_spec$present, p = 0.7, list = FALSE)
   pa_spec$train = 0
   pa_spec$train[train_index] = 1
   
-  return(pa_spec)
-})
-
-ssdm_data = bind_rows(ssdm_data)
-save(ssdm_data, file = "data/r_objects/ssdm_data.RData")
-
-# ---------------------------------------------------------------------------#
-# Create MSDM dataset                                                     ####
-# ---------------------------------------------------------------------------#
-load("data/r_objects/occs_final.RData")
-load("data/r_objects/sa_polygon.RData")
-sf::sf_use_s2(use_s2 = FALSE)
-
-occs_split = split(occs_final, occs_final$species)
-
-future::plan("multisession", workers = 8)
-msdm_data = furrr::future_map(occs_split, .progress = TRUE, .options = furrr::furrr_options(seed = 42), .f = function(occs_spec){
-  # Define model/sampling region
-  occs_bbox = occs_spec %>% 
-    sf::st_bbox() %>% 
-    expand_bbox(min_span = 1, expansion = 0.25) %>% 
-    sf::st_as_sfc() %>% 
-    st_set_crs(st_crs(occs_spec)) 
-  
-  sample_region = suppressMessages(
-    st_as_sf(st_intersection(occs_bbox, sa_polygon))
-  )
-  
-  # Sample pseudo absence
-  sample_points = st_as_sf(st_sample(sample_region, nrow(occs_spec))) %>% 
-    dplyr::mutate(
-      species = occs_spec$species[1],
-      longitude = sf::st_coordinates(.)[,1],
-      latitude = sf::st_coordinates(.)[,2]
-    ) %>% 
-    dplyr::rename(geometry = "x")
-  
-  abs_spec = terra::rast(raster_filepaths) %>% 
-    setNames(paste0("layer_", 1:19)) %>% 
-    terra::extract(sample_points) %>% 
-    dplyr::select(-ID) %>% 
-    dplyr::mutate(
-      present = 0,
-      geometry = sample_points$x
-    ) %>% 
-    tibble() %>% 
-    bind_cols(sample_points)
+  # Define cross-validation folds
+  folds = tryCatch({
+    spatial_folds = suppressMessages(
+      blockCV::cv_spatial(
+        pa_spec,
+        column = "present",
+        k = 5,
+        progress = F, plot = F, report = F
+      )
+    )
+    
+    spatial_folds$folds_ids
+  }, warning = function(w){
+    NA
+  }, error = function(e){
+    NA
+  })
   
-  # Create presence-absence dataframe
-  pa_spec = occs_spec %>% 
-    dplyr::mutate(present = 1) %>% 
-    bind_rows(abs_spec) 
+  pa_spec$folds = folds
+  pa_spec$geometry = NULL
   
   return(pa_spec)
 })
 
-msdm_data = bind_rows(msdm_data)
-save(ssdm_data, file = "data/r_objects/msdm_data.RData")
+model_data = bind_rows(model_data)
+save(model_data, file = "data/r_objects/model_data.RData")
diff --git a/R/04_01_ssdm_modeling.R b/R/04_01_ssdm_modeling.R
index 22e35e6d3ecff5b2a26397c68074d29f583ea825..bdc7c9f0df3f6acd185a28f872cf5ce97d106cb9 100644
--- a/R/04_01_ssdm_modeling.R
+++ b/R/04_01_ssdm_modeling.R
@@ -7,23 +7,23 @@ library(pROC)
 
 source("R/utils.R")
 
-load("data/r_objects/ssdm_data.RData")
+load("data/r_objects/model_data.RData")
 
-pa_split = split(ssdm_data, ssdm_data$species)
+data_split = split(model_data, model_data$species)
 
 # ----------------------------------------------------------------------#
 # Train models                                                       ####
 # ----------------------------------------------------------------------#
 future::plan("multisession", workers = 8)
-ssdm_results = furrr::future_map(pa_split, .options = furrr::furrr_options(seed = 123), .f = function(pa_spec){
+ssdm_results = furrr::future_map(data_split, .options = furrr::furrr_options(seed = 123), .f = function(data_spec){
   # Initial check
-  if(nrow(pa_spec) < 10){
+  if(nrow(data_spec) < 10 | anyNA(data_spec$folds)){ 
     return(NULL)
   }
   
-  pa_spec$present_fct = factor(pa_spec$present, levels = c("0", "1"), labels = c("A", "P"))
-  train_data = dplyr::filter(pa_spec, train == 1)
-  test_data = dplyr::filter(pa_spec, train == 0)
+  data_spec$present_fct = factor(data_spec$present, levels = c("0", "1"), labels = c("A", "P"))
+  train_data = dplyr::filter(data_spec, train == 1)
+  test_data = dplyr::filter(data_spec, train == 0)
   
   # Define empty result for performance eval
   na_performance = list(    
@@ -137,8 +137,8 @@ ssdm_results = furrr::future_map(pa_split, .options = furrr::furrr_options(seed
   
   # Summarize results
   performance_summary = tibble(
-    species = pa_spec$species[1],
-    obs = nrow(pa_spec),
+    species = data_spec$species[1],
+    obs = nrow(data_spec),
     model = c("RF", "GBM", "GLM", "NN"),
     auc = c(rf_performance$AUC, gbm_performance$AUC, glm_performance$AUC, nn_performance$AUC),
     accuracy = c(rf_performance$Accuracy, gbm_performance$Accuracy, glm_performance$Accuracy, nn_performance$Accuracy),
diff --git a/R/04_02_msdm_modeling.R b/R/04_02_msdm_modeling.R
new file mode 100644
index 0000000000000000000000000000000000000000..84f9ba5c2cd4faab3ce35ebc45b2331397ac0fd4
--- /dev/null
+++ b/R/04_02_msdm_modeling.R
@@ -0,0 +1,80 @@
+library(dplyr)
+library(tidyr)
+library(cito)
+
+source("R/utils.R")
+
+load("data/r_objects/model_data.RData")
+
+model_data$species_int = as.integer(as.factor(model_data$species))
+
+train_data = dplyr::filter(model_data, train == 1)
+test_data = dplyr::filter(model_data, train == 0)
+
+# ----------------------------------------------------------------------#
+# Train model                                                       ####
+# ----------------------------------------------------------------------#
+predictors = paste0("layer_", 1:19)
+formula = as.formula(paste0("present ~ ", paste(predictors, collapse = '+'), " + ", "e(species_int, dim = 20, lambda = 0.000001)"))
+
+msdm_fit = dnn(
+  formula,
+  data = train_data,
+  hidden = c(500L, 1000L, 1000L),
+  loss = "binomial",
+  activation = c("sigmoid", "leaky_relu", "leaky_relu"),
+  epochs = 10000L, 
+  lr = 0.01,   
+  baseloss = 1,
+  batchsize = nrow(train_data)/5,
+  dropout = 0.25,
+  burnin = 100,
+  optimizer = config_optimizer("adam", weight_decay = 0.001),
+  lr_scheduler = config_lr_scheduler("reduce_on_plateau", patience = 100, factor = 0.7),
+  early_stopping = 250,
+  validation = 0.3,
+  device = "cuda"
+)
+
+save(msdm_fit, file = "data/r_objects/msdm_fit.RData")
+
+# ----------------------------------------------------------------------#
+# Evaluate model                                                     ####
+# ----------------------------------------------------------------------#
+load("data/r_objects/msdm_fit.RData")
+
+# Overall 
+preds_train = predict(msdm_fit, newdata = as.matrix(train_data), type = "response")
+preds_test = predict(msdm_fit, newdata = as.matrix(test_data), type = "response")
+
+hist(preds_train)
+hist(preds_test)
+
+eval_overall = evaluate_model(msdm_fit,  test_data)
+
+# Per species
+data_split = split(model_data, model_data$species)
+
+msdm_results = lapply(data_split, function(data_spec){
+  test_data = dplyr::filter(data_spec, train == 0)
+  
+  msdm_performance = tryCatch({
+    evaluate_model(msdm_fit, test_data)
+  }, error = function(e){
+    list(AUC = NA, Accuracy = NA, Kappa = NA, Precision = NA, Recall = NA, F1 = NA)
+  })
+  
+  performance_summary = tibble(
+    species = data_spec$species[1],
+    obs = nrow(data_spec),
+    model = "NN_MSDM",
+    auc = msdm_performance$AUC,
+    accuracy = msdm_performance$Accuracy,
+    kappa = msdm_performance$Kappa,
+    precision = msdm_performance$Precision,
+    recall = msdm_performance$Recall,
+    f1 = msdm_performance$F1
+  )
+}) %>% bind_rows()
+
+save(msdm_results, file = "data/r_objects/msdm_results.RData")
diff --git a/R/04_distribution_modeling.R b/R/04_distribution_modeling.R
deleted file mode 100644
index 9baa270cef154373dd3abab92af9360363afa635..0000000000000000000000000000000000000000
--- a/R/04_distribution_modeling.R
+++ /dev/null
@@ -1,217 +0,0 @@
-# ---------------------------------------------------------------------------#
-# Main loop                                                               ####
-# ---------------------------------------------------------------------------#
-load("data/r_objects/occs_final.RData")
-load("data/r_objects/sa_polygon.RData")
-
-occs_split = split(occs_final, occs_final$species)
-
-future::plan("multisession", workers = 20)
-
-ssdm_data = furrr::future_map(occs_split[1:20], .options = furrr::furrr_options(seed = 123), .f = function(occs_spec){
-  # Initial check
-  if(nrow(occs_spec) < 10){
-    return(NULL)
-  }
-  
-  species = occs_spec$species[1]
-  
-  # ------------------------------- #
-  # Define model/sampling region ####
-  # ------------------------------- #
-  sample_region = tryCatch({
-    sf::sf_use_s2(use_s2 = FALSE)
-    
-    occs_bbox = occs_spec %>% 
-      sf::st_bbox() %>% 
-      expand_bbox(0.25) %>% 
-      sf::st_as_sfc() %>% 
-      st_set_crs(st_crs(occs_spec)) 
-    
-    sample_region = suppressMessages(
-      st_as_sf(st_intersection(occs_bbox, sa_polygon))
-    )
-  }, error = function(e){
-    glue::glue("Skipping species {species}:\n{e}")
-  })
-  
-  if(is.character(sample_region)){ # There was an error
-    return(sample_region)
-  }
-  
-  # ------------------------------- #
-  # Create pseudo absence        ####
-  # ------------------------------- #
-  sample_points = st_as_sf(st_sample(sample_region, nrow(occs_spec)))  # TODO: Vary sample size?
-  
-  abs_spec = terra::rast(raster_filepaths) %>% 
-    setNames(paste0("layer_", 1:19)) %>% 
-    terra::extract(sample_points) %>% 
-    dplyr::select(-ID) %>% 
-    dplyr::mutate(
-      presence = 0,
-      geometry = sample_points$x
-    ) %>% 
-    tibble()
-  
-  # ------------------------------- #
-  # Create modeling dataset      ####
-  # ------------------------------- #
-  model_data = occs_spec %>% 
-    dplyr::mutate(presence = 1) %>% 
-    bind_rows(abs_spec) %>% 
-    dplyr::mutate(
-      presence_fct = as.factor(ifelse(presence == 0, "absent", "present"))
-    )
-  
-  # Define cross-validation folds
-  spatial_folds = blockCV::cv_spatial(
-    model_data,
-    column = "presence",
-    k = 5,
-    progress = F, plot = F, report = F
-  )
-  
-  model_data$fold = spatial_folds$folds_ids
-  model_data$geometry = NULL
-  
-  # Split into train and test datasets
-  train_index = createDataPartition(model_data$presence, p = 0.7, list = FALSE)
-  train_data = model_data[train_index, ]
-  test_data  = model_data[-train_index, ]
-  
-  # Define predictor columns
-  predictors = paste0("layer_", 1:19)
-  
-  # Define empty performance summary
-  na_performance = list(    
-    AUC = NA,
-    Accuracy = NA,
-    Kappa = NA,
-    Precision = NA,
-    Recall = NA,
-    F1 = NA
-  )
-  
-  # ------------------------------- #
-  # Train models                 ####
-  # ------------------------------- #
-  ## cito ####
-  config_lr_scheduler()
-  
-  nn_performance = tryCatch({
-    nn_fit = dnn(
-      X = train_data[, predictors],
-      Y = train_data$presence,
-      hidden = c(500L, 200L, 50L),
-      loss = "binomial",
-      activation = c("sigmoid", "leaky_relu", "leaky_relu"),
-      epochs = 500L, 
-      lr = 0.02,   
-      baseloss=10,
-      batchsize=nrow(train_data)/4,
-      dropout = 0.1,  # Regularization 
-      optimizer = config_optimizer("adam", weight_decay = 0.001),
-      lr_scheduler = config_lr_scheduler("reduce_on_plateau", patience = 100, factor = 0.7),
-      early_stopping = 200, # stop training when validation loss does not decrease anymore
-      validation = 0.3, # used for early stopping and lr_scheduler 
-      device = "cuda",
-      bootstrap = 5
-    )
-    
-    evaluate_model(nn_fit, test_data)
-  }, error = function(e){
-    na_performance
-  })
-  
-  ## caret ####  
-  # Define training routine
-  index_train = lapply(unique(sort(train_data$fold)), function(x){
-    return(which(train_data$fold != x))
-  })
-  
-  train_ctrl = trainControl(
-    search = "grid",
-    classProbs = TRUE, 
-    index = index_train,
-    summaryFunction = twoClassSummary, 
-    savePredictions = "final"
-  )
-  
-  # Random Forest
-  rf_performance = tryCatch({
-    rf_grid = expand.grid(
-      mtry = c(3,7,11,15,19)                # Number of randomly selected predictors
-    )
-    
-    rf_fit = caret::train(
-      x = train_data[, predictors],
-      y = train_data$presence_fct,
-      method = "rf",
-      metric = "ROC",
-      tuneGrid = rf_grid,
-      trControl = train_ctrl
-    )
-    evaluate_model(rf_fit, test_data)
-  }, error = function(e){
-    na_performance
-  })
-  
-  # Gradient Boosted Machine
-  gbm_performance = tryCatch({
-    gbm_grid <- expand.grid(
-      n.trees = c(100, 500, 1000, 1500),       # Higher number of boosting iterations
-      interaction.depth = c(3, 5, 7),          # Maximum depth of each tree
-      shrinkage = c(0.01, 0.005, 0.001),       # Lower learning rates
-      n.minobsinnode = c(10, 20)               # Minimum number of observations in nodes
-    )
-    
-    gbm_fit = train(
-      x = train_data[, predictors],
-      y = train_data$presence_fct,
-      method = "gbm",
-      metric = "ROC",
-      verbose = F,
-      tuneGrid = gbm_grid,
-      trControl = train_ctrl
-    )
-    evaluate_model(gbm_fit, test_data)
-  }, error = function(e){
-    na_performance
-  })
-  
-  # Generalized additive Model
-  glm_performance = tryCatch({
-    glm_fit = train(
-      x = train_data[, predictors],
-      y = train_data$presence_fct,
-      method = "glm",
-      family=binomial, 
-      metric = "ROC",
-      preProcess = c("center", "scale"),
-      trControl = train_ctrl
-    )
-    evaluate_model(glm_fit, test_data)
-  }, error = function(e){
-    na_performance
-  })
-  
-  # Summarize results
-  performance_summary = tibble(
-    species = species,
-    obs = nrow(occs_spec),
-    model = c("NN", "RF", "GBM", "GLM"),
-    auc = c(nn_performance$AUC, rf_performance$AUC, gbm_performance$AUC, glm_performance$AUC),
-    accuracy = c(nn_performance$Accuracy, rf_performance$Accuracy, gbm_performance$Accuracy, glm_performance$Accuracy),
-    kappa = c(nn_performance$Kappa, rf_performance$Kappa, gbm_performance$Kappa, glm_performance$Kappa),
-    precision = c(nn_performance$Precision, rf_performance$Precision, gbm_performance$Precision, glm_performance$Precision),
-    recall = c(nn_performance$Recall, rf_performance$Recall, gbm_performance$Recall, glm_performance$Recall),
-    f1 = c(nn_performance$F1, rf_performance$F1, gbm_performance$F1, glm_performance$F1)
-  )
-  
-  return(performance_summary)
-})
-
-model_data_df = bind_rows(model_data)
-
-save(model_results, file = "data/r_objects/model_results.RData")
\ No newline at end of file
diff --git a/R/05_performance_analysis.qmd b/R/05_performance_analysis.qmd
index a481b1d7032b42d65ce7cebb1a9b5f03aa5c83b4..c2d7d442fe8689080c38bbbdc8acf5e926ab8c2a 100644
--- a/R/05_performance_analysis.qmd
+++ b/R/05_performance_analysis.qmd
@@ -13,6 +13,7 @@ library(plotly)
 library(DT)
 
 load("../data/r_objects/ssdm_results.RData")
+load("../data/r_objects/msdm_results.RData")
 load("../data/r_objects/range_maps.RData")
 load("../data/r_objects/range_maps_gridded.RData")
 load("../data/r_objects/occs_final.RData")
@@ -84,7 +85,7 @@ Generalized Linear Model
 
 -   Logistic model with binomial link function
 
-Neural Netwok
+Neural Netwok (single-species)
 
 -   Three hidden layers (sigmoid - 500, leaky_relu - 200, leaky_relu - 50)
 
@@ -92,6 +93,8 @@ Neural Netwok
 
 -   Adam optimizer
 
+Neural Network (multi-species)
+
 ### Key findings:
 
 -   Performance: RF \> GBM \> GLM \> NN
@@ -105,7 +108,7 @@ Neural Netwok
 The table below shows the analysed modeling results.
 
 ```{r performance, echo = FALSE, message=FALSE, warnings=FALSE}
-performance = ssdm_results %>% 
+performance = bind_rows(ssdm_results, msdm_results) %>% 
   pivot_longer(c(auc, accuracy, kappa, precision, recall, f1), names_to = "metric") %>% 
   dplyr::filter(!is.na(value)) %>% 
   dplyr::mutate(
@@ -504,7 +507,7 @@ bslib::card(plot, full_screen = T)
 Functional groups were assigned based on taxonomic order. The following groupings were used:
 
 | Functional group      | Taxomic orders                                                        |
-|-----------------------|-------------------------------------------------|
+|------------------------|------------------------------------------------|
 | large ground-dwelling | Carnivora, Artiodactyla, Cingulata, Perissodactyla                    |
 | small ground-dwelling | Rodentia, Didelphimorphia, Soricomorpha, Paucituberculata, Lagomorpha |
 | arboreal              | Primates, Pilosa                                                      |
diff --git a/R/05_performance_analysis.rmarkdown b/R/05_performance_analysis.rmarkdown
new file mode 100644
index 0000000000000000000000000000000000000000..d82a62297406fbfbed633ecbdc7ecba1cbc828bf
--- /dev/null
+++ b/R/05_performance_analysis.rmarkdown
@@ -0,0 +1,575 @@
+---

+title: "sSDM Performance analysis"

+format: html

+editor: visual

+engine: knitr

+---

+
+```{r init, echo = FALSE, include = FALSE}

+library(tidyverse)

+library(Symobio)

+library(sf)

+library(plotly)

+library(DT)

+

+load("../data/r_objects/ssdm_results.RData")

+load("../data/r_objects/msdm_results.RData")

+load("../data/r_objects/range_maps.RData")

+load("../data/r_objects/range_maps_gridded.RData")

+load("../data/r_objects/occs_final.RData")

+load("../data/r_objects/functional_groups.RData")

+sf::sf_use_s2(use_s2 = FALSE)

+```

+
+```{r globals, echo = FALSE, include = FALSE}

+# Select metrics

+focal_metrics = c("auc", "f1", "accuracy")  # There's a weird bug in plotly that scrambles up lines when using more then three groups

+

+# Dropdown options

+plotly_buttons = list()

+for(metric in focal_metrics){

+  plotly_buttons[[length(plotly_buttons) + 1]] = list(method = "restyle", args = list("transforms[0].value", metric), label = metric)

+}

+

+# Regression functions

+asym_regression = function(x, y){

+  nls_fit = nls(y ~ 1 - (1-b) * exp(-c * log(x)), start = list(b = 0.1, c = 0.1))

+  new_x = exp(seq(log(min(x)), log(max(x)), length.out = 100))

+  data.frame(

+    x = new_x,

+    fit = predict(nls_fit, newdata = data.frame(x = new_x))

+  )

+}

+

+lin_regression = function(x, y){

+  glm_fit = suppressWarnings(glm(y~x, family = "binomial"))

+  new_x = seq(min(x), max(x), length.out = 100)

+  data.frame(

+    x = new_x,

+    fit = predict(glm_fit, newdata = data.frame(x = new_x), type = "response")

+  )

+}

+

+```

+
+

+## Summary

+

+This document summarizes the performance of four SDM algorithms (Random Forest, Gradient Boosting Machine, Generalized Linear Model, Deep Neural Network) for `r I(length(unique(ssdm_results$species)))` South American mammal species. We use `r I(length(focal_metrics))` metrics (`r I(paste(focal_metrics, collapse = ', '))`) to evaluate model performance and look at how performance varies with five factors (number of records, range size, range coverage, range coverage bias, and functional group).

+

+### Modeling decisions:

+

+#### Data

+

+-   Randomly sampled pseudo-absences from expanded area of extent of occurrence records (×1.25)

+-   Balanced presences and absences for each species

+-   Predictors: all 19 CHELSA bioclim variables

+-   70/30 Split of training vs. test data

+

+#### Algorithms

+

+Random Forest

+

+-   Spatial block cross-validation during training

+

+-   Hyperparameter tuning of `mtry`

+

+Generalized boosted maching

+

+-   Spatial block cross-validation during training

+

+-   Hyperparameter tuning across `n.trees` , `interaction.depth` , `shrinkage`, `n.minobsinnode`

+

+Generalized Linear Model

+

+-   Spatial block cross-validation during training

+

+-   Logistic model with binomial link function

+

+Neural Netwok (single-species)

+

+-   Three hidden layers (sigmoid - 500, leaky_relu - 200, leaky_relu - 50)

+

+-   Binomial loss

+

+-   Adam optimizer

+

+Neural Network (multi-species)

+

+### Key findings:

+

+-   Performance: RF \> GBM \> GLM \> NN

+-   Convergence problems with Neural Notwork Models

+-   More occurrence records and larger range sizes tended to improve model performance

+-   Higher range coverage correlated with better performance.

+-   Range coverage bias and functional group showed some impact but were less consistent

+

+## Analysis

+

+The table below shows the analysed modeling results.

+

+
+```{r performance, echo = FALSE, message=FALSE, warnings=FALSE}

+performance = bind_rows(ssdm_results, msdm_results) %>% 

+  pivot_longer(c(auc, accuracy, kappa, precision, recall, f1), names_to = "metric") %>% 

+  dplyr::filter(!is.na(value)) %>% 

+  dplyr::mutate(

+    metric = factor(metric, levels = c("auc", "kappa", "f1", "accuracy", "precision", "recall")),

+    value = round(pmax(value, 0, na.rm = T), 3) # Fix one weird instance of f1 < 0

+  ) %>% 

+  dplyr::filter(metric %in% focal_metrics)

+

+DT::datatable(performance)

+```

+
+

+### Number of records

+

+-   Model performance was generally better for species with more observations

+-   Very poor performance below 50-100 observations

+

+
+```{r number_of_records, echo = FALSE, message=FALSE, warnings=FALSE}

+df_plot = performance

+

+# Calculate regression lines for each model and metric combination

+suppressWarnings({

+  regression_lines = df_plot %>%

+    group_by(model, metric) %>%

+    group_modify(~asym_regression(.x$obs, .x$value))

+})

+

+# Create base plot

+plot <- plot_ly() %>% 

+  layout(

+    title = "Model Performance vs. Number of observations",

+    xaxis = list(title = "Number of observations", type = "log"),

+    yaxis = list(title = "Value"),

+    legend = list(x = 1.1, y = 0.5),  # Move legend to the right of the plot

+    margin = list(r = 150),  # Add right margin to accommodate legend

+    hovermode = 'closest',

+    updatemenus = list(

+      list(

+        type = "dropdown",

+        active = 0,

+        buttons = plotly_buttons

+      )

+    )

+  )

+

+# Points

+for (model_name in unique(df_plot$model)) {

+  plot = plot %>%

+    add_markers(

+      data = filter(df_plot, model == model_name),

+      x = ~obs,

+      y = ~value,

+      color = model_name,  # Set color to match legendgroup

+      legendgroup = model_name,

+      opacity = 0.6,

+      name = ~model,

+      hoverinfo = 'text',

+      text = ~paste("Species:", species, "<br>Observations:", obs, "<br>Value:", round(value, 3)),

+      transforms = list(

+        list(

+          type = 'filter',

+          target = ~metric,

+          operation = '=',

+          value = focal_metrics[1]

+        )

+      )

+    )

+}

+

+# Add regression lines

+for(model_name in unique(df_plot$model)){

+  reg_data = dplyr::filter(regression_lines, model == model_name)

+  plot = plot %>% 

+    add_lines(

+      data = reg_data,

+      x = ~x,

+      y = ~fit,

+      color = model_name,  # Set color to match legendgroup

+      legendgroup = model_name,

+      name = paste(model_name, '(fit)'),

+      showlegend = FALSE,

+      transforms = list(

+        list(

+          type = 'filter',

+          target = ~metric,

+          operation = '=',

+          value = focal_metrics[1]

+        )

+      )

+    )

+}

+

+bslib::card(plot, full_screen = T)

+```

+
+

+### Range characteristics

+

+#### Range size

+

+Range size was calculated based on polygon layers from the IUCN Red List of Threatened Species (2016).

+

+-   Model performance tended to be slightly higher for species with larger range size

+-   Only RF shows continuous performance improvements beyond range sizes of \~5M km²

+

+
+```{r range_size, echo = FALSE, message=FALSE, warnings=FALSE}

+

+df_join = range_maps %>% 

+  dplyr::mutate(range_size = as.numeric(st_area(range_maps) / 1000000)) %>%  # range in sqkm

+  sf::st_drop_geometry()

+

+df_plot = performance %>% 

+  inner_join(df_join, by = c("species" = "name_matched"))

+

+# Calculate regression lines for each model and metric combination

+suppressWarnings({

+  regression_lines = df_plot %>%

+    group_by(model, metric) %>%

+    group_modify(~asym_regression(.x$range_size, .x$value))

+})

+

+# Create base plot

+plot <- plot_ly() %>% 

+  layout(

+    title = "Model Performance vs. Range size",

+    xaxis = list(title = "Range size"),

+    yaxis = list(title = "Value"),

+    legend = list(x = 1.1, y = 0.5),  # Move legend to the right of the plot

+    margin = list(r = 150),  # Add right margin to accommodate legend

+    hovermode = 'closest',

+    updatemenus = list(

+      list(

+        type = "dropdown",

+        active = 0,

+        buttons = plotly_buttons

+      )

+    )

+  )

+

+# Add regression lines and confidence intervals for each model

+for (model_name in unique(df_plot$model)) {

+  plot <- plot %>%

+    add_markers(

+      data = filter(df_plot, model == model_name),

+      x = ~range_size,

+      y = ~value,

+      color = model_name,  # Set color to match legendgroup

+      legendgroup = model_name,

+      opacity = 0.6,

+      name = ~model,

+      hoverinfo = 'text',

+      text = ~paste("Species:", species, "<br>Range size:", range_size, "<br>Value:", round(value, 3)),

+      transforms = list(

+        list(

+          type = 'filter',

+          target = ~metric,

+          operation = '=',

+          value = focal_metrics[1]

+        )

+      )

+    )

+}

+

+for (model_name in unique(df_plot$model)) {

+  reg_data = dplyr::filter(regression_lines, model == model_name)

+  plot = plot %>% 

+    add_lines(

+      data = reg_data,

+      x = ~x,

+      y = ~fit,

+      color = model_name,  # Set color to match legendgroup

+      legendgroup = model_name,

+      name = paste(model_name, '(fit)'),

+      showlegend = FALSE,

+      transforms = list(

+        list(

+          type = 'filter',

+          target = ~metric,

+          operation = '=',

+          value = focal_metrics[1]

+        )

+      )

+    )

+}

+

+bslib::card(plot, full_screen = T)

+```

+
+

+#### Range coverage

+

+Species ranges were split into continuous hexagonal grid cells of 1 degree diameter. Range coverage was then calculated as the number of grid cells containing at least one occurrence record divided by the number of total grid cells.

+

+$$

+RangeCoverage = \frac{N_{cells\_occ}}{N_{cells\_total}}

+$$

+

+-   Models for species with higher range coverage showed slightly better performance

+

+
+```{r range_coverage, echo = FALSE, message=FALSE, warnings=FALSE}

+df_cells_total = range_maps_gridded %>%

+  dplyr::rename("species" = name_matched) %>% 

+  group_by(species) %>%

+  summarise(cells_total = n()) %>% 

+  st_drop_geometry()

+

+df_cells_occ <- range_maps_gridded %>%

+  st_join(occs_final, join = st_intersects) %>%

+  filter(name_matched == species) %>%         # Filter only intersections of the same species

+  group_by(species) %>%

+  summarise(cells_occupied = n_distinct(geometry)) %>%

+  st_drop_geometry()

+

+df_join = df_cells_total %>% 

+  dplyr::inner_join(df_cells_occ, by = "species") %>% 

+  dplyr::mutate(range_cov = cells_occupied / cells_total) %>% 

+  dplyr::select(species, range_cov)

+

+df_plot = performance %>% 

+  inner_join(df_join, by = "species")

+

+# Calculate regression lines for each model and metric combination

+suppressWarnings({

+  regression_lines = df_plot %>%

+    group_by(model, metric) %>%

+    group_modify(~lin_regression(.x$range_cov, .x$value))

+})

+

+# Create base plot

+plot <- plot_ly() %>% 

+  layout(

+    title = "Model Performance vs. Range coverage",

+    xaxis = list(title = "Range coverage"),

+    yaxis = list(title = "Value"),

+    legend = list(x = 1.1, y = 0.5),  # Move legend to the right of the plot

+    margin = list(r = 150),  # Add right margin to accommodate legend

+    hovermode = 'closest',

+    updatemenus = list(

+      list(

+        type = "dropdown",

+        active = 0,

+        buttons = plotly_buttons

+      )

+    )

+  )

+

+# Add regression lines and confidence intervals for each model

+for (model_name in unique(df_plot$model)) {

+  plot <- plot %>%

+    add_markers(

+      data = filter(df_plot, model == model_name),

+      x = ~range_cov,

+      y = ~value,

+      color = model_name,  # Set color to match legendgroup

+      legendgroup = model_name,

+      opacity = 0.6,

+      name = ~model,

+      hoverinfo = 'text',

+      text = ~paste("Species:", species, "<br>Range coverage:", range_cov, "<br>Value:", round(value, 3)),

+      transforms = list(

+        list(

+          type = 'filter',

+          target = ~metric,

+          operation = '=',

+          value = focal_metrics[1]

+        )

+      )

+    )

+}

+

+for (model_name in unique(df_plot$model)) {

+  reg_data = dplyr::filter(regression_lines, model == model_name)

+  plot = plot %>% 

+    add_lines(

+      data = reg_data,

+      x = ~x,

+      y = ~fit,

+      color = model_name,  # Set color to match legendgroup

+      legendgroup = model_name,

+      name = paste(model_name, '(fit)'),

+      showlegend = FALSE,

+      transforms = list(

+        list(

+          type = 'filter',

+          target = ~metric,

+          operation = '=',

+          value = focal_metrics[1]

+        )

+      )

+    )

+}

+

+bslib::card(plot, full_screen = T)

+```

+
+

+#### Range coverage bias

+

+Range coverage bias was calculated as 1 minus the ratio of the actual range coverage and the hypothetical range coverage if all observations were maximally spread out across the range.

+

+$$

+RangeCoverageBias = 1 - \frac{RangeCoverage}{min({N_{obs\_total}} / {N_{cells\_total}}, 1)}

+$$

+

+Higher bias values indicate that occurrence records are spatially more clustered within the range of the species.

+

+-   There was no strong relationship between range coverage bias and model performance

+

+
+```{r range_coverage_bias, echo = FALSE, message=FALSE, warnings=FALSE}

+df_occs_total = occs_final %>% 

+  st_drop_geometry() %>% 

+  group_by(species) %>% 

+  summarise(occs_total = n())

+

+df_join = df_occs_total %>% 

+  dplyr::inner_join(df_cells_total, by = "species") %>% 

+  dplyr::inner_join(df_cells_occ, by = "species") %>% 

+  dplyr::mutate(range_bias = 1-((cells_occupied / cells_total) / pmin(occs_total / cells_total, 1)))

+

+df_plot = performance %>% 

+  inner_join(df_join, by = "species")

+

+# Calculate regression lines for each model and metric combination

+suppressWarnings({

+  regression_lines = df_plot %>%

+    group_by(model, metric) %>%

+    group_modify(~lin_regression(.x$range_bias, .x$value))

+})

+

+# Create base plot

+plot <- plot_ly() %>% 

+  layout(

+    title = "Model Performance vs. Range coverage bias",

+    xaxis = list(title = "Range coverage bias"),

+    yaxis = list(title = "Value"),

+    legend = list(x = 1.1, y = 0.5),  # Move legend to the right of the plot

+    margin = list(r = 150),  # Add right margin to accommodate legend

+    hovermode = 'closest',

+    updatemenus = list(

+      list(

+        type = "dropdown",

+        active = 0,

+        buttons = plotly_buttons

+      )

+    )

+  )

+

+# Add regression lines and confidence intervals for each model

+for (model_name in unique(df_plot$model)) {

+  plot <- plot %>%

+    add_markers(

+      data = filter(df_plot, model == model_name),

+      x = ~range_bias,

+      y = ~value,

+      color = model_name,  # Set color to match legendgroup

+      legendgroup = model_name,

+      opacity = 0.6,

+      name = ~model,

+      hoverinfo = 'text',

+      text = ~paste("Species:", species, "<br>Range coverage bias:", range_bias, "<br>Value:", round(value, 3)),

+      transforms = list(

+        list(

+          type = 'filter',

+          target = ~metric,

+          operation = '=',

+          value = focal_metrics[1]

+        )

+      )

+    )

+}

+

+for (model_name in unique(df_plot$model)) {

+  reg_data = dplyr::filter(regression_lines, model == model_name)

+  plot = plot %>% 

+    add_lines(

+      data = reg_data,

+      x = ~x,

+      y = ~fit,

+      color = model_name,  # Set color to match legendgroup

+      legendgroup = model_name,

+      name = paste(model_name, '(fit)'),

+      showlegend = FALSE,

+      transforms = list(

+        list(

+          type = 'filter',

+          target = ~metric,

+          operation = '=',

+          value = focal_metrics[1]

+        )

+      )

+    )

+}

+

+

+bslib::card(plot, full_screen = T)

+```

+
+

+### Functional group

+

+Functional groups were assigned based on taxonomic order. The following groupings were used:

+

+| Functional group      | Taxomic orders                                                        |

+|------------------------|------------------------------------------------|

+| large ground-dwelling | Carnivora, Artiodactyla, Cingulata, Perissodactyla                    |

+| small ground-dwelling | Rodentia, Didelphimorphia, Soricomorpha, Paucituberculata, Lagomorpha |

+| arboreal              | Primates, Pilosa                                                      |

+| flying                | Chiroptera                                                            |

+

+-   Models for bats tended to perform slightly worse than for other groups.

+

+
+```{r functional_groups, echo = FALSE, message=FALSE, warnings=FALSE}

+df_plot = performance %>% 

+  dplyr::left_join(functional_groups, by = c("species" = "name_matched"))

+

+plot <- plot_ly(

+  data = df_plot,

+  x = ~functional_group,

+  y = ~value,

+  color = ~model,

+  type = 'box',

+  boxpoints = "all",

+  jitter = 1,

+  pointpos = 0,

+  hoverinfo = 'text',

+  text = ~paste("Species:", species, "<br>Functional group:", functional_group, "<br>Value:", round(value, 3)),

+  transforms = list(

+    list(

+      type = 'filter',

+      target = ~metric,

+      operation = '=',

+      value = focal_metrics[1]  # default value

+    )

+  )

+)

+

+plot <- plot %>%

+  layout(

+    title = "Model Performance vs. Functional Group",

+    xaxis = list(title = "Functional group"),

+    yaxis = list(title = "Value"),

+    legend = list(x = 1.1, y = 0.5),  # Move legend to the right of the plot

+    margin = list(r = 150),  # Add right margin to accommodate legend

+    hovermode = 'closest',

+    boxmode = "group",

+    updatemenus = list(

+      list(

+        type = "dropdown",

+        active = 0,

+        buttons = plotly_buttons

+      )

+    )

+  )

+

+bslib::card(plot, full_screen = T)

+```

+
diff --git a/R/utils.R b/R/utils.R
index ce483b7fe58487db918f6170046344f72e9ef304..5539cfd5a9eed4da5e7aea07de93fac44ac676e2 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -40,14 +40,21 @@ evaluate_model <- function(model, test_data) {
   
   # Predict probabilities
   if(class(model) %in% c("citodnn", "citodnnBootstrap")){
-    probs <- predict(model, test_data, type = "response")[,1]
+    # This is a hack to fix naming issue during initial training run. Fix for later runs:
+    # ----
+    test_data = test_data %>% 
+      dplyr::select(-species) %>% 
+      dplyr::rename(species = species_int)
+    # ----
+    
+    probs <- predict(model, as.matrix(test_data), type = "response")[,1]
     preds <- factor(round(probs), levels = c("0", "1"), labels = c("A", "P"))
   } else {
     probs <- predict(model, test_data, type = "prob")$P
     preds <- predict(model, test_data, type = "raw")
   }
   
-  actual <- test_data$present_fct
+  actual <- factor(test_data$present, levels = c("0", "1"), labels = c("A", "P"))
   
   # Calculate AUC
   auc <- pROC::roc(actual, probs, levels = c("P", "A"), direction = ">")$auc
@@ -56,12 +63,14 @@ evaluate_model <- function(model, test_data) {
   cm <- caret::confusionMatrix(preds, actual, positive = "P")
   
   # Return metrics
-  return(list(
-    AUC = auc,
-    Accuracy = cm$overall["Accuracy"],
-    Kappa = cm$overall["Kappa"],
-    Precision = cm$byClass["Precision"],
-    Recall = cm$byClass["Recall"],
-    F1 = cm$byClass["F1"]
-  ))
+  return(
+    list(
+      AUC = as.numeric(auc),
+      Accuracy = cm$overall["Accuracy"],
+      Kappa = cm$overall["Kappa"],
+      Precision = cm$byClass["Precision"],
+      Recall = cm$byClass["Recall"],
+      F1 = cm$byClass["F1"]
+    )
+  )
 }