diff --git a/R/01_01_range_map_preparation.R b/R/01_01_range_map_preparation.R
index 0a0717690f5370dd4fa90ba24f8c60dde544688a..eb33fdc1b726ab35bf7fa5bd97ec638740f30600 100644
--- a/R/01_01_range_map_preparation.R
+++ b/R/01_01_range_map_preparation.R
@@ -79,8 +79,8 @@ geometries_unique = range_maps_gridded_id %>%
   group_by(geom_id) %>% 
   slice_head(n = 1)
 
-geom_dist = sf::st_distance(geometries_unique, geometries_unique)  # Takes ~ 10 mins
-  %>% as.matrix()
+geom_dist = sf::st_distance(geometries_unique, geometries_unique)  %>%  # Takes ~ 10 mins
+  as.matrix()
 
 range_maps_split = range_maps_gridded_id %>% 
   group_by(name_matched) %>% 
diff --git a/R/03_02_absence_preparation.R b/R/03_02_absence_preparation.R
index 914ddb726898e633499a027f9b82159925b5b2f3..b148c644cd3c799537e98bd6365449038a2db921 100644
--- a/R/03_02_absence_preparation.R
+++ b/R/03_02_absence_preparation.R
@@ -49,9 +49,9 @@ range_maps = st_transform(range_maps, proj_string)
 # geographically close (reproduce sampling biases) but environmentally       #
 # dissimilar (avoid false negatives) to the known occurrences                #
 # ---------------------------------------------------------------------------#
-future::plan("multisession", workers = 16)
+future::plan("multisession", workers = 24)
 
-model_data = furrr::future_walk(
+furrr::future_walk(
   .x = target_species,
   .options = furrr::furrr_options(seed = 42, scheduling = FALSE), # make sure workers get constant stream of work
   .env_globals = c(raster_filepaths, sa_polygon, occs_final, range_maps, proj_string),
@@ -126,13 +126,13 @@ model_data = furrr::future_walk(
                       y = y + runif(nrow(.), -5000, 5000)) %>%     # Add jitter (res/2) to cell centroids 
         st_as_sf(coords = c("x", "y"), crs = proj_string)
       
-      sample_abs = bind_cols(
-        terra::extract(raster_data, terra::vect(sample_points), ID = FALSE)
-      ) %>% 
-        bind_cols(sample_points) %>% 
+      sample_abs = sample_points %>% 
+        bind_cols(
+          terra::extract(raster_data, terra::vect(sample_points), ID = FALSE)
+        ) %>% 
         drop_na()
       
-      abs_spec_list[[length(abs_spec_list)+1]] = sample_points
+      abs_spec_list[[length(abs_spec_list)+1]] = sample_abs
       
       samples_required = samples_required - nrow(sample_points) # Sometimes there are no env data for sample points, so keep sampling
     }
@@ -154,7 +154,7 @@ model_data = furrr::future_walk(
     # Create presence-absence dataframe
     pa_spec = occs_spec %>% 
       dplyr::mutate(present = 1) %>% 
-      bind_rows(abs_spec) 
+      bind_rows(abs_spec)
     
     ggplot() +
       ggtitle(species) +
@@ -189,7 +189,6 @@ model_data = furrr::future_walk(
     })
     
     pa_spec$fold_eval = folds
-    pa_spec$geometry = NULL
     
     save(pa_spec, file = paste0("data/r_objects/pa_sampling/", species, ".RData"))
   }
diff --git a/R/04_01_ssdm_modeling.R b/R/04_01_ssdm_modeling.R
index 959f77c38589b6ad8594c0a3d465aaa6f9907f7a..914c1aa2990cacea42993a6cf5775d6954dc51b3 100644
--- a/R/04_01_ssdm_modeling.R
+++ b/R/04_01_ssdm_modeling.R
@@ -1,156 +1,208 @@
-library(dplyr)
-library(tidyr)
 library(furrr)
-library(caret)
+library(progressr)
+library(dplyr)
 library(cito)
-library(pROC)
 
 source("R/utils.R")
 
-load("data/r_objects/model_data.RData")
-
-data_split = split(model_data, model_data$species)
-
 # ----------------------------------------------------------------------#
-# Train models                                                       ####
+# Define Training function                                           ####
 # ----------------------------------------------------------------------#
-future::plan("multisession", workers = 8)
-ssdm_results = furrr::future_map(data_split, .options = furrr::furrr_options(seed = 123), .f = function(data_spec){
-  # Initial check
-  if(nrow(data_spec) < 10 | anyNA(data_spec$folds)){ 
-    return(NULL)
-  }
-  
-  data_spec$present_fct = factor(data_spec$present, levels = c("0", "1"), labels = c("A", "P"))
-  train_data = dplyr::filter(data_spec, train == 1)
-  test_data = dplyr::filter(data_spec, train == 0)
-  
-  # Define empty result for performance eval
-  na_performance = list(    
-    AUC = NA,
-    Accuracy = NA,
-    Kappa = NA,
-    Precision = NA,
-    Recall = NA,
-    F1 = NA
-  )
-  
-  # Define predictors
-  predictors = paste0("layer_", 1:19)
-  
-  # Define caret training routine #####
-  index_train = lapply(unique(sort(train_data$fold)), function(x){
-    return(which(train_data$fold != x))
-  })
+train_models = function(pa_files){
+  future::plan("multisession", workers = 4)
+  p = progressor(along = pa_files)
   
-  train_ctrl = trainControl(
-    search = "grid",
-    classProbs = TRUE, 
-    index = index_train,
-    summaryFunction = twoClassSummary, 
-    savePredictions = "final"
+  furrr::future_walk(
+    .x = pa_files, 
+    .options = furrr::furrr_options(
+      seed = 123,
+      packages = c("dplyr", "tidyr", "sf", "caret", "cito", "pROC"),
+      scheduling = FALSE  # make sure workers get constant stream of work
+    ),
+    .f = function(pa_file){
+      load(pa_file)
+      species = pa_spec$species[1]
+      p(sprintf("species=%s", species))
+      
+      if(all(is.na(pa_spec$fold_eval))){
+        warning("Too few samples")
+        return()
+      }
+      
+      # Define empty result for performance eval
+      na_performance = list(    
+        AUC = NA,
+        Accuracy = NA,
+        Kappa = NA,
+        Precision = NA,
+        Recall = NA,
+        F1 = NA
+      )
+      
+      # Create factor presence column 
+      pa_spec$present_fct = factor(pa_spec$present, levels = c("0", "1"), labels = c("A", "P"))
+      
+      # Outer CV loop (for averaging performance metrics)
+      performance_cv = lapply(sort(unique(pa_spec$fold_eval)), function(k){
+        data_test = dplyr::filter(pa_spec, fold_eval == k)
+        data_train = dplyr::filter(pa_spec, fold_eval != k)
+        
+        if(nrow(data_test) == 0 || nrow(data_train) == 0){
+          warning("Too few samples")
+        }
+        
+        # Create inner CV folds for model training
+        cv_train = blockCV::cv_spatial(
+          data_train,
+          column = "present",
+          k = 5,
+          progress = F, plot = F, report = F
+        )
+        data_train$fold_train = cv_train$folds_ids
+        
+        # Drop geometry
+        data_train$geometry = NULL
+        data_test$geometry = NULL
+        
+        # Define caret training routine #####
+        index_train = lapply(unique(sort(data_train$fold_train)), function(x){
+          return(which(data_train$fold_train != x))
+        })
+        
+        train_ctrl = trainControl(
+          search = "random",
+          classProbs = TRUE, 
+          index = index_train,
+          summaryFunction = twoClassSummary, 
+          savePredictions = "final",
+        )
+        
+        # Define predictors
+        predictors = c("bio6", "bio17", "cmi", "rsds", "igfc", "dtfw", "igsw", "roughness")
+        
+        # Random Forest #####
+        rf_performance = tryCatch({
+          # Fit model
+          rf_fit = caret::train(
+            x = data_train[, predictors],
+            y = data_train$present_fct,
+            method = "rf",
+            trControl = train_ctrl,
+            tuneLength = 4,
+            verbose = F
+          )
+          
+          evaluate_model(rf_fit, data_test)
+        }, error = function(e){
+          na_performance
+        })
+        
+        # Gradient Boosted Machine ####
+        gbm_performance = tryCatch({
+          gbm_fit = train(
+            x = data_train[, predictors],
+            y = data_train$present_fct,
+            method = "gbm",
+            trControl = train_ctrl,
+            tuneLength = 4,
+            verbose = F
+          )
+          evaluate_model(gbm_fit, data_test)
+        }, error = function(e){
+          na_performance
+        })
+        
+        # Generalized Additive Model ####
+        gam_performance = tryCatch({
+          gam_fit = train(
+            x = data_train[, predictors],
+            y = data_train$present_fct,
+            method = "gamSpline",
+            tuneLength = 4,
+            trControl = train_ctrl
+          )
+          evaluate_model(gam_fit, data_test)
+        }, error = function(e){
+          na_performance
+        })
+        
+        # Neural Network ####
+        nn_performance = tryCatch({
+          formula = as.formula(paste0("present ~ ", paste(predictors, collapse = '+')))
+          
+          nn_fit = dnn(
+            formula,
+            data = data_train,
+            hidden = c(100L, 100L, 100L),
+            loss = "binomial",
+            activation = c("leaky_relu", "leaky_relu", "leaky_relu"),
+            epochs = 500L, 
+            burnin = 100L,
+            lr = 0.001,   
+            batchsize = max(nrow(data_train)/10, 64),
+            dropout = 0.1,
+            optimizer = config_optimizer("adam", weight_decay = 0.001),
+            lr_scheduler = config_lr_scheduler("reduce_on_plateau", patience = 50, factor = 0.7),
+            early_stopping = 100,
+            validation = 0.2,
+            device = "cuda",
+            verbose = F,
+            plot = F
+          )
+          
+          if(nn_fit$successfull == 1){
+            evaluate_model(nn_fit, data_test)  
+          } else {
+            na_performance
+          }
+        }, error = function(e){
+          na_performance
+        })
+        
+        # Summarize results
+        performance_summary = tibble(
+          species = !!species,
+          obs = nrow(data_train),
+          fold_eval = k,
+          model = c("RF", "GBM", "GAM", "NN"),
+          auc = c(rf_performance$AUC, gbm_performance$AUC, gam_performance$AUC, nn_performance$AUC),
+          accuracy = c(rf_performance$Accuracy, gbm_performance$Accuracy, gam_performance$Accuracy, nn_performance$Accuracy),
+          kappa = c(rf_performance$Kappa, gbm_performance$Kappa, gam_performance$Kappa, nn_performance$Kappa),
+          precision = c(rf_performance$Precision, gbm_performance$Precision, gam_performance$Precision, nn_performance$Precision),
+          recall = c(rf_performance$Recall, gbm_performance$Recall, gam_performance$Recall, nn_performance$Recall),
+          f1 = c(rf_performance$F1, gbm_performance$F1, gam_performance$F1, nn_performance$F1)
+        ) %>% 
+          pivot_longer(all_of(c("auc", "accuracy", "kappa", "precision", "recall", "f1")), names_to = "metric", values_to = "value")
+        
+        return(performance_summary)
+      })
+      
+      # Combine and save evaluation results
+      performance_spec = bind_rows(performance_cv)
+      save(performance_spec, file = paste0("data/r_objects/model_results/", species, ".RData"))
+    }
   )
-  
-  # Random Forest #####
-  rf_performance = tryCatch({
-    rf_grid = expand.grid(
-      mtry = c(3,7,11,15,19)                # Number of randomly selected predictors
-    )
-    
-    rf_fit = caret::train(
-      x = train_data[, predictors],
-      y = train_data$present_fct,
-      method = "rf",
-      metric = "ROC",
-      tuneGrid = rf_grid,
-      trControl = train_ctrl
-    )
-    evaluate_model(rf_fit, test_data)
-  }, error = function(e){
-    na_performance
-  })
-  
-  # Gradient Boosted Machine ####
-  gbm_performance = tryCatch({
-    gbm_grid <- expand.grid(
-      n.trees = c(100, 500, 1000, 1500),       # number of trees
-      interaction.depth = c(3, 5, 7),          # Maximum depth of each tree
-      shrinkage = c(0.01, 0.005, 0.001),       # Lower learning rates
-      n.minobsinnode = c(10, 20)               # Minimum number of observations in nodes
-    )
-    
-    gbm_fit = train(
-      x = train_data[, predictors],
-      y = train_data$present_fct,
-      method = "gbm",
-      metric = "ROC",
-      verbose = F,
-      tuneGrid = gbm_grid,
-      trControl = train_ctrl
-    )
-    evaluate_model(gbm_fit, test_data)
-  }, error = function(e){
-    na_performance
-  })
-  
-  # Generalized additive Model ####
-  glm_performance = tryCatch({
-    glm_fit = train(
-      x = train_data[, predictors],
-      y = train_data$present_fct,
-      method = "glm",
-      family=binomial, 
-      metric = "ROC",
-      preProcess = c("center", "scale"),
-      trControl = train_ctrl
-    )
-    evaluate_model(glm_fit, test_data)
-  }, error = function(e){
-    na_performance
-  })
-  
-  # Neural Network ####
-  nn_performance = tryCatch({
-    nn_fit = dnn(
-      X = train_data[, predictors],
-      Y = train_data$present,
-      hidden = c(200L, 200L, 200L),
-      loss = "binomial",
-      activation = c("sigmoid", "leaky_relu", "leaky_relu"),
-      epochs = 500L, 
-      lr = 0.02,   
-      baseloss=10,
-      batchsize=nrow(train_data)/4,
-      dropout = 0.1,  # Regularization 
-      optimizer = config_optimizer("adam", weight_decay = 0.001),
-      lr_scheduler = config_lr_scheduler("reduce_on_plateau", patience = 100, factor = 0.7),
-      early_stopping = 200, # stop training when validation loss does not decrease anymore
-      validation = 0.3, # used for early stopping and lr_scheduler 
-      device = "cuda",
-      bootstrap = 5
-    )
-    
-    evaluate_model(nn_fit, test_data)
-  }, error = function(e){
-    na_performance
-  })
-  
-  # Summarize results
-  performance_summary = tibble(
-    species = data_spec$species[1],
-    obs = nrow(data_spec),
-    model = c("SSDM_RF", "SSDM_GBM", "SSDM_GLM", "SSDM_NN"),
-    auc = c(rf_performance$AUC, gbm_performance$AUC, glm_performance$AUC, nn_performance$AUC),
-    accuracy = c(rf_performance$Accuracy, gbm_performance$Accuracy, glm_performance$Accuracy, nn_performance$Accuracy),
-    kappa = c(rf_performance$Kappa, gbm_performance$Kappa, glm_performance$Kappa, nn_performance$Kappa),
-    precision = c(rf_performance$Precision, gbm_performance$Precision, glm_performance$Precision, nn_performance$Precision),
-    recall = c(rf_performance$Recall, gbm_performance$Recall, glm_performance$Recall, nn_performance$Recall),
-    f1 = c(rf_performance$F1, gbm_performance$F1, glm_performance$F1, nn_performance$F1)
-  )
-  
-  return(performance_summary)
-})
+}
 
-ssdm_results = bind_rows(ssdm_results)
+# ----------------------------------------------------------------------#
+# Run training                                                       ####
+# ----------------------------------------------------------------------#
+handlers(global = TRUE)
+handlers("progress")
+
+pa_files = list.files("data/r_objects/pa_sampling/", full.names = T)
+species_processed = stringr::str_remove_all(list.files("data/r_objects/model_results/"), ".RData")
+pa_files_run = pa_files[!sapply(pa_files, function(x){any(stringr::str_detect(x, species_processed))})]
+
+train_models(pa_files_run)
+
+# ----------------------------------------------------------------------#
+# Combine results                                                    ####
+# ----------------------------------------------------------------------#
+files = list.files("data/r_objects/model_results/", full.names = T)
+ssdm_results = lapply(files, function(f){load(f); return(performance_spec)}) %>% 
+  bind_rows() 
+  #dplyr::group_by(species, model, metric) %>% 
+  #dplyr::summarize(value = mean(value, na.rm =T))
 
 save(ssdm_results, file = "data/r_objects/ssdm_results.RData")
diff --git a/R/04_03_msdm_embed_raw.R b/R/04_03_msdm_embed_raw.R
index 6e7924a172401b7f428a55183f580eafa8ec0bc5..6b2df020f31d9ad7f1a666bebd1e6675ff230a9f 100644
--- a/R/04_03_msdm_embed_raw.R
+++ b/R/04_03_msdm_embed_raw.R
@@ -4,37 +4,41 @@ library(cito)
 
 source("R/utils.R")
 
-load("data/r_objects/model_data.RData")
+load("data/r_objects/model_data_pa_sampling.RData")
 
-model_data$species_int = as.integer(as.factor(model_data$species))
-
-train_data = dplyr::filter(model_data, train == 1)
-test_data = dplyr::filter(model_data, train == 0)
+model_data = model_data %>% 
+  dplyr::mutate(species_int = as.integer(as.factor(model_data$species))) %>% 
+  sf::st_drop_geometry()
+  
+test_data = dplyr::filter(model_data, fold_eval == 1) %>% 
+  dplyr::select(-fold_eval)
+train_data = dplyr::filter(model_data, fold_eval != 1) %>% 
+  dplyr::select(-fold_eval)
 
 # ----------------------------------------------------------------------#
 # Train model                                                        ####
 # ----------------------------------------------------------------------#
-predictors = paste0("layer_", 1:19)
-formula = as.formula(paste0("present ~ ", paste(predictors, collapse = '+'), " + ", "e(species_int, dim = 50, lambda = 0.000001)"))
+predictors = c("bio6", "bio17", "cmi", "rsds", "igfc", "dtfw", "igsw", "roughness")
+formula = as.formula(paste0("present ~ ", paste(predictors, collapse = '+'), " + ", "e(species_int, dim = 50, train = T, lambda = 0.0001)"))
 
-plot(1, type="n", xlab="", ylab="", xlim=c(0, 15000), ylim=c(0, 0.7)) # empty plot with better limits, draw points in there
+plot(1, type="n", xlab="", ylab="", xlim=c(0, 25000), ylim=c(0, 0.7)) # empty plot with better limits, draw points in there
 msdm_fit_embedding_raw = dnn(
   formula,
   data = train_data,
   hidden = c(500L, 500L, 500L),
   loss = "binomial",
   activation = c("sigmoid", "leaky_relu", "leaky_relu"),
-  epochs = 15000L, 
+  epochs = 10L, 
   lr = 0.01,   
   baseloss = 1,
   batchsize = nrow(train_data),
   dropout = 0.1,
-  burnin = 100,
+  burnin = 500,
   optimizer = config_optimizer("adam", weight_decay = 0.001),
-  lr_scheduler = config_lr_scheduler("reduce_on_plateau", patience = 100, factor = 0.7),
-  early_stopping = 250,
-  validation = 0.3,
-  device = "cuda",
+  lr_scheduler = config_lr_scheduler("reduce_on_plateau", patience = 150, factor = 0.7),
+  early_stopping = 400,
+  validation = 0.2,
+  device = "cuda"
 )
 
 save(msdm_fit_embedding_raw, file = "data/r_objects/msdm_fit_embedding_raw.RData")
@@ -44,20 +48,22 @@ save(msdm_fit_embedding_raw, file = "data/r_objects/msdm_fit_embedding_raw.RData
 # ----------------------------------------------------------------------#
 load("data/r_objects/msdm_fit_embedding_raw.RData")
 
-data_split = split(model_data, model_data$species)
+data_split = test_data %>% 
+  split(test_data$species)
 
 msdm_results_embedding_raw = lapply(data_split, function(data_spec){
-  test_data = dplyr::filter(data_spec, train == 0) %>% 
+  species = data_spec$species[1]
+  data_spec = data_spec %>% 
     dplyr::select(-species)
   
   msdm_performance = tryCatch({
-    evaluate_model(msdm_fit_embedding_raw, test_data)
+    evaluate_model(msdm_fit_embedding_raw, data_spec)
   }, error = function(e){
     list(AUC = NA, Accuracy = NA, Kappa = NA, Precision = NA, Recall = NA, F1 = NA)
   })
   
   performance_summary = tibble(
-    species = data_spec$species[1],
+    species = !!species,
     obs = nrow(data_spec),
     model = "MSDM_embed",
     auc = msdm_performance$AUC,
diff --git a/R/04_07_msdm_embed_multi_nolonlat.R b/R/04_07_msdm_embed_multi_nolonlat.R
index 2044e0309227be71dd7a3dccac1f153d27ced97b..15d37e9828461494f0458566989968d75530ee64 100644
--- a/R/04_07_msdm_embed_multi_nolonlat.R
+++ b/R/04_07_msdm_embed_multi_nolonlat.R
@@ -1,10 +1,11 @@
 library(dplyr)
 library(tidyr)
 library(cito)
+library(sf)
 
 source("R/utils.R")
 
-load("data/r_objects/model_data.RData")
+load("data/r_objects/model_data_pa_sampling.RData")
 load("data/r_objects/func_dist.RData")
 load("data/r_objects/phylo_dist.RData")
 load("data/r_objects/range_dist.RData")
@@ -18,11 +19,13 @@ model_species = Reduce(
 ) 
 
 model_data_final = model_data %>%
+  sf::st_drop_geometry() %>% 
   dplyr::filter(species %in% !!model_species) %>% 
   dplyr::mutate(species_int = as.integer(as.factor(species)))
+  
 
-train_data = dplyr::filter(model_data_final, train == 1)
-test_data = dplyr::filter(model_data_final, train == 0)
+test_data = dplyr::filter(model_data_final, fold_eval == 1)
+train_data = dplyr::filter(model_data_final, fold_eval != 1)
 
 # Create embeddings
 func_ind = match(model_species, colnames(func_dist))
@@ -41,14 +44,15 @@ range_embeddings = eigen(range_dist)$vectors[,1:20]
 # ----------------------------------------------------------------------#
 # Train model                                                        ####
 # ----------------------------------------------------------------------#
-predictors = paste0("layer_", 1:19)
+# Define predictors
+predictors = c("bio6", "bio17", "cmi", "rsds", "igfc", "dtfw", "igsw", "roughness")
 
 formula = as.formula(
   paste0("present ~ ", 
          paste(predictors, collapse = '+'),  
-         " + e(species_int, weights = func_embeddings, lambda = 0.00001, train = F)",
-         " + e(species_int, weights = phylo_embeddings, lambda = 0.00001, train = F)",
-         " + e(species_int, weights = range_embeddings, lambda = 0.00001, train = F)"
+         " + e(species_int, weights = func_embeddings, lambda = 0.00001, train = T)",
+         " + e(species_int, weights = phylo_embeddings, lambda = 0.00001, train = T)",
+         " + e(species_int, weights = range_embeddings, lambda = 0.00001, train = T)"
   )
 )
 
@@ -56,7 +60,7 @@ plot(1, type="n", xlab="", ylab="", xlim=c(0, 25000), ylim=c(0, 0.7)) # empty pl
 msdm_fit_embedding_multi_nolonlat = dnn(
   formula,
   data = train_data,
-  hidden = c(500L, 500L, 500L),
+  hidden = c(200L, 200L, 200L),
   loss = "binomial",
   activation = c("sigmoid", "leaky_relu", "leaky_relu"),
   epochs = 30000L, 
diff --git a/R/04_07_msdm_oneoff.R b/R/04_07_msdm_oneoff.R
new file mode 100644
index 0000000000000000000000000000000000000000..4ef2a031a860a28d7f7f6596a94db2d617f85e17
--- /dev/null
+++ b/R/04_07_msdm_oneoff.R
@@ -0,0 +1,111 @@
+library(dplyr)
+library(tidyr)
+library(cito)
+library(sf)
+
+source("R/utils.R")
+
+load("data/r_objects/model_data_pa_sampling.RData")
+load("data/r_objects/func_dist.RData")
+load("data/r_objects/phylo_dist.RData")
+load("data/r_objects/range_dist.RData")
+
+# ----------------------------------------------------------------------#
+# Prepare data                                                       ####
+# ----------------------------------------------------------------------#
+model_species = Reduce(
+  intersect, 
+  list(unique(model_data$species), colnames(range_dist), colnames(phylo_dist), colnames(func_dist))
+) 
+
+model_data_final = model_data %>%
+  sf::st_drop_geometry() %>% 
+  dplyr::filter(species %in% !!model_species) %>% 
+  dplyr::mutate(species_int = as.integer(as.factor(species)))
+  
+
+test_data = dplyr::filter(model_data_final, fold_eval == 1)
+train_data = dplyr::filter(model_data_final, fold_eval != 1)
+
+# Create embeddings
+func_ind = match(model_species, colnames(func_dist))
+func_dist = func_dist[func_ind, func_ind]
+func_embeddings = eigen(func_dist)$vectors[,1:20]
+
+phylo_ind = match(model_species, colnames(phylo_dist))
+phylo_dist = phylo_dist[phylo_ind, phylo_ind]
+phylo_embeddings = eigen(phylo_dist)$vectors[,1:20]
+
+range_ind = match(model_species, colnames(range_dist))
+range_dist = range_dist[range_ind, range_ind]
+range_embeddings = eigen(range_dist)$vectors[,1:20]
+
+
+# ----------------------------------------------------------------------#
+# Train model                                                        ####
+# ----------------------------------------------------------------------#
+# Define predictors
+predictors = c("bio6", "bio17", "cmi", "rsds", "igfc", "dtfw", "igsw", "roughness")
+
+formula = as.formula(
+  paste0("present ~ ", 
+         paste(predictors, collapse = '+'),  
+         " + e(species_int, weights = func_embeddings, lambda = 0.00001, train = T)",
+         " + e(species_int, weights = phylo_embeddings, lambda = 0.00001, train = T)",
+         " + e(species_int, weights = range_embeddings, lambda = 0.00001, train = T)"
+  )
+)
+
+plot(1, type="n", xlab="", ylab="", xlim=c(0, 25000), ylim=c(0, 0.7)) # empty plot with better limits, draw points in there
+msdm_fit_embedding_multi_nolonlat = dnn(
+  formula,
+  data = train_data,
+  hidden = c(400L, 400L, 400L),
+  loss = "binomial",
+  activation = c("sigmoid", "leaky_relu", "leaky_relu"),
+  epochs = 30000L, 
+  lr = 0.01,   
+  baseloss = 1,
+  batchsize = nrow(train_data),
+  dropout = 0.1,
+  burnin = 100,
+  optimizer = config_optimizer("adam", weight_decay = 0.001),
+  lr_scheduler = config_lr_scheduler("reduce_on_plateau", patience = 150, factor = 0.7),
+  early_stopping = 250,
+  validation = 0.3,
+  device = "cuda",
+)
+save(msdm_fit_embedding_multi_nolonlat, file = "data/r_objects/msdm_fit_embedding_multi_nolonlat.RData")
+
+# ----------------------------------------------------------------------#
+# Evaluate results                                                   ####
+# ----------------------------------------------------------------------#
+load("data/r_objects/msdm_fit_embedding_multi_nolonlat.RData")
+data_split = test_data %>% 
+  group_by(species_int) %>% 
+  group_split()
+
+msdm_results_embedding_multi_nolonlat = lapply(data_split, function(data_spec){
+  target_species =  data_spec$species[1]
+  data_spec = dplyr::select(data_spec, -species)
+  
+  msdm_performance = tryCatch({
+    evaluate_model(msdm_fit_embedding_multi_nolonlat, data_spec)
+  }, error = function(e){
+    list(AUC = NA, Accuracy = NA, Kappa = NA, Precision = NA, Recall = NA, F1 = NA)
+  })
+  
+  performance_summary = tibble(
+    species = !!target_species,
+    obs = length(which(model_data$species == target_species)),
+    model = "MSDM_embed_informed_multi_nolonlat",
+    auc = msdm_performance$AUC,
+    accuracy = msdm_performance$Accuracy,
+    kappa = msdm_performance$Kappa,
+    precision = msdm_performance$Precision,
+    recall = msdm_performance$Recall,
+    f1 = msdm_performance$F1
+  )
+}) %>% bind_rows()
+
+save(msdm_results_embedding_multi_nolonlat, file = "data/r_objects/msdm_results_embedding_multi_nolonlat.RData")
diff --git a/R/05_01_performance_analysis.qmd b/R/05_01_performance_analysis.qmd
index e2c2f330a395e7f3bdabb992b950e2284d648033..17be731c8ac03a0e4ae1cec80991f6b322a97be0 100644
--- a/R/05_01_performance_analysis.qmd
+++ b/R/05_01_performance_analysis.qmd
@@ -11,12 +11,10 @@ library(sf)
 library(plotly)
 library(DT)
 
-
+load("../data/r_objects/model_data_pa_sampling.RData")
 load("../data/r_objects/ssdm_results.RData")
-load("../data/r_objects/msdm_fit.RData")
-load("../data/r_objects/msdm_results.RData")
-load("../data/r_objects/msdm_results_embedding_trained.RData")
-load("../data/r_objects/msdm_results_multiclass.RData.")
+load("../data/r_objects/msdm_results_embedding_raw.RData")
+
 load("../data/r_objects/range_maps.RData")
 load("../data/r_objects/range_maps_gridded.RData")
 load("../data/r_objects/occs_final.RData")
@@ -25,14 +23,15 @@ sf::sf_use_s2(use_s2 = FALSE)
 ```
 
 ```{r globals, echo = FALSE, include = FALSE}
-# Select metrics
-focal_metrics = c("auc", "f1", "accuracy")  # There's a weird bug in plotly that scrambles up lines when using more than three groups
 
-# Dropdown options
-plotly_buttons = list()
-for(metric in focal_metrics){
-  plotly_buttons[[length(plotly_buttons) + 1]] = list(method = "restyle", args = list("transforms[0].value", metric), label = metric)
-}
+
+# Count occs per species
+obs_count = model_data %>% 
+  sf::st_drop_geometry() %>% 
+  dplyr::filter(present == 1) %>% 
+  dplyr::group_by(species) %>% 
+  dplyr::summarise(obs = n())
+
 
 # Regression functions
 asym_regression = function(x, y){
@@ -44,8 +43,8 @@ asym_regression = function(x, y){
   )
 }
 
-lin_regression = function(x, y){
-  glm_fit = suppressWarnings(glm(y~x, family = "binomial"))
+lin_regression = function(x, y, family = "binomial"){
+  glm_fit = suppressWarnings(glm(y~x, family = family))
   new_x = seq(min(x), max(x), length.out = 100)
   data.frame(
     x = new_x,
@@ -53,15 +52,29 @@ lin_regression = function(x, y){
   )
 }
 
-# Performance table
-performance = bind_rows(ssdm_results, msdm_results, msdm_results_embedding_trained, msdm_results_multiclass) %>% 
-  pivot_longer(c(auc, accuracy, kappa, precision, recall, f1), names_to = "metric") %>% 
-  dplyr::filter(!is.na(value)) %>% 
+msdm_results = msdm_results_embedding_raw %>% 
+  pivot_longer(all_of(c("auc", "accuracy", "kappa", "precision", "recall", "f1")), names_to = "metric", values_to = "value") %>% 
+  dplyr::select(-obs) %>% 
   dplyr::mutate(
-    metric = factor(metric, levels = c("auc", "kappa", "f1", "accuracy", "precision", "recall")),
-    value = round(pmax(value, 0, na.rm = T), 3) # Fix one weird instance of f1 < 0
+    fold_eval = 1
   ) %>% 
-  dplyr::filter(metric %in% focal_metrics)
+  drop_na()
+
+# Performance table
+performance = ssdm_results %>% 
+  dplyr::select(-obs) %>% 
+  dplyr::filter(fold_eval == 1, species %in% msdm_results$species) %>%  # Only look at first fold
+  bind_rows(msdm_results) %>% 
+  ungroup() %>% 
+  dplyr::mutate(
+    value = case_when(
+      ((is.na(value) | is.nan(value)) & metric %in% c("auc", "f1", "accurracy", "precision", "recall")) ~ 0.5,
+      ((is.na(value) | is.nan(value)) & metric %in% c("kappa")) ~ 0,
+      .default = value
+    )
+  )
+
+focal_metrics = unique(performance$metric)
 ```
 
 ## Summary
@@ -139,7 +152,8 @@ Multi-Class Neural Network (**MSDM_multiclass**)
 The table below shows the analysed modeling results.
 
 ```{r performance, echo = FALSE, message=FALSE, warnings=FALSE}
-DT::datatable(performance)
+DT::datatable(performance) %>% 
+  formatRound(columns="value", digits=3)
 ```
 
 ### Number of records
@@ -148,83 +162,112 @@ DT::datatable(performance)
 -   Very poor performance below 50-100 observations
 
 ```{r number_of_records, echo = FALSE, message=FALSE, warnings=FALSE}
-df_plot = performance
-
-# Calculate regression lines for each model and metric combination
-suppressWarnings({
-  regression_lines = df_plot %>%
-    group_by(model, metric) %>%
-    group_modify(~asym_regression(.x$obs, .x$value))
-})
-
-# Create base plot
-plot <- plot_ly() %>% 
-  layout(
-    title = "Model Performance vs. Number of observations",
-    xaxis = list(title = "Number of observations", type = "log"),
-    yaxis = list(title = "Value"),
-    legend = list(x = 1.1, y = 0.5),  # Move legend to the right of the plot
-    margin = list(r = 150),  # Add right margin to accommodate legend
-    hovermode = 'closest',
-    updatemenus = list(
-      list(
-        type = "dropdown",
-        active = 0,
-        buttons = plotly_buttons
-      )
+plot_performance_over_frequency = function(df_plot, metric) {
+
+  df_plot = dplyr::filter(df_plot, metric == !!metric) 
+  
+  # Calculate regression lines for each model and metric combination
+  suppressWarnings({
+    regression_lines = df_plot %>%
+      group_by(model) %>%
+      group_modify( ~ asym_regression(.x$obs, .x$value))
+  })
+  
+  # Create base plot
+  plot <- plot_ly() %>%
+    layout(
+      title = "Model Performance vs. Number of observations",
+      xaxis = list(title = "Number of observations", type = "log"),
+      yaxis = list(title = metric),
+      legend = list(x = 1.1, y = 0.5), # Move legend to the right of the plot
+      margin = list(r = 150), # Add right margin to accommodate legend
+      hovermode = 'closest'
     )
-  )
-
-# Points
-for (model_name in unique(df_plot$model)) {
-  plot = plot %>%
-    add_markers(
-      data = filter(df_plot, model == model_name),
-      x = ~obs,
-      y = ~value,
-      color = model_name,  # Set color to match legendgroup
-      legendgroup = model_name,
-      opacity = 0.6,
-      name = ~model,
-      hoverinfo = 'text',
-      text = ~paste("Species:", species, "<br>Observations:", obs, "<br>Value:", round(value, 3)),
-      transforms = list(
-        list(
-          type = 'filter',
-          target = ~metric,
-          operation = '=',
-          value = focal_metrics[1]
+  
+  # Points
+  for (model_name in unique(df_plot$model)) {
+    plot = plot %>%
+      add_markers(
+        data = filter(df_plot, model == model_name, metric %in% focal_metrics),
+        x = ~ obs,
+        y = ~ value,
+        color = model_name, # Set color to match legendgroup
+        legendgroup = model_name,
+        opacity = 0.6,
+        name = ~ model,
+        hoverinfo = 'text',
+        text = ~ paste(
+          "Species:", species, "<br>Observations:", obs, "<br>Value:", round(value, 3)
         )
       )
-    )
-}
-
-# Add regression lines
-for(model_name in unique(df_plot$model)){
-  reg_data = dplyr::filter(regression_lines, model == model_name)
-  plot = plot %>% 
-    add_lines(
-      data = reg_data,
-      x = ~x,
-      y = ~fit,
-      color = model_name,  # Set color to match legendgroup
-      legendgroup = model_name,
-      name = paste(model_name, '(fit)'),
-      showlegend = FALSE,
-      transforms = list(
-        list(
-          type = 'filter',
-          target = ~metric,
-          operation = '=',
-          value = focal_metrics[1]
-        )
+  }
+  
+  # Add regression lines
+  for (model_name in unique(df_plot$model)) {
+    reg_data = dplyr::filter(regression_lines, model == model_name)
+    plot = plot %>%
+      add_lines(
+        data = reg_data,
+        x = ~ x,
+        y = ~ fit,
+        color = model_name, # Set color to match legendgroup
+        legendgroup = model_name,
+        name = paste(model_name, '(fit)'),
+        showlegend = FALSE
       )
-    )
+  }
+  
+  return(plot)
 }
 
+df_plot = performance %>% dplyr::left_join(obs_count, by = "species") 
+  
+```
+
+::: panel-tabset
+#### AUC
+
+```{r echo = FALSE}
+plot = plot_performance_over_frequency(df_plot, metric = "auc")
+bslib::card(plot, full_screen = T)
+```
+
+#### F1
+
+```{r echo = FALSE}
+plot = plot_performance_over_frequency(df_plot, metric = "f1")
 bslib::card(plot, full_screen = T)
 ```
 
+#### Cohen's kappa
+
+```{r echo = FALSE}
+plot = plot_performance_over_frequency(df_plot, metric = "kappa")
+bslib::card(plot, full_screen = T)
+```
+
+#### Accurracy
+
+```{r echo = FALSE}
+plot = plot_performance_over_frequency(df_plot, metric = "accuracy")
+bslib::card(plot, full_screen = T)
+```
+
+#### Precision
+
+```{r echo = FALSE}
+plot = plot_performance_over_frequency(df_plot, metric = "precision")
+bslib::card(plot, full_screen = T)
+```
+
+#### Recall
+
+```{r echo = FALSE}
+plot = plot_performance_over_frequency(df_plot, metric = "recall")
+bslib::card(plot, full_screen = T)
+```
+:::
+
 ### Range characteristics
 
 #### Range size
@@ -234,7 +277,7 @@ Range size was calculated based on polygon layers from the IUCN Red List of Thre
 -   Model performance tended to be slightly higher for species with larger range size
 -   Only RF shows continuous performance improvements beyond range sizes of \~5M km²
 
-```{r range_size, echo = FALSE, message=FALSE, warnings=FALSE}
+```{r range_size, echo = FALSE, message=FALSE, warnings=FALSE, eval=F}
 
 df_join = range_maps %>% 
   dplyr::mutate(range_size = as.numeric(st_area(range_maps) / 1000000)) %>%  # range in sqkm
@@ -327,7 +370,7 @@ $$
 
 -   Models for species with higher range coverage showed slightly better performance
 
-```{r range_coverage, echo = FALSE, message=FALSE, warnings=FALSE}
+```{r range_coverage, echo = FALSE, message=FALSE, warnings=FALSE, eval=F}
 df_cells_total = range_maps_gridded %>%
   dplyr::rename("species" = name_matched) %>% 
   group_by(species) %>%
@@ -435,7 +478,7 @@ Higher bias values indicate that occurrence records are spatially more clustered
 
 -   There was no strong relationship between range coverage bias and model performance
 
-```{r range_coverage_bias, echo = FALSE, message=FALSE, warnings=FALSE}
+```{r range_coverage_bias, echo = FALSE, message=FALSE, warnings=FALSE, eval=F}
 df_occs_total = occs_final %>% 
   st_drop_geometry() %>% 
   group_by(species) %>% 
@@ -529,7 +572,7 @@ bslib::card(plot, full_screen = T)
 Functional groups were assigned based on taxonomic order. The following groupings were used:
 
 | Functional group      | Taxomic orders                                                        |
-|------------------|-----------------------------------------------------|
+|--------------------|----------------------------------------------------|
 | large ground-dwelling | Carnivora, Artiodactyla, Cingulata, Perissodactyla                    |
 | small ground-dwelling | Rodentia, Didelphimorphia, Soricomorpha, Paucituberculata, Lagomorpha |
 | arboreal              | Primates, Pilosa                                                      |
@@ -537,7 +580,7 @@ Functional groups were assigned based on taxonomic order. The following grouping
 
 -   Models for bats tended to perform slightly worse than for other groups.
 
-```{r functional_groups, echo = FALSE, message=FALSE, warnings=FALSE}
+```{r functional_groups, echo = FALSE, message=FALSE, warnings=FALSE, eval=F}
 df_plot = performance %>% 
   dplyr::left_join(functional_groups, by = c("species" = "name_matched"))
 
@@ -582,4 +625,3 @@ plot <- plot %>%
 
 bslib::card(plot, full_screen = T)
 ```
-
diff --git a/R/05_01_performance_analysis_carsten.qmd b/R/05_01_performance_analysis_carsten.qmd
new file mode 100644
index 0000000000000000000000000000000000000000..7dee2c4fabee65f3b26905f8d549661df50c107b
--- /dev/null
+++ b/R/05_01_performance_analysis_carsten.qmd
@@ -0,0 +1,190 @@
+---
+title: "SDM Performance analysis"
+format: html
+editor: visual
+engine: knitr
+---
+
+```{r init, echo = FALSE, include = FALSE}
+library(tidyverse)
+library(sf)
+library(plotly)
+library(DT)
+
+load("../data/r_objects/model_data_pa_sampling.RData")
+load("../data/r_objects/ssdm_results.RData")
+load("../data/r_objects/msdm_results_embedding_raw.RData")
+
+load("../data/r_objects/range_maps.RData")
+load("../data/r_objects/range_maps_gridded.RData")
+load("../data/r_objects/occs_final.RData")
+load("../data/r_objects/functional_groups.RData")
+sf::sf_use_s2(use_s2 = FALSE)
+```
+
+```{r globals, echo = FALSE, include = FALSE}
+
+
+# Count occs per species
+obs_count = model_data %>% 
+  sf::st_drop_geometry() %>% 
+  dplyr::filter(present == 1) %>% 
+  dplyr::group_by(species) %>% 
+  dplyr::summarise(obs = n())
+
+
+# Regression functions
+asym_regression = function(x, y){
+  nls_fit = nls(y ~ 1 - (1-b) * exp(-c * log(x)), start = list(b = 0.1, c = 0.1))
+  new_x = exp(seq(log(min(x)), log(max(x)), length.out = 100))
+  data.frame(
+    x = new_x,
+    fit = predict(nls_fit, newdata = data.frame(x = new_x))
+  )
+}
+
+lin_regression = function(x, y, family = "binomial"){
+  glm_fit = suppressWarnings(glm(y~x, family = family))
+  new_x = seq(min(x), max(x), length.out = 100)
+  data.frame(
+    x = new_x,
+    fit = predict(glm_fit, newdata = data.frame(x = new_x), type = "response")
+  )
+}
+
+msdm_results = msdm_results_embedding_raw %>% 
+  pivot_longer(all_of(c("auc", "accuracy", "kappa", "precision", "recall", "f1")), names_to = "metric", values_to = "value") %>% 
+  dplyr::select(-obs) %>% 
+  dplyr::mutate(
+    fold_eval = 1
+  ) %>% 
+  drop_na()
+
+# Performance table
+performance = ssdm_results %>% 
+  dplyr::select(-obs) %>% 
+  dplyr::filter(fold_eval == 1, species %in% msdm_results$species) %>%  # Only look at first fold
+  bind_rows(msdm_results) %>% 
+  ungroup() %>% 
+  dplyr::mutate(
+    value = case_when(
+      ((is.na(value) | is.nan(value)) & metric %in% c("auc", "f1", "accurracy", "precision", "recall")) ~ 0.5,
+      ((is.na(value) | is.nan(value)) & metric %in% c("kappa")) ~ 0,
+      .default = value
+    )
+  )
+
+focal_metrics = unique(performance$metric)
+```
+
+
+## Analysis
+
+### Number of records
+
+```{r number_of_records, echo = FALSE, message=FALSE, warnings=FALSE}
+plot_performance_over_frequency = function(df_plot, metric) {
+
+  df_plot = dplyr::filter(df_plot, metric == !!metric) 
+  
+  # Calculate regression lines for each model and metric combination
+  suppressWarnings({
+    regression_lines = df_plot %>%
+      group_by(model) %>%
+      group_modify( ~ asym_regression(.x$obs, .x$value))
+  })
+  
+  # Create base plot
+  plot <- plot_ly() %>%
+    layout(
+      title = "Model Performance vs. Number of observations",
+      xaxis = list(title = "Number of observations", type = "log"),
+      yaxis = list(title = metric),
+      legend = list(x = 1.1, y = 0.5), # Move legend to the right of the plot
+      margin = list(r = 150), # Add right margin to accommodate legend
+      hovermode = 'closest'
+    )
+  
+  # Points
+  for (model_name in unique(df_plot$model)) {
+    plot = plot %>%
+      add_markers(
+        data = filter(df_plot, model == model_name, metric %in% focal_metrics),
+        x = ~ obs,
+        y = ~ value,
+        color = model_name, # Set color to match legendgroup
+        legendgroup = model_name,
+        opacity = 0.6,
+        name = ~ model,
+        hoverinfo = 'text',
+        text = ~ paste(
+          "Species:", species, "<br>Observations:", obs, "<br>Value:", round(value, 3)
+        )
+      )
+  }
+  
+  # Add regression lines
+  for (model_name in unique(df_plot$model)) {
+    reg_data = dplyr::filter(regression_lines, model == model_name)
+    plot = plot %>%
+      add_lines(
+        data = reg_data,
+        x = ~ x,
+        y = ~ fit,
+        color = model_name, # Set color to match legendgroup
+        legendgroup = model_name,
+        name = paste(model_name, '(fit)'),
+        showlegend = FALSE
+      )
+  }
+  
+  return(plot)
+}
+
+df_plot = performance %>% dplyr::left_join(obs_count, by = "species") 
+  
+```
+
+::: panel-tabset
+#### AUC
+
+```{r echo = FALSE}
+plot = plot_performance_over_frequency(df_plot, metric = "auc")
+bslib::card(plot, full_screen = T)
+```
+
+#### F1
+
+```{r echo = FALSE}
+plot = plot_performance_over_frequency(df_plot, metric = "f1")
+bslib::card(plot, full_screen = T)
+```
+
+#### Cohen's kappa
+
+```{r echo = FALSE}
+plot = plot_performance_over_frequency(df_plot, metric = "kappa")
+bslib::card(plot, full_screen = T)
+```
+
+#### Accurracy
+
+```{r echo = FALSE}
+plot = plot_performance_over_frequency(df_plot, metric = "accuracy")
+bslib::card(plot, full_screen = T)
+```
+
+#### Precision
+
+```{r echo = FALSE}
+plot = plot_performance_over_frequency(df_plot, metric = "precision")
+bslib::card(plot, full_screen = T)
+```
+
+#### Recall
+
+```{r echo = FALSE}
+plot = plot_performance_over_frequency(df_plot, metric = "recall")
+bslib::card(plot, full_screen = T)
+```
+:::
diff --git a/R/05_02_MSDM_comparison.qmd b/R/05_02_MSDM_comparison.qmd
index 5b14c8a7d1241af5315de31327fbd91dc067e89b..b70af796777e659e9da4f3b62acda73a5ad89c2c 100644
--- a/R/05_02_MSDM_comparison.qmd
+++ b/R/05_02_MSDM_comparison.qmd
@@ -12,13 +12,15 @@ library(plotly)
 library(DT)
 library(shiny)
 
-load("../data/r_objects/msdm_results_embedding_raw.RData")
-load("../data/r_objects/msdm_results_embedding_traits_static.RData")
-load("../data/r_objects/msdm_results_embedding_traits_trained.RData")
-load("../data/r_objects/msdm_results_embedding_phylo_static.RData")
-load("../data/r_objects/msdm_results_embedding_phylo_trained.RData")
-load("../data/r_objects/msdm_results_embedding_range_static.RData")
-load("../data/r_objects/msdm_results_embedding_range_trained.RData")
+load("../data/r_objects/simple_cv/msdm_results_embedding_raw.RData")
+load("../data/r_objects/simple_cv/msdm_results_embedding_traits_static.RData")
+load("../data/r_objects/simple_cv/msdm_results_embedding_traits_trained.RData")
+load("../data/r_objects/simple_cv/msdm_results_embedding_phylo_static.RData")
+load("../data/r_objects/simple_cv/msdm_results_embedding_phylo_trained.RData")
+load("../data/r_objects/simple_cv/msdm_results_embedding_range_static.RData")
+load("../data/r_objects/simple_cv/msdm_results_embedding_range_trained.RData")
+load("../data/r_objects/simple_cv/msdm_results_embedding_multi_nolonlat.RData")
+load("../data/r_objects/simple_cv/msdm_results_embedding_multi_lonlat.RData")
 
 sf::sf_use_s2(use_s2 = FALSE)
 ```
@@ -40,7 +42,9 @@ results_embedding_informed = c(
   "msdm_results_embedding_range_static",
   "msdm_results_embedding_range_trained",
   "msdm_results_embedding_traits_static",
-  "msdm_results_embedding_traits_trained"
+  "msdm_results_embedding_traits_trained",
+  "msdm_results_embedding_multi_nolonlat",
+  "msdm_results_embedding_multi_lonlat"
 )
 
 results_embedding_informed_merged = lapply(results_embedding_informed, function(df_name){
@@ -63,7 +67,9 @@ results_final = results_embedding_raw %>%
       "MSDM_embed_informed_traits_static" = "M_TS",
       "MSDM_embed_informed_traits_trained" = "M_TT",
       "MSDM_embed_informed_range_static" = "M_RS",
-      "MSDM_embed_informed_range_trained" = "M_RT"
+      "MSDM_embed_informed_range_trained" = "M_RT",
+      "MSDM_embed_informed_multi_nolonlat" = "M_MT_nolonlat",
+      "MSDM_embed_informed_multi_lonlat" = "M_MT_lonlat"
     ),
     across(all_of(focal_metrics), round, 3)
   )
@@ -274,13 +280,12 @@ for(model_name in unique(df_plot$model)){
 bslib::card(plot, full_screen = T)
 ```
 
-
 ## *Relative Performance*
 
 ```{r delta, echo = FALSE, message=FALSE, warnings=FALSE}
 results_ranked_obs = results_final_long %>% 
   group_by(species,  metric) %>% 
-  mutate(rank = rev(rank(value)))
+  mutate(rank = rank(value))
 
 reglines = results_ranked_obs %>%
   group_by(model, metric) %>%
@@ -426,7 +431,6 @@ bslib::card(plot, full_screen = T)
 ```
 :::
 
-
 ## *Trait space*
 
 ```{r trait_pca, echo = FALSE, message=FALSE, warnings=FALSE}
diff --git a/R/_publish.yml b/R/_publish.yml
index 02920d16bb1021f602d5a92d12ad9761ba6a0334..c07bacdf4dd08df0543df4ec56e167e482e70c0d 100644
--- a/R/_publish.yml
+++ b/R/_publish.yml
@@ -2,3 +2,7 @@
   quarto-pub:
     - id: 309c45c3-1515-4985-a435-9ffa1888c5e7
       url: 'https://chrkoenig.quarto.pub/ssdm-performance-analysis-0a06'
+- source: 05_01_performance_analysis_carsten.qmd
+  quarto-pub:
+    - id: f9cafa63-bfd4-4c00-b401-09ec137bd3ce
+      url: 'https://chrkoenig.quarto.pub/sdm-performance-carsten'
diff --git a/R/utils.R b/R/utils.R
index b1da26abc38515507c2381fa178c92bab4e50296..1c9dc02ef9d965b3ed9a02f7552aec85de46141f 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -25,7 +25,7 @@ expand_bbox <- function(bbox, min_span = 1, expansion = 0.25) {
   return(bbox)
 }
 
-evaluate_model <- function(model, test_data) {
+evaluate_model <- function(model, data) {
   # Accuracy: The proportion of correctly predicted instances (both true positives and true negatives) out of the total instances.
   # Formula: Accuracy = (TP + TN) / (TP + TN + FP + FN)
   
@@ -40,14 +40,14 @@ evaluate_model <- function(model, test_data) {
   
   # Predict probabilities
   if(class(model) %in% c("citodnn", "citodnnBootstrap")){
-    probs <- predict(model, as.matrix(test_data), type = "response")[,1]
+    probs <- predict(model, as.matrix(data), type = "response")[,1]
     preds <- factor(round(probs), levels = c("0", "1"), labels = c("A", "P"))
   } else {
-    probs <- predict(model, test_data, type = "prob")$P
-    preds <- predict(model, test_data, type = "raw")
+    probs <- predict(model, data, type = "prob")$P
+    preds <- predict(model, data, type = "raw")
   }
   
-  actual <- factor(test_data$present, levels = c("0", "1"), labels = c("A", "P"))
+  actual <- factor(data$present, levels = c("0", "1"), labels = c("A", "P"))
   
   # Calculate AUC
   auc <- pROC::roc(actual, probs, levels = c("P", "A"), direction = ">")$auc
diff --git a/renv.lock b/renv.lock
index ff0fc5aa5dcee66526155c43af29c34119242c22..7f13630b5e9ba0b1d75e0fcf60e2ae056a29d10f 100644
--- a/renv.lock
+++ b/renv.lock
@@ -2,6 +2,26 @@
   "R": {
     "Version": "4.3.0",
     "Repositories": [
+      {
+        "Name": "BioCsoft",
+        "URL": "https://bioconductor.org/packages/3.18/bioc"
+      },
+      {
+        "Name": "BioCann",
+        "URL": "https://bioconductor.org/packages/3.18/data/annotation"
+      },
+      {
+        "Name": "BioCexp",
+        "URL": "https://bioconductor.org/packages/3.18/data/experiment"
+      },
+      {
+        "Name": "BioCworkflows",
+        "URL": "https://bioconductor.org/packages/3.18/workflows"
+      },
+      {
+        "Name": "BioCbooks",
+        "URL": "https://bioconductor.org/packages/3.18/books"
+      },
       {
         "Name": "CRAN",
         "URL": "https://cloud.r-project.org"
@@ -177,6 +197,20 @@
       ],
       "Hash": "f27411eb6d9c3dada5edd444b8416675"
     },
+    "RcppArmadillo": {
+      "Package": "RcppArmadillo",
+      "Version": "14.2.0-1",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Requirements": [
+        "R",
+        "Rcpp",
+        "methods",
+        "stats",
+        "utils"
+      ],
+      "Hash": "ce9c35ab9ea9c6ef4e27a08868cdb32b"
+    },
     "RcppEigen": {
       "Package": "RcppEigen",
       "Version": "0.3.4.0.2",
@@ -190,6 +224,57 @@
       ],
       "Hash": "4ac8e423216b8b70cb9653d1b3f71eb9"
     },
+    "RcppGSL": {
+      "Package": "RcppGSL",
+      "Version": "0.3.13",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Requirements": [
+        "Rcpp",
+        "stats"
+      ],
+      "Hash": "e8fc7310d256a7b6c4de8e57ab76c55d"
+    },
+    "RcppParallel": {
+      "Package": "RcppParallel",
+      "Version": "5.1.9",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Requirements": [
+        "R"
+      ],
+      "Hash": "f38a72a419b91faac0ce5d9eee04c120"
+    },
+    "RcppZiggurat": {
+      "Package": "RcppZiggurat",
+      "Version": "0.1.6",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Requirements": [
+        "R",
+        "Rcpp",
+        "RcppGSL",
+        "graphics",
+        "parallel",
+        "stats",
+        "utils"
+      ],
+      "Hash": "75b4a36aeeed440ad03b996081190703"
+    },
+    "Rfast": {
+      "Package": "Rfast",
+      "Version": "2.1.0",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Requirements": [
+        "R",
+        "Rcpp",
+        "RcppArmadillo",
+        "RcppParallel",
+        "RcppZiggurat"
+      ],
+      "Hash": "79e8394e1fa06a4ae954b70ca9b16e8f"
+    },
     "SQUAREM": {
       "Package": "SQUAREM",
       "Version": "2021.1",
@@ -832,6 +917,21 @@
       ],
       "Hash": "33698c4b3127fc9f506654607fb73676"
     },
+    "dismo": {
+      "Package": "dismo",
+      "Version": "1.3-16",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Requirements": [
+        "R",
+        "Rcpp",
+        "methods",
+        "raster",
+        "sp",
+        "terra"
+      ],
+      "Hash": "5c8646b40ba69146afc070aaea9f893c"
+    },
     "doParallel": {
       "Package": "doParallel",
       "Version": "1.0.17",
@@ -1139,6 +1239,17 @@
       ],
       "Hash": "15e9634c0fcd294799e9b2e929ed1b86"
     },
+    "geos": {
+      "Package": "geos",
+      "Version": "0.2.4",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Requirements": [
+        "libgeos",
+        "wk"
+      ],
+      "Hash": "117a3a09b793abf1d2146027a71b7524"
+    },
     "geosphere": {
       "Package": "geosphere",
       "Version": "1.5-18",
@@ -1623,6 +1734,13 @@
       ],
       "Hash": "d908914ae53b04d4c0c0fd72ecc35370"
     },
+    "libgeos": {
+      "Package": "libgeos",
+      "Version": "3.11.1-2",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Hash": "323d0f39c2e5ebcb152b810d1e8ed9bb"
+    },
     "lifecycle": {
       "Package": "lifecycle",
       "Version": "1.0.4",
@@ -2660,6 +2778,18 @@
       ],
       "Hash": "75940133cca2e339afce15a586f85b11"
     },
+    "spatialEco": {
+      "Package": "spatialEco",
+      "Version": "2.0-2",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Requirements": [
+        "R",
+        "sf",
+        "terra"
+      ],
+      "Hash": "d0352030add66f0c73ff5d7473b7aef1"
+    },
     "stringdist": {
       "Package": "stringdist",
       "Version": "0.9.12",
@@ -2847,6 +2977,27 @@
       ],
       "Hash": "829f27b9c4919c16b593794a6344d6c0"
     },
+    "tidyterra": {
+      "Package": "tidyterra",
+      "Version": "0.6.1",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Requirements": [
+        "R",
+        "cli",
+        "data.table",
+        "dplyr",
+        "ggplot2",
+        "magrittr",
+        "rlang",
+        "scales",
+        "sf",
+        "terra",
+        "tibble",
+        "tidyr"
+      ],
+      "Hash": "5d9672bc78297d33c4ad8b51a8aaa48f"
+    },
     "tidyverse": {
       "Package": "tidyverse",
       "Version": "2.0.0",