diff --git a/R/03_presence_absence_preparation.R b/R/03_presence_absence_preparation.R
index 9fd817c71fa460cf27c87aa9d023296999392b67..78d9327dd475664272dfdfd82d8bdc4263b17e5e 100644
--- a/R/03_presence_absence_preparation.R
+++ b/R/03_presence_absence_preparation.R
@@ -126,14 +126,9 @@ model_data = furrr::future_map(occs_split, .progress = TRUE, .options = furrr::f
   pa_spec = occs_spec %>% 
     dplyr::mutate(present = 1) %>% 
     bind_rows(abs_spec) 
-  
-  # Split into train and test datasets
-  train_index = createDataPartition(pa_spec$present, p = 0.7, list = FALSE)
-  pa_spec$train = 0
-  pa_spec$train[train_index] = 1
-  
+
   # Define cross-validation folds
-  folds = tryCatch({
+  folds_eval = tryCatch({
     spatial_folds = suppressMessages(
       blockCV::cv_spatial(
         pa_spec,
@@ -150,11 +145,11 @@ model_data = furrr::future_map(occs_split, .progress = TRUE, .options = furrr::f
     NA
   })
   
-  pa_spec$folds = folds
-  pa_spec$geometry = NULL
+
+  pa_spec$fold_eval = folds_eval
   
   return(pa_spec)
 })
 
 model_data = bind_rows(model_data)
-save(model_data, file = "data/r_objects/model_data.RData")
+save(model_data, file = "data/r_objects/nest_cv/model_data.RData")
diff --git a/R/04_01_ssdm_modeling.R b/R/04_01_ssdm_modeling.R
index 959f77c38589b6ad8594c0a3d465aaa6f9907f7a..ddef6965715f976e9bf90267a213daa1c7ec4585 100644
--- a/R/04_01_ssdm_modeling.R
+++ b/R/04_01_ssdm_modeling.R
@@ -7,147 +7,161 @@ library(pROC)
 
 source("R/utils.R")
 
-load("data/r_objects/model_data.RData")
+load("data/r_objects/full_evaluation/model_data.RData")
 
 data_split = split(model_data, model_data$species)
 
+# Define empty result for performance eval
+performance = list(    
+  AUC = numeric(0),
+  Accuracy = numeric(0),
+  Kappa = numeric(0),
+  Precision = numeric(0),
+  Recall = numeric(0),
+  F1 = numeric(0)
+)
+
 # ----------------------------------------------------------------------#
 # Train models                                                       ####
 # ----------------------------------------------------------------------#
 future::plan("multisession", workers = 8)
 ssdm_results = furrr::future_map(data_split, .options = furrr::furrr_options(seed = 123), .f = function(data_spec){
-  # Initial check
-  if(nrow(data_spec) < 10 | anyNA(data_spec$folds)){ 
-    return(NULL)
-  }
-  
   data_spec$present_fct = factor(data_spec$present, levels = c("0", "1"), labels = c("A", "P"))
-  train_data = dplyr::filter(data_spec, train == 1)
-  test_data = dplyr::filter(data_spec, train == 0)
-  
-  # Define empty result for performance eval
-  na_performance = list(    
-    AUC = NA,
-    Accuracy = NA,
-    Kappa = NA,
-    Precision = NA,
-    Recall = NA,
-    F1 = NA
-  )
-  
-  # Define predictors
-  predictors = paste0("layer_", 1:19)
-  
-  # Define caret training routine #####
-  index_train = lapply(unique(sort(train_data$fold)), function(x){
-    return(which(train_data$fold != x))
-  })
   
-  train_ctrl = trainControl(
-    search = "grid",
-    classProbs = TRUE, 
-    index = index_train,
-    summaryFunction = twoClassSummary, 
-    savePredictions = "final"
-  )
-  
-  # Random Forest #####
-  rf_performance = tryCatch({
-    rf_grid = expand.grid(
-      mtry = c(3,7,11,15,19)                # Number of randomly selected predictors
-    )
+  # Outer CV loop
+  for(k in sort(unique(data_spec$fold_eval))){
+    data_test = dplyr::filter(data_spec, fold_eval == k)
+    data_train = dplyr::filter(data_spec, fold_eval != k)
     
-    rf_fit = caret::train(
-      x = train_data[, predictors],
-      y = train_data$present_fct,
-      method = "rf",
-      metric = "ROC",
-      tuneGrid = rf_grid,
-      trControl = train_ctrl
-    )
-    evaluate_model(rf_fit, test_data)
-  }, error = function(e){
-    na_performance
-  })
-  
-  # Gradient Boosted Machine ####
-  gbm_performance = tryCatch({
-    gbm_grid <- expand.grid(
-      n.trees = c(100, 500, 1000, 1500),       # number of trees
-      interaction.depth = c(3, 5, 7),          # Maximum depth of each tree
-      shrinkage = c(0.01, 0.005, 0.001),       # Lower learning rates
-      n.minobsinnode = c(10, 20)               # Minimum number of observations in nodes
-    )
+    if(nrow(data_test) == 0 || nrow(data_train) == 0){
+      return()
+    }
     
-    gbm_fit = train(
-      x = train_data[, predictors],
-      y = train_data$present_fct,
-      method = "gbm",
-      metric = "ROC",
-      verbose = F,
-      tuneGrid = gbm_grid,
-      trControl = train_ctrl
+    # Create inner CV folds for model training
+    cv_train = blockCV::cv_spatial(
+      data_train,
+      column = "present",
+      k = 5,
+      progress = F, plot = F, report = F
     )
-    evaluate_model(gbm_fit, test_data)
-  }, error = function(e){
-    na_performance
-  })
-  
-  # Generalized additive Model ####
-  glm_performance = tryCatch({
-    glm_fit = train(
-      x = train_data[, predictors],
-      y = train_data$present_fct,
-      method = "glm",
-      family=binomial, 
-      metric = "ROC",
-      preProcess = c("center", "scale"),
-      trControl = train_ctrl
+    data_train$fold_train = cv_train$folds_ids
+    
+    # Define caret training routine #####
+    index_train = lapply(unique(sort(data_train$fold_train)), function(x){
+      return(which(data_train$fold_train != x))
+    })
+    
+    train_ctrl = trainControl(
+      search = "grid",
+      classProbs = TRUE, 
+      index = index_train,
+      summaryFunction = twoClassSummary, 
+      savePredictions = "final"
     )
-    evaluate_model(glm_fit, test_data)
-  }, error = function(e){
-    na_performance
-  })
   
-  # Neural Network ####
-  nn_performance = tryCatch({
-    nn_fit = dnn(
-      X = train_data[, predictors],
-      Y = train_data$present,
-      hidden = c(200L, 200L, 200L),
-      loss = "binomial",
-      activation = c("sigmoid", "leaky_relu", "leaky_relu"),
-      epochs = 500L, 
-      lr = 0.02,   
-      baseloss=10,
-      batchsize=nrow(train_data)/4,
-      dropout = 0.1,  # Regularization 
-      optimizer = config_optimizer("adam", weight_decay = 0.001),
-      lr_scheduler = config_lr_scheduler("reduce_on_plateau", patience = 100, factor = 0.7),
-      early_stopping = 200, # stop training when validation loss does not decrease anymore
-      validation = 0.3, # used for early stopping and lr_scheduler 
-      device = "cuda",
-      bootstrap = 5
-    )
+    # Define predictors
+    predictors = paste0("layer_", 1:19)
     
-    evaluate_model(nn_fit, test_data)
-  }, error = function(e){
-    na_performance
-  })
-  
-  # Summarize results
-  performance_summary = tibble(
-    species = data_spec$species[1],
-    obs = nrow(data_spec),
-    model = c("SSDM_RF", "SSDM_GBM", "SSDM_GLM", "SSDM_NN"),
-    auc = c(rf_performance$AUC, gbm_performance$AUC, glm_performance$AUC, nn_performance$AUC),
-    accuracy = c(rf_performance$Accuracy, gbm_performance$Accuracy, glm_performance$Accuracy, nn_performance$Accuracy),
-    kappa = c(rf_performance$Kappa, gbm_performance$Kappa, glm_performance$Kappa, nn_performance$Kappa),
-    precision = c(rf_performance$Precision, gbm_performance$Precision, glm_performance$Precision, nn_performance$Precision),
-    recall = c(rf_performance$Recall, gbm_performance$Recall, glm_performance$Recall, nn_performance$Recall),
-    f1 = c(rf_performance$F1, gbm_performance$F1, glm_performance$F1, nn_performance$F1)
-  )
-  
+    # Random Forest #####
+    rf_performance = tryCatch({
+      rf_grid = expand.grid(
+        mtry = c(3,7,11,15,19)                # Number of randomly selected predictors
+      )
+      
+      rf_fit = caret::train(
+        x = data_train[, predictors],
+        y = data_train$present_fct,
+        method = "rf",
+        metric = "ROC",
+        tuneGrid = rf_grid,
+        trControl = train_ctrl
+      )
+      evaluate_model(rf_fit, data_train)
+    }, error = function(e){
+      na_performance
+    })
+    
+    # Gradient Boosted Machine ####
+    gbm_performance = tryCatch({
+      gbm_grid <- expand.grid(
+        n.trees = c(100, 500, 1000, 1500),       # number of trees
+        interaction.depth = c(3, 5, 7),          # Maximum depth of each tree
+        shrinkage = c(0.01, 0.005, 0.001),       # Lower learning rates
+        n.minobsinnode = c(10, 20)               # Minimum number of observations in nodes
+      )
+      
+      gbm_fit = train(
+        x = data_train[, predictors],
+        y = data_train$present_fct,
+        method = "gbm",
+        metric = "ROC",
+        verbose = F,
+        tuneGrid = gbm_grid,
+        trControl = train_ctrl
+      )
+      evaluate_model(gbm_fit, data_test)
+    }, error = function(e){
+      na_performance
+    })
+    
+    # Generalized additive Model ####
+    glm_performance = tryCatch({
+      glm_fit = train(
+        x = data_train[, predictors],
+        y = data_train$present_fct,
+        method = "glm",
+        family=binomial, 
+        metric = "ROC",
+        preProcess = c("center", "scale"),
+        trControl = train_ctrl
+      )
+      evaluate_model(glm_fit, data_test)
+    }, error = function(e){
+      na_performance
+    })
+    
+    # Neural Network ####
+    nn_performance = tryCatch({
+      predictors = paste0("layer_", 1:19)
+      formula = as.formula(paste0("present ~ ", paste(predictors, collapse = '+')))
+      
+      nn_fit = dnn(
+        formula,
+        data = data_train,
+        hidden = c(500L, 500L, 500L),
+        loss = "binomial",
+        activation = c("sigmoid", "leaky_relu", "leaky_relu"),
+        epochs = 500L, 
+        lr = 0.001,   
+        baseloss = 1,
+        batchsize = nrow(data_train),
+        dropout = 0.1,
+        burnin = 100,
+        optimizer = config_optimizer("adam", weight_decay = 0.001),
+        lr_scheduler = config_lr_scheduler("reduce_on_plateau", patience = 100, factor = 0.7),
+        early_stopping = 250,
+        validation = 0.3,
+        device = "cuda"
+      )
+      
+      evaluate_model(nn_fit, data_train)
+    }, error = function(e){
+      na_performance
+    })
+    
+    # Summarize results
+    performance_summary = tibble(
+      species = data_train$species[1],
+      obs = nrow(data_train),
+      model = c("SSDM_RF", "SSDM_GBM", "SSDM_GLM", "SSDM_NN"),
+      auc = c(rf_performance$AUC, gbm_performance$AUC, glm_performance$AUC, nn_performance$AUC),
+      accuracy = c(rf_performance$Accuracy, gbm_performance$Accuracy, glm_performance$Accuracy, nn_performance$Accuracy),
+      kappa = c(rf_performance$Kappa, gbm_performance$Kappa, glm_performance$Kappa, nn_performance$Kappa),
+      precision = c(rf_performance$Precision, gbm_performance$Precision, glm_performance$Precision, nn_performance$Precision),
+      recall = c(rf_performance$Recall, gbm_performance$Recall, glm_performance$Recall, nn_performance$Recall),
+      f1 = c(rf_performance$F1, gbm_performance$F1, glm_performance$F1, nn_performance$F1)
+    )
+  }
   return(performance_summary)
 })
 
diff --git a/R/04_03_msdm_embed_raw.R b/R/04_03_msdm_embed_raw.R
index 6e7924a172401b7f428a55183f580eafa8ec0bc5..03a8737f138b5ba220b92c244e46a368c5ccc855 100644
--- a/R/04_03_msdm_embed_raw.R
+++ b/R/04_03_msdm_embed_raw.R
@@ -34,7 +34,7 @@ msdm_fit_embedding_raw = dnn(
   lr_scheduler = config_lr_scheduler("reduce_on_plateau", patience = 100, factor = 0.7),
   early_stopping = 250,
   validation = 0.3,
-  device = "cuda",
+  device = "cuda"
 )
 
 save(msdm_fit_embedding_raw, file = "data/r_objects/msdm_fit_embedding_raw.RData")
diff --git a/R/utils.R b/R/utils.R
index b1da26abc38515507c2381fa178c92bab4e50296..1c9dc02ef9d965b3ed9a02f7552aec85de46141f 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -25,7 +25,7 @@ expand_bbox <- function(bbox, min_span = 1, expansion = 0.25) {
   return(bbox)
 }
 
-evaluate_model <- function(model, test_data) {
+evaluate_model <- function(model, data) {
   # Accuracy: The proportion of correctly predicted instances (both true positives and true negatives) out of the total instances.
   # Formula: Accuracy = (TP + TN) / (TP + TN + FP + FN)
   
@@ -40,14 +40,14 @@ evaluate_model <- function(model, test_data) {
   
   # Predict probabilities
   if(class(model) %in% c("citodnn", "citodnnBootstrap")){
-    probs <- predict(model, as.matrix(test_data), type = "response")[,1]
+    probs <- predict(model, as.matrix(data), type = "response")[,1]
     preds <- factor(round(probs), levels = c("0", "1"), labels = c("A", "P"))
   } else {
-    probs <- predict(model, test_data, type = "prob")$P
-    preds <- predict(model, test_data, type = "raw")
+    probs <- predict(model, data, type = "prob")$P
+    preds <- predict(model, data, type = "raw")
   }
   
-  actual <- factor(test_data$present, levels = c("0", "1"), labels = c("A", "P"))
+  actual <- factor(data$present, levels = c("0", "1"), labels = c("A", "P"))
   
   # Calculate AUC
   auc <- pROC::roc(actual, probs, levels = c("P", "A"), direction = ">")$auc