diff --git a/R/01_02_raster_preparation.R b/R/01_02_raster_preparation.R
index 1d6951a61ed6b0dde9bc17d3d4a873b196471bf6..e7876e935c8b77377e003bb17490e704abc9246e 100644
--- a/R/01_02_raster_preparation.R
+++ b/R/01_02_raster_preparation.R
@@ -4,21 +4,19 @@ library(rnaturalearth)
 library(furrr)
 
 # Define Study extent ####
-study_extent = rnaturalearth::ne_countries() %>% 
+sa_polygon = rnaturalearth::ne_countries(scale = 10, returnclass = "sf") %>% 
   dplyr::filter(continent == "South America") %>% 
   sf::st_union() %>% 
-  sf::st_bbox() %>% 
-  ext()
+  st_crop(xmin = -82, ymin = -56, xmax = -34, ymax = 13)
 
-# CHELSA bioclim target variables ####
-chelsa_urls = c("https://os.zhdk.cloud.switch.ch/chelsav2/GLOBAL/climatologies/1981-2010/bio/CHELSA_cmi_mean_1981-2010_V.2.1.tif",
-                "https://os.zhdk.cloud.switch.ch/chelsav2/GLOBAL/climatologies/1981-2010/bio/CHELSA_rsds_1981-2010_mean_V.2.1.tif",
-                "https://os.zhdk.cloud.switch.ch/chelsav2/GLOBAL/climatologies/1981-2010/bio/CHELSA_bio6_1981-2010_V.2.1.tif",
-                "https://os.zhdk.cloud.switch.ch/chelsav2/GLOBAL/climatologies/1981-2010/bio/CHELSA_bio17_1981-2010_V.2.1.tif")
+save(sa_polygon, file = "data/r_objects/sa_polygon.RData")
 
+# CHELSA bioclim target variables ####
+chelsa_urls = paste0("https://os.zhdk.cloud.switch.ch/chelsav2/GLOBAL/climatologies/1981-2010/bio/CHELSA_bio", 1:19,"_1981-2010_V.2.1.tif")
+ 
 chelsa = terra::rast(chelsa_urls) %>% 
-  crop(study_extent)
-set.names(chelsa, c("cmi", "rsds", "bio6", "bio17"))
+  crop(sa_polygon)
+set.names(chelsa, paste0("bio", 1:19))
 terra::writeRaster(chelsa, filename = file.path("data", "geospatial", "raster", basename(chelsa_urls)), overwrite = T)
 
 # Tree canopy cover ####
@@ -29,23 +27,6 @@ igfc = list.files("I:/mas_data/00_data/processed/iGFC/", pattern = "canopyDensit
 terra::set.names(igfc, "igfc")
 terra::writeRaster(igfc, filename = file.path("data", "geospatial", "raster", "mean_canopyDensity_1992-2010.tif"), overwrite = T)
 
-# Seasonal inundation ####
-igsw = list.files("I:/mas_data/00_data/processed/iGSW/", pattern = "seasonal_(199[0-9]|200[0-9]|2010)", full.names = TRUE) %>% 
-  terra::rast() %>% 
-  terra::resample(chelsa, threads = 32) %>%   
-  app(mean, na.rm = TRUE) %>% 
-  subst(NA, 0)
-terra::set.names(igsw, "igsw")
-terra::writeRaster(igsw, filename = file.path("data", "geospatial", "raster", "mean_seasonalInundation_1992-2010.tif"), overwrite = T)
-
-# Distance to freshwater ####
-dtfw = list.files("I:/mas_data/00_data/processed/linearDistance/", pattern = "lake|reservoirs|river", full.names = TRUE) %>% 
-  terra::rast() %>% 
-  terra::resample(chelsa) %>% 
-  app(mean, na.rm = TRUE)
-terra::set.names(dtfw, "dtfw")
-terra::writeRaster(dtfw, filename = file.path("data", "geospatial", "raster", "mean_distanceToWater.tif"), overwrite = T)
-
 # Terrain ####
 roughness = list.files("C:/Users/ye87zine/Downloads", pattern = "roughness", full.names = T) %>% 
   terra::rast() %>% 
diff --git a/R/03_01_presence_preparation.R b/R/03_01_presence_preparation.R
index 8d3fce31360d1f0a4e1fc663c8eee01514779c54..7a2697e034900d92634da38b681ab03a7358d7de 100644
--- a/R/03_01_presence_preparation.R
+++ b/R/03_01_presence_preparation.R
@@ -20,9 +20,7 @@ sf::sf_use_s2(use_s2 = FALSE) # Don't use spherical geometry
 # ---------------------------------------------------------------------------#
 # Prepare Geodata                                                         ####
 # ---------------------------------------------------------------------------#
-sa_polygon = rnaturalearth::ne_countries() %>% 
-  dplyr::filter(continent == "South America") %>% 
-  sf::st_union()
+load("data/r_objects/sa_polygon.RData")
 
 raster_filepaths = list.files("~/symobio-modeling/data/geospatial/raster/", full.names = T) %>% 
   stringr::str_sort(numeric = T) 
@@ -46,8 +44,13 @@ occs = tbl(con, "species_occurrences") %>%
 
 # Flag occurrences using Coordinate cleaner
 occs_flagged = occs %>% 
-  dplyr::distinct(species, coordinate_id, longitude, latitude) %>% 
+  dplyr::mutate(
+    longitude = round(longitude, 3),
+    latitude = round(latitude, 3)
+  ) %>% 
+  sf::st_drop_geometry() %>% 
   group_by(species) %>% 
+  dplyr::distinct(longitude, latitude) %>% 
   group_split() %>% 
   purrr::map(                     # Loop over species individually due to bug in CoordinateCleaner
     CoordinateCleaner::clean_coordinates,
@@ -57,14 +60,13 @@ occs_flagged = occs %>%
     outliers_method = "quantile",
     verbose = F
   ) %>% 
-  bind_rows() %>% 
-  dplyr::filter(.summary == T) %>% 
-  dplyr::select(species, coordinate_id, longitude, latitude)
+  bind_rows()
 
 # Subset species occurrences to validated records
-occs_final = occs %>% 
-  inner_join(occs_flagged, by = c("species", "coordinate_id", "longitude", "latitude")) %>% 
-  dplyr::select(-coordinate_id)
+occs_final = occs_flagged %>% 
+  dplyr::filter(.summary == T) %>% 
+  sf::st_as_sf(coords = c("longitude", "latitude"), remove = F, crs = sf::st_crs(4326)) %>% 
+  dplyr::select(species, longitude, latitude)
 
 # Extract environmental data
 proj_string = "+proj=tcea +lon_0=-58.0704167 +datum=WGS84 +units=m +no_defs" 
diff --git a/R/03_02_absence_preparation.R b/R/03_02_absence_preparation.R
index 2726f8cc58de431325a4ede8ffc505981fe65b01..d7ef76775c3791eef97ed8aa6121912927bc1617 100644
--- a/R/03_02_absence_preparation.R
+++ b/R/03_02_absence_preparation.R
@@ -20,182 +20,172 @@ source("R/utils.R")
 
 load("data/r_objects/range_maps.RData")
 load("data/r_objects/occs_final.RData")
+load("data/r_objects/sa_polygon.RData")
 
 # ---------------------------------------------------------------------------#
 # Prepare Geodata                                                         ####
 # ---------------------------------------------------------------------------#
-target_species = unique(occs_final$species)
+proj_string = "+proj=tcea +lon_0=-58.0704167 +datum=WGS84 +units=m +no_defs" 
 
-sa_polygon = rnaturalearth::ne_countries() %>% 
-  dplyr::filter(continent == "South America") %>% 
-  sf::st_union() 
+target_species = unique(occs_final$species)
 
 raster_filepaths = list.files("~/symobio-modeling/data/geospatial/raster/", full.names = T) %>% 
   stringr::str_sort(numeric = T) 
 
-# Project (proj string generated with SA bbox coordinates on https://projectionwizard.org)
-proj_string = "+proj=tcea +lon_0=-58.0704167 +datum=WGS84 +units=m +no_defs" 
-
-# raster_data = terra::project(raster_data, proj_string)
 sa_polygon = st_transform(sa_polygon, proj_string)
 occs_final = st_transform(occs_final, proj_string)
 range_maps = st_transform(range_maps, proj_string)
+raster_data = terra::rast(raster_filepaths) %>% 
+  terra::project(proj_string) 
 
 # ---------------------------------------------------------------------------#
-# Sample absences                                                         ####
+# Sample random absences                                                  ####
 #                                                                            #
-# Absences should be predominantly sampled from regions that are             #
-# geographically close (reproduce sampling biases) but environmentally       #
-# dissimilar (avoid false negatives) to the known occurrences                #
+# Absences are sampled at random                                             #
+# (1) within the defined sampling area (same number as presences)            #
+# (2) outside the defined sampling area (100 records)                        #
 # ---------------------------------------------------------------------------#
-future::plan("multisession", workers = 24)
+species_processed = list.files("data/r_objects/pa_sampling/", pattern = "RData") %>% 
+  stringr::str_remove(".RData")
 
-furrr::future_walk(
-  .x = target_species,
-  .options = furrr::furrr_options(seed = 42, scheduling = FALSE), # make sure workers get constant stream of work
-  .env_globals = c(raster_filepaths, sa_polygon, occs_final, range_maps, proj_string),
-  .f = function(species){
-    # Prepare species occurrence data #####
-    occs_spec = dplyr::filter(occs_final, species == !!species)
-    if(nrow(occs_spec) <= 1){
-      warning("Skipping species ", species, ": Can't process species with a single presence record." )
-      return()
-    }
-    
-    occs_multipoint = occs_spec %>% 
-      summarise(geometry = st_combine(geometry))
-    
-    range_spec = dplyr::filter(range_maps, name_matched == !!species) %>% 
-      rename(species = name_matched)
-    
-    # Define model/sampling region
-    # 1. Union all points from range polygon and occurrences records
-    # 2. Build concave polygon from unionen points
-    # 3. Buffer by 100 km to extend sampling region into new environments
-    # 4. Crop by South America outline
-    sampling_region = st_cast(range_spec, "MULTIPOINT") %>% 
-      st_union(occs_multipoint) %>% 
-      geos::geos_concave_hull(ratio = 0.3) %>% # Concave hull in sf requires geos 3.11
-      st_as_sf() %>% 
-      st_set_crs(proj_string) %>% 
-      st_buffer(dist = 100000) %>% 
-      st_intersection(sa_polygon) 
-    
-    # Load raster data 
-    raster_data = terra::rast(raster_filepaths) %>% 
-      terra::project(proj_string) %>% 
-      terra::crop(sf::st_bbox(sampling_region))
-    
-    # Define spatial KDE
-    ref = st_bbox(sampling_region)[c(1,3,2,4)]
-    
-    # Function to estimate bandwidth 
-    # ---> this function is the default estimation of the sf.kde package but with a minimum of 50000 = 50km)
-    calc_bw <- function(x) {
-      r <- stats::quantile(x, c(0.25, 0.75))
-      h <- (r[2] - r[1])/1.34
-      result = 4 * 1.06 * min(sqrt(stats::var(x)), h) * length(x)^(-1/5)
-      return(max(result, 50000))
-    }
-    
-    bw_x = calc_bw(st_coordinates(occs_spec)[,"X"])
-    bw_y = calc_bw(st_coordinates(occs_spec)[,"Y"])
-    spatial_kde = spatialEco::sf.kde(occs_spec, bw = c(bw_x, bw_y), res = 10000, ref = ref, standardize = T, scale.factor = 1) %>% 
-      crop(sampling_region, mask = T, touches = F)
-    
-    # Define reverse niche
-    env_sampling_region = terra::crop(raster_data, sampling_region, mask = T) %>% 
-      terra::resample(spatial_kde) %>% 
-      raster::stack()
-    
-    maxent_fit <- maxent(env_sampling_region, st_coordinates(occs_spec))
-    env_kde <- (1-predict(env_sampling_region, maxent_fit)) %>% 
-      terra::rast()
-    
-    # Sample pseudo absences
-    abs_prob = spatial_kde * env_kde
-    abs_spec_list = list()
-    
-    samples_required = nrow(occs_spec)
-    
-    while(samples_required != 0){
-      sample_points = spatSample(abs_prob, size = samples_required, method = "weights", xy = T, as.df = T) %>% 
-        dplyr::select(-z) %>% 
-        dplyr::mutate(x = x + runif(nrow(.), -5000, 5000),         # Add jitter (res/2) to cell centroids 
-                      y = y + runif(nrow(.), -5000, 5000)) %>%     # Add jitter (res/2) to cell centroids 
-        st_as_sf(coords = c("x", "y"), crs = proj_string)
-      
-      sample_abs = sample_points %>% 
-        bind_cols(
-          terra::extract(raster_data, terra::vect(sample_points), ID = FALSE)
-        ) %>% 
-        drop_na()
-      
-      abs_spec_list[[length(abs_spec_list)+1]] = sample_abs
-      
-      samples_required = samples_required - nrow(sample_points) # Sometimes there are no env data for sample points, so keep sampling
-    }
-    
-    abs_spec = bind_rows(abs_spec_list)
-    abs_spec = abs_spec %>% 
-      bind_cols(
-        st_coordinates(st_transform(abs_spec, "EPSG:4326")) 
-      ) %>% 
-      dplyr::rename(
-        longitude = X,
-        latitude = Y
-      ) %>% 
+for(species in setdiff(target_species, species_processed)){
+  print(species)
+  # Prepare species occurrence data #####
+  occs_spec = dplyr::filter(occs_final, species == !!species)
+  if(nrow(occs_spec) <= 1){
+    warning("Skipping species ", species, ": Can't process species with a single presence record." )
+  }
+  
+  occs_multipoint = occs_spec %>% 
+    summarise(geometry = st_combine(geometry))
+  
+  range_spec = dplyr::filter(range_maps, name_matched == !!species) %>% 
+    rename(species = name_matched)
+  
+  # Define core sampling region
+  # 1. Union all points from range polygon and occurrences records
+  # 2. Build concave polygon from unioned points
+  # 3. Buffer by 50 km to extend sampling region into new environments
+  # 4. Crop by South America outline
+  # 5. Crop by buffered presence points
+  sampling_region = st_cast(range_spec, "MULTIPOINT") %>% 
+    st_union(occs_multipoint) %>% 
+    geos::geos_concave_hull(ratio = 0.3) %>% # Concave hull in sf requires geos 3.11
+    st_as_sf() %>% 
+    st_set_crs(proj_string) %>% 
+    st_buffer(dist = 50000) %>% 
+    st_intersection(sa_polygon) %>% 
+    st_difference(st_buffer(occs_multipoint, dist = 50000))
+  
+  # Sample balanced number of pseudo absences inside core region
+  abs_spec_list = list()
+  
+  samples_required = nrow(occs_spec)
+  while(samples_required != 0){
+    sample_points = sf::st_sample(sampling_region, size = samples_required)
+    env_sample = terra::extract(raster_data, terra::vect(sample_points), ID = FALSE)
+    
+    sample_abs = st_sf(env_sample, geometry = sample_points) %>% 
+      drop_na() %>% 
       dplyr::mutate(
-        species = !!species,
-        present = 0
-      ) 
-    
-    # Create presence-absence dataframe
-    pa_spec = occs_spec %>% 
-      dplyr::mutate(present = 1) %>% 
-      bind_rows(abs_spec)
-    
-    ggplot() +
-      ggtitle(species) +
-      geom_sf(data = st_crop(sa_polygon, ref), fill = "#756445") +
-      tidyterra::geom_spatraster(data = abs_prob) +
-      scale_fill_gradient(low="darkblue", high="green", na.value=NA, name = "Absence sampling probability") +
-      geom_sf(data = range_spec, alpha = 0, color = "black") +
-      geom_sf(data = pa_spec, aes(color = as.factor(present)), size = 0.7) +
-      scale_color_discrete(name = "Presence") +
-      theme_minimal()
+        record_type = "core"
+      )
     
-    try({
-      ggsave(paste0(species, ".pdf"), path = "plots/pa_sampling", device = "pdf", scale = 2)
-    })
+    abs_spec_list[[length(abs_spec_list)+1]] = sample_abs
     
-    # Define cross-validation folds
-    folds = tryCatch({
-      spatial_folds = suppressMessages(
-        blockCV::cv_spatial(
-          pa_spec,
-          column = "present",
-          k = 5,
-          progress = F, plot = F, report = F
-        )
+    samples_required = samples_required - nrow(sample_abs) # Sometimes there are no env data for sample points, so keep sampling
+  }
+  
+  # Sample 100 pseudo absences outside core region
+  background_region = st_difference(sa_polygon, sampling_region)
+  samples_required = 100
+  while(samples_required != 0){
+    sample_points = sf::st_sample(background_region, size = samples_required)
+    env_sample = terra::extract(raster_data, terra::vect(sample_points), ID = FALSE)
+    
+    sample_abs = st_sf(env_sample, geometry = sample_points) %>% 
+      drop_na() %>% 
+      dplyr::mutate(
+        record_type = "background"
       )
-      
-      spatial_folds$folds_ids
-    }, warning = function(w){
-      NA
-    }, error = function(e){
-      NA
-    })
     
-    pa_spec$fold_eval = folds
+    abs_spec_list[[length(abs_spec_list)+1]] = sample_abs
     
-    save(pa_spec, file = paste0("data/r_objects/pa_sampling/", species, ".RData"))
+    samples_required = samples_required - nrow(sample_abs) # Sometimes there are no env data for sample points, so keep sampling
   }
-)
+  
+  # Consolidate absences
+  abs_spec = bind_rows(abs_spec_list)
+  abs_spec = abs_spec %>% 
+    bind_cols(
+      st_coordinates(st_transform(abs_spec, "EPSG:4326")) 
+    ) %>% 
+    dplyr::rename(
+      longitude = X,
+      latitude = Y
+    ) %>% 
+    dplyr::mutate(
+      species = !!species,
+      present = 0
+    ) 
+  
+  # Create presence-absence dataframe
+  pa_spec = occs_spec %>% 
+    dplyr::mutate(
+      present = 1,
+      record_type = "core"
+    ) %>% 
+    bind_rows(abs_spec)
+  
+  ref = st_bbox(sampling_region)[c(1,3,2,4)]
+  ggplot() +
+   ggtitle(species) +
+   geom_sf(data = sa_polygon) +
+   geom_sf(data = range_spec, alpha = 0, color = "red") +
+   geom_sf(data = pa_spec, aes(color = as.factor(present)), size = 0.7) +
+   scale_color_discrete(name = "Presence") +
+   theme_minimal()
+  
+  try({
+   ggsave(paste0(species, ".pdf"), path = "plots/pa_sampling", device = "pdf", 
+          width = 8, height = 12)
+  })
+  
+  # Define cross-validation folds
+  pa_spec_core = dplyr::filter(pa_spec, record_type != "background")
+  pa_spec_background = dplyr::filter(pa_spec, record_type == "background")
+  
+  folds = tryCatch({
+    spatial_folds = suppressMessages(
+      blockCV::cv_spatial(
+        pa_spec_core,
+        column = "present",
+        k = 5,
+        progress = F, plot = F, report = F
+      )
+    )
+    
+    spatial_folds$folds_ids
+  }, error = function(e){
+    NA
+  })
+  
+  pa_spec = pa_spec_core %>% 
+    dplyr::mutate(fold_eval = folds) %>% 
+    bind_rows(pa_spec_background) %>% 
+    group_by(present) %>%
+    mutate(weight = 1/n()) %>%
+    ungroup()
+  
+  save(pa_spec, file = paste0("data/r_objects/pa_sampling/", species, ".RData"))
+}
 
 # Combine presence-absence data
 files = list.files("data/r_objects/pa_sampling/", full.names = T)
 model_data = lapply(files, function(f){load(f); return(pa_spec)}) %>% 
-  bind_rows()
+  bind_rows() %>% 
+  group_by(species) %>% 
+  dplyr::filter(all(1:5 %in% fold_eval)) %>% 
+  ungroup
 
 save(model_data, file = "data/r_objects/model_data.RData")
diff --git a/R/03_03_absence_preparation_random.R b/R/03_03_absence_preparation_random.R
deleted file mode 100644
index 7986d0bf8e98e534c7160479a7c1f4e8235ea9f4..0000000000000000000000000000000000000000
--- a/R/03_03_absence_preparation_random.R
+++ /dev/null
@@ -1,185 +0,0 @@
-# General packages
-library(dplyr)
-library(tidyr)
-library(furrr)
-
-# Geo packages
-library(terra)
-library(CoordinateCleaner)
-library(sf)
-library(geos)
-
-# Stat packages
-library(MASS)
-library(dismo)
-library(spatialEco)
-library(blockCV)
-library(caret)
-
-source("R/utils.R")
-
-load("data/r_objects/range_maps.RData")
-load("data/r_objects/occs_final.RData")
-
-# ---------------------------------------------------------------------------#
-# Prepare Geodata                                                         ####
-# ---------------------------------------------------------------------------#
-proj_string = "+proj=tcea +lon_0=-58.0704167 +datum=WGS84 +units=m +no_defs" 
-
-target_species = unique(occs_final$species)
-
-sa_polygon = rnaturalearth::ne_countries() %>% 
-  dplyr::filter(continent == "South America") %>% 
-  sf::st_union() 
-
-raster_filepaths = list.files("~/symobio-modeling/data/geospatial/raster/", full.names = T) %>% 
-  stringr::str_sort(numeric = T) 
-
-sa_polygon = st_transform(sa_polygon, proj_string)
-occs_final = st_transform(occs_final, proj_string)
-range_maps = st_transform(range_maps, proj_string)
-raster_data = terra::rast(raster_filepaths) %>% 
-  terra::project(proj_string) 
-
-# ---------------------------------------------------------------------------#
-# Sample random absences                                                  ####
-#                                                                            #
-# Absences are sampled at random                                             #
-# (1) within the defined sampling area (same number as presences)            #
-# (2) outside the defined sampling area (100 records)                        #
-# ---------------------------------------------------------------------------#
-species_processed = list.files("data/r_objects/pa_sampling_random_extended/", pattern = "RData") %>% 
-  stringr::str_remove(".RData")
-
-for(species in setdiff(target_species, species_processed)){
-  print(species)
-  # Prepare species occurrence data #####
-  occs_spec = dplyr::filter(occs_final, species == !!species)
-  if(nrow(occs_spec) <= 1){
-    warning("Skipping species ", species, ": Can't process species with a single presence record." )
-  }
-  
-  occs_multipoint = occs_spec %>% 
-    summarise(geometry = st_combine(geometry))
-  
-  range_spec = dplyr::filter(range_maps, name_matched == !!species) %>% 
-    rename(species = name_matched)
-  
-  # Define core sampling region
-  # 1. Union all points from range polygon and occurrences records
-  # 2. Build concave polygon from unioned points
-  # 3. Buffer by 50 km to extend sampling region into new environments
-  # 4. Crop by South America outline
-  # 5. Crop by buffered presence points
-  sampling_region = st_cast(range_spec, "MULTIPOINT") %>% 
-    st_union(occs_multipoint) %>% 
-    geos::geos_concave_hull(ratio = 0.3) %>% # Concave hull in sf requires geos 3.11
-    st_as_sf() %>% 
-    st_set_crs(proj_string) %>% 
-    st_buffer(dist = 50000) %>% 
-    st_intersection(sa_polygon) %>% 
-    st_difference(st_buffer(occs_multipoint, dist = 50000))
-  
-  # Sample balanced number of pseudo absences inside core region
-  abs_spec_list = list()
-  
-  samples_required = nrow(occs_spec)
-  while(samples_required != 0){
-    sample_points = sf::st_sample(sampling_region, size = samples_required)
-    env_sample = terra::extract(raster_data, terra::vect(sample_points), ID = FALSE)
-    
-    sample_abs = st_sf(env_sample, geometry = sample_points) %>% 
-      drop_na() %>% 
-      dplyr::mutate(
-        record_type = "absence_core"
-      )
-    
-    abs_spec_list[[length(abs_spec_list)+1]] = sample_abs
-    
-    samples_required = samples_required - nrow(sample_abs) # Sometimes there are no env data for sample points, so keep sampling
-  }
-  
-  # Sample 100 pseudo absences outside core region
-  background_region = st_difference(sa_polygon, sampling_region)
-  samples_required = 100
-  while(samples_required != 0){
-    sample_points = sf::st_sample(background_region, size = samples_required)
-    env_sample = terra::extract(raster_data, terra::vect(sample_points), ID = FALSE)
-    
-    sample_abs = st_sf(env_sample, geometry = sample_points) %>% 
-      drop_na() %>% 
-      dplyr::mutate(
-        record_type = "absence_background"
-      )
-    
-    abs_spec_list[[length(abs_spec_list)+1]] = sample_abs
-    
-    samples_required = samples_required - nrow(sample_abs) # Sometimes there are no env data for sample points, so keep sampling
-  }
-  
-  # Consolidate absences
-  abs_spec = bind_rows(abs_spec_list)
-  abs_spec = abs_spec %>% 
-    bind_cols(
-      st_coordinates(st_transform(abs_spec, "EPSG:4326")) 
-    ) %>% 
-    dplyr::rename(
-      longitude = X,
-      latitude = Y
-    ) %>% 
-    dplyr::mutate(
-      species = !!species,
-      present = 0
-    ) 
-  
-  # Create presence-absence dataframe
-  pa_spec = occs_spec %>% 
-    dplyr::mutate(
-      present = 1,
-      record_type = "present"
-    ) %>% 
-    bind_rows(abs_spec)
-  
-  # ref = st_bbox(sampling_region)[c(1,3,2,4)]
-  # ggplot() +
-  #   ggtitle(species) +
-  #   geom_sf(data = st_crop(sa_polygon, ref), fill = "#756445") +
-  #   geom_sf(data = range_spec, alpha = 0, color = "black") +
-  #   geom_sf(data = pa_spec, aes(color = as.factor(present)), size = 0.7) +
-  #   scale_color_discrete(name = "Presence") +
-  #   theme_minimal()
-  # 
-  # try({
-  #   ggsave(paste0(species, ".pdf"), path = "plots/pa_sampling_random", device = "pdf", scale = 2)
-  # })
-  
-  # Define cross-validation folds
-  folds = tryCatch({
-    spatial_folds = suppressMessages(
-      blockCV::cv_spatial(
-        pa_spec,
-        column = "present",
-        k = 5,
-        progress = F, plot = F, report = F
-      )
-    )
-    
-    spatial_folds$folds_ids
-  }, error = function(e){
-    NA
-  })
-  
-  pa_spec$fold_eval = folds
-  
-  save(pa_spec, file = paste0("data/r_objects/pa_sampling_random_extended/", species, ".RData"))
-}
-
-# Combine presence-absence data
-files = list.files("data/r_objects/pa_sampling_random_extended/", full.names = T)
-model_data = lapply(files, function(f){load(f); return(pa_spec)}) %>% 
-  bind_rows() %>% 
-  group_by(species, present) %>%
-  mutate(weight = 1/n()) %>%
-  ungroup()
-
-save(model_data, file = "data/r_objects/model_data_random_abs_extended.RData")
diff --git a/R/04_01_modelling_ssdm.R b/R/04_01_modelling_ssdm.R
index d44330917af7440a34f9421fd4601e810a3f08ce..bc1346c2374091bd6410c052c343a1333ebdeea5 100644
--- a/R/04_01_modelling_ssdm.R
+++ b/R/04_01_modelling_ssdm.R
@@ -23,7 +23,7 @@ data_split = model_data %>%
 for(pa_spec in data_split){
   species = pa_spec$species[1]
   print(species)
-
+  
   # Define empty result for performance eval
   na_performance = list(AUC = NA_real_, Accuracy = NA_real_, Kappa = NA_real_, 
                         Precision = NA_real_, Recall = NA_real_, F1 = NA_real_, 
@@ -38,16 +38,27 @@ for(pa_spec in data_split){
     
     ## Preparations #####
     data_test = dplyr::filter(pa_spec, fold_eval == fold)
-    data_train = dplyr::filter(pa_spec, fold_eval != fold)
+    data_train = dplyr::filter(pa_spec, is.na(fold_eval) | fold_eval != fold) # include background samples
     
     # Create inner CV folds for model training
-    cv_train = blockCV::cv_spatial(
-      data_train,
-      column = "present",
-      k = 5,
-      progress = F, plot = F, report = F
-    )
-    data_train$fold_train = cv_train$folds_ids
+    folds_train = tryCatch({
+      cv_train = blockCV::cv_spatial(
+        data_train,
+        column = "present",
+        k = 5,
+        progress = F, plot = F, report = F
+      )
+      
+      cv_train$folds_ids
+    }, error = function(e){
+      NA
+    })
+    
+    if(is.numeric(folds_train)){
+      data_train$fold_train = folds_train
+    } else {
+      return()
+    }
     
     # Drop geometry
     data_train$geometry = NULL
@@ -67,7 +78,7 @@ for(pa_spec in data_split){
     )
     
     # Define predictors
-    predictors = c("bio6", "bio17", "cmi", "rsds", "igfc", "dtfw", "igsw", "roughness")
+    predictors = paste0("bio", 1:19)
     
     ## Random Forest #####
     rf_performance = tryCatch({
@@ -75,7 +86,8 @@ for(pa_spec in data_split){
       rf_fit = caret::train(
         x = data_train[, predictors],
         y = data_train$present_fct,
-        method = "rf",
+        method = "ranger",
+        metric = "Accuracy",
         trControl = train_ctrl,
         tuneLength = 8,
         verbose = F
@@ -146,17 +158,17 @@ for(pa_spec in data_split){
       species = !!species,
       obs = nrow(data_train),
       fold_eval = fold,
-      model = c("RF", "XGB", "GAM", "NN"),
-      AUC = c(rf_performance$AUC, xgb_performance$AUC, gam_performance$AUC, nn_performance$AUC),
-      Accuracy = c(rf_performance$Accuracy, xgb_performance$Accuracy, gam_performance$Accuracy, nn_performance$Accuracy),
-      Kappa = c(rf_performance$Kappa, xgb_performance$Kappa, gam_performance$Kappa, nn_performance$Kappa),
-      Precision = c(rf_performance$Precision, xgb_performance$Precision, gam_performance$Precision, nn_performance$Precision),
-      Recall = c(rf_performance$Recall, xgb_performance$Recall, gam_performance$Recall, nn_performance$Recall),
-      F1 = c(rf_performance$F1, xgb_performance$F1, gam_performance$F1, nn_performance$F1),
-      TP = c(rf_performance$TP, xgb_performance$TP, gam_performance$TP, nn_performance$TP),
-      FP = c(rf_performance$FP, xgb_performance$FP, gam_performance$FP, nn_performance$FP),
-      TN = c(rf_performance$TN, xgb_performance$TN, gam_performance$TN, nn_performance$TN),
-      FN = c(rf_performance$FN, xgb_performance$FN, gam_performance$FN, nn_performance$FN)
+      model = c("rf", "gbm", "gam", "nn"),
+      auc = c(rf_performance$auc, gbm_performance$auc, gam_performance$auc, nn_performance$auc),
+      accuracy = c(rf_performance$accuracy, gbm_performance$accuracy, gam_performance$accuracy, nn_performance$accuracy),
+      kappa = c(rf_performance$kappa, gbm_performance$kappa, gam_performance$kappa, nn_performance$kappa),
+      precision = c(rf_performance$precision, gbm_performance$precision, gam_performance$precision, nn_performance$precision),
+      recall = c(rf_performance$recall, gbm_performance$recall, gam_performance$recall, nn_performance$recall),
+      f1 = c(rf_performance$f1, gbm_performance$f1, gam_performance$f1, nn_performance$f1),
+      tp = c(rf_performance$tp, gbm_performance$tp, gam_performance$tp, nn_performance$tp),
+      fp = c(rf_performance$fp, gbm_performance$fp, gam_performance$fp, nn_performance$fp),
+      tn = c(rf_performance$tn, gbm_performance$tn, gam_performance$tn, nn_performance$tn),
+      fn = c(rf_performance$fn, gbm_performance$fn, gam_performance$fn, nn_performance$fn)
     ) %>% 
       tidyr::pivot_longer(-any_of(c("species", "obs", "fold_eval", "model")), names_to = "metric", values_to = "value") 
     
@@ -164,18 +176,21 @@ for(pa_spec in data_split){
   })
   
   # Combine and save evaluation results
-  performance_spec = bind_rows(performance_cv) %>% 
-    dplyr::arrange(fold_eval, model, metric)
-  
-  save(performance_spec, file = paste0("data/r_objects/ssdm_results/performance/", species, ".RData"))
+  performance_spec = bind_rows(performance_cv) 
+  if(nrow(performance_spec) != 0){
+    performance_spec = performance_spec %>% 
+      dplyr::arrange(fold_eval, model, metric)
+    
+    save(performance_spec, file = paste0("data/r_objects/ssdm_results/performance/", species, ".RData"))
+  }
 }
 
 # Combine results  
 files = list.files("data/r_objects/ssdm_results/performance/", full.names = T, pattern = ".RData")
-ssdm_results = lapply(files, function(f){load(f); return(performance_spec)}) %>% 
+ssdm_performance = lapply(files, function(f){load(f); return(performance_spec)}) %>% 
   bind_rows() 
 
-save(ssdm_results, file = "data/r_objects/ssdm_performance.RData")
+save(ssdm_performance, file = "data/r_objects/ssdm_performance.RData")
 
 # ----------------------------------------------------------------------#
 # Train full models                                                  ####
@@ -187,6 +202,7 @@ data_split = model_data %>%
   dplyr::group_split()
 
 for(pa_spec in data_split){
+  ## Preparations ####
   species = pa_spec$species[1]
   print(species)
   
@@ -212,53 +228,54 @@ for(pa_spec in data_split){
     classProbs = TRUE, 
     index = index_train,
     summaryFunction = caret::twoClassSummary, 
-    savePredictions = "final",
+    savePredictions = "final"
   )
-
+  
   # Define predictors
-  predictors = c("bio6", "bio17", "cmi", "rsds", "igfc", "dtfw", "igsw", "roughness")
+  predictors = paste0("bio", 1:19)
   
-  # Fit models
+  # Random Forest ####
   try({
-    # Fit model
     rf_fit = caret::train(
       x = pa_spec[, predictors],
       y = pa_spec$present_fct,
-      method = "rf",
-      metric = "Kappa",
+      method = "ranger",
+      metric = "Accuracy",
       trControl = train_ctrl,
       tuneLength = 8,
       verbose = F
     )
-
+    
     save(rf_fit, file = paste0("data/r_objects/ssdm_results/full_models/", species, "_rf_fit.RData"))
   })
   
+  
+  # Gradient Boosted Machine ####
   try({
-    xgb_fit = train(
+    gbm_fit = train(
       x = pa_spec[, predictors],
       y = pa_spec$present_fct,
-      method = "xgbTree",
-      metric = "Kappa",
+      method = "gbm",
       trControl = train_ctrl,
       tuneLength = 8,
       verbose = F
     )
-    save(xgb_fit, file = paste0("data/r_objects/ssdm_results/full_models/", species, "_xgb_fit.RData"))
+    save(gbm_fit, file = paste0("data/r_objects/ssdm_results/full_models/", species, "_gbm_fit.RData"))
   })
   
+  # Generalized Additive Model ####
   try({
     gam_fit = train(
       x = pa_spec[, predictors],
       y = pa_spec$present_fct,
       method = "gamSpline",
-      metric = "Kappa",
       trControl = train_ctrl,
       tuneLength = 8,
     )
     save(gam_fit, file = paste0("data/r_objects/ssdm_results/full_models/", species, "_gam_fit.RData"))
   })
   
+  # Neural Network ####
   try({
     formula = as.formula(paste0("present ~ ", paste(predictors, collapse = '+')))
     nn_fit = dnn(
diff --git a/R/04_02_modelling_msdm_embed.R b/R/04_02_modelling_msdm_embed.R
index 584fb57d2c5ea2f42ac6d22b1548951a16598720..48c4e9ca873d7b45376d4fef12129bc47dd36bb9 100644
--- a/R/04_02_modelling_msdm_embed.R
+++ b/R/04_02_modelling_msdm_embed.R
@@ -7,14 +7,13 @@ source("R/utils.R")
 load("data/r_objects/model_data.RData")
 
 model_data = model_data %>% 
-  dplyr::filter(!is.na(fold_eval)) %>% 
   dplyr::mutate(species = as.factor(species)) %>% 
   sf::st_drop_geometry()
 
 # ----------------------------------------------------------------------#
 # Train model                                                        ####
 # ----------------------------------------------------------------------#
-predictors = c("bio6", "bio17", "cmi", "rsds", "igfc", "dtfw", "igsw", "roughness")
+predictors = paste0("bio", 1:19)
 formula = as.formula(paste0("present ~ ", paste(predictors, collapse = '+'), " + ", "e(species, dim = 10)")) 
 
 # 1. Cross validation
@@ -55,7 +54,7 @@ msdm_embed_fit = dnn(
   data = model_data,
   hidden = c(200L, 200L, 200L),
   loss = "binomial",
-  epochs = 7500, 
+  epochs = 5000, 
   lr = 0.001,   
   baseloss = 1,
   batchsize = 4096,
@@ -85,9 +84,9 @@ msdm_embed_performance = lapply(1:5, function(fold){
     performance = tryCatch({
       evaluate_model(msdm_embed_fit, test_data_spec)
     }, error = function(e){
-      list(AUC = NA_real_, Accuracy = NA_real_, Kappa = NA_real_, 
-           Precision = NA_real_, Recall = NA_real_, F1 = NA_real_, 
-           TP = NA_real_, FP = NA_real_, TN = NA_real_, FN = NA_real_)
+      list(auc = NA_real_, accuracy = NA_real_, kappa = NA_real_, 
+           precision = NA_real_, recall = NA_real_, f1 = NA_real_, 
+           tp = NA_real_, fp = NA_real_, tn = NA_real_, fn = NA_real_)
     })
     
     performance_summary = performance %>% 
@@ -96,7 +95,7 @@ msdm_embed_performance = lapply(1:5, function(fold){
         species = !!species,
         obs = nrow(dplyr::filter(model_data, species == !!species, fold_eval != !!fold)),
         fold_eval = !!fold,
-        model = "MSDM_embed",
+        model = "msdm_embed",
       ) %>% 
       tidyr::pivot_longer(-any_of(c("species", "obs", "fold_eval", "model")), names_to = "metric", values_to = "value")
   }) %>% 
diff --git a/R/04_03_modelling_msdm_onehot.R b/R/04_03_modelling_msdm_onehot.R
index 71f5ac0ee3212f14b176d14933fba7b8713d237d..f5e7ee3ebc61c1c9106157f056a7aff8ec17489e 100644
--- a/R/04_03_modelling_msdm_onehot.R
+++ b/R/04_03_modelling_msdm_onehot.R
@@ -7,14 +7,14 @@ source("R/utils.R")
 load("data/r_objects/model_data.RData")
 
 model_data = model_data %>% 
-  dplyr::filter(!is.na(fold_eval)) %>% 
   dplyr::mutate(species = as.factor(species)) %>% 
   sf::st_drop_geometry()
 
 # ----------------------------------------------------------------------#
 # Train model                                                        ####
 # ----------------------------------------------------------------------#
-formula = present ~ bio6 + bio17 + cmi + rsds + igfc + dtfw + igsw + roughness + species
+predictors = paste0("bio", 1:19)
+formula = as.formula(paste0("present ~ ", paste(predictors, collapse = '+'), " + ", "species")) 
 
 # 1. Cross validation
 for(fold in 1:5){
@@ -54,8 +54,9 @@ msdm_onehot_fit = dnn(
   data = model_data,
   hidden = c(200L, 200L, 200L),
   loss = "binomial",
-  epochs = 7500, 
+  epochs = 5000, 
   lr = 0.001,   
+  baseloss = 1,
   batchsize = 4096,
   dropout = 0.25,
   burnin = 500,
@@ -83,9 +84,9 @@ msdm_onehot_performance = lapply(1:5, function(fold){
     performance = tryCatch({
       evaluate_model(msdm_onehot_fit, test_data_spec)
     }, error = function(e){
-      list(AUC = NA_real_, Accuracy = NA_real_, Kappa = NA_real_, 
-           Precision = NA_real_, Recall = NA_real_, F1 = NA_real_, 
-           TP = NA_real_, FP = NA_real_, TN = NA_real_, FN = NA_real_)
+      list(auc = NA_real_, accuracy = NA_real_, kappa = NA_real_, 
+           precision = NA_real_, recall = NA_real_, f1 = NA_real_, 
+           tp = NA_real_, fp = NA_real_, tn = NA_real_, fn = NA_real_)
     })
     
     performance_summary = performance %>% 
@@ -94,7 +95,7 @@ msdm_onehot_performance = lapply(1:5, function(fold){
         species = !!species,
         obs = nrow(dplyr::filter(model_data, species == !!species, fold_eval != !!fold)),
         fold_eval = !!fold,
-        model = "MSDM_onehot",
+        model = "msdm_onehot",
       ) %>% 
       tidyr::pivot_longer(-any_of(c("species", "obs", "fold_eval", "model")), names_to = "metric", values_to = "value")
   }) %>% 
diff --git a/R/05_01_performance_report.qmd b/R/05_01_performance_report.qmd
index 1c1b64d5e8fa93cc5641e9f324bb58b5002f053e..bc5a0b715eb87b55b59f30958d962b72e8657da2 100644
--- a/R/05_01_performance_report.qmd
+++ b/R/05_01_performance_report.qmd
@@ -12,11 +12,10 @@ library(plotly)
 library(DT)
 
 load("../data/r_objects/model_data.RData")
+load("../data/r_objects/ssdm_performance.RData")
 load("../data/r_objects/msdm_embed_performance.RData")
+load("../data/r_objects/msdm_onehot_performance.RData")
 load("../data/r_objects/msdm_rf_performance.RData")
-load("../data/r_objects/msdm_rf_random_abs_performance.RData")
-load("../data/r_objects/msdm_rf_no_species_performance.RData")
-load("../data/r_objects/msdm_rf_no_species_random_abs_performance.RData")
 
 load("../data/r_objects/range_maps.RData")
 load("../data/r_objects/range_maps_gridded.RData")
@@ -26,17 +25,9 @@ load("../data/r_objects/functional_groups.RData")
 sf::sf_use_s2(use_s2 = FALSE)
 ```
 
-```{r globals, echo = FALSE, include = FALSE}
-# Count occs per species
-obs_count = model_data %>% 
-  sf::st_drop_geometry() %>% 
-  dplyr::filter(present == 1) %>% 
-  dplyr::group_by(species) %>% 
-  dplyr::summarise(obs = n())
-
-
+```{r globals, echo = FALSE, cache = TRUE, include = FALSE}
 # Regression functions
-asym_regression = function(x, y){
+asym_fit = function(x, y){
   nls_fit = nls(y ~ 1 - (1-b) * exp(-c * log(x)), start = list(b = 0.1, c = 0.1))
   new_x = exp(seq(log(min(x)), log(max(x)), length.out = 100))
   data.frame(
@@ -45,7 +36,7 @@ asym_regression = function(x, y){
   )
 }
 
-lin_regression = function(x, y, family = "binomial"){
+glm_fit = function(x, y, family = "binomial"){
   glm_fit = suppressWarnings(glm(y~x, family = family))
   new_x = seq(min(x), max(x), length.out = 100)
   data.frame(
@@ -54,34 +45,48 @@ lin_regression = function(x, y, family = "binomial"){
   )
 }
 
+loess_fit = function(x, y, span = 0.75){
+  df = data.frame(x = x, y = y)
+  loess_fit = loess(y ~ x, data = df, span = span)
+  new_x = seq(min(x), max(x), length.out = 100)
+  data.frame(
+    x = new_x,
+    fit = predict(loess_fit, newdata = data.frame(x = new_x))
+  )
+}
+
 # Performance table
-performance = msdm_rf_performance %>% 
-  bind_rows(msdm_rf_random_abs_performance) %>% 
-  bind_rows(msdm_rf_no_species_performance) %>% 
-  bind_rows(msdm_rf_no_species_random_abs_performance) %>% 
-  dplyr::filter(fold_eval <= 3) %>% # TODO use all folds
+performance = ssdm_results %>% 
+  bind_rows(msdm_embed_performance) %>% 
+  bind_rows(msdm_onehot_performance) %>% 
+  bind_rows(msdm_rf_performance) %>% 
+  dplyr::mutate(   # TODO: remove when only working with new data
+    metric = stringr::str_to_lower(metric),
+    model = stringr::str_to_lower(model)
+  ) %>% 
   dplyr::group_by(species, model, metric) %>% 
   dplyr::summarise(value = mean(value, na.rm = F)) %>% 
   dplyr::mutate(
-    metric = stringr::str_to_lower(metric),
     value = case_when(
       ((is.na(value) | is.nan(value)) & metric %in% c("auc", "f1", "accuracy", "precision", "recall")) ~ 0.5,
       ((is.na(value) | is.nan(value)) & metric %in% c("kappa")) ~ 0,
       .default = value
     )
-  )
-
-focal_metrics = c("accuracy", "auc", "f1", "kappa", "precision", "recall")
-
-plot_performance = function(df_plot, metric, x_var, x_label, x_log = T, reg_func = lin_regression) {
-  df_plot = dplyr::filter(df_plot, metric == !!metric) %>% 
-    dplyr::rename(x_var = !!x_var)
+  ) %>% 
+  ungroup()
+
+plot_performance = function(df_plot, y_var, y_label, x_var, x_label, x_log = T, reg_func = loess_fit) {
+  df_plot = df_plot %>% 
+    dplyr::rename(
+      x_var = !!x_var,
+      y_var = !!y_var
+    )
   
   # Calculate regression lines for each model and metric combination
   suppressWarnings({
     regression_lines = df_plot %>%
       group_by(model) %>%
-      group_modify( ~ reg_func(.x$x_var, .x$value))
+      group_modify( ~ reg_func(.x[["x_var"]], .x[["y_var"]]))
   })
   
   # Create base plot
@@ -89,7 +94,7 @@ plot_performance = function(df_plot, metric, x_var, x_label, x_log = T, reg_func
     layout(
       title = paste0("Model Performance vs. ", x_label),
       xaxis = list(title = x_label, type = ifelse(x_log, "log", "linear")),
-      yaxis = list(title = metric),
+      yaxis = list(title = y_label),
       legend = list(x = 1.1, y = 0.5), # Move legend to the right of the plot
       margin = list(r = 150), # Add right margin to accommodate legend
       hovermode = 'closest'
@@ -99,16 +104,16 @@ plot_performance = function(df_plot, metric, x_var, x_label, x_log = T, reg_func
   for (model_name in unique(df_plot$model)) {
     plot = plot %>%
       add_markers(
-        data = filter(df_plot, model == model_name, metric %in% focal_metrics),
+        data = filter(df_plot, model == model_name),
         x = ~ x_var,
-        y = ~ value,
+        y = ~ y_var,
         color = model_name, # Set color to match legendgroup
         legendgroup = model_name,
         opacity = 0.6,
         name = ~ model,
         hoverinfo = 'text',
         text = ~ paste(
-          "Species:", species, "<br>", x_label, ":", x_var, "<br>Value:", round(value, 3)
+          "Species:", species, "<br>", x_label, ":", x_var, "<br>Value:", round(y_var, 3)
         )
       )
   }
@@ -130,11 +135,23 @@ plot_performance = function(df_plot, metric, x_var, x_label, x_log = T, reg_func
   
   return(plot)
 }
+
+add_partial_residuals = function(df, focus_var, control_vars, family = quasibinomial) {
+  model_formula <- as.formula(paste("value ~", paste(c(focus_var, control_vars), collapse = " + ")))
+  model = glm(model_formula, data = df, family = family)
+  
+  if (focus_var %in% names(model$coefficients)) {
+    df[[paste0(focus_var, "_partial")]] = residuals(model, type = "response") + model$coefficients[focus_var] * df[[focus_var]]
+  }
+  
+  return(df)
+}
+
 ```
 
 ## Summary
 
-This document summarizes the performance of different sSDM and mSDM algorithms for `r I(length(unique(performance$species)))` South American mammal species. Model performance is evaluated on `r I(xfun::numbers_to_words(length(focal_metrics)))` metrics (`r I(paste(focal_metrics, collapse = ', '))`) and analyzed along five potential influence factors (number of records, range size, range coverage, range coverage bias). The comparison of sSDM vs mSDM approaches is of particular interest.
+This document summarizes the performance of different sSDM and mSDM algorithms for `r I(length(unique(performance$species)))` South American mammal species. Model performance is evaluated on six metrics (auc, f1, kappa, accuracy, precision, recall) and analyzed along five potential influence factors (number of records, range size, range coverage, range coverage bias). The comparison of sSDM vs mSDM approaches is of particular interest.
 
 Code can be found on [GitLab](https://git.idiv.de/ye87zine/symobio-modeling).
 
@@ -142,22 +159,17 @@ Code can be found on [GitLab](https://git.idiv.de/ye87zine/symobio-modeling).
 
 #### General decisions
 
--   Balanced presences and absences for each species
--   Absence sampling from probability surface that balances geographical sampling bias and environmental preferences per species
-    -   higher probability in areas that have been sampled more intensely
-    -   lower probability in areas with environmental conditions similar to presence locations
--   Eight predictors:
-    -   Min Temperature of Coldest Month (bio6)
-    -   Precipitation of Driest Quarter (bio17)
-    -   Climate Moisture Index (cmi)
-    -   Surface Downwelling Shortwave Flux (rsds)
-    -   Tree canopy cover (igfc)
-    -   Distance to freshwater (dtfw)
-    -   Seasonal inundation(igsw)
-    -   Topographic roughness (roughness)
--   Nested cross validation
-    -   Outer (performance evaluation): Spatially blocked 5-fold cross validation for all models
-    -   Inner (model training): Random split (1/5) in cito, Spatially blocked 5-fold cross validation in other models
+-   Presence/Absence
+    -   Random absence sampling
+    -   Balanced number of presences and absences within core area of distribution *(core)*
+    -   Additionally, 100 absences across South America per species *(background)*
+-   Predictors
+    -   19 CHELSA BioClim variables at 1km resolution
+-   Evaluation
+    -   Six metrics (AUC, F1, Kappa, Accuracy, Precision, Recall)
+    -   Evaluation only on *core* samples (*background* samples excluded)
+    -   Five fold spatially blocked Cross Validation
+    -   Values shown are averaged across all five validation folds
 
 #### sSDM Algorithms
 
@@ -173,120 +185,305 @@ Code can be found on [GitLab](https://git.idiv.de/ye87zine/symobio-modeling).
 
 -   Species identity part of the input data, internal representation then either as onehot vector (MSDM_rf, MSDM_onehot) or via embedding (MSDM_embed)
 
-### Key findings:
+### Key findings
 
 -   MSDM algorithms score much higher across all performance algorithms
 -   Among MSDM algorithms, RF outperforms NNs significantly
 
 ## Analysis
 
+### Quantify drivers of model performance
+
+```{r prepare_model_df, echo = FALSE, include = FALSE}
+# Number of records
+df_join = model_data %>% 
+  sf::st_drop_geometry() %>% 
+  dplyr::filter(present == 1) %>% 
+  dplyr::group_by(species) %>% 
+  dplyr::summarise(n_obs = n())
+
+performance = performance %>% 
+  dplyr::left_join(df_join, by = "species") 
+
+# Range size
+df_join = range_maps %>% 
+  sf::st_transform("+proj=tcea +lon_0=-58.0704167 +datum=WGS84 +units=m +no_defs") %>% 
+  dplyr::mutate(range_size = as.numeric(sf::st_area(range_maps) / 1000000)) %>%  # range in sqkm
+  sf::st_drop_geometry()
+
+performance = performance %>% 
+  inner_join(df_join, by = c("species" = "name_matched"))
+
+# Range coverage
+range_maps_gridded = range_maps_gridded %>%
+  sf::st_transform("+proj=tcea +lon_0=-58.0704167 +datum=WGS84 +units=m +no_defs")
+
+df_cells_total = range_maps_gridded %>%
+  dplyr::rename("species" = name_matched) %>% 
+  group_by(species) %>%
+  summarise(cells_total = n()) %>% 
+  st_drop_geometry()
+
+df_cells_occ = range_maps_gridded %>% 
+  st_join(occs_final, join = st_intersects) %>%
+  filter(name_matched == species) %>%         # Filter only intersections of the same species
+  group_by(species) %>%
+  summarise(cells_occupied = n_distinct(geometry)) %>%
+  st_drop_geometry()
+
+df_join = df_cells_total %>% 
+  dplyr::inner_join(df_cells_occ, by = "species") %>% 
+  dplyr::mutate(range_cov = cells_occupied / cells_total) %>% 
+  dplyr::select(species, range_cov)
+
+performance = performance %>% 
+  inner_join(df_join, by = "species")
+
+# Range coverage bias
+df_occs_total = occs_final %>% 
+  st_drop_geometry() %>% 
+  group_by(species) %>% 
+  summarise(occs_total = n())
+
+df_join = df_occs_total %>% 
+  dplyr::inner_join(df_cells_total, by = "species") %>% 
+  dplyr::inner_join(df_cells_occ, by = "species") %>% 
+  dplyr::mutate(range_bias = 1-((cells_occupied / cells_total) / pmin(occs_total / cells_total, 1))) %>%
+  dplyr::select(species, range_bias)
+
+performance = performance %>% 
+  inner_join(df_join, by = "species")
+```
+
 ### Number of records
 
-```{r, echo = FALSE, message=FALSE, warnings=FALSE}
-df_plot = performance %>% 
-  dplyr::left_join(obs_count, by = "species") 
+::: panel-tabset
+#### Marginal Effects
+
+::: panel-tabset
+##### AUC
+
+```{r echo = FALSE}
+df_plot = dplyr::filter(performance, metric == "auc")
+plot = plot_performance(df_plot, y_var = "value", y_label = "AUC", x_var = "n_obs", x_label = "Number of records")
+bslib::card(plot, full_screen = T)
+```
+
+##### F1
+
+```{r echo = FALSE}
+df_plot = dplyr::filter(performance, metric == "f1")
+plot = plot_performance(df_plot, y_var = "value", y_label = "F1", x_var = "n_obs", x_label = "Number of records")
+bslib::card(plot, full_screen = T)
+```
+
+##### Cohen's kappa
+
+```{r echo = FALSE}
+df_plot = dplyr::filter(performance, metric == "kappa")
+plot = plot_performance(df_plot, y_var = "value", y_label = "Kappa", x_var = "n_obs", x_label = "Number of records")
+bslib::card(plot, full_screen = T)
+```
+
+##### Accuracy
+
+```{r echo = FALSE}
+df_plot = dplyr::filter(performance, metric == "accuracy")
+plot = plot_performance(df_plot, y_var = "value", y_label = "Accuracy", x_var = "n_obs", x_label = "Number of records")
+bslib::card(plot, full_screen = T)
+```
+
+##### Precision
+
+```{r echo = FALSE}
+df_plot = dplyr::filter(performance, metric == "precision")
+plot = plot_performance(df_plot, y_var = "value", y_label = "Precision", x_var = "n_obs", x_label = "Number of records")
+bslib::card(plot, full_screen = T)
 ```
 
+##### Recall
+
+```{r echo = FALSE}
+df_plot = dplyr::filter(performance, metric == "recall")
+plot = plot_performance(df_plot, y_var = "value", y_label = "Recall", x_var = "n_obs", x_label = "Number of records")
+bslib::card(plot, full_screen = T)
+```
+:::
+
+#### Partial Effects
+
 ::: panel-tabset
-#### AUC
+##### AUC
 
 ```{r echo = FALSE}
-plot = plot_performance(df_plot, metric = "auc", x_var = "obs", x_label = "Number of records", reg_func = asym_regression)
+df_plot = dplyr::filter(performance, metric == "auc") %>% 
+  add_partial_residuals(focus_var = "n_obs", control_vars = c("range_size", "range_cov", "range_bias"))
+plot = plot_performance(df_plot, y_var = "n_obs_partial", y_label = "AUC (partial residual)", x_var = "n_obs", x_label = "Number of records")
 bslib::card(plot, full_screen = T)
 ```
 
-#### F1
+##### F1
 
 ```{r echo = FALSE}
-plot = plot_performance(df_plot, metric = "f1", x_var = "obs", x_label = "Number of records", reg_func = asym_regression)
+df_plot = dplyr::filter(performance, metric == "f1") %>% 
+  add_partial_residuals(focus_var = "n_obs", control_vars = c("range_size", "range_cov", "range_bias"))
+plot = plot_performance(df_plot, y_var = "n_obs_partial", y_label = "F1 (partial residual)", x_var = "n_obs", x_label = "Number of records")
 bslib::card(plot, full_screen = T)
 ```
 
-#### Cohen's kappa
+##### Cohen's kappa
 
 ```{r echo = FALSE}
-plot = plot_performance(df_plot, metric = "kappa", x_var = "obs", x_label = "Number of records", reg_func = asym_regression)
+df_plot = dplyr::filter(performance, metric == "kappa") %>% 
+  add_partial_residuals(focus_var = "n_obs", control_vars = c("range_size", "range_cov", "range_bias"), family = gaussian)
+plot = plot_performance(df_plot, y_var = "n_obs_partial", y_label = "Kappa (partial residual)", x_var = "n_obs", x_label = "Number of records")
 bslib::card(plot, full_screen = T)
 ```
 
-#### Accuracy
+##### Accuracy
 
 ```{r echo = FALSE}
-plot = plot_performance(df_plot, metric = "accuracy", x_var = "obs", x_label = "Number of records", reg_func = asym_regression)
+df_plot = dplyr::filter(performance, metric == "accuracy") %>% 
+  add_partial_residuals(focus_var = "n_obs", control_vars = c("range_size", "range_cov", "range_bias"))
+plot = plot_performance(df_plot, y_var = "n_obs_partial", y_label = "Accuracy (partial residual)", x_var = "n_obs", x_label = "Number of records")
 bslib::card(plot, full_screen = T)
 ```
 
-#### Precision
+##### Precision
 
 ```{r echo = FALSE}
-plot = plot_performance(df_plot, metric = "precision", x_var = "obs", x_label = "Number of records", reg_func = asym_regression)
+df_plot = dplyr::filter(performance, metric == "precision") %>% 
+  add_partial_residuals(focus_var = "n_obs", control_vars = c("range_size", "range_cov", "range_bias"))
+plot = plot_performance(df_plot, y_var = "n_obs_partial", y_label = "Precision (partial residual)", x_var = "n_obs", x_label = "Number of records")
 bslib::card(plot, full_screen = T)
 ```
 
-#### Recall
+##### Recall
 
 ```{r echo = FALSE}
-plot = plot_performance(df_plot, metric = "recall", x_var = "obs", x_label = "Number of records", reg_func = asym_regression)
+df_plot = dplyr::filter(performance, metric == "recall") %>% 
+  add_partial_residuals(focus_var = "n_obs", control_vars = c("range_size", "range_cov", "range_bias"))
+plot = plot_performance(df_plot, y_var = "n_obs_partial", y_label = "Recall (partial residual)", x_var = "n_obs", x_label = "Number of records")
 bslib::card(plot, full_screen = T)
 ```
 :::
+:::
 
 ### Range size
 
 Range size was calculated based on polygon layers from the IUCN Red List of Threatened Species (2016).
 
-```{r range_size, echo = FALSE, message=FALSE, warnings=FALSE}
-df_join = range_maps %>% 
-  dplyr::mutate(range_size = as.numeric(sf::st_area(range_maps) / 1000000)) %>%  # range in sqkm
-  sf::st_drop_geometry()
+::: panel-tabset
+#### Marginal Effects
 
-df_plot = performance %>% 
-  inner_join(df_join, by = c("species" = "name_matched"))
+::: panel-tabset
+##### AUC
+
+```{r echo = FALSE}
+df_plot = dplyr::filter(performance, metric == "auc")
+plot = plot_performance(df_plot, y_var = "value", y_label = "AUC", x_var = "range_size", x_label = "Range size [km]")
+bslib::card(plot, full_screen = T)
 ```
 
+##### F1
+
+```{r echo = FALSE}
+df_plot = dplyr::filter(performance, metric == "f1")
+plot = plot_performance(df_plot, y_var = "value", y_label = "F1", x_var = "range_size", x_label = "Range size [km]")
+bslib::card(plot, full_screen = T)
+```
+
+##### Cohen's kappa
+
+```{r echo = FALSE}
+df_plot = dplyr::filter(performance, metric == "kappa")
+plot = plot_performance(df_plot, y_var = "value", y_label = "Kappa", x_var = "range_size", x_label = "Range size [km]")
+bslib::card(plot, full_screen = T)
+```
+
+##### Accuracy
+
+```{r echo = FALSE}
+df_plot = dplyr::filter(performance, metric == "accuracy")
+plot = plot_performance(df_plot, y_var = "value", y_label = "Accuracy", x_var = "range_size", x_label = "Range size [km]")
+bslib::card(plot, full_screen = T)
+```
+
+##### Precision
+
+```{r echo = FALSE}
+df_plot = dplyr::filter(performance, metric == "precision")
+plot = plot_performance(df_plot, y_var = "value", y_label = "Precision", x_var = "range_size", x_label = "Range size [km]")
+bslib::card(plot, full_screen = T)
+```
+
+##### Recall
+
+```{r echo = FALSE}
+df_plot = dplyr::filter(performance, metric == "recall")
+plot = plot_performance(df_plot, y_var = "value", y_label = "Recall", x_var = "range_size", x_label = "Range size [km]")
+bslib::card(plot, full_screen = T)
+```
+:::
+
+#### Partial Effects
+
 ::: panel-tabset
-#### AUC
+##### AUC
 
 ```{r echo = FALSE}
-plot = plot_performance(df_plot, metric = "auc", x_var = "range_size", x_label = "Range size")
+df_plot = dplyr::filter(performance, metric == "auc") %>% 
+  add_partial_residuals(focus_var = "n_obs", control_vars = c("range_size", "range_cov", "range_bias"))
+plot = plot_performance(df_plot, y_var = "n_obs_partial", y_label = "AUC (partial residual)", x_var = "range_size", x_label = "Range size [km]")
 bslib::card(plot, full_screen = T)
 ```
 
-#### F1
+##### F1
 
 ```{r echo = FALSE}
-plot = plot_performance(df_plot, metric = "f1", x_var = "range_size", x_label = "Range size")
+df_plot = dplyr::filter(performance, metric == "f1") %>% 
+  add_partial_residuals(focus_var = "n_obs", control_vars = c("range_size", "range_cov", "range_bias"))
+plot = plot_performance(df_plot, y_var = "n_obs_partial", y_label = "F1 (partial residual)", x_var = "range_size", x_label = "Range size [km²]")
 bslib::card(plot, full_screen = T)
 ```
 
-#### Cohen's kappa
+##### Cohen's kappa
 
 ```{r echo = FALSE}
-plot = plot_performance(df_plot, metric = "kappa", x_var = "range_size", x_label = "Range size", reg_func = asym_regression)
+df_plot = dplyr::filter(performance, metric == "kappa") %>% 
+  add_partial_residuals(focus_var = "n_obs", control_vars = c("range_size", "range_cov", "range_bias"), family = gaussian)
+plot = plot_performance(df_plot, y_var = "n_obs_partial", y_label = "Kappa (partial residual)", x_var = "range_size", x_label = "Range size [km²]")
 bslib::card(plot, full_screen = T)
 ```
 
-#### Accuracy
+##### Accuracy
 
 ```{r echo = FALSE}
-plot = plot_performance(df_plot, metric = "accuracy", x_var = "range_size", x_label = "Range size")
+df_plot = dplyr::filter(performance, metric == "accuracy") %>% 
+  add_partial_residuals(focus_var = "n_obs", control_vars = c("range_size", "range_cov", "range_bias"))
+plot = plot_performance(df_plot, y_var = "n_obs_partial", y_label = "Accuracy (partial residual)", x_var = "range_size", x_label = "Range size [km²]")
 bslib::card(plot, full_screen = T)
 ```
 
-#### Precision
+##### Precision
 
 ```{r echo = FALSE}
-plot = plot_performance(df_plot, metric = "precision", x_var = "range_size", x_label = "Range size")
+df_plot = dplyr::filter(performance, metric == "precision") %>% 
+  add_partial_residuals(focus_var = "n_obs", control_vars = c("range_size", "range_cov", "range_bias"))
+plot = plot_performance(df_plot, y_var = "n_obs_partial", y_label = "Precision (partial residual)", x_var = "range_size", x_label = "Range size [km²]")
 bslib::card(plot, full_screen = T)
 ```
 
-#### Recall
+##### Recall
 
 ```{r echo = FALSE}
-plot = plot_performance(df_plot, metric = "recall", x_var = "range_size", x_label = "Range size")
+df_plot = dplyr::filter(performance, metric == "recall") %>% 
+  add_partial_residuals(focus_var = "n_obs", control_vars = c("range_size", "range_cov", "range_bias"))
+plot = plot_performance(df_plot, y_var = "n_obs_partial", y_label = "Recall (partial residual)", x_var = "range_size", x_label = "Range size [km²]")
 bslib::card(plot, full_screen = T)
 ```
 :::
+:::
 
 ### Range coverage
 
@@ -296,74 +493,117 @@ $$
 RangeCoverage = \frac{N_{cells\_occ}}{N_{cells\_total}}
 $$
 
-```{r range_coverage, echo = FALSE, message=FALSE, warnings=FALSE}
-occs_final_unproj = occs_final %>% sf::st_transform("EPSG:4326")
+::: panel-tabset
+#### Marginal Effects
 
-df_cells_total = range_maps_gridded %>%
-  dplyr::rename("species" = name_matched) %>% 
-  group_by(species) %>%
-  summarise(cells_total = n()) %>% 
-  st_drop_geometry()
+::: panel-tabset
+##### AUC
 
-df_cells_occ <- range_maps_gridded %>%
-  st_join(occs_final_unproj, join = st_intersects) %>%
-  filter(name_matched == species) %>%         # Filter only intersections of the same species
-  group_by(species) %>%
-  summarise(cells_occupied = n_distinct(geometry)) %>%
-  st_drop_geometry()
+```{r echo = FALSE}
+df_plot = dplyr::filter(performance, metric == "auc")
+plot = plot_performance(df_plot, y_var = "value", y_label = "AUC", x_var = "range_cov", x_label = "Range coverage")
+bslib::card(plot, full_screen = T)
+```
 
-df_join = df_cells_total %>% 
-  dplyr::inner_join(df_cells_occ, by = "species") %>% 
-  dplyr::mutate(range_cov = cells_occupied / cells_total) %>% 
-  dplyr::select(species, range_cov)
+##### F1
 
-df_plot = performance %>% 
-  inner_join(df_join, by = "species")
+```{r echo = FALSE}
+df_plot = dplyr::filter(performance, metric == "f1")
+plot = plot_performance(df_plot, y_var = "value", y_label = "F1", x_var = "range_cov", x_label = "Range coverage")
+bslib::card(plot, full_screen = T)
+```
+
+##### Cohen's kappa
+
+```{r echo = FALSE}
+df_plot = dplyr::filter(performance, metric == "kappa")
+plot = plot_performance(df_plot, y_var = "value", y_label = "Kappa", x_var = "range_cov", x_label = "Range coverage")
+bslib::card(plot, full_screen = T)
 ```
 
+##### Accuracy
+
+```{r echo = FALSE}
+df_plot = dplyr::filter(performance, metric == "accuracy")
+plot = plot_performance(df_plot, y_var = "value", y_label = "Accuracy", x_var = "range_cov", x_label = "Range coverage")
+bslib::card(plot, full_screen = T)
+```
+
+##### Precision
+
+```{r echo = FALSE}
+df_plot = dplyr::filter(performance, metric == "precision")
+plot = plot_performance(df_plot, y_var = "value", y_label = "Precision", x_var = "range_cov", x_label = "Range coverage")
+bslib::card(plot, full_screen = T)
+```
+
+##### Recall
+
+```{r echo = FALSE}
+df_plot = dplyr::filter(performance, metric == "recall")
+plot = plot_performance(df_plot, y_var = "value", y_label = "Recall", x_var = "range_cov", x_label = "Range coverage")
+bslib::card(plot, full_screen = T)
+```
+:::
+
+#### Partial Effects
+
 ::: panel-tabset
-#### AUC
+##### AUC
 
 ```{r echo = FALSE}
-plot = plot_performance(df_plot, metric = "auc", x_var = "range_cov", x_label = "Range coverage", reg_func = asym_regression)
+df_plot = dplyr::filter(performance, metric == "auc") %>% 
+  add_partial_residuals(focus_var = "n_obs", control_vars = c("range_size", "range_cov", "range_bias"))
+plot = plot_performance(df_plot, y_var = "n_obs_partial", y_label = "AUC (partial residual)", x_var = "range_cov", x_label = "Range coverage")
 bslib::card(plot, full_screen = T)
 ```
 
-#### F1
+##### F1
 
 ```{r echo = FALSE}
-plot = plot_performance(df_plot, metric = "f1", x_var = "range_cov", x_label = "Range coverage", reg_func = asym_regression)
+df_plot = dplyr::filter(performance, metric == "f1") %>% 
+  add_partial_residuals(focus_var = "n_obs", control_vars = c("range_size", "range_cov", "range_bias"))
+plot = plot_performance(df_plot, y_var = "n_obs_partial", y_label = "F1 (partial residual)", x_var = "range_cov", x_label = "Range coverage")
 bslib::card(plot, full_screen = T)
 ```
 
-#### Cohen's kappa
+##### Cohen's kappa
 
 ```{r echo = FALSE}
-plot = plot_performance(df_plot, metric = "kappa", x_var = "range_cov", x_label = "Range coverage", reg_func = asym_regression)
+df_plot = dplyr::filter(performance, metric == "kappa") %>% 
+  add_partial_residuals(focus_var = "n_obs", control_vars = c("range_size", "range_cov", "range_bias"), family = gaussian)
+plot = plot_performance(df_plot, y_var = "n_obs_partial", y_label = "Kappa (partial residual)", x_var = "range_cov", x_label = "Range coverage")
 bslib::card(plot, full_screen = T)
 ```
 
-#### Accuracy
+##### Accuracy
 
 ```{r echo = FALSE}
-plot = plot_performance(df_plot, metric = "accuracy", x_var = "range_cov", x_label = "Range coverage", reg_func = asym_regression)
+df_plot = dplyr::filter(performance, metric == "accuracy") %>% 
+  add_partial_residuals(focus_var = "n_obs", control_vars = c("range_size", "range_cov", "range_bias"))
+plot = plot_performance(df_plot, y_var = "n_obs_partial", y_label = "Accuracy (partial residual)", x_var = "range_cov", x_label = "Range coverage")
 bslib::card(plot, full_screen = T)
 ```
 
-#### Precision
+##### Precision
 
 ```{r echo = FALSE}
-plot = plot_performance(df_plot, metric = "precision", x_var = "range_cov", x_label = "Range coverage", reg_func = asym_regression)
+df_plot = dplyr::filter(performance, metric == "precision") %>% 
+  add_partial_residuals(focus_var = "n_obs", control_vars = c("range_size", "range_cov", "range_bias"))
+plot = plot_performance(df_plot, y_var = "n_obs_partial", y_label = "Precision (partial residual)", x_var = "range_cov", x_label = "Range coverage")
 bslib::card(plot, full_screen = T)
 ```
 
-#### Recall
+##### Recall
 
 ```{r echo = FALSE}
-plot = plot_performance(df_plot, metric = "recall", x_var = "range_cov", x_label = "Range coverage", reg_func = asym_regression)
+df_plot = dplyr::filter(performance, metric == "recall") %>% 
+  add_partial_residuals(focus_var = "n_obs", control_vars = c("range_size", "range_cov", "range_bias"))
+plot = plot_performance(df_plot, y_var = "n_obs_partial", y_label = "Recall (partial residual)", x_var = "range_cov", x_label = "Range coverage")
 bslib::card(plot, full_screen = T)
 ```
 :::
+:::
 
 ### Range coverage bias
 
@@ -375,56 +615,114 @@ $$
 
 Higher bias values indicate that occurrence records are spatially more clustered within the range of the species.
 
-```{r range_coverage_bias, echo = FALSE, message=FALSE, warnings=FALSE}
-occs_final_unproj = occs_final %>% sf::st_transform("EPSG:4326")
+::: panel-tabset
+#### Marginal Effects
 
-df_occs_total = occs_final_unproj %>% 
-  st_drop_geometry() %>% 
-  group_by(species) %>% 
-  summarise(occs_total = n())
+::: panel-tabset
+##### AUC
 
-df_join = df_occs_total %>% 
-  dplyr::inner_join(df_cells_total, by = "species") %>% 
-  dplyr::inner_join(df_cells_occ, by = "species") %>% 
-  dplyr::mutate(range_bias = 1-((cells_occupied / cells_total) / pmin(occs_total / cells_total, 1)))
+```{r echo = FALSE}
+df_plot = dplyr::filter(performance, metric == "auc")
+plot = plot_performance(df_plot, y_var = "value", y_label = "AUC", x_var = "range_bias", x_label = "Range coverage bias")
+bslib::card(plot, full_screen = T)
+```
 
-df_plot = performance %>% 
-  inner_join(df_join, by = "species")
+##### F1
+
+```{r echo = FALSE}
+df_plot = dplyr::filter(performance, metric == "f1")
+plot = plot_performance(df_plot, y_var = "value", y_label = "F1", x_var = "range_bias", x_label = "Range coverage bias")
+bslib::card(plot, full_screen = T)
 ```
 
+##### Cohen's kappa
+
+```{r echo = FALSE}
+df_plot = dplyr::filter(performance, metric == "kappa")
+plot = plot_performance(df_plot, y_var = "value", y_label = "Kappa", x_var = "range_bias", x_label = "Range coverage bias")
+bslib::card(plot, full_screen = T)
+```
+
+##### Accuracy
+
+```{r echo = FALSE}
+df_plot = dplyr::filter(performance, metric == "accuracy")
+plot = plot_performance(df_plot, y_var = "value", y_label = "Accuracy", x_var = "range_bias", x_label = "Range coverage bias")
+bslib::card(plot, full_screen = T)
+```
+
+##### Precision
+
+```{r echo = FALSE}
+df_plot = dplyr::filter(performance, metric == "precision")
+plot = plot_performance(df_plot, y_var = "value", y_label = "Precision", x_var = "range_bias", x_label = "Range coverage bias")
+bslib::card(plot, full_screen = T)
+```
+
+##### Recall
+
+```{r echo = FALSE}
+df_plot = dplyr::filter(performance, metric == "recall")
+plot = plot_performance(df_plot, y_var = "value", y_label = "Recall", x_var = "range_bias", x_label = "Range coverage bias")
+bslib::card(plot, full_screen = T)
+```
+:::
+
+#### Partial Effects
+
 ::: panel-tabset
-#### AUC
+##### AUC
 
 ```{r echo = FALSE}
-plot = plot_performance(df_plot, metric = "auc", x_var = "range_bias", x_label = "Range coverage bias", x_log = F)
+df_plot = dplyr::filter(performance, metric == "auc") %>% 
+  add_partial_residuals(focus_var = "n_obs", control_vars = c("range_size", "range_cov", "range_bias"))
+plot = plot_performance(df_plot, y_var = "n_obs_partial", y_label = "AUC (partial residual)", x_var = "range_bias", x_label = "Range coverage bias")
 bslib::card(plot, full_screen = T)
 ```
 
-#### F1
+##### F1
 
 ```{r echo = FALSE}
-plot = plot_performance(df_plot, metric = "f1", x_var = "range_bias", x_label = "Range coverage bias", x_log = F)
+df_plot = dplyr::filter(performance, metric == "f1") %>% 
+  add_partial_residuals(focus_var = "n_obs", control_vars = c("range_size", "range_cov", "range_bias"))
+plot = plot_performance(df_plot, y_var = "n_obs_partial", y_label = "F1 (partial residual)", x_var = "range_bias", x_label = "Range coverage bias")
 bslib::card(plot, full_screen = T)
 ```
 
-#### Accuracy
+##### Cohen's kappa
 
 ```{r echo = FALSE}
-plot = plot_performance(df_plot, metric = "accuracy", x_var = "range_bias", x_label = "Range coverage bias", x_log = F)
+df_plot = dplyr::filter(performance, metric == "kappa") %>% 
+  add_partial_residuals(focus_var = "n_obs", control_vars = c("range_size", "range_cov", "range_bias"), family = gaussian)
+plot = plot_performance(df_plot, y_var = "n_obs_partial", y_label = "Kappa (partial residual)", x_var = "range_bias", x_label = "Range coverage bias")
 bslib::card(plot, full_screen = T)
 ```
 
-#### Precision
+##### Accuracy
 
 ```{r echo = FALSE}
-plot = plot_performance(df_plot, metric = "precision", x_var = "range_bias", x_label = "Range coverage bias", x_log = F)
+df_plot = dplyr::filter(performance, metric == "accuracy") %>% 
+  add_partial_residuals(focus_var = "n_obs", control_vars = c("range_size", "range_cov", "range_bias"))
+plot = plot_performance(df_plot, y_var = "n_obs_partial", y_label = "Accuracy (partial residual)", x_var = "range_bias", x_label = "Range coverage bias")
 bslib::card(plot, full_screen = T)
 ```
 
-#### Recall
+##### Precision
 
 ```{r echo = FALSE}
-plot = plot_performance(df_plot, metric = "recall", x_var = "range_bias", x_label = "Range coverage bias", x_log = F)
+df_plot = dplyr::filter(performance, metric == "precision") %>% 
+  add_partial_residuals(focus_var = "n_obs", control_vars = c("range_size", "range_cov", "range_bias"))
+plot = plot_performance(df_plot, y_var = "n_obs_partial", y_label = "Precision (partial residual)", x_var = "range_bias", x_label = "Range coverage bias")
 bslib::card(plot, full_screen = T)
 ```
+
+##### Recall
+
+```{r echo = FALSE}
+df_plot = dplyr::filter(performance, metric == "recall") %>% 
+  add_partial_residuals(focus_var = "n_obs", control_vars = c("range_size", "range_cov", "range_bias"))
+plot = plot_performance(df_plot, y_var = "n_obs_partial", y_label = "Recall (partial residual)", x_var = "range_bias", x_label = "Range coverage bias")
+bslib::card(plot, full_screen = T)
+```
+:::
 :::
diff --git a/R/utils.R b/R/utils.R
index d00b91a6355e24ee441a8baa1a22f5e55bbde698..464072b436ddbd8bed11a5a5fdbfb0cfe84470a7 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -80,16 +80,16 @@ evaluate_model <- function(model, data) {
   # Return metrics
   return(
     list(
-      AUC = as.numeric(auc),
-      Accuracy = cm$overall["Accuracy"],
-      Kappa = cm$overall["Kappa"],
-      Precision = cm$byClass["Precision"],
-      Recall = cm$byClass["Recall"],
-      F1 = cm$byClass["F1"],
-      TP = cm$table["P", "P"],
-      FP = cm$table["P", "A"],
-      TN = cm$table["A", "A"],
-      FN = cm$table["A", "P"]
+      auc = as.numeric(auc),
+      accuracy = cm$overall["Accuracy"],
+      kappa = cm$overall["Kappa"],
+      precision = cm$byClass["Precision"],
+      recall = cm$byClass["Recall"],
+      f1 = cm$byClass["F1"],
+      tp = cm$table["P", "P"],
+      fp = cm$table["P", "A"],
+      tn = cm$table["A", "A"],
+      fn = cm$table["A", "P"]
     )
   )
 }
diff --git a/env_vars.png b/env_vars.png
deleted file mode 100644
index 273e21027964862a1deec247aafe6ca9eaea63f4..0000000000000000000000000000000000000000
Binary files a/env_vars.png and /dev/null differ
diff --git a/occurrences.png b/occurrences.png
new file mode 100644
index 0000000000000000000000000000000000000000..0a8e43d1943357ed699740edbce1198cfddb6b33
Binary files /dev/null and b/occurrences.png differ