diff --git a/R/03_01_presence_preparation.R b/R/03_01_presence_preparation.R
new file mode 100644
index 0000000000000000000000000000000000000000..c8705b9874b716b87aa552304ea415c625a3b27b
--- /dev/null
+++ b/R/03_01_presence_preparation.R
@@ -0,0 +1,68 @@
+# General packages
+library(dplyr)
+library(tidyr)
+library(furrr)
+
+# Geo packages
+library(terra)
+library(CoordinateCleaner)
+library(sf)
+
+# DB packages
+library(Symobio)
+library(DBI)
+
+source("R/utils.R")
+
+con = db_connect() # Connection to Symobio
+sf::sf_use_s2(use_s2 = FALSE) # Don't use spherical geometry
+
+# ---------------------------------------------------------------------------#
+# Prepare Geodata                                                         ####
+# ---------------------------------------------------------------------------#
+# TODO: finalize predictor choice
+raster_filepaths = list.files("~/share/groups/mas_data/00_data/processed/CHELSA_v2-1_bioclim/", pattern = ".tif$", full.names = T) %>% 
+  stringr::str_sort(numeric = T)
+
+sa_polygon = rnaturalearth::ne_countries() %>% 
+  dplyr::filter(continent == "South America") %>% 
+  sf::st_union()
+
+# ---------------------------------------------------------------------------#
+# Prepare Occurrence Data                                                 ####
+# ---------------------------------------------------------------------------#
+load("data/r_objects/range_maps.RData")
+target_species = unique(range_maps$name_matched[!is.na(range_maps$name_matched)])
+
+# Query species from Symobio
+occs = tbl(con, "species_occurrences") %>% 
+  dplyr::filter(species %in% target_species) %>% 
+  dplyr::select(-year) %>% 
+  dplyr::distinct() %>% 
+  collect() %>% 
+  sf::st_as_sf(coords = c("longitude", "latitude"), remove = F, crs = sf::st_crs(4326)) %>% 
+  sf::st_filter(sa_polygon)
+
+# Flag occurrences using Coordinate cleaner
+occs_flagged = occs %>% 
+  dplyr::distinct(species, coordinate_id, longitude, latitude) %>% 
+  group_by(species) %>% 
+  group_split() %>% 
+  purrr::map(                     # Loop over species individually due to bug in CoordinateCleaner
+    CoordinateCleaner::clean_coordinates,
+    lon = "longitude", 
+    lat = "latitude",
+    tests = c("centroids", "gbif", "equal", "institutions", "outliers"),
+    outliers_method = "quantile",
+    verbose = F
+  ) %>% 
+  bind_rows() %>% 
+  dplyr::filter(.summary == T) %>% 
+  dplyr::select(species, coordinate_id, longitude, latitude)
+
+# Finalize and save occurrence dataset
+occs_final = occs %>% 
+  inner_join(occs_flagged, by = c("species", "coordinate_id", "longitude", "latitude")) %>% 
+  dplyr::select(-coordinate_id)
+
+save(occs_final, file = "data/r_objects/occs_final.RData")
\ No newline at end of file
diff --git a/R/03_02_absence_preparation.R b/R/03_02_absence_preparation.R
new file mode 100644
index 0000000000000000000000000000000000000000..b72d132a3cd8bb01ef3f856fcf55e22d510b9f3f
--- /dev/null
+++ b/R/03_02_absence_preparation.R
@@ -0,0 +1,174 @@
+# General packages
+library(dplyr)
+library(tidyr)
+library(furrr)
+
+# Geo packages
+library(terra)
+library(CoordinateCleaner)
+library(sf)
+library(MASS)
+library(geos)
+
+# DB packages
+library(Symobio)
+library(DBI)
+
+source("R/utils.R")
+
+load("data/r_objects/range_maps.RData")
+load("data/r_objects/occs_final.RData")
+
+# ---------------------------------------------------------------------------#
+# Prepare Geodata                                                         ####
+# ---------------------------------------------------------------------------#
+target_species = unique(occs_final$species)
+
+sa_polygon = rnaturalearth::ne_countries() %>% 
+  dplyr::filter(continent == "South America") %>% 
+  sf::st_union() 
+
+raster_filepaths = list.files("~/share/groups/mas_data/00_data/processed/CHELSA_v2-1_bioclim/", pattern = ".tif$", full.names = T) %>% # TODO: finalize predictor choice
+  stringr::str_sort(numeric = T) 
+
+raster_data = terra::rast(raster_filepaths) %>% 
+  terra::crop(sf::st_bbox(sa_polygon))
+
+# Project (proj string generated with SA bbox coordinates on https://projectionwizard.org)
+proj_string = "+proj=tcea +lon_0=-58.0704167 +datum=WGS84 +units=m +no_defs" 
+
+raster_data = terra::project(raster_data, proj_string)
+sa_polygon = st_transform(sa_polygon, proj_string)
+occs_final = st_transform(occs_final, proj_string)
+range_maps = st_transform(range_maps, proj_string)
+
+# ---------------------------------------------------------------------------#
+# 1. Distance-based sampling                                              ####
+#                                                                            #
+# Absences should be predominantly sampled from regions that are             #
+# geographically close (reproduce sampling biases) but environmentally       #
+# dissimilar (avoid false negatives) to the known occurrences                #
+# ---------------------------------------------------------------------------#
+# Geographic distance
+future::plan("multisession", workers = 16)
+pseudo_absences = furrr::future_map(
+  .x = target_species, 
+  .options = furrr::furrr_options(seed = 42), 
+  .f = function(species){
+    # Prepare occurrences
+    occs_spec = dplyr::filter(occs_final, species == !!species)
+    
+    occs_poly = occs_spec %>% 
+      summarise(geometry = st_combine(geometry)) %>%
+      st_concave_hull(ratio = 0.5)
+    
+    occs_bff = occs_spec %>% 
+      st_buffer(10000) %>% # Buffer by 10 kilometers as exclusion radius for absence sampling
+      st_union()
+    
+    range_spec = dplyr::filter(range_maps, name_matched == !!species) %>% 
+      rename(species = name_matched)
+    
+    # Define model/sampling region
+    sampling_region = st_union(range_spec, occs_poly) %>% # Union occs poly and expert range
+      st_buffer(dist = 100000) %>% # Buffer by 100 km to extend sampling region into new environments
+      st_intersection(sa_polygon) %>% # Crop to South America 
+      st_difference(occs_bff) # Crop out buffered presences
+    
+    # Define KDE
+    ref = st_bbox(sampling_region)[c(1,3,2,4)]
+    min_extent = min(ref[2]-ref[1], ref[4]-ref[3])
+    occs_kde = spatialEco::sf.kde(occs_spec, bw = min_extent/2, res = 10000, ref = ref, standardize = T, scale.factor = 1) %>% 
+      crop(sampling_region, mask = T, touches = F)
+    
+    # Sampling of pseudo absence
+    abs_spec = st_sf(geometry = st_sfc(), crs = proj_string)
+    while(nrow(abs_spec) < nrow(occs_spec)){
+      sample_points_raw = st_sample(sampling_region, nrow(occs_spec))
+      p = extract(occs_kde, vect(sample_points_raw))$z
+      x = runif(length(p))
+      
+      sample_points = sample_points_raw[x<p]
+      samples_required = nrow(occs_spec) - nrow(abs_spec)
+      
+      if(length(sample_points) > samples_required){
+        sample_points = sample(sample_points, samples_required)
+      }
+      
+      abs_spec = bind_rows(abs_spec, st_sf(geometry = sample_points)) 
+    }
+    
+    plot(sa_polygon)
+    plot(occs_kde, add = T, legend = F)
+    plot(range_spec, add = T, col = "#00000000")
+    plot(abs_spec, add = T, col = "black", pch = 3, cex = 0.5)
+    plot(occs_spec, add = T, col = "white", pch = 3, cex = 0.5)
+    
+    
+    sample_points = st_as_sf(st_sample(sample_region, nrow(abs_spec))) %>% 
+      dplyr::mutate(
+        species = occs_spec$species[1],
+        longitude = sf::st_coordinates(.)[,1],
+        latitude = sf::st_coordinates(.)[,2]
+      ) %>% 
+      dplyr::rename(geometry = "x")
+    
+    abs_spec = terra::rast(raster_filepaths) %>% 
+      setNames(paste0("layer_", 1:19)) %>% 
+      terra::extract(sample_points) %>% 
+      dplyr::select(-ID) %>% 
+      dplyr::mutate(
+        present = 0,
+        geometry = sample_points$x
+      ) %>% 
+      tibble() %>% 
+      bind_cols(sample_points)
+    
+    # Create presence-absence dataframe
+    pa_spec = occs_spec %>% 
+      dplyr::mutate(present = 1) %>% 
+      bind_rows(abs_spec) 
+    
+    # Split into train and test datasets
+    train_index = createDataPartition(pa_spec$present, p = 0.7, list = FALSE)
+    pa_spec$train = 0
+    pa_spec$train[train_index] = 1
+    
+    # Define cross-validation folds
+    folds = tryCatch({
+      spatial_folds = suppressMessages(
+        blockCV::cv_spatial(
+          pa_spec,
+          column = "present",
+          k = 5,
+          progress = F, plot = F, report = F
+        )
+      )
+      
+      spatial_folds$folds_ids
+    }, warning = function(w){
+      NA
+    }, error = function(e){
+      NA
+    })
+    
+    pa_spec$folds = folds
+    pa_spec$geometry = NULL
+    
+    return(pa_spec)
+  })
+
+model_data = bind_rows(model_data)
+save(model_data, file = "data/r_objects/model_data.RData")
+
+
+
+
+ggplot() +
+  geom_sf(data = sa_polygon, fill = "red") +
+  geom_sf(data = range_spec, fill = "green") + 
+  geom_sf(data = occs_spec) +
+  #   facet_wrap("species", nrow = 2) + 
+  #   theme_minimal()
+  #   
+  
\ No newline at end of file
diff --git a/R/03_presence_absence_preparation.R b/R/03_presence_absence_preparation.R
deleted file mode 100644
index 9fd817c71fa460cf27c87aa9d023296999392b67..0000000000000000000000000000000000000000
--- a/R/03_presence_absence_preparation.R
+++ /dev/null
@@ -1,160 +0,0 @@
-# General packages
-library(dplyr)
-library(tidyr)
-library(furrr)
-
-# Geo packages
-library(terra)
-library(CoordinateCleaner)
-library(sf)
-
-# DB packages
-library(Symobio)
-library(DBI)
-
-source("R/utils.R")
-
-con = db_connect() # Connection to Symobio
-sf::sf_use_s2(use_s2 = FALSE) # Don't use spherical geometry
-
-# ---------------------------------------------------------------------------#
-# Prepare Geodata                                                         ####
-# ---------------------------------------------------------------------------#
-raster_info = tbl(con, "datasets") %>% 
-  dplyr::filter(stringr::str_detect(name, "CHELSA")) %>% 
-  collect()
-
-# raster_filepaths = list.files(raster_info$raw_data_path, pattern = ".tif$", full.names = T) %>% 
-#   stringr::str_sort(numeric = T)
-
-raster_filepaths = list.files("I:/mas_data/00_data/processed/CHELSA_v2-1_bioclim", pattern = ".tif$", full.names = T) %>% 
-  stringr::str_sort(numeric = T)
-
-sa_polygon = rnaturalearth::ne_countries() %>% 
-  dplyr::filter(continent == "South America") %>% 
-  sf::st_union()
-
-# ---------------------------------------------------------------------------#
-# Prepare Occurrence Data                                                 ####
-# ---------------------------------------------------------------------------#
-load("data/r_objects/range_maps.RData")
-target_species = unique(range_maps$name_matched[!is.na(range_maps$name_matched)])
-
-occs = tbl(con, "species_occurrences") %>% 
-  dplyr::filter(species %in% target_species) %>% 
-  dplyr::select(-year) %>% 
-  dplyr::distinct() %>% 
-  collect() %>% 
-  sf::st_as_sf(coords = c("longitude", "latitude"), remove = F, crs = sf::st_crs(4326)) %>% 
-  sf::st_filter(sa_polygon)
-
-occs_flagged = occs %>% 
-  dplyr::distinct(species, coordinate_id, longitude, latitude) %>% 
-  group_by(species) %>% 
-  group_split() %>% 
-  purrr::map(                     # Loop over species individually due to bug in CoordinateCleaner
-    CoordinateCleaner::clean_coordinates,
-    lon = "longitude", 
-    lat = "latitude",
-    tests = c("centroids", "gbif", "equal", "institutions", "outliers"),
-    outliers_method = "quantile",
-    verbose = F
-  ) %>% 
-  bind_rows() %>% 
-  dplyr::filter(.summary == T) %>% 
-  dplyr::select(species, coordinate_id, longitude, latitude)
-
-env_vars = tbl(con, "raster_extracts_num") %>% 
-  dplyr::filter(
-    coordinate_id %in% occs$coordinate_id,
-    metric == "mean"
-  ) %>% 
-  arrange(raster_layer_id) %>% 
-  tidyr::pivot_wider(id_cols = coordinate_id, names_from = raster_layer_id, names_prefix = "layer_", values_from = value) %>% 
-  collect()
-
-occs_final = occs %>% 
-  inner_join(occs_flagged, by = c("species", "coordinate_id", "longitude", "latitude")) %>% 
-  inner_join(env_vars, by = "coordinate_id") %>% 
-  dplyr::select(-coordinate_id)
-
-save(occs_final, file = "data/r_objects/occs_final.RData")
-
-# ---------------------------------------------------------------------------#
-# Create Modeling dataset                                                     ####
-# ---------------------------------------------------------------------------#
-load("data/r_objects/occs_final.RData")
-load("data/r_objects/sa_polygon.RData")
-sf::sf_use_s2(use_s2 = FALSE)
-
-occs_split = split(occs_final, occs_final$species)
-
-future::plan("multisession", workers = 16)
-model_data = furrr::future_map(occs_split, .progress = TRUE, .options = furrr::furrr_options(seed = 42), .f = function(occs_spec){
-  # Define model/sampling region
-  occs_bbox = occs_spec %>% 
-    sf::st_bbox() %>% 
-    expand_bbox(min_span = 1, expansion = 0.25) %>% 
-    sf::st_as_sfc() %>% 
-    st_set_crs(st_crs(occs_spec)) 
-  
-  sample_region = suppressMessages(
-    st_as_sf(st_intersection(occs_bbox, sa_polygon))
-  )
-  
-  # Sample pseudo absence
-  sample_points = st_as_sf(st_sample(sample_region, nrow(occs_spec))) %>% 
-    dplyr::mutate(
-      species = occs_spec$species[1],
-      longitude = sf::st_coordinates(.)[,1],
-      latitude = sf::st_coordinates(.)[,2]
-    ) %>% 
-    dplyr::rename(geometry = "x")
-  
-  abs_spec = terra::rast(raster_filepaths) %>% 
-    setNames(paste0("layer_", 1:19)) %>% 
-    terra::extract(sample_points) %>% 
-    dplyr::select(-ID) %>% 
-    dplyr::mutate(
-      present = 0,
-      geometry = sample_points$x
-    ) %>% 
-    tibble() %>% 
-    bind_cols(sample_points)
-  
-  # Create presence-absence dataframe
-  pa_spec = occs_spec %>% 
-    dplyr::mutate(present = 1) %>% 
-    bind_rows(abs_spec) 
-  
-  # Split into train and test datasets
-  train_index = createDataPartition(pa_spec$present, p = 0.7, list = FALSE)
-  pa_spec$train = 0
-  pa_spec$train[train_index] = 1
-  
-  # Define cross-validation folds
-  folds = tryCatch({
-    spatial_folds = suppressMessages(
-      blockCV::cv_spatial(
-        pa_spec,
-        column = "present",
-        k = 5,
-        progress = F, plot = F, report = F
-      )
-    )
-    
-    spatial_folds$folds_ids
-  }, warning = function(w){
-    NA
-  }, error = function(e){
-    NA
-  })
-  
-  pa_spec$folds = folds
-  pa_spec$geometry = NULL
-  
-  return(pa_spec)
-})
-
-model_data = bind_rows(model_data)
-save(model_data, file = "data/r_objects/model_data.RData")