diff --git a/R/03_04_modelling_msdm_rf.R b/R/03_04_modelling_msdm_rf.R
index 1e03f70098f0c9086cdd9afc515fbb3578d2704c..f0f187c920323afe75be76eb3e2ea9669f9fca17 100644
--- a/R/03_04_modelling_msdm_rf.R
+++ b/R/03_04_modelling_msdm_rf.R
@@ -46,7 +46,7 @@ for(fold in 1:5){
     num.threads = 48
   )
   
-  save(rf_fit, file = paste0("data/r_objects/msdm_rf/msdm_rf_fit_fold", fold,".RData"))
+  save(rf_fit, file = paste0("data/r_objects/msdm_rf_results/msdm_rf_fit_fold", fold,".RData"))
 }
 
 # Full model
@@ -71,13 +71,13 @@ rf_fit = caret::train(
   num.threads = 48
 )
 
-save(rf_fit, file = "data/r_objects/msdm_rf/msdm_rf_fit_full.RData")
+save(rf_fit, file = "data/r_objects/msdm_rf_results/msdm_rf_fit_full.RData")
 
 # ----------------------------------------------------------------------#
 # Evaluate model                                                     ####
 # ----------------------------------------------------------------------#
 msdm_rf_performance = lapply(1:5, function(fold){
-  load(paste0("data/r_objects/msdm_rf/msdm_rf_fit_fold", fold, ".RData"))
+  load(paste0("data/r_objects/msdm_rf_results/msdm_rf_fit_fold", fold, ".RData"))
   
   test_data = model_data %>% 
     dplyr::filter(fold_global == fold) %>% 
@@ -137,4 +137,4 @@ msdm_rf_performance = lapply(1:5, function(fold){
 }) %>% 
   bind_rows()
 
-save(msdm_rf_performance, file = paste0("data/r_objects/msdm_rf_performance.RData"))
+save(msdm_rf_performance, file = paste0("data/r_objects/msdm_rf_results_performance.RData"))
diff --git a/R/utils.R b/R/utils.R
index 464072b436ddbd8bed11a5a5fdbfb0cfe84470a7..d324645483d6334dd891a10181d112f66dd8e19a 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -1,7 +1,42 @@
-expand_bbox <- function(bbox, min_span = 1, expansion = 0.25) {
+setup_dirs = function(){
+  checkmate::assert_true(basename(getwd()) == "symobio-modeling")
+  
+  dirs = c(
+    "data", 
+    "data/geospatial", 
+    "data/phylogenies", 
+    "data/r_objects",
+    "data/r_objects/pa_sampling",
+    "data/r_objects/ssdm_results",
+    "data/r_objects/msdm_embed_results",
+    "data/r_objects/msdm_onehot_results",
+    "data/r_objects/msdm_embed_results",
+    "data/r_objects/msdm_rf_results",
+    "plots", 
+    "plots/pa_sampling", 
+    "plots/publication", 
+    "plots/range_predictions"
+  )
+  
+  sapply(dirs, function(dir){
+    tryCatch({
+      if(!dir.exists(dir)){
+        dir.create(dir)
+      } else {
+        message("Skipping directory '", dir, "': already exists")
+      } 
+    }, error = function(e){
+      message("Couldn't create directory '", dir, "': ", e)
+    })
+  })
+  
+  return(invisible(NULL))
+}
+
+expand_bbox = function(bbox, min_span = 1, expansion = 0.25) {
   # Get current bbox dimensions
-  x_range <- bbox["xmax"] - bbox["xmin"]
-  y_range <- bbox["ymax"] - bbox["ymin"]
+  x_range = bbox["xmax"] - bbox["xmin"]
+  y_range = bbox["ymax"] - bbox["ymin"]
   x_expand = expansion
   y_expand = expansion
   
@@ -17,10 +52,10 @@ expand_bbox <- function(bbox, min_span = 1, expansion = 0.25) {
   }
   
   # Expand the limits, adjusting both directions correctly
-  bbox["xmin"] <- bbox["xmin"] - (x_expand * x_range)
-  bbox["xmax"] <- bbox["xmax"] + (x_expand * x_range)
-  bbox["ymin"] <- bbox["ymin"] - (y_expand * y_range)
-  bbox["ymax"] <- bbox["ymax"] + (y_expand * y_range)
+  bbox["xmin"] = bbox["xmin"] - (x_expand * x_range)
+  bbox["xmax"] = bbox["xmax"] + (x_expand * x_range)
+  bbox["ymin"] = bbox["ymin"] - (y_expand * y_range)
+  bbox["ymax"] = bbox["ymax"] + (y_expand * y_range)
   
   return(bbox)
 }
@@ -47,7 +82,7 @@ predict_new = function(model, data, type = "prob"){
   }
 }
 
-evaluate_model <- function(model, data) {
+evaluate_model = function(model, data) {
   # Accuracy: The proportion of correctly predicted instances (both true positives and true negatives) out of the total instances.
   # Formula: Accuracy = (TP + TN) / (TP + TN + FP + FN)
   
@@ -75,7 +110,7 @@ evaluate_model <- function(model, data) {
   auc = pROC::roc(actual, probs, levels = c("P", "A"), direction = ">")$auc
   
   # Calculate confusion matrix
-  cm <- caret::confusionMatrix(preds, actual, positive = "P")
+  cm = caret::confusionMatrix(preds, actual, positive = "P")
   
   # Return metrics
   return(
diff --git a/README.md b/README.md
index e3aa5ea57899c5cef869d495fd302c3e1ed32ea7..f015a0602f874670af787202e7f8767cd8a74771 100644
--- a/README.md
+++ b/README.md
@@ -1,16 +1,18 @@
-# Codebase Documentation
+# Symobio Modeling
 
-This repository implements a species distribution modeling comparison study for about 600 South American mammal species. Specifically, the study compares different modeling approaches for predicting species distributions.
+Code for a comparative SDM study for about 600 South American mammal species. Specifically, the study compares different modeling approaches for predicting species distributions.
+
+An analysis of model performance can be found here: https://chrkoenig.quarto.pub/sdm-performance-report/
 
 ## Project Structure
 
 - **`R/`**: Contains all the R scripts organized by workflow steps.
-- **`renv/`**: Manages package dependencies for reproducibility.
 - **`Symobio_modeling.Rproj`**: RStudio project file for easy navigation.
 - **`README.md`**: High-level overview of the project.
-- **`occurrences.png`**: Visualization or reference image for occurrences data.
-- **`.Rprofile`**: Custom R environment settings.
+- **`renv/`**: Manages package dependencies for reproducibility.
 - **`renv.lock`**: Lockfile for `renv` to ensure consistent package versions.
+- **`data/`**: Input data (geo, phylo), intermediate data and modeling results
+- **`plots/`**: Plots for visualizing data processing and analysis steps
 
 ## Workflow Overview
 
@@ -21,7 +23,7 @@ Pre-process species-specific and environmental information for model fitting and
 
 - **`01_01_range_preparation.R`**: Process species range maps and calculate range dissimilarity.
 - **`01_02_traits_preparation.R`**: Prepare species trait data and calculate functional distances.
-- **`01_03_phylo_preparation.R`**: Process phylogenetic information and alculate phylogenetic distances.
+- **`01_03_phylo_preparation.R`**: Process phylogenetic information and calculate phylogenetic distances.
 - **`01_04_raster_preparation.R`**: Prepare environmental raster layers for modeling for data extraction.
 
 ### 2. Presence/Absence Data Processing
@@ -56,7 +58,8 @@ Analyse modeling results
    ```r
    renv::restore()
    ```
-3. Run the scripts in the R/ directory sequentially. Some scripts, especially for model fitting, may run a long time and benefit from powerful hardware.
+3. Set up the directory structure using the `setup_dirs()` function in the `utils.R`
+4. Run the scripts in the R/ directory sequentially. Some scripts, especially for model fitting, may run a long time and benefit from powerful hardware.
 
 ## Additional Notes
 - Ensure that all required input data (e.g., range maps, raster files) is available in the expected directories.