Skip to content
Snippets Groups Projects
Commit 60ea080e authored by ye87zine's avatar ye87zine
Browse files

update readme, add function to setup dir structure

parent 5e216f8e
Branches
No related tags found
No related merge requests found
...@@ -46,7 +46,7 @@ for(fold in 1:5){ ...@@ -46,7 +46,7 @@ for(fold in 1:5){
num.threads = 48 num.threads = 48
) )
save(rf_fit, file = paste0("data/r_objects/msdm_rf/msdm_rf_fit_fold", fold,".RData")) save(rf_fit, file = paste0("data/r_objects/msdm_rf_results/msdm_rf_fit_fold", fold,".RData"))
} }
# Full model # Full model
...@@ -71,13 +71,13 @@ rf_fit = caret::train( ...@@ -71,13 +71,13 @@ rf_fit = caret::train(
num.threads = 48 num.threads = 48
) )
save(rf_fit, file = "data/r_objects/msdm_rf/msdm_rf_fit_full.RData") save(rf_fit, file = "data/r_objects/msdm_rf_results/msdm_rf_fit_full.RData")
# ----------------------------------------------------------------------# # ----------------------------------------------------------------------#
# Evaluate model #### # Evaluate model ####
# ----------------------------------------------------------------------# # ----------------------------------------------------------------------#
msdm_rf_performance = lapply(1:5, function(fold){ msdm_rf_performance = lapply(1:5, function(fold){
load(paste0("data/r_objects/msdm_rf/msdm_rf_fit_fold", fold, ".RData")) load(paste0("data/r_objects/msdm_rf_results/msdm_rf_fit_fold", fold, ".RData"))
test_data = model_data %>% test_data = model_data %>%
dplyr::filter(fold_global == fold) %>% dplyr::filter(fold_global == fold) %>%
...@@ -137,4 +137,4 @@ msdm_rf_performance = lapply(1:5, function(fold){ ...@@ -137,4 +137,4 @@ msdm_rf_performance = lapply(1:5, function(fold){
}) %>% }) %>%
bind_rows() bind_rows()
save(msdm_rf_performance, file = paste0("data/r_objects/msdm_rf_performance.RData")) save(msdm_rf_performance, file = paste0("data/r_objects/msdm_rf_results_performance.RData"))
expand_bbox <- function(bbox, min_span = 1, expansion = 0.25) { setup_dirs = function(){
checkmate::assert_true(basename(getwd()) == "symobio-modeling")
dirs = c(
"data",
"data/geospatial",
"data/phylogenies",
"data/r_objects",
"data/r_objects/pa_sampling",
"data/r_objects/ssdm_results",
"data/r_objects/msdm_embed_results",
"data/r_objects/msdm_onehot_results",
"data/r_objects/msdm_embed_results",
"data/r_objects/msdm_rf_results",
"plots",
"plots/pa_sampling",
"plots/publication",
"plots/range_predictions"
)
sapply(dirs, function(dir){
tryCatch({
if(!dir.exists(dir)){
dir.create(dir)
} else {
message("Skipping directory '", dir, "': already exists")
}
}, error = function(e){
message("Couldn't create directory '", dir, "': ", e)
})
})
return(invisible(NULL))
}
expand_bbox = function(bbox, min_span = 1, expansion = 0.25) {
# Get current bbox dimensions # Get current bbox dimensions
x_range <- bbox["xmax"] - bbox["xmin"] x_range = bbox["xmax"] - bbox["xmin"]
y_range <- bbox["ymax"] - bbox["ymin"] y_range = bbox["ymax"] - bbox["ymin"]
x_expand = expansion x_expand = expansion
y_expand = expansion y_expand = expansion
...@@ -17,10 +52,10 @@ expand_bbox <- function(bbox, min_span = 1, expansion = 0.25) { ...@@ -17,10 +52,10 @@ expand_bbox <- function(bbox, min_span = 1, expansion = 0.25) {
} }
# Expand the limits, adjusting both directions correctly # Expand the limits, adjusting both directions correctly
bbox["xmin"] <- bbox["xmin"] - (x_expand * x_range) bbox["xmin"] = bbox["xmin"] - (x_expand * x_range)
bbox["xmax"] <- bbox["xmax"] + (x_expand * x_range) bbox["xmax"] = bbox["xmax"] + (x_expand * x_range)
bbox["ymin"] <- bbox["ymin"] - (y_expand * y_range) bbox["ymin"] = bbox["ymin"] - (y_expand * y_range)
bbox["ymax"] <- bbox["ymax"] + (y_expand * y_range) bbox["ymax"] = bbox["ymax"] + (y_expand * y_range)
return(bbox) return(bbox)
} }
...@@ -47,7 +82,7 @@ predict_new = function(model, data, type = "prob"){ ...@@ -47,7 +82,7 @@ predict_new = function(model, data, type = "prob"){
} }
} }
evaluate_model <- function(model, data) { evaluate_model = function(model, data) {
# Accuracy: The proportion of correctly predicted instances (both true positives and true negatives) out of the total instances. # Accuracy: The proportion of correctly predicted instances (both true positives and true negatives) out of the total instances.
# Formula: Accuracy = (TP + TN) / (TP + TN + FP + FN) # Formula: Accuracy = (TP + TN) / (TP + TN + FP + FN)
...@@ -75,7 +110,7 @@ evaluate_model <- function(model, data) { ...@@ -75,7 +110,7 @@ evaluate_model <- function(model, data) {
auc = pROC::roc(actual, probs, levels = c("P", "A"), direction = ">")$auc auc = pROC::roc(actual, probs, levels = c("P", "A"), direction = ">")$auc
# Calculate confusion matrix # Calculate confusion matrix
cm <- caret::confusionMatrix(preds, actual, positive = "P") cm = caret::confusionMatrix(preds, actual, positive = "P")
# Return metrics # Return metrics
return( return(
......
# Codebase Documentation # Symobio Modeling
This repository implements a species distribution modeling comparison study for about 600 South American mammal species. Specifically, the study compares different modeling approaches for predicting species distributions. Code for a comparative SDM study for about 600 South American mammal species. Specifically, the study compares different modeling approaches for predicting species distributions.
An analysis of model performance can be found here: https://chrkoenig.quarto.pub/sdm-performance-report/
## Project Structure ## Project Structure
- **`R/`**: Contains all the R scripts organized by workflow steps. - **`R/`**: Contains all the R scripts organized by workflow steps.
- **`renv/`**: Manages package dependencies for reproducibility.
- **`Symobio_modeling.Rproj`**: RStudio project file for easy navigation. - **`Symobio_modeling.Rproj`**: RStudio project file for easy navigation.
- **`README.md`**: High-level overview of the project. - **`README.md`**: High-level overview of the project.
- **`occurrences.png`**: Visualization or reference image for occurrences data. - **`renv/`**: Manages package dependencies for reproducibility.
- **`.Rprofile`**: Custom R environment settings.
- **`renv.lock`**: Lockfile for `renv` to ensure consistent package versions. - **`renv.lock`**: Lockfile for `renv` to ensure consistent package versions.
- **`data/`**: Input data (geo, phylo), intermediate data and modeling results
- **`plots/`**: Plots for visualizing data processing and analysis steps
## Workflow Overview ## Workflow Overview
...@@ -21,7 +23,7 @@ Pre-process species-specific and environmental information for model fitting and ...@@ -21,7 +23,7 @@ Pre-process species-specific and environmental information for model fitting and
- **`01_01_range_preparation.R`**: Process species range maps and calculate range dissimilarity. - **`01_01_range_preparation.R`**: Process species range maps and calculate range dissimilarity.
- **`01_02_traits_preparation.R`**: Prepare species trait data and calculate functional distances. - **`01_02_traits_preparation.R`**: Prepare species trait data and calculate functional distances.
- **`01_03_phylo_preparation.R`**: Process phylogenetic information and alculate phylogenetic distances. - **`01_03_phylo_preparation.R`**: Process phylogenetic information and calculate phylogenetic distances.
- **`01_04_raster_preparation.R`**: Prepare environmental raster layers for modeling for data extraction. - **`01_04_raster_preparation.R`**: Prepare environmental raster layers for modeling for data extraction.
### 2. Presence/Absence Data Processing ### 2. Presence/Absence Data Processing
...@@ -56,7 +58,8 @@ Analyse modeling results ...@@ -56,7 +58,8 @@ Analyse modeling results
```r ```r
renv::restore() renv::restore()
``` ```
3. Run the scripts in the R/ directory sequentially. Some scripts, especially for model fitting, may run a long time and benefit from powerful hardware. 3. Set up the directory structure using the `setup_dirs()` function in the `utils.R`
4. Run the scripts in the R/ directory sequentially. Some scripts, especially for model fitting, may run a long time and benefit from powerful hardware.
## Additional Notes ## Additional Notes
- Ensure that all required input data (e.g., range maps, raster files) is available in the expected directories. - Ensure that all required input data (e.g., range maps, raster files) is available in the expected directories.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment