Skip to content
Snippets Groups Projects
Commit 60ea080e authored by ye87zine's avatar ye87zine
Browse files

update readme, add function to setup dir structure

parent 5e216f8e
Branches main
No related tags found
No related merge requests found
......@@ -46,7 +46,7 @@ for(fold in 1:5){
num.threads = 48
)
save(rf_fit, file = paste0("data/r_objects/msdm_rf/msdm_rf_fit_fold", fold,".RData"))
save(rf_fit, file = paste0("data/r_objects/msdm_rf_results/msdm_rf_fit_fold", fold,".RData"))
}
# Full model
......@@ -71,13 +71,13 @@ rf_fit = caret::train(
num.threads = 48
)
save(rf_fit, file = "data/r_objects/msdm_rf/msdm_rf_fit_full.RData")
save(rf_fit, file = "data/r_objects/msdm_rf_results/msdm_rf_fit_full.RData")
# ----------------------------------------------------------------------#
# Evaluate model ####
# ----------------------------------------------------------------------#
msdm_rf_performance = lapply(1:5, function(fold){
load(paste0("data/r_objects/msdm_rf/msdm_rf_fit_fold", fold, ".RData"))
load(paste0("data/r_objects/msdm_rf_results/msdm_rf_fit_fold", fold, ".RData"))
test_data = model_data %>%
dplyr::filter(fold_global == fold) %>%
......@@ -137,4 +137,4 @@ msdm_rf_performance = lapply(1:5, function(fold){
}) %>%
bind_rows()
save(msdm_rf_performance, file = paste0("data/r_objects/msdm_rf_performance.RData"))
save(msdm_rf_performance, file = paste0("data/r_objects/msdm_rf_results_performance.RData"))
expand_bbox <- function(bbox, min_span = 1, expansion = 0.25) {
setup_dirs = function(){
checkmate::assert_true(basename(getwd()) == "symobio-modeling")
dirs = c(
"data",
"data/geospatial",
"data/phylogenies",
"data/r_objects",
"data/r_objects/pa_sampling",
"data/r_objects/ssdm_results",
"data/r_objects/msdm_embed_results",
"data/r_objects/msdm_onehot_results",
"data/r_objects/msdm_embed_results",
"data/r_objects/msdm_rf_results",
"plots",
"plots/pa_sampling",
"plots/publication",
"plots/range_predictions"
)
sapply(dirs, function(dir){
tryCatch({
if(!dir.exists(dir)){
dir.create(dir)
} else {
message("Skipping directory '", dir, "': already exists")
}
}, error = function(e){
message("Couldn't create directory '", dir, "': ", e)
})
})
return(invisible(NULL))
}
expand_bbox = function(bbox, min_span = 1, expansion = 0.25) {
# Get current bbox dimensions
x_range <- bbox["xmax"] - bbox["xmin"]
y_range <- bbox["ymax"] - bbox["ymin"]
x_range = bbox["xmax"] - bbox["xmin"]
y_range = bbox["ymax"] - bbox["ymin"]
x_expand = expansion
y_expand = expansion
......@@ -17,10 +52,10 @@ expand_bbox <- function(bbox, min_span = 1, expansion = 0.25) {
}
# Expand the limits, adjusting both directions correctly
bbox["xmin"] <- bbox["xmin"] - (x_expand * x_range)
bbox["xmax"] <- bbox["xmax"] + (x_expand * x_range)
bbox["ymin"] <- bbox["ymin"] - (y_expand * y_range)
bbox["ymax"] <- bbox["ymax"] + (y_expand * y_range)
bbox["xmin"] = bbox["xmin"] - (x_expand * x_range)
bbox["xmax"] = bbox["xmax"] + (x_expand * x_range)
bbox["ymin"] = bbox["ymin"] - (y_expand * y_range)
bbox["ymax"] = bbox["ymax"] + (y_expand * y_range)
return(bbox)
}
......@@ -47,7 +82,7 @@ predict_new = function(model, data, type = "prob"){
}
}
evaluate_model <- function(model, data) {
evaluate_model = function(model, data) {
# Accuracy: The proportion of correctly predicted instances (both true positives and true negatives) out of the total instances.
# Formula: Accuracy = (TP + TN) / (TP + TN + FP + FN)
......@@ -75,7 +110,7 @@ evaluate_model <- function(model, data) {
auc = pROC::roc(actual, probs, levels = c("P", "A"), direction = ">")$auc
# Calculate confusion matrix
cm <- caret::confusionMatrix(preds, actual, positive = "P")
cm = caret::confusionMatrix(preds, actual, positive = "P")
# Return metrics
return(
......
# Codebase Documentation
# Symobio Modeling
This repository implements a species distribution modeling comparison study for about 600 South American mammal species. Specifically, the study compares different modeling approaches for predicting species distributions.
Code for a comparative SDM study for about 600 South American mammal species. Specifically, the study compares different modeling approaches for predicting species distributions.
An analysis of model performance can be found here: https://chrkoenig.quarto.pub/sdm-performance-report/
## Project Structure
- **`R/`**: Contains all the R scripts organized by workflow steps.
- **`renv/`**: Manages package dependencies for reproducibility.
- **`Symobio_modeling.Rproj`**: RStudio project file for easy navigation.
- **`README.md`**: High-level overview of the project.
- **`occurrences.png`**: Visualization or reference image for occurrences data.
- **`.Rprofile`**: Custom R environment settings.
- **`renv/`**: Manages package dependencies for reproducibility.
- **`renv.lock`**: Lockfile for `renv` to ensure consistent package versions.
- **`data/`**: Input data (geo, phylo), intermediate data and modeling results
- **`plots/`**: Plots for visualizing data processing and analysis steps
## Workflow Overview
......@@ -21,7 +23,7 @@ Pre-process species-specific and environmental information for model fitting and
- **`01_01_range_preparation.R`**: Process species range maps and calculate range dissimilarity.
- **`01_02_traits_preparation.R`**: Prepare species trait data and calculate functional distances.
- **`01_03_phylo_preparation.R`**: Process phylogenetic information and alculate phylogenetic distances.
- **`01_03_phylo_preparation.R`**: Process phylogenetic information and calculate phylogenetic distances.
- **`01_04_raster_preparation.R`**: Prepare environmental raster layers for modeling for data extraction.
### 2. Presence/Absence Data Processing
......@@ -56,7 +58,8 @@ Analyse modeling results
```r
renv::restore()
```
3. Run the scripts in the R/ directory sequentially. Some scripts, especially for model fitting, may run a long time and benefit from powerful hardware.
3. Set up the directory structure using the `setup_dirs()` function in the `utils.R`
4. Run the scripts in the R/ directory sequentially. Some scripts, especially for model fitting, may run a long time and benefit from powerful hardware.
## Additional Notes
- Ensure that all required input data (e.g., range maps, raster files) is available in the expected directories.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment