Skip to content
Snippets Groups Projects
Commit 296d916c authored by ye87zine's avatar ye87zine
Browse files

Added more species to modeling pipeline, fixed some bugs and made some additions to analysis script

parent 87abe201
Branches
No related tags found
No related merge requests found
...@@ -10,6 +10,6 @@ renv/staging/ ...@@ -10,6 +10,6 @@ renv/staging/
renv/cache/ renv/cache/
# Data files # Data files
r_objects/ data/
R/performance_analysis_files R/performance_analysis_files
R/performance_analysis.html R/performance_analysis.html
\ No newline at end of file
load("data/r_objects/range_maps.RData")
# Get taxonomic information for target species
names_unique = unique(range_maps$name_matched[!is.na(range_maps$name_matched)])
species_matched = lapply(names_unique, function(name){
match_result = Symobio::gbif_match_name(name = name)
if(match_result$status != "ACCEPTED"){
match_result = gbif_match_name(usageKey = match_result$acceptedUsageKey)
}
match_result$name_orig = name
return(match_result)
}) %>%
bind_rows()
# Assign functional groups
functional_groups = species_matched %>%
mutate(
functional_group = case_when(
order %in% c("Carnivora", "Artiodactyla", "Cingulata", "Perissodactyla") ~ 1,
order %in% c("Rodentia", "Didelphimorphia", "Soricomorpha", "Paucituberculata", "Lagomorpha") ~ 2,
order %in% c("Primates", "Pilosa") ~ 3,
order %in% c("Chiroptera") ~ 4
),
functional_group = factor(functional_group, labels = c("large ground-dwelling", "small ground-dwelling", "arboreal", "flying"))
) %>%
dplyr::select(
name_orig,
name_matched = species,
functional_group
) %>%
distinct()
save(functional_groups, file = "data/r_objects/functional_groups.RData")
# Installation
options(timeout = 600) # increasing timeout is recommended since we will be downloading a 2GB file.
# For Windows and Linux: "cpu", "cu117" are the only currently supported
# For MacOS the supported are: "cpu-intel" or "cpu-m1"
kind <- "cu117"
version <- available.packages()["torch","Version"]
options(repos = c(
torch = sprintf("https://torch-cdn.mlverse.org/packages/%s/%s/", kind, version),
CRAN = "https://cloud.r-project.org" # or any other from which you want to install the other R dependencies.
))
install.packages("torch", type = "binary")
install.packages("torchvision")
install.packages("luz")
# Load packages
library(torch)
library(torchvision)
library(luz)
# Get dataset
dir <- "../playground/dataset/mnist"
train_ds <- mnist_dataset(
dir,
download = TRUE,
transform = transform_to_tensor
)
test_ds <- mnist_dataset(
dir,
train = FALSE,
transform = transform_to_tensor
)
train_dl <- dataloader(train_ds, batch_size = 128, shuffle = TRUE)
test_dl <- dataloader(test_ds, batch_size = 128)
# Plot example character
image <- train_ds$data[1,1:28,1:28]
image_df <- data.table::melt(image)
ggplot(image_df, aes(x=Var2, y=Var1, fill=value))+
geom_tile(show.legend = FALSE) +
xlab("") + ylab("") +
scale_fill_gradient(low="white", high="black")
# Build network
net <- nn_module(
"Net",
initialize = function() {
self$conv1 <- nn_conv2d(1, 32, 3, 1)
self$conv2 <- nn_conv2d(32, 64, 3, 1)
self$dropout1 <- nn_dropout2d(0.25)
self$dropout2 <- nn_dropout2d(0.5)
self$fc1 <- nn_linear(9216, 128)
self$fc2 <- nn_linear(128, 10)
},
forward = function(x) {
x %>% # N * 1 * 28 * 28
self$conv1() %>% # N * 32 * 26 * 26
nnf_relu() %>%
self$conv2() %>% # N * 64 * 24 * 24
nnf_relu() %>%
nnf_max_pool2d(2) %>% # N * 64 * 12 * 12
self$dropout1() %>%
torch_flatten(start_dim = 2) %>% # N * 9216
self$fc1() %>% # N * 128
nnf_relu() %>%
self$dropout2() %>%
self$fc2() # N * 10
}
)
# Train
fitted <- net %>%
setup(
loss = nn_cross_entropy_loss(),
optimizer = optim_adam,
metrics = list(
luz_metric_accuracy()
)
) %>%
fit(train_dl, epochs = 10, valid_data = test_dl)
...@@ -19,13 +19,13 @@ library(pROC) ...@@ -19,13 +19,13 @@ library(pROC)
library(cito) library(cito)
source("R/utils.R") source("R/utils.R")
con = db_connect() con = db_connect()
sf::sf_use_s2(use_s2 = FALSE) sf::sf_use_s2(use_s2 = FALSE)
# ---------------------------------------------------------------------------# # ---------------------------------------------------------------------------#
# Prepare Geodata #### # Prepare Geodata ####
# ---------------------------------------------------------------------------# # ---------------------------------------------------------------------------#
raster_info = tbl(con, "datasets") %>% raster_info = tbl(con, "datasets") %>%
dplyr::filter(stringr::str_detect(name, "CHELSA")) %>% dplyr::filter(stringr::str_detect(name, "CHELSA")) %>%
collect() collect()
...@@ -40,11 +40,11 @@ sa_polygon = rnaturalearth::ne_countries() %>% ...@@ -40,11 +40,11 @@ sa_polygon = rnaturalearth::ne_countries() %>%
# ---------------------------------------------------------------------------# # ---------------------------------------------------------------------------#
# Prepare Occurrence Data #### # Prepare Occurrence Data ####
# ---------------------------------------------------------------------------# # ---------------------------------------------------------------------------#
load("r_objects/test_species.RData") load("data/r_objects/range_maps.RData")
test_species = unique(test_species$name_matched) target_species = unique(range_maps$name_matched[!is.na(range_maps$name_matched)])
occs = tbl(con, "species_occurrences") %>% occs = tbl(con, "species_occurrences") %>%
dplyr::filter(species %in% test_species) %>% dplyr::filter(species %in% target_species) %>%
dplyr::select(-year) %>% dplyr::select(-year) %>%
dplyr::distinct() %>% dplyr::distinct() %>%
collect() %>% collect() %>%
...@@ -81,10 +81,13 @@ occs_final = occs %>% ...@@ -81,10 +81,13 @@ occs_final = occs %>%
inner_join(env_vars, by = "coordinate_id") %>% inner_join(env_vars, by = "coordinate_id") %>%
dplyr::select(-coordinate_id) dplyr::select(-coordinate_id)
save(occs_final, file = "r_objects/occs_final.RData") save(occs_final, file = "data/r_objects/occs_final.RData")
# ---------------------------------------------------------------------------# # ---------------------------------------------------------------------------#
# Main loop #### # Main loop ####
# ---------------------------------------------------------------------------# # ---------------------------------------------------------------------------#
load("data/r_objects/occs_final.RData")
load("data/r_objects/sa_polygon.RData")
occs_split = split(occs_final, occs_final$species) occs_split = split(occs_final, occs_final$species)
future::plan("multisession", workers = 16) future::plan("multisession", workers = 16)
...@@ -130,7 +133,7 @@ model_results = furrr::future_map(occs_split, .options = furrr::furrr_options(se ...@@ -130,7 +133,7 @@ model_results = furrr::future_map(occs_split, .options = furrr::furrr_options(se
terra::extract(sample_points) %>% terra::extract(sample_points) %>%
dplyr::select(-ID) %>% dplyr::select(-ID) %>%
dplyr::mutate( dplyr::mutate(
presence = "absent", presence = 0,
geometry = sample_points$x geometry = sample_points$x
) %>% ) %>%
tibble() tibble()
...@@ -139,9 +142,11 @@ model_results = furrr::future_map(occs_split, .options = furrr::furrr_options(se ...@@ -139,9 +142,11 @@ model_results = furrr::future_map(occs_split, .options = furrr::furrr_options(se
# Create modeling dataset #### # Create modeling dataset ####
# ------------------------------- # # ------------------------------- #
model_data = occs_spec %>% model_data = occs_spec %>%
dplyr::mutate(presence = "present") %>% dplyr::mutate(presence = 1) %>%
bind_rows(abs_spec) %>% bind_rows(abs_spec) %>%
dplyr::mutate(presence = as.factor(presence)) dplyr::mutate(
presence_fct = as.factor(ifelse(presence == 0, "absent", "present"))
)
# Define cross-validation folds # Define cross-validation folds
spatial_folds = blockCV::cv_spatial( spatial_folds = blockCV::cv_spatial(
...@@ -162,10 +167,7 @@ model_results = furrr::future_map(occs_split, .options = furrr::furrr_options(se ...@@ -162,10 +167,7 @@ model_results = furrr::future_map(occs_split, .options = furrr::furrr_options(se
# Define predictor columns # Define predictor columns
predictors = paste0("layer_", 1:19) predictors = paste0("layer_", 1:19)
# ------------------------------- # # Define empty performance summary
# Train models ####
# ------------------------------- #
# Preparation
na_performance = list( na_performance = list(
AUC = NA, AUC = NA,
Accuracy = NA, Accuracy = NA,
...@@ -175,18 +177,42 @@ model_results = furrr::future_map(occs_split, .options = furrr::furrr_options(se ...@@ -175,18 +177,42 @@ model_results = furrr::future_map(occs_split, .options = furrr::furrr_options(se
F1 = NA F1 = NA
) )
# Feature selection # ------------------------------- #
# --> Very time consuming! # Train models ####
# ------------------------------- #
# feature_selection = rfe( ## cito ####
# x = dplyr::select(model_data, contains("layer")), # model_data_nn = model_data %>%
# y = model_data$presence, # dplyr::mutate(across(all_of(predictors), scale)) %>%
# rfeControl = rfeControl(functions = caretFuncs, # select(-species, -longitude, -latitude)
# method = "cv", #
# number = 3, # Number of folds # train_data_nn = model_data_nn[train_index, ]
# verbose = T) # test_data_nn = model_data_nn[-train_index, ]
#
# nn_fit = dnn(
# Y = train_data_nn$presence,
# X = as.matrix(train_data_nn[, predictors]),
# hidden = c(500L, 500L),
# loss = "binomial",
# activation = "leaky_relu",
# epochs = 2000L,
# lr = 0.02,
# dropout = 0.2, # Regularization
# burnin = Inf,
# optimizer = config_optimizer("adam", weight_decay = 0.001),
# lr_scheduler = config_lr_scheduler("reduce_on_plateau", patience = 10, factor = 0.7),
# early_stopping = 500L, # stop training when validation loss does not decrease anymore
# validation = 0.2, # used for early stopping and lr_scheduler
# device = "cpu",
# bootstrap = 50L
# ) # )
#
# nn_fit$successfull
# preds = predict(nn_fit, type = "response") # --> Strange discrete steps of size 1/bootstrap_value
# plot(preds)
# Metrics::auc(train_data_nn$presence, round(preds[,1]))
## caret ####
# Define training routine
index_train = lapply(unique(sort(train_data$fold)), function(x){ index_train = lapply(unique(sort(train_data$fold)), function(x){
return(which(train_data$fold != x)) return(which(train_data$fold != x))
}) })
...@@ -207,7 +233,7 @@ model_results = furrr::future_map(occs_split, .options = furrr::furrr_options(se ...@@ -207,7 +233,7 @@ model_results = furrr::future_map(occs_split, .options = furrr::furrr_options(se
rf_fit = caret::train( rf_fit = caret::train(
x = train_data[, predictors], x = train_data[, predictors],
y = train_data$presence, y = train_data$presence_fct,
method = "rf", method = "rf",
metric = "ROC", metric = "ROC",
tuneGrid = rf_grid, tuneGrid = rf_grid,
...@@ -229,7 +255,7 @@ model_results = furrr::future_map(occs_split, .options = furrr::furrr_options(se ...@@ -229,7 +255,7 @@ model_results = furrr::future_map(occs_split, .options = furrr::furrr_options(se
gbm_fit = train( gbm_fit = train(
x = train_data[, predictors], x = train_data[, predictors],
y = train_data$presence, y = train_data$presence_fct,
method = "gbm", method = "gbm",
metric = "ROC", metric = "ROC",
verbose = F, verbose = F,
...@@ -245,7 +271,7 @@ model_results = furrr::future_map(occs_split, .options = furrr::furrr_options(se ...@@ -245,7 +271,7 @@ model_results = furrr::future_map(occs_split, .options = furrr::furrr_options(se
glm_performance = tryCatch({ glm_performance = tryCatch({
glm_fit = train( glm_fit = train(
x = train_data[, predictors], x = train_data[, predictors],
y = train_data$presence, y = train_data$presence_fct,
method = "glm", method = "glm",
family=binomial, family=binomial,
metric = "ROC", metric = "ROC",
...@@ -273,4 +299,4 @@ model_results = furrr::future_map(occs_split, .options = furrr::furrr_options(se ...@@ -273,4 +299,4 @@ model_results = furrr::future_map(occs_split, .options = furrr::furrr_options(se
return(performance_summary) return(performance_summary)
}) })
save(model_results, file = "r_objects/model_results.RData") save(model_results, file = "data/r_objects/model_results.RData")
...@@ -11,17 +11,17 @@ library(sf) ...@@ -11,17 +11,17 @@ library(sf)
library(plotly) library(plotly)
library(DT) library(DT)
load("../r_objects/model_results.RData") load("../data/r_objects/model_results.RData")
load("../r_objects/test_species.RData") load("../data/r_objects/range_maps.RData")
load("../r_objects/range_maps.RData") load("../data/r_objects/range_maps_gridded.RData")
load("../r_objects/range_maps_gridded.RData") load("../data/r_objects/occs_final.RData")
load("../r_objects/occs_final.RData") load("../data/r_objects/functional_groups.RData")
sf::sf_use_s2(use_s2 = FALSE) sf::sf_use_s2(use_s2 = FALSE)
``` ```
## Summary ## Summary
This document summarizes the performance of three SDM algorithms (Random Forest, Gradient Boosting Machine, Generalized Linear Model) for 96 South American terrestrial mammal species. We use six metrics (AUC, F1, kappa, accuracy, precision, and recall) to evaluate model performance and look at how performance varies with five factors (number of records, range size, range occupancy, spatial dispersion, and functional group). This document summarizes the performance of three SDM algorithms (Random Forest, Gradient Boosting Machine, Generalized Linear Model) for `{r} length(model_results)` South American mammal species. We use six metrics (AUC, F1, kappa, accuracy, precision, and recall) to evaluate model performance and look at how performance varies with five factors (number of records, range size, range coverage, range coverage bias, and functional group).
Modeling decisions: Modeling decisions:
...@@ -34,12 +34,13 @@ Modeling decisions: ...@@ -34,12 +34,13 @@ Modeling decisions:
Key findings: Key findings:
- RF and GBM models generally performed better than GLM across metrics - RF performed best, GBM slightly worse, GLM worst
- More occurrence records and larger range sizes tended to improve model accuracy - More occurrence records and larger range sizes tended to improve model performance
- Higher range occupancy correlated with better performance. - Higher range coverage correlated with better performance.
- Spatial dispersion and functional group showed some impact but were less consistent - Range coverage bias and functional group showed some impact but were less consistent <!-- TODO: check after rerun -->
## Analysis ## Analysis
The table below shows the analysed modeling results. The table below shows the analysed modeling results.
```{r performance, echo = FALSE, message=FALSE, warnings=FALSE} ```{r performance, echo = FALSE, message=FALSE, warnings=FALSE}
...@@ -50,7 +51,7 @@ performance = model_results %>% ...@@ -50,7 +51,7 @@ performance = model_results %>%
dplyr::filter(!is.na(value)) %>% dplyr::filter(!is.na(value)) %>%
dplyr::mutate( dplyr::mutate(
metric = factor(metric, levels = c("auc", "kappa", "f1", "accuracy", "precision", "recall")), metric = factor(metric, levels = c("auc", "kappa", "f1", "accuracy", "precision", "recall")),
value = pmax(value, 0, na.rm = T) # Fix one weird instance of f1 < 0 value = round(pmax(value, 0, na.rm = T), 3) # Fix one weird instance of f1 < 0
) )
DT::datatable(performance) DT::datatable(performance)
...@@ -59,7 +60,7 @@ DT::datatable(performance) ...@@ -59,7 +60,7 @@ DT::datatable(performance)
### Number of records ### Number of records
- Model performance was generally better for species with more observations - Model performance was generally better for species with more observations
- No major improvements in model performance beyond \~500 observations - Very poor performance below 50-100 observations
```{r number_of_records, echo = FALSE, message=FALSE, warnings=FALSE} ```{r number_of_records, echo = FALSE, message=FALSE, warnings=FALSE}
df_plot = performance df_plot = performance
...@@ -89,7 +90,7 @@ plot <- plot_ly( ...@@ -89,7 +90,7 @@ plot <- plot_ly(
plot <- plot %>% plot <- plot %>%
layout( layout(
title = "Model Performance vs. Number of observations", title = "Model Performance vs. Number of observations",
xaxis = list(title = "Number of observations"), xaxis = list(title = "Number of observations", type = "log"),
yaxis = list(title = "Value"), yaxis = list(title = "Value"),
legend = list(x = 1.1, y = 0.5), # Move legend to the right of the plot legend = list(x = 1.1, y = 0.5), # Move legend to the right of the plot
margin = list(r = 150), # Add right margin to accommodate legend margin = list(r = 150), # Add right margin to accommodate legend
...@@ -115,12 +116,12 @@ bslib::card(plot, full_screen = T) ...@@ -115,12 +116,12 @@ bslib::card(plot, full_screen = T)
### Range characteristics ### Range characteristics
The spatial coverage of species occurrences was evaluated using a systematic hexagonal grid system, with each cell spanning 1 degree in both latitudinal and longitudinal dimensions. Two complementary metrics were used to assess range coverage: First, sampling completeness was quantified by determining the number of grid cells containing at least one occurrence record (*occupancy*). Second, we calculated the theoretical maximum number of occupied hexagons possible given the total number of observations (*spatial dispersion*).
#### Range size #### Range size
- Model performance was lower for small ranged species Range size was calculated based on polygon layers from the IUCN Red List of Threatened Species (2016).
- No major improvements in model performance beyond range size of \~3M km²
- Model performance tended to be slightly higher for species with larger range size
- Only RF shows continuous performance improvements beyond range sizes of \~5M km²
```{r range_size, echo = FALSE, message=FALSE, warnings=FALSE} ```{r range_size, echo = FALSE, message=FALSE, warnings=FALSE}
df_join = range_maps %>% df_join = range_maps %>%
...@@ -179,11 +180,17 @@ plot <- plot %>% ...@@ -179,11 +180,17 @@ plot <- plot %>%
bslib::card(plot, full_screen = T) bslib::card(plot, full_screen = T)
``` ```
#### Occupancy #### Range coverage
Species ranges were split into continuous hexagonal grid cells of 1 degree diameter. Range coverage was then calculated as the number of grid cells containing at least one occurrence record divided by the number of total grid cells.
$$
RangeCoverage = \frac{N_{cells\_occ}}{N_{cells\_total}}
$$
- Models for species with higher range occupancy showed slightly better performance - Models for species with higher range coverage showed slightly better performance
```{r range_occupancy, echo = FALSE, message=FALSE, warnings=FALSE} ```{r range_coverage, echo = FALSE, message=FALSE, warnings=FALSE}
df_cells_total = range_maps_gridded %>% df_cells_total = range_maps_gridded %>%
dplyr::rename("species" = name_matched) %>% dplyr::rename("species" = name_matched) %>%
group_by(species) %>% group_by(species) %>%
...@@ -199,8 +206,8 @@ df_cells_occ <- range_maps_gridded %>% ...@@ -199,8 +206,8 @@ df_cells_occ <- range_maps_gridded %>%
df_join = df_cells_total %>% df_join = df_cells_total %>%
dplyr::inner_join(df_cells_occ, by = "species") %>% dplyr::inner_join(df_cells_occ, by = "species") %>%
dplyr::mutate(occupancy = cells_occupied / cells_total) %>% dplyr::mutate(range_cov = cells_occupied / cells_total) %>%
dplyr::select(species, occupancy) dplyr::select(species, range_cov)
df_plot = performance %>% df_plot = performance %>%
inner_join(df_join, by = "species") inner_join(df_join, by = "species")
...@@ -208,14 +215,14 @@ df_plot = performance %>% ...@@ -208,14 +215,14 @@ df_plot = performance %>%
# Create base plot # Create base plot
plot <- plot_ly( plot <- plot_ly(
data = df_plot, data = df_plot,
x = ~occupancy, x = ~range_cov,
y = ~value, y = ~value,
color = ~model, color = ~model,
type = 'scatter', type = 'scatter',
mode = 'markers', mode = 'markers',
name = ~model, name = ~model,
hoverinfo = 'text', hoverinfo = 'text',
text = ~paste("Species:", species, "<br>Range occupancy:", round(occupancy, 3), "<br>Value:", round(value, 3)), text = ~paste("Species:", species, "<br>Range coverage:", round(range_cov, 3), "<br>Value:", round(value, 3)),
transforms = list( transforms = list(
list( list(
type = 'filter', type = 'filter',
...@@ -229,8 +236,8 @@ plot <- plot_ly( ...@@ -229,8 +236,8 @@ plot <- plot_ly(
# Add dropdown for selecting metric # Add dropdown for selecting metric
plot <- plot %>% plot <- plot %>%
layout( layout(
title = "Model Performance vs. Range occupancy", title = "Model Performance vs. Range coverage",
xaxis = list(title = "Range occupancy"), xaxis = list(title = "Range coverage"),
yaxis = list(title = "Value"), yaxis = list(title = "Value"),
legend = list(x = 1.1, y = 0.5), # Move legend to the right of the plot legend = list(x = 1.1, y = 0.5), # Move legend to the right of the plot
margin = list(r = 150), # Add right margin to accommodate legend margin = list(r = 150), # Add right margin to accommodate legend
...@@ -254,13 +261,19 @@ plot <- plot %>% ...@@ -254,13 +261,19 @@ plot <- plot %>%
bslib::card(plot, full_screen = T) bslib::card(plot, full_screen = T)
``` ```
#### Spatial dispersion #### Range coverage bias
- Models for species with higher spatial dispersion tended to exhibit lower performance Range coverage bias was calculated as the as the minimum of total grid cells and total occurrences divided by the number of occupied cells.
- Large spread, weak relationship $$
RangeCoverageBias = \frac{min(N_{cells\_total}, N_{obs\_total})}{N_{cells\_occupied}}
$$
```{r spatial_dispersion, echo = FALSE, message=FALSE, warnings=FALSE} Higher bias values indicate that occurrence records are spatially more clustered within the range of the species.
- There was no strong relationship between range coverage bias and model performance
```{r range_coverage_bias, echo = FALSE, message=FALSE, warnings=FALSE}
df_occs_total = occs_final %>% df_occs_total = occs_final %>%
st_drop_geometry() %>% st_drop_geometry() %>%
group_by(species) %>% group_by(species) %>%
...@@ -269,7 +282,7 @@ df_occs_total = occs_final %>% ...@@ -269,7 +282,7 @@ df_occs_total = occs_final %>%
df_join = df_occs_total %>% df_join = df_occs_total %>%
dplyr::inner_join(df_cells_total, by = "species") %>% dplyr::inner_join(df_cells_total, by = "species") %>%
dplyr::inner_join(df_cells_occ, by = "species") %>% dplyr::inner_join(df_cells_occ, by = "species") %>%
dplyr::mutate(dispersion = cells_occupied / pmin(cells_total, occs_total)) dplyr::mutate(range_bias = pmin(cells_total, occs_total) / cells_occupied)
df_plot = performance %>% df_plot = performance %>%
inner_join(df_join, by = "species") inner_join(df_join, by = "species")
...@@ -277,14 +290,14 @@ df_plot = performance %>% ...@@ -277,14 +290,14 @@ df_plot = performance %>%
# Create base plot # Create base plot
plot <- plot_ly( plot <- plot_ly(
data = df_plot, data = df_plot,
x = ~dispersion, x = ~range_bias,
y = ~value, y = ~value,
color = ~model, color = ~model,
type = 'scatter', type = 'scatter',
mode = 'markers', mode = 'markers',
name = ~model, name = ~model,
hoverinfo = 'text', hoverinfo = 'text',
text = ~paste("Species:", species, "<br>Dispersion:", round(dispersion, 3), "<br>Value:", round(value, 3)), text = ~paste("Species:", species, "<br>Range coverage bias:", round(range_bias, 3), "<br>Value:", round(value, 3)),
transforms = list( transforms = list(
list( list(
type = 'filter', type = 'filter',
...@@ -298,8 +311,8 @@ plot <- plot_ly( ...@@ -298,8 +311,8 @@ plot <- plot_ly(
# Add dropdown for selecting metric # Add dropdown for selecting metric
plot <- plot %>% plot <- plot %>%
layout( layout(
title = "Model Performance vs. Dispersion", title = "Model Performance vs. Range coverage bias",
xaxis = list(title = "Dispersion"), xaxis = list(title = "Range coverage bias", type = "log"),
yaxis = list(title = "Value"), yaxis = list(title = "Value"),
legend = list(x = 1.1, y = 0.5), # Move legend to the right of the plot legend = list(x = 1.1, y = 0.5), # Move legend to the right of the plot
margin = list(r = 150), # Add right margin to accommodate legend margin = list(r = 150), # Add right margin to accommodate legend
...@@ -325,15 +338,20 @@ bslib::card(plot, full_screen = T) ...@@ -325,15 +338,20 @@ bslib::card(plot, full_screen = T)
### Functional group ### Functional group
- No major difference in model performance among functional groups Functional groups were assigned based on taxonomic order. The following groupings were used:
```{r functional_groups, echo = FALSE, message=FALSE, warnings=FALSE} | Functional group | Taxomic orders |
df_join = test_species %>% |-----------------------|-----------------------------------------------------------------------|
dplyr::distinct(name_matched, functional_group) %>% | large ground-dwelling | Carnivora, Artiodactyla, Cingulata, Perissodactyla |
dplyr::mutate(functional_group = factor(functional_group, labels = c("arboreal", "large ground-dwelling", "small ground-dwelling"))) | small ground-dwelling | Rodentia, Didelphimorphia, Soricomorpha, Paucituberculata, Lagomorpha |
| arboreal | Primates, Pilosa |
| flying | Chiroptera |
- Models for bats tended to perform slightly worse than for other groups.
```{r functional_groups, echo = FALSE, message=FALSE, warnings=FALSE}
df_plot = performance %>% df_plot = performance %>%
dplyr::left_join(df_join, by = c("species" = "name_matched")) dplyr::left_join(functional_groups, by = c("species" = "name_matched"))
plot <- plot_ly( plot <- plot_ly(
data = df_plot, data = df_plot,
...@@ -359,7 +377,7 @@ plot <- plot_ly( ...@@ -359,7 +377,7 @@ plot <- plot_ly(
plot <- plot %>% plot <- plot %>%
layout( layout(
title = "Model Performance vs. Functional Group", title = "Model Performance vs. Functional Group",
xaxis = list(title = "Range occupancy"), xaxis = list(title = "Functional group"),
yaxis = list(title = "Value"), yaxis = list(title = "Value"),
legend = list(x = 1.1, y = 0.5), # Move legend to the right of the plot legend = list(x = 1.1, y = 0.5), # Move legend to the right of the plot
margin = list(r = 150), # Add right margin to accommodate legend margin = list(r = 150), # Add right margin to accommodate legend
......
...@@ -3,8 +3,6 @@ library(Symobio) ...@@ -3,8 +3,6 @@ library(Symobio)
library(sf) library(sf)
library(rnaturalearth) library(rnaturalearth)
load("r_objects/test_species.RData")
sf::sf_use_s2(use_s2 = FALSE) sf::sf_use_s2(use_s2 = FALSE)
# Load range maps # Load range maps
...@@ -15,13 +13,16 @@ range_maps = st_read( ...@@ -15,13 +13,16 @@ range_maps = st_read(
) %>% ) %>%
dplyr::filter(!legend %in% c("Extinct", "Not Mapped")) dplyr::filter(!legend %in% c("Extinct", "Not Mapped"))
# Load South America polygon # Find species that intersect with Amazon
sa_polygon = rnaturalearth::ne_countries() %>% amazon = st_read(
dplyr::filter(continent == "South America") %>% "data/geospatial/amazonia_polygons/amazonia_polygons.shp",
sf::st_union() geometry_column = "geometry",
promote_to_multi = T
)
range_maps = st_filter(range_maps, amazon)
# Match names against GBIF backbone # Match names against GBIF backbone
maps_names_matched = lapply(unique(range_maps$binomial), function(name){ range_maps_names_matched = lapply(unique(range_maps$binomial), function(name){
tryCatch({ tryCatch({
match_result = Symobio::gbif_match_name(name = name) match_result = Symobio::gbif_match_name(name = name)
if(match_result$status != "ACCEPTED"){ if(match_result$status != "ACCEPTED"){
...@@ -34,25 +35,25 @@ maps_names_matched = lapply(unique(range_maps$binomial), function(name){ ...@@ -34,25 +35,25 @@ maps_names_matched = lapply(unique(range_maps$binomial), function(name){
return(NULL) return(NULL)
}) })
}) })
save(range_maps_names_matched, file = "data/r_objects/range_maps_names_matched.RData")
save(range_maps_names_matched, file = "r_objects/range_maps_names_matched.RData")
# Subset range maps to target species and focal region # Subset range maps to target species and focal region
names_subset = Filter(is.data.frame, range_maps_names_matched) %>% names_matched = Filter(is.data.frame, range_maps_names_matched) %>%
bind_rows() %>% bind_rows()
dplyr::filter(name_matched %in% test_species$name_matched)
range_maps = range_maps %>% range_maps = range_maps %>%
inner_join(names_subset, by = c("binomial" = "name_orig")) %>% inner_join(names_matched, by = c("binomial" = "name_orig")) %>%
group_by(name_matched) %>% group_by(name_matched) %>%
summarize(geometry = suppressMessages(st_union(geometry))) %>% summarize(geometry = suppressMessages(st_union(geometry)))
st_intersection(sa_polygon)
save(range_maps, file = "r_objects/range_maps.RData") save(range_maps, file = "data/r_objects/range_maps.RData")
# Gridded range maps # Gridded range maps
range_maps_gridded = st_make_grid(sa_polygon, square = FALSE, cellsize = 1) %>% range_maps_gridded = rnaturalearth::ne_countries() %>%
dplyr::filter(continent == "South America") %>%
sf::st_union() %>%
st_make_grid(square = FALSE, cellsize = 1) %>%
st_sf() %>% st_sf() %>%
st_join(range_maps, st_intersects, left = F) st_join(range_maps, st_intersects, left = F)
save(range_maps_gridded, file = "r_objects/range_maps_gridded.RData") save(range_maps_gridded, file = "data/r_objects/range_maps_gridded.RData")
...@@ -28,7 +28,7 @@ evaluate_model <- function(model, test_data, threshold = 0.5) { ...@@ -28,7 +28,7 @@ evaluate_model <- function(model, test_data, threshold = 0.5) {
# Predict probabilities # Predict probabilities
probs <- predict(model, test_data, type = "prob")$present probs <- predict(model, test_data, type = "prob")$present
preds <- predict(model, test_data, type = "raw") preds <- predict(model, test_data, type = "raw")
actual <- test_data$presence actual <- test_data$presence_fct
# Calculate AUC # Calculate AUC
auc <- pROC::roc(actual, probs, levels = c("present", "absent"), direction = ">")$auc auc <- pROC::roc(actual, probs, levels = c("present", "absent"), direction = ">")$auc
......
{ {
"R": { "R": {
"Version": "4.2.2", "Version": "4.3.0",
"Repositories": [ "Repositories": [
{ {
"Name": "CRAN", "Name": "CRAN",
...@@ -55,7 +55,7 @@ ...@@ -55,7 +55,7 @@
}, },
"MASS": { "MASS": {
"Package": "MASS", "Package": "MASS",
"Version": "7.3-58", "Version": "7.3-58.4",
"Source": "Repository", "Source": "Repository",
"Repository": "CRAN", "Repository": "CRAN",
"Requirements": [ "Requirements": [
...@@ -66,11 +66,11 @@ ...@@ -66,11 +66,11 @@
"stats", "stats",
"utils" "utils"
], ],
"Hash": "4bf2c61e656bbc238634c798c208d386" "Hash": "a3142b2a022b8174ca675bc8b80cdc4e"
}, },
"Matrix": { "Matrix": {
"Package": "Matrix", "Package": "Matrix",
"Version": "1.5-1", "Version": "1.5-4",
"Source": "Repository", "Source": "Repository",
"Repository": "CRAN", "Repository": "CRAN",
"Requirements": [ "Requirements": [
...@@ -82,7 +82,7 @@ ...@@ -82,7 +82,7 @@
"stats", "stats",
"utils" "utils"
], ],
"Hash": "539dc0c0c05636812f1080f473d2c177" "Hash": "e779c7d9f35cc364438578f334cffee2"
}, },
"ModelMetrics": { "ModelMetrics": {
"Package": "ModelMetrics", "Package": "ModelMetrics",
...@@ -242,7 +242,7 @@ ...@@ -242,7 +242,7 @@
}, },
"boot": { "boot": {
"Package": "boot", "Package": "boot",
"Version": "1.3-28", "Version": "1.3-28.1",
"Source": "Repository", "Source": "Repository",
"Repository": "CRAN", "Repository": "CRAN",
"Requirements": [ "Requirements": [
...@@ -250,7 +250,7 @@ ...@@ -250,7 +250,7 @@
"graphics", "graphics",
"stats" "stats"
], ],
"Hash": "0baa960e3b49c6176a4f42addcbacc59" "Hash": "9a052fbcbe97a98ceb18dbfd30ebd96e"
}, },
"broom": { "broom": {
"Package": "broom", "Package": "broom",
...@@ -391,7 +391,7 @@ ...@@ -391,7 +391,7 @@
}, },
"class": { "class": {
"Package": "class", "Package": "class",
"Version": "7.3-20", "Version": "7.3-21",
"Source": "Repository", "Source": "Repository",
"Repository": "CRAN", "Repository": "CRAN",
"Requirements": [ "Requirements": [
...@@ -400,7 +400,7 @@ ...@@ -400,7 +400,7 @@
"stats", "stats",
"utils" "utils"
], ],
"Hash": "da09d82223e669d270e47ed24ac8686e" "Hash": "8ae0d4328e2eb3a582dfd5391a3663b7"
}, },
"classInt": { "classInt": {
"Package": "classInt", "Package": "classInt",
...@@ -457,13 +457,13 @@ ...@@ -457,13 +457,13 @@
}, },
"codetools": { "codetools": {
"Package": "codetools", "Package": "codetools",
"Version": "0.2-18", "Version": "0.2-19",
"Source": "Repository", "Source": "Repository",
"Repository": "CRAN", "Repository": "CRAN",
"Requirements": [ "Requirements": [
"R" "R"
], ],
"Hash": "019388fc48e48b3da0d3a76ff94608a8" "Hash": "c089a619a7fae175d149d89164f8c7d8"
}, },
"colorspace": { "colorspace": {
"Package": "colorspace", "Package": "colorspace",
...@@ -1226,7 +1226,7 @@ ...@@ -1226,7 +1226,7 @@
}, },
"lattice": { "lattice": {
"Package": "lattice", "Package": "lattice",
"Version": "0.20-45", "Version": "0.21-8",
"Source": "Repository", "Source": "Repository",
"Repository": "CRAN", "Repository": "CRAN",
"Requirements": [ "Requirements": [
...@@ -1237,7 +1237,7 @@ ...@@ -1237,7 +1237,7 @@
"stats", "stats",
"utils" "utils"
], ],
"Hash": "b64cdbb2b340437c4ee047a1f4c4377b" "Hash": "0b8a6d63c8770f02a8b5635f3c431e6b"
}, },
"lava": { "lava": {
"Package": "lava", "Package": "lava",
...@@ -1355,7 +1355,7 @@ ...@@ -1355,7 +1355,7 @@
}, },
"mgcv": { "mgcv": {
"Package": "mgcv", "Package": "mgcv",
"Version": "1.8-41", "Version": "1.8-42",
"Source": "Repository", "Source": "Repository",
"Repository": "CRAN", "Repository": "CRAN",
"Requirements": [ "Requirements": [
...@@ -1368,7 +1368,7 @@ ...@@ -1368,7 +1368,7 @@
"stats", "stats",
"utils" "utils"
], ],
"Hash": "6b3904f13346742caa3e82dd0303d4ad" "Hash": "3460beba7ccc8946249ba35327ba902a"
}, },
"mime": { "mime": {
"Package": "mime", "Package": "mime",
...@@ -1421,7 +1421,7 @@ ...@@ -1421,7 +1421,7 @@
}, },
"nlme": { "nlme": {
"Package": "nlme", "Package": "nlme",
"Version": "3.1-160", "Version": "3.1-162",
"Source": "Repository", "Source": "Repository",
"Repository": "CRAN", "Repository": "CRAN",
"Requirements": [ "Requirements": [
...@@ -1431,7 +1431,7 @@ ...@@ -1431,7 +1431,7 @@
"stats", "stats",
"utils" "utils"
], ],
"Hash": "02e3c6e7df163aafa8477225e6827bc5" "Hash": "0984ce8da8da9ead8643c5cbbb60f83e"
}, },
"nloptr": { "nloptr": {
"Package": "nloptr", "Package": "nloptr",
...@@ -1790,13 +1790,13 @@ ...@@ -1790,13 +1790,13 @@
}, },
"renv": { "renv": {
"Package": "renv", "Package": "renv",
"Version": "1.0.5", "Version": "1.0.11",
"Source": "Repository", "Source": "Repository",
"Repository": "CRAN", "Repository": "CRAN",
"Requirements": [ "Requirements": [
"utils" "utils"
], ],
"Hash": "32c3f93e8360f667ca5863272ec8ba6a" "Hash": "47623f66b4e80b3b0587bc5d7b309888"
}, },
"reprex": { "reprex": {
"Package": "reprex", "Package": "reprex",
...@@ -2100,7 +2100,7 @@ ...@@ -2100,7 +2100,7 @@
}, },
"survival": { "survival": {
"Package": "survival", "Package": "survival",
"Version": "3.4-0", "Version": "3.5-5",
"Source": "Repository", "Source": "Repository",
"Repository": "CRAN", "Repository": "CRAN",
"Requirements": [ "Requirements": [
...@@ -2112,7 +2112,7 @@ ...@@ -2112,7 +2112,7 @@
"stats", "stats",
"utils" "utils"
], ],
"Hash": "04411ae66ab4659230c067c32966fc20" "Hash": "d683341b1fa2e8d817efde27d6e6d35b"
}, },
"sys": { "sys": {
"Package": "sys", "Package": "sys",
......
...@@ -2,10 +2,12 @@ ...@@ -2,10 +2,12 @@
local({ local({
# the requested version of renv # the requested version of renv
version <- "1.0.5" version <- "1.0.11"
attr(version, "sha") <- NULL attr(version, "sha") <- NULL
# the project directory # the project directory
project <- Sys.getenv("RENV_PROJECT")
if (!nzchar(project))
project <- getwd() project <- getwd()
# use start-up diagnostics if enabled # use start-up diagnostics if enabled
...@@ -96,6 +98,66 @@ local({ ...@@ -96,6 +98,66 @@ local({
unloadNamespace("renv") unloadNamespace("renv")
# load bootstrap tools # load bootstrap tools
ansify <- function(text) {
if (renv_ansify_enabled())
renv_ansify_enhanced(text)
else
renv_ansify_default(text)
}
renv_ansify_enabled <- function() {
override <- Sys.getenv("RENV_ANSIFY_ENABLED", unset = NA)
if (!is.na(override))
return(as.logical(override))
pane <- Sys.getenv("RSTUDIO_CHILD_PROCESS_PANE", unset = NA)
if (identical(pane, "build"))
return(FALSE)
testthat <- Sys.getenv("TESTTHAT", unset = "false")
if (tolower(testthat) %in% "true")
return(FALSE)
iderun <- Sys.getenv("R_CLI_HAS_HYPERLINK_IDE_RUN", unset = "false")
if (tolower(iderun) %in% "false")
return(FALSE)
TRUE
}
renv_ansify_default <- function(text) {
text
}
renv_ansify_enhanced <- function(text) {
# R help links
pattern <- "`\\?(renv::(?:[^`])+)`"
replacement <- "`\033]8;;ide:help:\\1\a?\\1\033]8;;\a`"
text <- gsub(pattern, replacement, text, perl = TRUE)
# runnable code
pattern <- "`(renv::(?:[^`])+)`"
replacement <- "`\033]8;;ide:run:\\1\a\\1\033]8;;\a`"
text <- gsub(pattern, replacement, text, perl = TRUE)
# return ansified text
text
}
renv_ansify_init <- function() {
envir <- renv_envir_self()
if (renv_ansify_enabled())
assign("ansify", renv_ansify_enhanced, envir = envir)
else
assign("ansify", renv_ansify_default, envir = envir)
}
`%||%` <- function(x, y) { `%||%` <- function(x, y) {
if (is.null(x)) y else x if (is.null(x)) y else x
} }
...@@ -129,6 +191,24 @@ local({ ...@@ -129,6 +191,24 @@ local({
} }
heredoc <- function(text, leave = 0) {
# remove leading, trailing whitespace
trimmed <- gsub("^\\s*\\n|\\n\\s*$", "", text)
# split into lines
lines <- strsplit(trimmed, "\n", fixed = TRUE)[[1L]]
# compute common indent
indent <- regexpr("[^[:space:]]", lines)
common <- min(setdiff(indent, -1L)) - leave
text <- paste(substring(lines, common), collapse = "\n")
# substitute in ANSI links for executable renv code
ansify(text)
}
startswith <- function(string, prefix) { startswith <- function(string, prefix) {
substring(string, 1, nchar(prefix)) == prefix substring(string, 1, nchar(prefix)) == prefix
} }
...@@ -288,8 +368,11 @@ local({ ...@@ -288,8 +368,11 @@ local({
quiet = TRUE quiet = TRUE
) )
if ("headers" %in% names(formals(utils::download.file))) if ("headers" %in% names(formals(utils::download.file))) {
args$headers <- renv_bootstrap_download_custom_headers(url) headers <- renv_bootstrap_download_custom_headers(url)
if (length(headers) && is.character(headers))
args$headers <- headers
}
do.call(utils::download.file, args) do.call(utils::download.file, args)
...@@ -368,10 +451,21 @@ local({ ...@@ -368,10 +451,21 @@ local({
for (type in types) { for (type in types) {
for (repos in renv_bootstrap_repos()) { for (repos in renv_bootstrap_repos()) {
# build arguments for utils::available.packages() call
args <- list(type = type, repos = repos)
# add custom headers if available -- note that
# utils::available.packages() will pass this to download.file()
if ("headers" %in% names(formals(utils::download.file))) {
headers <- renv_bootstrap_download_custom_headers(repos)
if (length(headers) && is.character(headers))
args$headers <- headers
}
# retrieve package database # retrieve package database
db <- tryCatch( db <- tryCatch(
as.data.frame( as.data.frame(
utils::available.packages(type = type, repos = repos), do.call(utils::available.packages, args),
stringsAsFactors = FALSE stringsAsFactors = FALSE
), ),
error = identity error = identity
...@@ -453,6 +547,14 @@ local({ ...@@ -453,6 +547,14 @@ local({
} }
renv_bootstrap_github_token <- function() {
for (envvar in c("GITHUB_TOKEN", "GITHUB_PAT", "GH_TOKEN")) {
envval <- Sys.getenv(envvar, unset = NA)
if (!is.na(envval))
return(envval)
}
}
renv_bootstrap_download_github <- function(version) { renv_bootstrap_download_github <- function(version) {
enabled <- Sys.getenv("RENV_BOOTSTRAP_FROM_GITHUB", unset = "TRUE") enabled <- Sys.getenv("RENV_BOOTSTRAP_FROM_GITHUB", unset = "TRUE")
...@@ -460,16 +562,16 @@ local({ ...@@ -460,16 +562,16 @@ local({
return(FALSE) return(FALSE)
# prepare download options # prepare download options
pat <- Sys.getenv("GITHUB_PAT") token <- renv_bootstrap_github_token()
if (nzchar(Sys.which("curl")) && nzchar(pat)) { if (nzchar(Sys.which("curl")) && nzchar(token)) {
fmt <- "--location --fail --header \"Authorization: token %s\"" fmt <- "--location --fail --header \"Authorization: token %s\""
extra <- sprintf(fmt, pat) extra <- sprintf(fmt, token)
saved <- options("download.file.method", "download.file.extra") saved <- options("download.file.method", "download.file.extra")
options(download.file.method = "curl", download.file.extra = extra) options(download.file.method = "curl", download.file.extra = extra)
on.exit(do.call(base::options, saved), add = TRUE) on.exit(do.call(base::options, saved), add = TRUE)
} else if (nzchar(Sys.which("wget")) && nzchar(pat)) { } else if (nzchar(Sys.which("wget")) && nzchar(token)) {
fmt <- "--header=\"Authorization: token %s\"" fmt <- "--header=\"Authorization: token %s\""
extra <- sprintf(fmt, pat) extra <- sprintf(fmt, token)
saved <- options("download.file.method", "download.file.extra") saved <- options("download.file.method", "download.file.extra")
options(download.file.method = "wget", download.file.extra = extra) options(download.file.method = "wget", download.file.extra = extra)
on.exit(do.call(base::options, saved), add = TRUE) on.exit(do.call(base::options, saved), add = TRUE)
...@@ -631,6 +733,9 @@ local({ ...@@ -631,6 +733,9 @@ local({
# if the user has requested an automatic prefix, generate it # if the user has requested an automatic prefix, generate it
auto <- Sys.getenv("RENV_PATHS_PREFIX_AUTO", unset = NA) auto <- Sys.getenv("RENV_PATHS_PREFIX_AUTO", unset = NA)
if (is.na(auto) && getRversion() >= "4.4.0")
auto <- "TRUE"
if (auto %in% c("TRUE", "True", "true", "1")) if (auto %in% c("TRUE", "True", "true", "1"))
return(renv_bootstrap_platform_prefix_auto()) return(renv_bootstrap_platform_prefix_auto())
...@@ -822,24 +927,23 @@ local({ ...@@ -822,24 +927,23 @@ local({
# the loaded version of renv doesn't match the requested version; # the loaded version of renv doesn't match the requested version;
# give the user instructions on how to proceed # give the user instructions on how to proceed
remote <- if (!is.null(description[["RemoteSha"]])) { dev <- identical(description[["RemoteType"]], "github")
remote <- if (dev)
paste("rstudio/renv", description[["RemoteSha"]], sep = "@") paste("rstudio/renv", description[["RemoteSha"]], sep = "@")
} else { else
paste("renv", description[["Version"]], sep = "@") paste("renv", description[["Version"]], sep = "@")
}
# display both loaded version + sha if available # display both loaded version + sha if available
friendly <- renv_bootstrap_version_friendly( friendly <- renv_bootstrap_version_friendly(
version = description[["Version"]], version = description[["Version"]],
sha = description[["RemoteSha"]] sha = if (dev) description[["RemoteSha"]]
) )
fmt <- paste( fmt <- heredoc("
"renv %1$s was loaded from project library, but this project is configured to use renv %2$s.", renv %1$s was loaded from project library, but this project is configured to use renv %2$s.
"- Use `renv::record(\"%3$s\")` to record renv %1$s in the lockfile.", - Use `renv::record(\"%3$s\")` to record renv %1$s in the lockfile.
"- Use `renv::restore(packages = \"renv\")` to install renv %2$s into the project library.", - Use `renv::restore(packages = \"renv\")` to install renv %2$s into the project library.
sep = "\n" ")
)
catf(fmt, friendly, renv_bootstrap_version_friendly(version), remote) catf(fmt, friendly, renv_bootstrap_version_friendly(version), remote)
FALSE FALSE
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment