diff --git a/R/01_01_range_map_preparation.R b/R/01_01_range_map_preparation.R index 0a0717690f5370dd4fa90ba24f8c60dde544688a..eb33fdc1b726ab35bf7fa5bd97ec638740f30600 100644 --- a/R/01_01_range_map_preparation.R +++ b/R/01_01_range_map_preparation.R @@ -79,8 +79,8 @@ geometries_unique = range_maps_gridded_id %>% group_by(geom_id) %>% slice_head(n = 1) -geom_dist = sf::st_distance(geometries_unique, geometries_unique) # Takes ~ 10 mins - %>% as.matrix() +geom_dist = sf::st_distance(geometries_unique, geometries_unique) %>% # Takes ~ 10 mins + as.matrix() range_maps_split = range_maps_gridded_id %>% group_by(name_matched) %>% diff --git a/R/03_02_absence_preparation.R b/R/03_02_absence_preparation.R index 914ddb726898e633499a027f9b82159925b5b2f3..b148c644cd3c799537e98bd6365449038a2db921 100644 --- a/R/03_02_absence_preparation.R +++ b/R/03_02_absence_preparation.R @@ -49,9 +49,9 @@ range_maps = st_transform(range_maps, proj_string) # geographically close (reproduce sampling biases) but environmentally # # dissimilar (avoid false negatives) to the known occurrences # # ---------------------------------------------------------------------------# -future::plan("multisession", workers = 16) +future::plan("multisession", workers = 24) -model_data = furrr::future_walk( +furrr::future_walk( .x = target_species, .options = furrr::furrr_options(seed = 42, scheduling = FALSE), # make sure workers get constant stream of work .env_globals = c(raster_filepaths, sa_polygon, occs_final, range_maps, proj_string), @@ -126,13 +126,13 @@ model_data = furrr::future_walk( y = y + runif(nrow(.), -5000, 5000)) %>% # Add jitter (res/2) to cell centroids st_as_sf(coords = c("x", "y"), crs = proj_string) - sample_abs = bind_cols( - terra::extract(raster_data, terra::vect(sample_points), ID = FALSE) - ) %>% - bind_cols(sample_points) %>% + sample_abs = sample_points %>% + bind_cols( + terra::extract(raster_data, terra::vect(sample_points), ID = FALSE) + ) %>% drop_na() - abs_spec_list[[length(abs_spec_list)+1]] = sample_points + abs_spec_list[[length(abs_spec_list)+1]] = sample_abs samples_required = samples_required - nrow(sample_points) # Sometimes there are no env data for sample points, so keep sampling } @@ -154,7 +154,7 @@ model_data = furrr::future_walk( # Create presence-absence dataframe pa_spec = occs_spec %>% dplyr::mutate(present = 1) %>% - bind_rows(abs_spec) + bind_rows(abs_spec) ggplot() + ggtitle(species) + @@ -189,7 +189,6 @@ model_data = furrr::future_walk( }) pa_spec$fold_eval = folds - pa_spec$geometry = NULL save(pa_spec, file = paste0("data/r_objects/pa_sampling/", species, ".RData")) } diff --git a/R/04_01_ssdm_modeling.R b/R/04_01_ssdm_modeling.R index 959f77c38589b6ad8594c0a3d465aaa6f9907f7a..914c1aa2990cacea42993a6cf5775d6954dc51b3 100644 --- a/R/04_01_ssdm_modeling.R +++ b/R/04_01_ssdm_modeling.R @@ -1,156 +1,208 @@ -library(dplyr) -library(tidyr) library(furrr) -library(caret) +library(progressr) +library(dplyr) library(cito) -library(pROC) source("R/utils.R") -load("data/r_objects/model_data.RData") - -data_split = split(model_data, model_data$species) - # ----------------------------------------------------------------------# -# Train models #### +# Define Training function #### # ----------------------------------------------------------------------# -future::plan("multisession", workers = 8) -ssdm_results = furrr::future_map(data_split, .options = furrr::furrr_options(seed = 123), .f = function(data_spec){ - # Initial check - if(nrow(data_spec) < 10 | anyNA(data_spec$folds)){ - return(NULL) - } - - data_spec$present_fct = factor(data_spec$present, levels = c("0", "1"), labels = c("A", "P")) - train_data = dplyr::filter(data_spec, train == 1) - test_data = dplyr::filter(data_spec, train == 0) - - # Define empty result for performance eval - na_performance = list( - AUC = NA, - Accuracy = NA, - Kappa = NA, - Precision = NA, - Recall = NA, - F1 = NA - ) - - # Define predictors - predictors = paste0("layer_", 1:19) - - # Define caret training routine ##### - index_train = lapply(unique(sort(train_data$fold)), function(x){ - return(which(train_data$fold != x)) - }) +train_models = function(pa_files){ + future::plan("multisession", workers = 4) + p = progressor(along = pa_files) - train_ctrl = trainControl( - search = "grid", - classProbs = TRUE, - index = index_train, - summaryFunction = twoClassSummary, - savePredictions = "final" + furrr::future_walk( + .x = pa_files, + .options = furrr::furrr_options( + seed = 123, + packages = c("dplyr", "tidyr", "sf", "caret", "cito", "pROC"), + scheduling = FALSE # make sure workers get constant stream of work + ), + .f = function(pa_file){ + load(pa_file) + species = pa_spec$species[1] + p(sprintf("species=%s", species)) + + if(all(is.na(pa_spec$fold_eval))){ + warning("Too few samples") + return() + } + + # Define empty result for performance eval + na_performance = list( + AUC = NA, + Accuracy = NA, + Kappa = NA, + Precision = NA, + Recall = NA, + F1 = NA + ) + + # Create factor presence column + pa_spec$present_fct = factor(pa_spec$present, levels = c("0", "1"), labels = c("A", "P")) + + # Outer CV loop (for averaging performance metrics) + performance_cv = lapply(sort(unique(pa_spec$fold_eval)), function(k){ + data_test = dplyr::filter(pa_spec, fold_eval == k) + data_train = dplyr::filter(pa_spec, fold_eval != k) + + if(nrow(data_test) == 0 || nrow(data_train) == 0){ + warning("Too few samples") + } + + # Create inner CV folds for model training + cv_train = blockCV::cv_spatial( + data_train, + column = "present", + k = 5, + progress = F, plot = F, report = F + ) + data_train$fold_train = cv_train$folds_ids + + # Drop geometry + data_train$geometry = NULL + data_test$geometry = NULL + + # Define caret training routine ##### + index_train = lapply(unique(sort(data_train$fold_train)), function(x){ + return(which(data_train$fold_train != x)) + }) + + train_ctrl = trainControl( + search = "random", + classProbs = TRUE, + index = index_train, + summaryFunction = twoClassSummary, + savePredictions = "final", + ) + + # Define predictors + predictors = c("bio6", "bio17", "cmi", "rsds", "igfc", "dtfw", "igsw", "roughness") + + # Random Forest ##### + rf_performance = tryCatch({ + # Fit model + rf_fit = caret::train( + x = data_train[, predictors], + y = data_train$present_fct, + method = "rf", + trControl = train_ctrl, + tuneLength = 4, + verbose = F + ) + + evaluate_model(rf_fit, data_test) + }, error = function(e){ + na_performance + }) + + # Gradient Boosted Machine #### + gbm_performance = tryCatch({ + gbm_fit = train( + x = data_train[, predictors], + y = data_train$present_fct, + method = "gbm", + trControl = train_ctrl, + tuneLength = 4, + verbose = F + ) + evaluate_model(gbm_fit, data_test) + }, error = function(e){ + na_performance + }) + + # Generalized Additive Model #### + gam_performance = tryCatch({ + gam_fit = train( + x = data_train[, predictors], + y = data_train$present_fct, + method = "gamSpline", + tuneLength = 4, + trControl = train_ctrl + ) + evaluate_model(gam_fit, data_test) + }, error = function(e){ + na_performance + }) + + # Neural Network #### + nn_performance = tryCatch({ + formula = as.formula(paste0("present ~ ", paste(predictors, collapse = '+'))) + + nn_fit = dnn( + formula, + data = data_train, + hidden = c(100L, 100L, 100L), + loss = "binomial", + activation = c("leaky_relu", "leaky_relu", "leaky_relu"), + epochs = 500L, + burnin = 100L, + lr = 0.001, + batchsize = max(nrow(data_train)/10, 64), + dropout = 0.1, + optimizer = config_optimizer("adam", weight_decay = 0.001), + lr_scheduler = config_lr_scheduler("reduce_on_plateau", patience = 50, factor = 0.7), + early_stopping = 100, + validation = 0.2, + device = "cuda", + verbose = F, + plot = F + ) + + if(nn_fit$successfull == 1){ + evaluate_model(nn_fit, data_test) + } else { + na_performance + } + }, error = function(e){ + na_performance + }) + + # Summarize results + performance_summary = tibble( + species = !!species, + obs = nrow(data_train), + fold_eval = k, + model = c("RF", "GBM", "GAM", "NN"), + auc = c(rf_performance$AUC, gbm_performance$AUC, gam_performance$AUC, nn_performance$AUC), + accuracy = c(rf_performance$Accuracy, gbm_performance$Accuracy, gam_performance$Accuracy, nn_performance$Accuracy), + kappa = c(rf_performance$Kappa, gbm_performance$Kappa, gam_performance$Kappa, nn_performance$Kappa), + precision = c(rf_performance$Precision, gbm_performance$Precision, gam_performance$Precision, nn_performance$Precision), + recall = c(rf_performance$Recall, gbm_performance$Recall, gam_performance$Recall, nn_performance$Recall), + f1 = c(rf_performance$F1, gbm_performance$F1, gam_performance$F1, nn_performance$F1) + ) %>% + pivot_longer(all_of(c("auc", "accuracy", "kappa", "precision", "recall", "f1")), names_to = "metric", values_to = "value") + + return(performance_summary) + }) + + # Combine and save evaluation results + performance_spec = bind_rows(performance_cv) + save(performance_spec, file = paste0("data/r_objects/model_results/", species, ".RData")) + } ) - - # Random Forest ##### - rf_performance = tryCatch({ - rf_grid = expand.grid( - mtry = c(3,7,11,15,19) # Number of randomly selected predictors - ) - - rf_fit = caret::train( - x = train_data[, predictors], - y = train_data$present_fct, - method = "rf", - metric = "ROC", - tuneGrid = rf_grid, - trControl = train_ctrl - ) - evaluate_model(rf_fit, test_data) - }, error = function(e){ - na_performance - }) - - # Gradient Boosted Machine #### - gbm_performance = tryCatch({ - gbm_grid <- expand.grid( - n.trees = c(100, 500, 1000, 1500), # number of trees - interaction.depth = c(3, 5, 7), # Maximum depth of each tree - shrinkage = c(0.01, 0.005, 0.001), # Lower learning rates - n.minobsinnode = c(10, 20) # Minimum number of observations in nodes - ) - - gbm_fit = train( - x = train_data[, predictors], - y = train_data$present_fct, - method = "gbm", - metric = "ROC", - verbose = F, - tuneGrid = gbm_grid, - trControl = train_ctrl - ) - evaluate_model(gbm_fit, test_data) - }, error = function(e){ - na_performance - }) - - # Generalized additive Model #### - glm_performance = tryCatch({ - glm_fit = train( - x = train_data[, predictors], - y = train_data$present_fct, - method = "glm", - family=binomial, - metric = "ROC", - preProcess = c("center", "scale"), - trControl = train_ctrl - ) - evaluate_model(glm_fit, test_data) - }, error = function(e){ - na_performance - }) - - # Neural Network #### - nn_performance = tryCatch({ - nn_fit = dnn( - X = train_data[, predictors], - Y = train_data$present, - hidden = c(200L, 200L, 200L), - loss = "binomial", - activation = c("sigmoid", "leaky_relu", "leaky_relu"), - epochs = 500L, - lr = 0.02, - baseloss=10, - batchsize=nrow(train_data)/4, - dropout = 0.1, # Regularization - optimizer = config_optimizer("adam", weight_decay = 0.001), - lr_scheduler = config_lr_scheduler("reduce_on_plateau", patience = 100, factor = 0.7), - early_stopping = 200, # stop training when validation loss does not decrease anymore - validation = 0.3, # used for early stopping and lr_scheduler - device = "cuda", - bootstrap = 5 - ) - - evaluate_model(nn_fit, test_data) - }, error = function(e){ - na_performance - }) - - # Summarize results - performance_summary = tibble( - species = data_spec$species[1], - obs = nrow(data_spec), - model = c("SSDM_RF", "SSDM_GBM", "SSDM_GLM", "SSDM_NN"), - auc = c(rf_performance$AUC, gbm_performance$AUC, glm_performance$AUC, nn_performance$AUC), - accuracy = c(rf_performance$Accuracy, gbm_performance$Accuracy, glm_performance$Accuracy, nn_performance$Accuracy), - kappa = c(rf_performance$Kappa, gbm_performance$Kappa, glm_performance$Kappa, nn_performance$Kappa), - precision = c(rf_performance$Precision, gbm_performance$Precision, glm_performance$Precision, nn_performance$Precision), - recall = c(rf_performance$Recall, gbm_performance$Recall, glm_performance$Recall, nn_performance$Recall), - f1 = c(rf_performance$F1, gbm_performance$F1, glm_performance$F1, nn_performance$F1) - ) - - return(performance_summary) -}) +} -ssdm_results = bind_rows(ssdm_results) +# ----------------------------------------------------------------------# +# Run training #### +# ----------------------------------------------------------------------# +handlers(global = TRUE) +handlers("progress") + +pa_files = list.files("data/r_objects/pa_sampling/", full.names = T) +species_processed = stringr::str_remove_all(list.files("data/r_objects/model_results/"), ".RData") +pa_files_run = pa_files[!sapply(pa_files, function(x){any(stringr::str_detect(x, species_processed))})] + +train_models(pa_files_run) + +# ----------------------------------------------------------------------# +# Combine results #### +# ----------------------------------------------------------------------# +files = list.files("data/r_objects/model_results/", full.names = T) +ssdm_results = lapply(files, function(f){load(f); return(performance_spec)}) %>% + bind_rows() + #dplyr::group_by(species, model, metric) %>% + #dplyr::summarize(value = mean(value, na.rm =T)) save(ssdm_results, file = "data/r_objects/ssdm_results.RData") diff --git a/R/04_03_msdm_embed_raw.R b/R/04_03_msdm_embed_raw.R index 6e7924a172401b7f428a55183f580eafa8ec0bc5..6b2df020f31d9ad7f1a666bebd1e6675ff230a9f 100644 --- a/R/04_03_msdm_embed_raw.R +++ b/R/04_03_msdm_embed_raw.R @@ -4,37 +4,41 @@ library(cito) source("R/utils.R") -load("data/r_objects/model_data.RData") +load("data/r_objects/model_data_pa_sampling.RData") -model_data$species_int = as.integer(as.factor(model_data$species)) - -train_data = dplyr::filter(model_data, train == 1) -test_data = dplyr::filter(model_data, train == 0) +model_data = model_data %>% + dplyr::mutate(species_int = as.integer(as.factor(model_data$species))) %>% + sf::st_drop_geometry() + +test_data = dplyr::filter(model_data, fold_eval == 1) %>% + dplyr::select(-fold_eval) +train_data = dplyr::filter(model_data, fold_eval != 1) %>% + dplyr::select(-fold_eval) # ----------------------------------------------------------------------# # Train model #### # ----------------------------------------------------------------------# -predictors = paste0("layer_", 1:19) -formula = as.formula(paste0("present ~ ", paste(predictors, collapse = '+'), " + ", "e(species_int, dim = 50, lambda = 0.000001)")) +predictors = c("bio6", "bio17", "cmi", "rsds", "igfc", "dtfw", "igsw", "roughness") +formula = as.formula(paste0("present ~ ", paste(predictors, collapse = '+'), " + ", "e(species_int, dim = 50, train = T, lambda = 0.0001)")) -plot(1, type="n", xlab="", ylab="", xlim=c(0, 15000), ylim=c(0, 0.7)) # empty plot with better limits, draw points in there +plot(1, type="n", xlab="", ylab="", xlim=c(0, 25000), ylim=c(0, 0.7)) # empty plot with better limits, draw points in there msdm_fit_embedding_raw = dnn( formula, data = train_data, hidden = c(500L, 500L, 500L), loss = "binomial", activation = c("sigmoid", "leaky_relu", "leaky_relu"), - epochs = 15000L, + epochs = 10L, lr = 0.01, baseloss = 1, batchsize = nrow(train_data), dropout = 0.1, - burnin = 100, + burnin = 500, optimizer = config_optimizer("adam", weight_decay = 0.001), - lr_scheduler = config_lr_scheduler("reduce_on_plateau", patience = 100, factor = 0.7), - early_stopping = 250, - validation = 0.3, - device = "cuda", + lr_scheduler = config_lr_scheduler("reduce_on_plateau", patience = 150, factor = 0.7), + early_stopping = 400, + validation = 0.2, + device = "cuda" ) save(msdm_fit_embedding_raw, file = "data/r_objects/msdm_fit_embedding_raw.RData") @@ -44,20 +48,22 @@ save(msdm_fit_embedding_raw, file = "data/r_objects/msdm_fit_embedding_raw.RData # ----------------------------------------------------------------------# load("data/r_objects/msdm_fit_embedding_raw.RData") -data_split = split(model_data, model_data$species) +data_split = test_data %>% + split(test_data$species) msdm_results_embedding_raw = lapply(data_split, function(data_spec){ - test_data = dplyr::filter(data_spec, train == 0) %>% + species = data_spec$species[1] + data_spec = data_spec %>% dplyr::select(-species) msdm_performance = tryCatch({ - evaluate_model(msdm_fit_embedding_raw, test_data) + evaluate_model(msdm_fit_embedding_raw, data_spec) }, error = function(e){ list(AUC = NA, Accuracy = NA, Kappa = NA, Precision = NA, Recall = NA, F1 = NA) }) performance_summary = tibble( - species = data_spec$species[1], + species = !!species, obs = nrow(data_spec), model = "MSDM_embed", auc = msdm_performance$AUC, diff --git a/R/04_07_msdm_embed_multi_nolonlat.R b/R/04_07_msdm_embed_multi_nolonlat.R index 2044e0309227be71dd7a3dccac1f153d27ced97b..15d37e9828461494f0458566989968d75530ee64 100644 --- a/R/04_07_msdm_embed_multi_nolonlat.R +++ b/R/04_07_msdm_embed_multi_nolonlat.R @@ -1,10 +1,11 @@ library(dplyr) library(tidyr) library(cito) +library(sf) source("R/utils.R") -load("data/r_objects/model_data.RData") +load("data/r_objects/model_data_pa_sampling.RData") load("data/r_objects/func_dist.RData") load("data/r_objects/phylo_dist.RData") load("data/r_objects/range_dist.RData") @@ -18,11 +19,13 @@ model_species = Reduce( ) model_data_final = model_data %>% + sf::st_drop_geometry() %>% dplyr::filter(species %in% !!model_species) %>% dplyr::mutate(species_int = as.integer(as.factor(species))) + -train_data = dplyr::filter(model_data_final, train == 1) -test_data = dplyr::filter(model_data_final, train == 0) +test_data = dplyr::filter(model_data_final, fold_eval == 1) +train_data = dplyr::filter(model_data_final, fold_eval != 1) # Create embeddings func_ind = match(model_species, colnames(func_dist)) @@ -41,14 +44,15 @@ range_embeddings = eigen(range_dist)$vectors[,1:20] # ----------------------------------------------------------------------# # Train model #### # ----------------------------------------------------------------------# -predictors = paste0("layer_", 1:19) +# Define predictors +predictors = c("bio6", "bio17", "cmi", "rsds", "igfc", "dtfw", "igsw", "roughness") formula = as.formula( paste0("present ~ ", paste(predictors, collapse = '+'), - " + e(species_int, weights = func_embeddings, lambda = 0.00001, train = F)", - " + e(species_int, weights = phylo_embeddings, lambda = 0.00001, train = F)", - " + e(species_int, weights = range_embeddings, lambda = 0.00001, train = F)" + " + e(species_int, weights = func_embeddings, lambda = 0.00001, train = T)", + " + e(species_int, weights = phylo_embeddings, lambda = 0.00001, train = T)", + " + e(species_int, weights = range_embeddings, lambda = 0.00001, train = T)" ) ) @@ -56,7 +60,7 @@ plot(1, type="n", xlab="", ylab="", xlim=c(0, 25000), ylim=c(0, 0.7)) # empty pl msdm_fit_embedding_multi_nolonlat = dnn( formula, data = train_data, - hidden = c(500L, 500L, 500L), + hidden = c(200L, 200L, 200L), loss = "binomial", activation = c("sigmoid", "leaky_relu", "leaky_relu"), epochs = 30000L, diff --git a/R/04_07_msdm_oneoff.R b/R/04_07_msdm_oneoff.R new file mode 100644 index 0000000000000000000000000000000000000000..4ef2a031a860a28d7f7f6596a94db2d617f85e17 --- /dev/null +++ b/R/04_07_msdm_oneoff.R @@ -0,0 +1,111 @@ +library(dplyr) +library(tidyr) +library(cito) +library(sf) + +source("R/utils.R") + +load("data/r_objects/model_data_pa_sampling.RData") +load("data/r_objects/func_dist.RData") +load("data/r_objects/phylo_dist.RData") +load("data/r_objects/range_dist.RData") + +# ----------------------------------------------------------------------# +# Prepare data #### +# ----------------------------------------------------------------------# +model_species = Reduce( + intersect, + list(unique(model_data$species), colnames(range_dist), colnames(phylo_dist), colnames(func_dist)) +) + +model_data_final = model_data %>% + sf::st_drop_geometry() %>% + dplyr::filter(species %in% !!model_species) %>% + dplyr::mutate(species_int = as.integer(as.factor(species))) + + +test_data = dplyr::filter(model_data_final, fold_eval == 1) +train_data = dplyr::filter(model_data_final, fold_eval != 1) + +# Create embeddings +func_ind = match(model_species, colnames(func_dist)) +func_dist = func_dist[func_ind, func_ind] +func_embeddings = eigen(func_dist)$vectors[,1:20] + +phylo_ind = match(model_species, colnames(phylo_dist)) +phylo_dist = phylo_dist[phylo_ind, phylo_ind] +phylo_embeddings = eigen(phylo_dist)$vectors[,1:20] + +range_ind = match(model_species, colnames(range_dist)) +range_dist = range_dist[range_ind, range_ind] +range_embeddings = eigen(range_dist)$vectors[,1:20] + + +# ----------------------------------------------------------------------# +# Train model #### +# ----------------------------------------------------------------------# +# Define predictors +predictors = c("bio6", "bio17", "cmi", "rsds", "igfc", "dtfw", "igsw", "roughness") + +formula = as.formula( + paste0("present ~ ", + paste(predictors, collapse = '+'), + " + e(species_int, weights = func_embeddings, lambda = 0.00001, train = T)", + " + e(species_int, weights = phylo_embeddings, lambda = 0.00001, train = T)", + " + e(species_int, weights = range_embeddings, lambda = 0.00001, train = T)" + ) +) + +plot(1, type="n", xlab="", ylab="", xlim=c(0, 25000), ylim=c(0, 0.7)) # empty plot with better limits, draw points in there +msdm_fit_embedding_multi_nolonlat = dnn( + formula, + data = train_data, + hidden = c(400L, 400L, 400L), + loss = "binomial", + activation = c("sigmoid", "leaky_relu", "leaky_relu"), + epochs = 30000L, + lr = 0.01, + baseloss = 1, + batchsize = nrow(train_data), + dropout = 0.1, + burnin = 100, + optimizer = config_optimizer("adam", weight_decay = 0.001), + lr_scheduler = config_lr_scheduler("reduce_on_plateau", patience = 150, factor = 0.7), + early_stopping = 250, + validation = 0.3, + device = "cuda", +) +save(msdm_fit_embedding_multi_nolonlat, file = "data/r_objects/msdm_fit_embedding_multi_nolonlat.RData") + +# ----------------------------------------------------------------------# +# Evaluate results #### +# ----------------------------------------------------------------------# +load("data/r_objects/msdm_fit_embedding_multi_nolonlat.RData") +data_split = test_data %>% + group_by(species_int) %>% + group_split() + +msdm_results_embedding_multi_nolonlat = lapply(data_split, function(data_spec){ + target_species = data_spec$species[1] + data_spec = dplyr::select(data_spec, -species) + + msdm_performance = tryCatch({ + evaluate_model(msdm_fit_embedding_multi_nolonlat, data_spec) + }, error = function(e){ + list(AUC = NA, Accuracy = NA, Kappa = NA, Precision = NA, Recall = NA, F1 = NA) + }) + + performance_summary = tibble( + species = !!target_species, + obs = length(which(model_data$species == target_species)), + model = "MSDM_embed_informed_multi_nolonlat", + auc = msdm_performance$AUC, + accuracy = msdm_performance$Accuracy, + kappa = msdm_performance$Kappa, + precision = msdm_performance$Precision, + recall = msdm_performance$Recall, + f1 = msdm_performance$F1 + ) +}) %>% bind_rows() + +save(msdm_results_embedding_multi_nolonlat, file = "data/r_objects/msdm_results_embedding_multi_nolonlat.RData") diff --git a/R/05_01_performance_analysis.qmd b/R/05_01_performance_analysis.qmd index e2c2f330a395e7f3bdabb992b950e2284d648033..17be731c8ac03a0e4ae1cec80991f6b322a97be0 100644 --- a/R/05_01_performance_analysis.qmd +++ b/R/05_01_performance_analysis.qmd @@ -11,12 +11,10 @@ library(sf) library(plotly) library(DT) - +load("../data/r_objects/model_data_pa_sampling.RData") load("../data/r_objects/ssdm_results.RData") -load("../data/r_objects/msdm_fit.RData") -load("../data/r_objects/msdm_results.RData") -load("../data/r_objects/msdm_results_embedding_trained.RData") -load("../data/r_objects/msdm_results_multiclass.RData.") +load("../data/r_objects/msdm_results_embedding_raw.RData") + load("../data/r_objects/range_maps.RData") load("../data/r_objects/range_maps_gridded.RData") load("../data/r_objects/occs_final.RData") @@ -25,14 +23,15 @@ sf::sf_use_s2(use_s2 = FALSE) ``` ```{r globals, echo = FALSE, include = FALSE} -# Select metrics -focal_metrics = c("auc", "f1", "accuracy") # There's a weird bug in plotly that scrambles up lines when using more than three groups -# Dropdown options -plotly_buttons = list() -for(metric in focal_metrics){ - plotly_buttons[[length(plotly_buttons) + 1]] = list(method = "restyle", args = list("transforms[0].value", metric), label = metric) -} + +# Count occs per species +obs_count = model_data %>% + sf::st_drop_geometry() %>% + dplyr::filter(present == 1) %>% + dplyr::group_by(species) %>% + dplyr::summarise(obs = n()) + # Regression functions asym_regression = function(x, y){ @@ -44,8 +43,8 @@ asym_regression = function(x, y){ ) } -lin_regression = function(x, y){ - glm_fit = suppressWarnings(glm(y~x, family = "binomial")) +lin_regression = function(x, y, family = "binomial"){ + glm_fit = suppressWarnings(glm(y~x, family = family)) new_x = seq(min(x), max(x), length.out = 100) data.frame( x = new_x, @@ -53,15 +52,29 @@ lin_regression = function(x, y){ ) } -# Performance table -performance = bind_rows(ssdm_results, msdm_results, msdm_results_embedding_trained, msdm_results_multiclass) %>% - pivot_longer(c(auc, accuracy, kappa, precision, recall, f1), names_to = "metric") %>% - dplyr::filter(!is.na(value)) %>% +msdm_results = msdm_results_embedding_raw %>% + pivot_longer(all_of(c("auc", "accuracy", "kappa", "precision", "recall", "f1")), names_to = "metric", values_to = "value") %>% + dplyr::select(-obs) %>% dplyr::mutate( - metric = factor(metric, levels = c("auc", "kappa", "f1", "accuracy", "precision", "recall")), - value = round(pmax(value, 0, na.rm = T), 3) # Fix one weird instance of f1 < 0 + fold_eval = 1 ) %>% - dplyr::filter(metric %in% focal_metrics) + drop_na() + +# Performance table +performance = ssdm_results %>% + dplyr::select(-obs) %>% + dplyr::filter(fold_eval == 1, species %in% msdm_results$species) %>% # Only look at first fold + bind_rows(msdm_results) %>% + ungroup() %>% + dplyr::mutate( + value = case_when( + ((is.na(value) | is.nan(value)) & metric %in% c("auc", "f1", "accurracy", "precision", "recall")) ~ 0.5, + ((is.na(value) | is.nan(value)) & metric %in% c("kappa")) ~ 0, + .default = value + ) + ) + +focal_metrics = unique(performance$metric) ``` ## Summary @@ -139,7 +152,8 @@ Multi-Class Neural Network (**MSDM_multiclass**) The table below shows the analysed modeling results. ```{r performance, echo = FALSE, message=FALSE, warnings=FALSE} -DT::datatable(performance) +DT::datatable(performance) %>% + formatRound(columns="value", digits=3) ``` ### Number of records @@ -148,83 +162,112 @@ DT::datatable(performance) - Very poor performance below 50-100 observations ```{r number_of_records, echo = FALSE, message=FALSE, warnings=FALSE} -df_plot = performance - -# Calculate regression lines for each model and metric combination -suppressWarnings({ - regression_lines = df_plot %>% - group_by(model, metric) %>% - group_modify(~asym_regression(.x$obs, .x$value)) -}) - -# Create base plot -plot <- plot_ly() %>% - layout( - title = "Model Performance vs. Number of observations", - xaxis = list(title = "Number of observations", type = "log"), - yaxis = list(title = "Value"), - legend = list(x = 1.1, y = 0.5), # Move legend to the right of the plot - margin = list(r = 150), # Add right margin to accommodate legend - hovermode = 'closest', - updatemenus = list( - list( - type = "dropdown", - active = 0, - buttons = plotly_buttons - ) +plot_performance_over_frequency = function(df_plot, metric) { + + df_plot = dplyr::filter(df_plot, metric == !!metric) + + # Calculate regression lines for each model and metric combination + suppressWarnings({ + regression_lines = df_plot %>% + group_by(model) %>% + group_modify( ~ asym_regression(.x$obs, .x$value)) + }) + + # Create base plot + plot <- plot_ly() %>% + layout( + title = "Model Performance vs. Number of observations", + xaxis = list(title = "Number of observations", type = "log"), + yaxis = list(title = metric), + legend = list(x = 1.1, y = 0.5), # Move legend to the right of the plot + margin = list(r = 150), # Add right margin to accommodate legend + hovermode = 'closest' ) - ) - -# Points -for (model_name in unique(df_plot$model)) { - plot = plot %>% - add_markers( - data = filter(df_plot, model == model_name), - x = ~obs, - y = ~value, - color = model_name, # Set color to match legendgroup - legendgroup = model_name, - opacity = 0.6, - name = ~model, - hoverinfo = 'text', - text = ~paste("Species:", species, "<br>Observations:", obs, "<br>Value:", round(value, 3)), - transforms = list( - list( - type = 'filter', - target = ~metric, - operation = '=', - value = focal_metrics[1] + + # Points + for (model_name in unique(df_plot$model)) { + plot = plot %>% + add_markers( + data = filter(df_plot, model == model_name, metric %in% focal_metrics), + x = ~ obs, + y = ~ value, + color = model_name, # Set color to match legendgroup + legendgroup = model_name, + opacity = 0.6, + name = ~ model, + hoverinfo = 'text', + text = ~ paste( + "Species:", species, "<br>Observations:", obs, "<br>Value:", round(value, 3) ) ) - ) -} - -# Add regression lines -for(model_name in unique(df_plot$model)){ - reg_data = dplyr::filter(regression_lines, model == model_name) - plot = plot %>% - add_lines( - data = reg_data, - x = ~x, - y = ~fit, - color = model_name, # Set color to match legendgroup - legendgroup = model_name, - name = paste(model_name, '(fit)'), - showlegend = FALSE, - transforms = list( - list( - type = 'filter', - target = ~metric, - operation = '=', - value = focal_metrics[1] - ) + } + + # Add regression lines + for (model_name in unique(df_plot$model)) { + reg_data = dplyr::filter(regression_lines, model == model_name) + plot = plot %>% + add_lines( + data = reg_data, + x = ~ x, + y = ~ fit, + color = model_name, # Set color to match legendgroup + legendgroup = model_name, + name = paste(model_name, '(fit)'), + showlegend = FALSE ) - ) + } + + return(plot) } +df_plot = performance %>% dplyr::left_join(obs_count, by = "species") + +``` + +::: panel-tabset +#### AUC + +```{r echo = FALSE} +plot = plot_performance_over_frequency(df_plot, metric = "auc") +bslib::card(plot, full_screen = T) +``` + +#### F1 + +```{r echo = FALSE} +plot = plot_performance_over_frequency(df_plot, metric = "f1") bslib::card(plot, full_screen = T) ``` +#### Cohen's kappa + +```{r echo = FALSE} +plot = plot_performance_over_frequency(df_plot, metric = "kappa") +bslib::card(plot, full_screen = T) +``` + +#### Accurracy + +```{r echo = FALSE} +plot = plot_performance_over_frequency(df_plot, metric = "accuracy") +bslib::card(plot, full_screen = T) +``` + +#### Precision + +```{r echo = FALSE} +plot = plot_performance_over_frequency(df_plot, metric = "precision") +bslib::card(plot, full_screen = T) +``` + +#### Recall + +```{r echo = FALSE} +plot = plot_performance_over_frequency(df_plot, metric = "recall") +bslib::card(plot, full_screen = T) +``` +::: + ### Range characteristics #### Range size @@ -234,7 +277,7 @@ Range size was calculated based on polygon layers from the IUCN Red List of Thre - Model performance tended to be slightly higher for species with larger range size - Only RF shows continuous performance improvements beyond range sizes of \~5M km² -```{r range_size, echo = FALSE, message=FALSE, warnings=FALSE} +```{r range_size, echo = FALSE, message=FALSE, warnings=FALSE, eval=F} df_join = range_maps %>% dplyr::mutate(range_size = as.numeric(st_area(range_maps) / 1000000)) %>% # range in sqkm @@ -327,7 +370,7 @@ $$ - Models for species with higher range coverage showed slightly better performance -```{r range_coverage, echo = FALSE, message=FALSE, warnings=FALSE} +```{r range_coverage, echo = FALSE, message=FALSE, warnings=FALSE, eval=F} df_cells_total = range_maps_gridded %>% dplyr::rename("species" = name_matched) %>% group_by(species) %>% @@ -435,7 +478,7 @@ Higher bias values indicate that occurrence records are spatially more clustered - There was no strong relationship between range coverage bias and model performance -```{r range_coverage_bias, echo = FALSE, message=FALSE, warnings=FALSE} +```{r range_coverage_bias, echo = FALSE, message=FALSE, warnings=FALSE, eval=F} df_occs_total = occs_final %>% st_drop_geometry() %>% group_by(species) %>% @@ -529,7 +572,7 @@ bslib::card(plot, full_screen = T) Functional groups were assigned based on taxonomic order. The following groupings were used: | Functional group | Taxomic orders | -|------------------|-----------------------------------------------------| +|--------------------|----------------------------------------------------| | large ground-dwelling | Carnivora, Artiodactyla, Cingulata, Perissodactyla | | small ground-dwelling | Rodentia, Didelphimorphia, Soricomorpha, Paucituberculata, Lagomorpha | | arboreal | Primates, Pilosa | @@ -537,7 +580,7 @@ Functional groups were assigned based on taxonomic order. The following grouping - Models for bats tended to perform slightly worse than for other groups. -```{r functional_groups, echo = FALSE, message=FALSE, warnings=FALSE} +```{r functional_groups, echo = FALSE, message=FALSE, warnings=FALSE, eval=F} df_plot = performance %>% dplyr::left_join(functional_groups, by = c("species" = "name_matched")) @@ -582,4 +625,3 @@ plot <- plot %>% bslib::card(plot, full_screen = T) ``` - diff --git a/R/05_01_performance_analysis_carsten.qmd b/R/05_01_performance_analysis_carsten.qmd new file mode 100644 index 0000000000000000000000000000000000000000..7dee2c4fabee65f3b26905f8d549661df50c107b --- /dev/null +++ b/R/05_01_performance_analysis_carsten.qmd @@ -0,0 +1,190 @@ +--- +title: "SDM Performance analysis" +format: html +editor: visual +engine: knitr +--- + +```{r init, echo = FALSE, include = FALSE} +library(tidyverse) +library(sf) +library(plotly) +library(DT) + +load("../data/r_objects/model_data_pa_sampling.RData") +load("../data/r_objects/ssdm_results.RData") +load("../data/r_objects/msdm_results_embedding_raw.RData") + +load("../data/r_objects/range_maps.RData") +load("../data/r_objects/range_maps_gridded.RData") +load("../data/r_objects/occs_final.RData") +load("../data/r_objects/functional_groups.RData") +sf::sf_use_s2(use_s2 = FALSE) +``` + +```{r globals, echo = FALSE, include = FALSE} + + +# Count occs per species +obs_count = model_data %>% + sf::st_drop_geometry() %>% + dplyr::filter(present == 1) %>% + dplyr::group_by(species) %>% + dplyr::summarise(obs = n()) + + +# Regression functions +asym_regression = function(x, y){ + nls_fit = nls(y ~ 1 - (1-b) * exp(-c * log(x)), start = list(b = 0.1, c = 0.1)) + new_x = exp(seq(log(min(x)), log(max(x)), length.out = 100)) + data.frame( + x = new_x, + fit = predict(nls_fit, newdata = data.frame(x = new_x)) + ) +} + +lin_regression = function(x, y, family = "binomial"){ + glm_fit = suppressWarnings(glm(y~x, family = family)) + new_x = seq(min(x), max(x), length.out = 100) + data.frame( + x = new_x, + fit = predict(glm_fit, newdata = data.frame(x = new_x), type = "response") + ) +} + +msdm_results = msdm_results_embedding_raw %>% + pivot_longer(all_of(c("auc", "accuracy", "kappa", "precision", "recall", "f1")), names_to = "metric", values_to = "value") %>% + dplyr::select(-obs) %>% + dplyr::mutate( + fold_eval = 1 + ) %>% + drop_na() + +# Performance table +performance = ssdm_results %>% + dplyr::select(-obs) %>% + dplyr::filter(fold_eval == 1, species %in% msdm_results$species) %>% # Only look at first fold + bind_rows(msdm_results) %>% + ungroup() %>% + dplyr::mutate( + value = case_when( + ((is.na(value) | is.nan(value)) & metric %in% c("auc", "f1", "accurracy", "precision", "recall")) ~ 0.5, + ((is.na(value) | is.nan(value)) & metric %in% c("kappa")) ~ 0, + .default = value + ) + ) + +focal_metrics = unique(performance$metric) +``` + + +## Analysis + +### Number of records + +```{r number_of_records, echo = FALSE, message=FALSE, warnings=FALSE} +plot_performance_over_frequency = function(df_plot, metric) { + + df_plot = dplyr::filter(df_plot, metric == !!metric) + + # Calculate regression lines for each model and metric combination + suppressWarnings({ + regression_lines = df_plot %>% + group_by(model) %>% + group_modify( ~ asym_regression(.x$obs, .x$value)) + }) + + # Create base plot + plot <- plot_ly() %>% + layout( + title = "Model Performance vs. Number of observations", + xaxis = list(title = "Number of observations", type = "log"), + yaxis = list(title = metric), + legend = list(x = 1.1, y = 0.5), # Move legend to the right of the plot + margin = list(r = 150), # Add right margin to accommodate legend + hovermode = 'closest' + ) + + # Points + for (model_name in unique(df_plot$model)) { + plot = plot %>% + add_markers( + data = filter(df_plot, model == model_name, metric %in% focal_metrics), + x = ~ obs, + y = ~ value, + color = model_name, # Set color to match legendgroup + legendgroup = model_name, + opacity = 0.6, + name = ~ model, + hoverinfo = 'text', + text = ~ paste( + "Species:", species, "<br>Observations:", obs, "<br>Value:", round(value, 3) + ) + ) + } + + # Add regression lines + for (model_name in unique(df_plot$model)) { + reg_data = dplyr::filter(regression_lines, model == model_name) + plot = plot %>% + add_lines( + data = reg_data, + x = ~ x, + y = ~ fit, + color = model_name, # Set color to match legendgroup + legendgroup = model_name, + name = paste(model_name, '(fit)'), + showlegend = FALSE + ) + } + + return(plot) +} + +df_plot = performance %>% dplyr::left_join(obs_count, by = "species") + +``` + +::: panel-tabset +#### AUC + +```{r echo = FALSE} +plot = plot_performance_over_frequency(df_plot, metric = "auc") +bslib::card(plot, full_screen = T) +``` + +#### F1 + +```{r echo = FALSE} +plot = plot_performance_over_frequency(df_plot, metric = "f1") +bslib::card(plot, full_screen = T) +``` + +#### Cohen's kappa + +```{r echo = FALSE} +plot = plot_performance_over_frequency(df_plot, metric = "kappa") +bslib::card(plot, full_screen = T) +``` + +#### Accurracy + +```{r echo = FALSE} +plot = plot_performance_over_frequency(df_plot, metric = "accuracy") +bslib::card(plot, full_screen = T) +``` + +#### Precision + +```{r echo = FALSE} +plot = plot_performance_over_frequency(df_plot, metric = "precision") +bslib::card(plot, full_screen = T) +``` + +#### Recall + +```{r echo = FALSE} +plot = plot_performance_over_frequency(df_plot, metric = "recall") +bslib::card(plot, full_screen = T) +``` +::: diff --git a/R/05_02_MSDM_comparison.qmd b/R/05_02_MSDM_comparison.qmd index 5b14c8a7d1241af5315de31327fbd91dc067e89b..b70af796777e659e9da4f3b62acda73a5ad89c2c 100644 --- a/R/05_02_MSDM_comparison.qmd +++ b/R/05_02_MSDM_comparison.qmd @@ -12,13 +12,15 @@ library(plotly) library(DT) library(shiny) -load("../data/r_objects/msdm_results_embedding_raw.RData") -load("../data/r_objects/msdm_results_embedding_traits_static.RData") -load("../data/r_objects/msdm_results_embedding_traits_trained.RData") -load("../data/r_objects/msdm_results_embedding_phylo_static.RData") -load("../data/r_objects/msdm_results_embedding_phylo_trained.RData") -load("../data/r_objects/msdm_results_embedding_range_static.RData") -load("../data/r_objects/msdm_results_embedding_range_trained.RData") +load("../data/r_objects/simple_cv/msdm_results_embedding_raw.RData") +load("../data/r_objects/simple_cv/msdm_results_embedding_traits_static.RData") +load("../data/r_objects/simple_cv/msdm_results_embedding_traits_trained.RData") +load("../data/r_objects/simple_cv/msdm_results_embedding_phylo_static.RData") +load("../data/r_objects/simple_cv/msdm_results_embedding_phylo_trained.RData") +load("../data/r_objects/simple_cv/msdm_results_embedding_range_static.RData") +load("../data/r_objects/simple_cv/msdm_results_embedding_range_trained.RData") +load("../data/r_objects/simple_cv/msdm_results_embedding_multi_nolonlat.RData") +load("../data/r_objects/simple_cv/msdm_results_embedding_multi_lonlat.RData") sf::sf_use_s2(use_s2 = FALSE) ``` @@ -40,7 +42,9 @@ results_embedding_informed = c( "msdm_results_embedding_range_static", "msdm_results_embedding_range_trained", "msdm_results_embedding_traits_static", - "msdm_results_embedding_traits_trained" + "msdm_results_embedding_traits_trained", + "msdm_results_embedding_multi_nolonlat", + "msdm_results_embedding_multi_lonlat" ) results_embedding_informed_merged = lapply(results_embedding_informed, function(df_name){ @@ -63,7 +67,9 @@ results_final = results_embedding_raw %>% "MSDM_embed_informed_traits_static" = "M_TS", "MSDM_embed_informed_traits_trained" = "M_TT", "MSDM_embed_informed_range_static" = "M_RS", - "MSDM_embed_informed_range_trained" = "M_RT" + "MSDM_embed_informed_range_trained" = "M_RT", + "MSDM_embed_informed_multi_nolonlat" = "M_MT_nolonlat", + "MSDM_embed_informed_multi_lonlat" = "M_MT_lonlat" ), across(all_of(focal_metrics), round, 3) ) @@ -274,13 +280,12 @@ for(model_name in unique(df_plot$model)){ bslib::card(plot, full_screen = T) ``` - ## *Relative Performance* ```{r delta, echo = FALSE, message=FALSE, warnings=FALSE} results_ranked_obs = results_final_long %>% group_by(species, metric) %>% - mutate(rank = rev(rank(value))) + mutate(rank = rank(value)) reglines = results_ranked_obs %>% group_by(model, metric) %>% @@ -426,7 +431,6 @@ bslib::card(plot, full_screen = T) ``` ::: - ## *Trait space* ```{r trait_pca, echo = FALSE, message=FALSE, warnings=FALSE} diff --git a/R/_publish.yml b/R/_publish.yml index 02920d16bb1021f602d5a92d12ad9761ba6a0334..c07bacdf4dd08df0543df4ec56e167e482e70c0d 100644 --- a/R/_publish.yml +++ b/R/_publish.yml @@ -2,3 +2,7 @@ quarto-pub: - id: 309c45c3-1515-4985-a435-9ffa1888c5e7 url: 'https://chrkoenig.quarto.pub/ssdm-performance-analysis-0a06' +- source: 05_01_performance_analysis_carsten.qmd + quarto-pub: + - id: f9cafa63-bfd4-4c00-b401-09ec137bd3ce + url: 'https://chrkoenig.quarto.pub/sdm-performance-carsten' diff --git a/R/utils.R b/R/utils.R index b1da26abc38515507c2381fa178c92bab4e50296..1c9dc02ef9d965b3ed9a02f7552aec85de46141f 100644 --- a/R/utils.R +++ b/R/utils.R @@ -25,7 +25,7 @@ expand_bbox <- function(bbox, min_span = 1, expansion = 0.25) { return(bbox) } -evaluate_model <- function(model, test_data) { +evaluate_model <- function(model, data) { # Accuracy: The proportion of correctly predicted instances (both true positives and true negatives) out of the total instances. # Formula: Accuracy = (TP + TN) / (TP + TN + FP + FN) @@ -40,14 +40,14 @@ evaluate_model <- function(model, test_data) { # Predict probabilities if(class(model) %in% c("citodnn", "citodnnBootstrap")){ - probs <- predict(model, as.matrix(test_data), type = "response")[,1] + probs <- predict(model, as.matrix(data), type = "response")[,1] preds <- factor(round(probs), levels = c("0", "1"), labels = c("A", "P")) } else { - probs <- predict(model, test_data, type = "prob")$P - preds <- predict(model, test_data, type = "raw") + probs <- predict(model, data, type = "prob")$P + preds <- predict(model, data, type = "raw") } - actual <- factor(test_data$present, levels = c("0", "1"), labels = c("A", "P")) + actual <- factor(data$present, levels = c("0", "1"), labels = c("A", "P")) # Calculate AUC auc <- pROC::roc(actual, probs, levels = c("P", "A"), direction = ">")$auc diff --git a/renv.lock b/renv.lock index ff0fc5aa5dcee66526155c43af29c34119242c22..7f13630b5e9ba0b1d75e0fcf60e2ae056a29d10f 100644 --- a/renv.lock +++ b/renv.lock @@ -2,6 +2,26 @@ "R": { "Version": "4.3.0", "Repositories": [ + { + "Name": "BioCsoft", + "URL": "https://bioconductor.org/packages/3.18/bioc" + }, + { + "Name": "BioCann", + "URL": "https://bioconductor.org/packages/3.18/data/annotation" + }, + { + "Name": "BioCexp", + "URL": "https://bioconductor.org/packages/3.18/data/experiment" + }, + { + "Name": "BioCworkflows", + "URL": "https://bioconductor.org/packages/3.18/workflows" + }, + { + "Name": "BioCbooks", + "URL": "https://bioconductor.org/packages/3.18/books" + }, { "Name": "CRAN", "URL": "https://cloud.r-project.org" @@ -177,6 +197,20 @@ ], "Hash": "f27411eb6d9c3dada5edd444b8416675" }, + "RcppArmadillo": { + "Package": "RcppArmadillo", + "Version": "14.2.0-1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "Rcpp", + "methods", + "stats", + "utils" + ], + "Hash": "ce9c35ab9ea9c6ef4e27a08868cdb32b" + }, "RcppEigen": { "Package": "RcppEigen", "Version": "0.3.4.0.2", @@ -190,6 +224,57 @@ ], "Hash": "4ac8e423216b8b70cb9653d1b3f71eb9" }, + "RcppGSL": { + "Package": "RcppGSL", + "Version": "0.3.13", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "Rcpp", + "stats" + ], + "Hash": "e8fc7310d256a7b6c4de8e57ab76c55d" + }, + "RcppParallel": { + "Package": "RcppParallel", + "Version": "5.1.9", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "f38a72a419b91faac0ce5d9eee04c120" + }, + "RcppZiggurat": { + "Package": "RcppZiggurat", + "Version": "0.1.6", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "Rcpp", + "RcppGSL", + "graphics", + "parallel", + "stats", + "utils" + ], + "Hash": "75b4a36aeeed440ad03b996081190703" + }, + "Rfast": { + "Package": "Rfast", + "Version": "2.1.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "Rcpp", + "RcppArmadillo", + "RcppParallel", + "RcppZiggurat" + ], + "Hash": "79e8394e1fa06a4ae954b70ca9b16e8f" + }, "SQUAREM": { "Package": "SQUAREM", "Version": "2021.1", @@ -832,6 +917,21 @@ ], "Hash": "33698c4b3127fc9f506654607fb73676" }, + "dismo": { + "Package": "dismo", + "Version": "1.3-16", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "Rcpp", + "methods", + "raster", + "sp", + "terra" + ], + "Hash": "5c8646b40ba69146afc070aaea9f893c" + }, "doParallel": { "Package": "doParallel", "Version": "1.0.17", @@ -1139,6 +1239,17 @@ ], "Hash": "15e9634c0fcd294799e9b2e929ed1b86" }, + "geos": { + "Package": "geos", + "Version": "0.2.4", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "libgeos", + "wk" + ], + "Hash": "117a3a09b793abf1d2146027a71b7524" + }, "geosphere": { "Package": "geosphere", "Version": "1.5-18", @@ -1623,6 +1734,13 @@ ], "Hash": "d908914ae53b04d4c0c0fd72ecc35370" }, + "libgeos": { + "Package": "libgeos", + "Version": "3.11.1-2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "323d0f39c2e5ebcb152b810d1e8ed9bb" + }, "lifecycle": { "Package": "lifecycle", "Version": "1.0.4", @@ -2660,6 +2778,18 @@ ], "Hash": "75940133cca2e339afce15a586f85b11" }, + "spatialEco": { + "Package": "spatialEco", + "Version": "2.0-2", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "sf", + "terra" + ], + "Hash": "d0352030add66f0c73ff5d7473b7aef1" + }, "stringdist": { "Package": "stringdist", "Version": "0.9.12", @@ -2847,6 +2977,27 @@ ], "Hash": "829f27b9c4919c16b593794a6344d6c0" }, + "tidyterra": { + "Package": "tidyterra", + "Version": "0.6.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "cli", + "data.table", + "dplyr", + "ggplot2", + "magrittr", + "rlang", + "scales", + "sf", + "terra", + "tibble", + "tidyr" + ], + "Hash": "5d9672bc78297d33c4ad8b51a8aaa48f" + }, "tidyverse": { "Package": "tidyverse", "Version": "2.0.0",