From 9e3afb8f285b5d4317f4d58dd8a3d7dcb3ccaf43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= <ye87zine@usr.idiv.de> Date: Thu, 14 Nov 2024 16:08:09 +0100 Subject: [PATCH] added trait distance calculation --- ....R => 02_01_functional_group_assignment.R} | 0 R/02_02_functional_traits_preparation.R | 59 +++++++++++++ R/04_03_msdm_modeling_with_traits.R | 84 +++++++++++++++++++ 3 files changed, 143 insertions(+) rename R/{02_functional_group_preparation.R => 02_01_functional_group_assignment.R} (100%) create mode 100644 R/02_02_functional_traits_preparation.R create mode 100644 R/04_03_msdm_modeling_with_traits.R diff --git a/R/02_functional_group_preparation.R b/R/02_01_functional_group_assignment.R similarity index 100% rename from R/02_functional_group_preparation.R rename to R/02_01_functional_group_assignment.R diff --git a/R/02_02_functional_traits_preparation.R b/R/02_02_functional_traits_preparation.R new file mode 100644 index 0000000..80f5887 --- /dev/null +++ b/R/02_02_functional_traits_preparation.R @@ -0,0 +1,59 @@ +library("tidyverse") +library("traitdata") + +# Match names +traits = traitdata::elton_mammals +names_unique = unique(traits$scientificNameStd) + +pb = txtProgressBar(min = 0, max = length(names_unique), style = 3) +trait_names_matched = lapply(seq_along(names_unique), function(i){ + tryCatch({ + name = names_unique[i] + setTxtProgressBar(pb, i) + + match_result = Symobio::gbif_match_name(name = name) + if(match_result$status != "ACCEPTED"){ + match_result = Symobio::gbif_match_name(usageKey = match_result$acceptedUsageKey) + } + match_result$name_orig = name + return(match_result) + }, error = function(e){ + return(NULL) + }) +}) %>% bind_rows() +close(pb) + +traits_matched = trait_names_matched %>% + dplyr::select("name_orig", "species", "scientificName") %>% + dplyr::distinct() %>% + dplyr::inner_join(traits, by = c("name_orig" = "scientificNameStd")) + +save(traits_matched, file = "data/r_objects/traits_matched.RData") + +# Calculate Distances +library("vegan") + +load("data/r_objects/range_maps.RData") +load("data/r_objects/traits_matched.RData") +target_species = unique(range_maps$name_matched[!is.na(range_maps$name_matched)]) +traits_target = dplyr::filter(traits_matched, species %in% target_species) + +# some pre-processing +traits_target$`ForStrat.Value.Proc` = as.numeric(factor(traits_target$`ForStrat.Value`, levels = c("G", "S", "Ar", "A"))) +traits_target$`BodyMass.Value.Proc` = scale(log(traits_target$`BodyMass.Value`)) + +# Define columns +diet_cols = c("Diet.Inv", "Diet.Vend", "Diet.Vect", "Diet.Vfish", "Diet.Vunk", "Diet.Scav", "Diet.Fruit", "Diet.Nect", "Diet.Seed", "Diet.PlantO") +strata_cols = c("ForStrat.Value.Proc") +activity_cols = c("Activity.Nocturnal", "Activity.Crepuscular", "Activity.Diurnal") +bodymass_cols = c("BodyMass.Value.Proc") + +diet_dist = vegan::vegdist(traits_target[,diet_cols], "bray") +foraging_dist = dist(traits_target[,strata_cols]) +activity_dist = vegan::vegdist(traits_target[,activity_cols], "bray") +bodymass_dist = dist(traits_target[,bodymass_cols]) + +func_dist = (diet_dist + foraging_dist/max(foraging_dist) + activity_dist + bodymass_dist/max(bodymass_dist)) / 4 +names(func_dist) = stringr::str_replace_all(traits_target$species, pattern = " ", "_") + +save(func_dist, file = "data/r_objects/func_dist.RData") diff --git a/R/04_03_msdm_modeling_with_traits.R b/R/04_03_msdm_modeling_with_traits.R new file mode 100644 index 0000000..3495629 --- /dev/null +++ b/R/04_03_msdm_modeling_with_traits.R @@ -0,0 +1,84 @@ +library(dplyr) +library(tidyr) +library(cito) + +source("R/utils.R") + +load("data/r_objects/model_data.RData") +load("data/r_objects/func_dist.RData") + +# ----------------------------------------------------------------------# +# Prepare embeddings #### +# ----------------------------------------------------------------------# +model_data$species_int = as.integer(as.factor(model_data$species)) + +train_data = dplyr::filter(model_data, train == 1) +test_data = dplyr::filter(model_data, train == 0) + +# ----------------------------------------------------------------------# +# Train model #### +# ----------------------------------------------------------------------# +predictors = paste0("layer_", 1:19) +formula = as.formula(paste0("present ~ ", paste(predictors, collapse = '+'), " + ", "e(species_int, dim = 20, lambda = 0.000001)")) + +msdm_fit = dnn( + formula, + data = train_data, + hidden = c(500L, 1000L, 1000L), + loss = "binomial", + activation = c("sigmoid", "leaky_relu", "leaky_relu"), + epochs = 10000L, + lr = 0.01, + baseloss = 1, + batchsize = nrow(train_data)/5, + dropout = 0.25, + burnin = 100, + optimizer = config_optimizer("adam", weight_decay = 0.001), + lr_scheduler = config_lr_scheduler("reduce_on_plateau", patience = 100, factor = 0.7), + early_stopping = 250, + validation = 0.3, + device = "cuda" +) + +save(msdm_fit, file = "data/r_objects/msdm_fit.RData") + +# ----------------------------------------------------------------------# +# Evaluate model #### +# ----------------------------------------------------------------------# +load("data/r_objects/msdm_fit.RData") + +# Overall +preds_train = predict(msdm_fit, newdata = as.matrix(train_data), type = "response") +preds_test = predict(msdm_fit, newdata = as.matrix(test_data), type = "response") + +hist(preds_train) +hist(preds_test) + +eval_overall = evaluate_model(msdm_fit, test_data) + +# Per species +data_split = split(model_data, model_data$species) + +msdm_results = lapply(data_split, function(data_spec){ + test_data = dplyr::filter(data_spec, train == 0) + + msdm_performance = tryCatch({ + evaluate_model(msdm_fit, test_data) + }, error = function(e){ + list(AUC = NA, Accuracy = NA, Kappa = NA, Precision = NA, Recall = NA, F1 = NA) + }) + + performance_summary = tibble( + species = data_spec$species[1], + obs = nrow(data_spec), + model = "NN_MSDM", + auc = msdm_performance$AUC, + accuracy = msdm_performance$Accuracy, + kappa = msdm_performance$Kappa, + precision = msdm_performance$Precision, + recall = msdm_performance$Recall, + f1 = msdm_performance$F1 + ) +}) %>% bind_rows() + +save(msdm_results, file = "data/r_objects/msdm_results.RData") -- GitLab