Skip to content
Snippets Groups Projects
Commit 9e3afb8f authored by ye87zine's avatar ye87zine
Browse files

added trait distance calculation

parent cce46e6d
No related branches found
No related tags found
No related merge requests found
library("tidyverse")
library("traitdata")
# Match names
traits = traitdata::elton_mammals
names_unique = unique(traits$scientificNameStd)
pb = txtProgressBar(min = 0, max = length(names_unique), style = 3)
trait_names_matched = lapply(seq_along(names_unique), function(i){
tryCatch({
name = names_unique[i]
setTxtProgressBar(pb, i)
match_result = Symobio::gbif_match_name(name = name)
if(match_result$status != "ACCEPTED"){
match_result = Symobio::gbif_match_name(usageKey = match_result$acceptedUsageKey)
}
match_result$name_orig = name
return(match_result)
}, error = function(e){
return(NULL)
})
}) %>% bind_rows()
close(pb)
traits_matched = trait_names_matched %>%
dplyr::select("name_orig", "species", "scientificName") %>%
dplyr::distinct() %>%
dplyr::inner_join(traits, by = c("name_orig" = "scientificNameStd"))
save(traits_matched, file = "data/r_objects/traits_matched.RData")
# Calculate Distances
library("vegan")
load("data/r_objects/range_maps.RData")
load("data/r_objects/traits_matched.RData")
target_species = unique(range_maps$name_matched[!is.na(range_maps$name_matched)])
traits_target = dplyr::filter(traits_matched, species %in% target_species)
# some pre-processing
traits_target$`ForStrat.Value.Proc` = as.numeric(factor(traits_target$`ForStrat.Value`, levels = c("G", "S", "Ar", "A")))
traits_target$`BodyMass.Value.Proc` = scale(log(traits_target$`BodyMass.Value`))
# Define columns
diet_cols = c("Diet.Inv", "Diet.Vend", "Diet.Vect", "Diet.Vfish", "Diet.Vunk", "Diet.Scav", "Diet.Fruit", "Diet.Nect", "Diet.Seed", "Diet.PlantO")
strata_cols = c("ForStrat.Value.Proc")
activity_cols = c("Activity.Nocturnal", "Activity.Crepuscular", "Activity.Diurnal")
bodymass_cols = c("BodyMass.Value.Proc")
diet_dist = vegan::vegdist(traits_target[,diet_cols], "bray")
foraging_dist = dist(traits_target[,strata_cols])
activity_dist = vegan::vegdist(traits_target[,activity_cols], "bray")
bodymass_dist = dist(traits_target[,bodymass_cols])
func_dist = (diet_dist + foraging_dist/max(foraging_dist) + activity_dist + bodymass_dist/max(bodymass_dist)) / 4
names(func_dist) = stringr::str_replace_all(traits_target$species, pattern = " ", "_")
save(func_dist, file = "data/r_objects/func_dist.RData")
library(dplyr)
library(tidyr)
library(cito)
source("R/utils.R")
load("data/r_objects/model_data.RData")
load("data/r_objects/func_dist.RData")
# ----------------------------------------------------------------------#
# Prepare embeddings ####
# ----------------------------------------------------------------------#
model_data$species_int = as.integer(as.factor(model_data$species))
train_data = dplyr::filter(model_data, train == 1)
test_data = dplyr::filter(model_data, train == 0)
# ----------------------------------------------------------------------#
# Train model ####
# ----------------------------------------------------------------------#
predictors = paste0("layer_", 1:19)
formula = as.formula(paste0("present ~ ", paste(predictors, collapse = '+'), " + ", "e(species_int, dim = 20, lambda = 0.000001)"))
msdm_fit = dnn(
formula,
data = train_data,
hidden = c(500L, 1000L, 1000L),
loss = "binomial",
activation = c("sigmoid", "leaky_relu", "leaky_relu"),
epochs = 10000L,
lr = 0.01,
baseloss = 1,
batchsize = nrow(train_data)/5,
dropout = 0.25,
burnin = 100,
optimizer = config_optimizer("adam", weight_decay = 0.001),
lr_scheduler = config_lr_scheduler("reduce_on_plateau", patience = 100, factor = 0.7),
early_stopping = 250,
validation = 0.3,
device = "cuda"
)
save(msdm_fit, file = "data/r_objects/msdm_fit.RData")
# ----------------------------------------------------------------------#
# Evaluate model ####
# ----------------------------------------------------------------------#
load("data/r_objects/msdm_fit.RData")
# Overall
preds_train = predict(msdm_fit, newdata = as.matrix(train_data), type = "response")
preds_test = predict(msdm_fit, newdata = as.matrix(test_data), type = "response")
hist(preds_train)
hist(preds_test)
eval_overall = evaluate_model(msdm_fit, test_data)
# Per species
data_split = split(model_data, model_data$species)
msdm_results = lapply(data_split, function(data_spec){
test_data = dplyr::filter(data_spec, train == 0)
msdm_performance = tryCatch({
evaluate_model(msdm_fit, test_data)
}, error = function(e){
list(AUC = NA, Accuracy = NA, Kappa = NA, Precision = NA, Recall = NA, F1 = NA)
})
performance_summary = tibble(
species = data_spec$species[1],
obs = nrow(data_spec),
model = "NN_MSDM",
auc = msdm_performance$AUC,
accuracy = msdm_performance$Accuracy,
kappa = msdm_performance$Kappa,
precision = msdm_performance$Precision,
recall = msdm_performance$Recall,
f1 = msdm_performance$F1
)
}) %>% bind_rows()
save(msdm_results, file = "data/r_objects/msdm_results.RData")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment