Skip to content
Snippets Groups Projects
Commit 1b612f78 authored by ye87zine's avatar ye87zine
Browse files
parents 925b1eb1 9e3afb8f
No related branches found
No related tags found
No related merge requests found
library("tidyverse")
library("traitdata")
# Match names
traits = traitdata::elton_mammals
names_unique = unique(traits$scientificNameStd)
pb = txtProgressBar(min = 0, max = length(names_unique), style = 3)
trait_names_matched = lapply(seq_along(names_unique), function(i){
tryCatch({
name = names_unique[i]
setTxtProgressBar(pb, i)
match_result = Symobio::gbif_match_name(name = name)
if(match_result$status != "ACCEPTED"){
match_result = Symobio::gbif_match_name(usageKey = match_result$acceptedUsageKey)
}
match_result$name_orig = name
return(match_result)
}, error = function(e){
return(NULL)
})
}) %>% bind_rows()
close(pb)
traits_matched = trait_names_matched %>%
dplyr::select("name_orig", "species", "scientificName") %>%
dplyr::distinct() %>%
dplyr::inner_join(traits, by = c("name_orig" = "scientificNameStd"))
save(traits_matched, file = "data/r_objects/traits_matched.RData")
# Calculate Distances
library("vegan")
load("data/r_objects/range_maps.RData")
load("data/r_objects/traits_matched.RData")
target_species = unique(range_maps$name_matched[!is.na(range_maps$name_matched)])
traits_target = dplyr::filter(traits_matched, species %in% target_species)
# some pre-processing
traits_target$`ForStrat.Value.Proc` = as.numeric(factor(traits_target$`ForStrat.Value`, levels = c("G", "S", "Ar", "A")))
traits_target$`BodyMass.Value.Proc` = scale(log(traits_target$`BodyMass.Value`))
# Define columns
diet_cols = c("Diet.Inv", "Diet.Vend", "Diet.Vect", "Diet.Vfish", "Diet.Vunk", "Diet.Scav", "Diet.Fruit", "Diet.Nect", "Diet.Seed", "Diet.PlantO")
strata_cols = c("ForStrat.Value.Proc")
activity_cols = c("Activity.Nocturnal", "Activity.Crepuscular", "Activity.Diurnal")
bodymass_cols = c("BodyMass.Value.Proc")
diet_dist = vegan::vegdist(traits_target[,diet_cols], "bray")
foraging_dist = dist(traits_target[,strata_cols])
activity_dist = vegan::vegdist(traits_target[,activity_cols], "bray")
bodymass_dist = dist(traits_target[,bodymass_cols])
func_dist = (diet_dist + foraging_dist/max(foraging_dist) + activity_dist + bodymass_dist/max(bodymass_dist)) / 4
names(func_dist) = stringr::str_replace_all(traits_target$species, pattern = " ", "_")
save(func_dist, file = "data/r_objects/func_dist.RData")
library(dplyr)
library(tidyr)
library(cito)
source("R/utils.R")
load("data/r_objects/model_data.RData")
load("data/r_objects/func_dist.RData")
# ----------------------------------------------------------------------#
# Prepare embeddings ####
# ----------------------------------------------------------------------#
model_data$species_int = as.integer(as.factor(model_data$species))
train_data = dplyr::filter(model_data, train == 1)
test_data = dplyr::filter(model_data, train == 0)
# ----------------------------------------------------------------------#
# Train model ####
# ----------------------------------------------------------------------#
predictors = paste0("layer_", 1:19)
formula = as.formula(paste0("present ~ ", paste(predictors, collapse = '+'), " + ", "e(species_int, dim = 20, lambda = 0.000001)"))
msdm_fit = dnn(
formula,
data = train_data,
hidden = c(500L, 1000L, 1000L),
loss = "binomial",
activation = c("sigmoid", "leaky_relu", "leaky_relu"),
epochs = 10000L,
lr = 0.01,
baseloss = 1,
batchsize = nrow(train_data)/5,
dropout = 0.25,
burnin = 100,
optimizer = config_optimizer("adam", weight_decay = 0.001),
lr_scheduler = config_lr_scheduler("reduce_on_plateau", patience = 100, factor = 0.7),
early_stopping = 250,
validation = 0.3,
device = "cuda"
)
save(msdm_fit, file = "data/r_objects/msdm_fit.RData")
# ----------------------------------------------------------------------#
# Evaluate model ####
# ----------------------------------------------------------------------#
load("data/r_objects/msdm_fit.RData")
# Overall
preds_train = predict(msdm_fit, newdata = as.matrix(train_data), type = "response")
preds_test = predict(msdm_fit, newdata = as.matrix(test_data), type = "response")
hist(preds_train)
hist(preds_test)
eval_overall = evaluate_model(msdm_fit, test_data)
# Per species
data_split = split(model_data, model_data$species)
msdm_results = lapply(data_split, function(data_spec){
test_data = dplyr::filter(data_spec, train == 0)
msdm_performance = tryCatch({
evaluate_model(msdm_fit, test_data)
}, error = function(e){
list(AUC = NA, Accuracy = NA, Kappa = NA, Precision = NA, Recall = NA, F1 = NA)
})
performance_summary = tibble(
species = data_spec$species[1],
obs = nrow(data_spec),
model = "NN_MSDM",
auc = msdm_performance$AUC,
accuracy = msdm_performance$Accuracy,
kappa = msdm_performance$Kappa,
precision = msdm_performance$Precision,
recall = msdm_performance$Recall,
f1 = msdm_performance$F1
)
}) %>% bind_rows()
save(msdm_results, file = "data/r_objects/msdm_results.RData")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment