library(tidyverse)
library(phytools)
library(ape)
library(Symobio)

forest = read.nexus("data/phylogenies/Complete_phylogeny.nex")
load("data/r_objects/range_maps_names_matched.RData")

# ---------------------------------------------------#
# Match taxonomic names in phylogeny              ####
# ---------------------------------------------------#
phylo_names_unique = unique(forest$UNTITLED$tip.label)

phylo_names_matched  = lapply(phylo_names_unique, function(name){
  tryCatch({
    name_query = stringr::str_replace_all(name, "_", " ")
    match_result = Symobio::gbif_match_name(name = name_query)
    if(match_result$status != "ACCEPTED"){
      match_result = Symobio::gbif_match_name(usageKey = match_result$acceptedUsageKey)
    }
    
    name_matched = if("species" %in% names(match_result)) match_result$species else NA
    data.frame(name_orig = name, name_matched = name_matched)
  }, error = function(e){
    return(NULL)
  })
}) %>% 
  bind_rows() %>% 
  drop_na()

save(phylo_names_matched, file = "data/r_objects/phylo_names_matched.RData")

# ---------------------------------------------------#
# Calculate phylo distances                      ####
# ---------------------------------------------------#
# Trim forest to target species
phylo_names_target = dplyr::filter(phylo_names_matched, name_matched %in% range_maps_names_matched$name_matched)

forest_pruned = lapply(forest, function(x) {
  labels_drop = setdiff(x$tip.label, phylo_names_target$name_orig)
  x_pruned = drop.tip(x, labels_drop)
  labels_new = phylo_names_target$name_matched[match(phylo_names_target$name_orig, x_pruned$tip.label)]
  x_pruned$tip.label = labels_new
  return(x_pruned)
})

# Calculate pairwise phylogenetic distances across a random sample 50 of forests
class(forest_pruned) <- "multiPhylo"
indices = sample(length(forest_pruned), 50)
dists = lapply(indices, function(i){
  cophenetic.phylo(forest_pruned[[i]])
})

# Save result
phylo_dist = Reduce("+", dists) / length(dists)
phylo_dist = phylo_dist / max(phylo_dist)
save(phylo_dist, file = "data/r_objects/phylo_dist.RData")