Skip to content
Snippets Groups Projects
Commit 3ed7e5fd authored by ye87zine's avatar ye87zine
Browse files

Finished file restructuring + improved documentation

parent 9f7c6efb
No related branches found
No related tags found
No related merge requests found
...@@ -7,7 +7,7 @@ library(Rfast) ...@@ -7,7 +7,7 @@ library(Rfast)
sf::sf_use_s2(use_s2 = FALSE) sf::sf_use_s2(use_s2 = FALSE)
# --------------------------------------# # --------------------------------------#
# Process range maps #### # Process range maps ####
# --------------------------------------# # --------------------------------------#
# Load IUCN range maps # Load IUCN range maps
range_maps = st_read( range_maps = st_read(
...@@ -52,7 +52,7 @@ range_maps = range_maps %>% ...@@ -52,7 +52,7 @@ range_maps = range_maps %>%
save(range_maps, file = "data/r_objects/range_maps.RData") save(range_maps, file = "data/r_objects/range_maps.RData")
# -------------------------------------------# # -------------------------------------------#
# Process gridded range maps #### # Process gridded range maps ####
# -------------------------------------------# # -------------------------------------------#
range_maps_gridded = rnaturalearth::ne_countries() %>% range_maps_gridded = rnaturalearth::ne_countries() %>%
dplyr::filter(continent == "South America") %>% dplyr::filter(continent == "South America") %>%
...@@ -65,10 +65,11 @@ range_maps_gridded = rnaturalearth::ne_countries() %>% ...@@ -65,10 +65,11 @@ range_maps_gridded = rnaturalearth::ne_countries() %>%
save(range_maps_gridded, file = "data/r_objects/range_maps_gridded.RData") save(range_maps_gridded, file = "data/r_objects/range_maps_gridded.RData")
# ----------------------------------------------# # ----------------------------------------------#
# Calculate range dissimilarity #### # Calculate range dissimilarity ####
# ----------------------------------------------# # ----------------------------------------------#
load("data/r_objects/range_maps_gridded.RData") load("data/r_objects/range_maps_gridded.RData")
# Get grid ids
range_maps_gridded_id = range_maps_gridded %>% range_maps_gridded_id = range_maps_gridded %>%
dplyr::group_by(geometry) %>% dplyr::group_by(geometry) %>%
dplyr::mutate(geom_id = cur_group_id()) %>% dplyr::mutate(geom_id = cur_group_id()) %>%
...@@ -79,6 +80,7 @@ geometries_unique = range_maps_gridded_id %>% ...@@ -79,6 +80,7 @@ geometries_unique = range_maps_gridded_id %>%
group_by(geom_id) %>% group_by(geom_id) %>%
slice_head(n = 1) slice_head(n = 1)
# Calculate pairwise distances between grid cells
geom_dist = sf::st_distance(geometries_unique, geometries_unique) %>% # Takes ~ 10 mins geom_dist = sf::st_distance(geometries_unique, geometries_unique) %>% # Takes ~ 10 mins
as.matrix() as.matrix()
...@@ -86,6 +88,7 @@ range_maps_split = range_maps_gridded_id %>% ...@@ -86,6 +88,7 @@ range_maps_split = range_maps_gridded_id %>%
group_by(name_matched) %>% group_by(name_matched) %>%
group_split() group_split()
# Calculate mean minimum distance between grid cells of species A and B
mean_minimum_distances = lapply(range_maps_split, function(df1){ # Takes ~ 30 mins mean_minimum_distances = lapply(range_maps_split, function(df1){ # Takes ~ 30 mins
df_dist = lapply(range_maps_split, function(df2){ df_dist = lapply(range_maps_split, function(df2){
dists_subset = geom_dist[df1$geom_id, df2$geom_id, drop = F] dists_subset = geom_dist[df1$geom_id, df2$geom_id, drop = F]
...@@ -103,11 +106,13 @@ mean_minimum_distances = lapply(range_maps_split, function(df1){ # Takes ~ 30 m ...@@ -103,11 +106,13 @@ mean_minimum_distances = lapply(range_maps_split, function(df1){ # Takes ~ 30 m
return(df_dist) return(df_dist)
}) %>% bind_rows() }) %>% bind_rows()
# Reshape into distance matrix
range_dist = mean_minimum_distances %>% range_dist = mean_minimum_distances %>%
pivot_wider(names_from = species2, values_from = dist) %>% pivot_wider(names_from = species2, values_from = dist) %>%
column_to_rownames("species1") %>% column_to_rownames("species1") %>%
as.matrix() as.matrix()
range_dist = range_dist / max(range_dist) # Scale to (0,1) # Scale to (0,1)
range_dist = range_dist / max(range_dist)
save(range_dist, file = "data/r_objects/range_dist.RData") save(range_dist, file = "data/r_objects/range_dist.RData")
...@@ -44,7 +44,7 @@ save(functional_groups, file = "data/r_objects/functional_groups.RData") ...@@ -44,7 +44,7 @@ save(functional_groups, file = "data/r_objects/functional_groups.RData")
# ---------------------------------------------------# # ---------------------------------------------------#
# Process and merge functional traits #### # Process and merge functional traits ####
# ---------------------------------------------------# # ---------------------------------------------------#
# Match names # Match taxonomic names
traits = traitdata::elton_mammals traits = traitdata::elton_mammals
names_unique = unique(traits$scientificNameStd) names_unique = unique(traits$scientificNameStd)
...@@ -84,21 +84,26 @@ traits_proc = traits_matched %>% ...@@ -84,21 +84,26 @@ traits_proc = traits_matched %>%
match_epithet = stringr::str_detect(species, Species)) %>% match_epithet = stringr::str_detect(species, Species)) %>%
dplyr::group_by(species) %>% dplyr::group_by(species) %>%
dplyr::group_modify(~ { dplyr::group_modify(~ {
# Only one record with accepted species name
if (nrow(.x) == 1) { if (nrow(.x) == 1) {
return (.x) return (.x)
} }
# If multiple records for accepted species name
x_mod = .x x_mod = .x
while (nrow(x_mod) > 1){ while (nrow(x_mod) > 1){
# 1. Remove records with mismatched genus name, retry
if(any(isFALSE(x_mod$match_genus))){ if(any(isFALSE(x_mod$match_genus))){
x_mod = dplyr::filter(x_mod, isTRUE(match_genus)) x_mod = dplyr::filter(x_mod, isTRUE(match_genus))
next next
} }
# 1. Remove records with mismatched epithet name, retry
if(any(isFALSE(x_mod$match_epithet))){ if(any(isFALSE(x_mod$match_epithet))){
x_mod = dplyr::filter(x_mod, isTRUE(match_epithet)) x_mod = dplyr::filter(x_mod, isTRUE(match_epithet))
next next
} }
# If there are still multiple records with matching genus and epithet, just take the first record
x_mod = x_mod[1,] x_mod = x_mod[1,]
} }
...@@ -108,7 +113,7 @@ traits_proc = traits_matched %>% ...@@ -108,7 +113,7 @@ traits_proc = traits_matched %>%
return(.x[1,]) return(.x[1,])
} }
}) %>% }) %>%
dplyr::select(c("species", diet_cols, strata_cols, activity_cols, bodymass_cols)) dplyr::select(all_of(c("species", diet_cols, strata_cols, activity_cols, bodymass_cols)))
save(traits_proc, file = "data/r_objects/traits_proc.RData") save(traits_proc, file = "data/r_objects/traits_proc.RData")
...@@ -122,10 +127,11 @@ traits_target = dplyr::filter(traits_proc, species %in% target_species) ...@@ -122,10 +127,11 @@ traits_target = dplyr::filter(traits_proc, species %in% target_species)
traits_target$`ForStrat.Value` = as.numeric(factor(traits_target$`ForStrat.Value`, levels = c("G", "S", "Ar", "A"))) traits_target$`ForStrat.Value` = as.numeric(factor(traits_target$`ForStrat.Value`, levels = c("G", "S", "Ar", "A")))
traits_target$`BodyMass.Value` = scale(log(traits_target$`BodyMass.Value`)) traits_target$`BodyMass.Value` = scale(log(traits_target$`BodyMass.Value`))
diet_dist = vegan::vegdist(traits_target[,diet_cols], "bray") # Calculate distances for trait categories separately
foraging_dist = dist(traits_target[,strata_cols]) diet_dist = vegan::vegdist(traits_target[,diet_cols], "bray") # Bray-Curtis
activity_dist = vegan::vegdist(traits_target[,activity_cols], "bray") foraging_dist = dist(traits_target[,strata_cols]) # Euclidean
bodymass_dist = dist(traits_target[,bodymass_cols]) activity_dist = vegan::vegdist(traits_target[,activity_cols], "bray") # Bray-Curtis
bodymass_dist = dist(traits_target[,bodymass_cols]) # Euclidean
func_dist = (diet_dist + foraging_dist/max(foraging_dist) + activity_dist + bodymass_dist/max(bodymass_dist)) / 4 func_dist = (diet_dist + foraging_dist/max(foraging_dist) + activity_dist + bodymass_dist/max(bodymass_dist)) / 4
names(func_dist) = traits_target$species names(func_dist) = traits_target$species
......
...@@ -7,7 +7,7 @@ forest = read.nexus("data/phylogenies/Complete_phylogeny.nex") ...@@ -7,7 +7,7 @@ forest = read.nexus("data/phylogenies/Complete_phylogeny.nex")
load("data/r_objects/range_maps_names_matched.RData") load("data/r_objects/range_maps_names_matched.RData")
# ---------------------------------------------------# # ---------------------------------------------------#
# Match taxonomic names in phylogeny #### # Match taxonomic names in phylogeny ####
# ---------------------------------------------------# # ---------------------------------------------------#
phylo_names_unique = unique(forest$UNTITLED$tip.label) phylo_names_unique = unique(forest$UNTITLED$tip.label)
...@@ -31,9 +31,8 @@ phylo_names_matched = lapply(phylo_names_unique, function(name){ ...@@ -31,9 +31,8 @@ phylo_names_matched = lapply(phylo_names_unique, function(name){
save(phylo_names_matched, file = "data/r_objects/phylo_names_matched.RData") save(phylo_names_matched, file = "data/r_objects/phylo_names_matched.RData")
# ---------------------------------------------------# # ---------------------------------------------------#
# Calculate phylo distances #### # Calculate phylo distances ####
# ---------------------------------------------------# # ---------------------------------------------------#
# Calculate pairwise phylogenetic distances across a random sample 50 of forests
# Trim forest to target species # Trim forest to target species
phylo_names_target = dplyr::filter(phylo_names_matched, name_matched %in% range_maps_names_matched$name_matched) phylo_names_target = dplyr::filter(phylo_names_matched, name_matched %in% range_maps_names_matched$name_matched)
...@@ -45,6 +44,7 @@ forest_pruned = lapply(forest, function(x) { ...@@ -45,6 +44,7 @@ forest_pruned = lapply(forest, function(x) {
return(x_pruned) return(x_pruned)
}) })
# Calculate pairwise phylogenetic distances across a random sample 50 of forests
class(forest_pruned) <- "multiPhylo" class(forest_pruned) <- "multiPhylo"
indices = sample(length(forest_pruned), 50) indices = sample(length(forest_pruned), 50)
dists = lapply(indices, function(i){ dists = lapply(indices, function(i){
......
library(tidyverse) library(tidyverse)
library(terra) library(terra)
library(rnaturalearth) library(rnaturalearth)
library(furrr) library(sf)
# Define Study extent #### # Define Study extent ####
sa_polygon = rnaturalearth::ne_countries(scale = 10, returnclass = "sf") %>% sa_polygon = rnaturalearth::ne_countries(scale = 10, returnclass = "sf") %>%
dplyr::filter(continent == "South America") %>% dplyr::filter(continent == "South America") %>%
sf::st_union() %>% sf::st_union() %>%
st_crop(xmin = -82, ymin = -56, xmax = -34, ymax = 13) sf::st_crop(xmin = -82, ymin = -56, xmax = -34, ymax = 13)
save(sa_polygon, file = "data/r_objects/sa_polygon.RData") save(sa_polygon, file = "data/r_objects/sa_polygon.RData")
......
...@@ -14,11 +14,11 @@ library(DBI) ...@@ -14,11 +14,11 @@ library(DBI)
source("R/utils.R") source("R/utils.R")
con = db_connect() # Connection to Symobio con = db_connect() # Connection to Symobio DB
sf::sf_use_s2(use_s2 = FALSE) # Don't use spherical geometry # sf::sf_use_s2(use_s2 = FALSE) # Don't use spherical geometry
# ---------------------------------------------------------------------------# # ---------------------------------------------------------------------------#
# Prepare Geodata #### # Load Geodata ####
# ---------------------------------------------------------------------------# # ---------------------------------------------------------------------------#
load("data/r_objects/sa_polygon.RData") load("data/r_objects/sa_polygon.RData")
...@@ -34,7 +34,7 @@ load("data/r_objects/range_maps.RData") ...@@ -34,7 +34,7 @@ load("data/r_objects/range_maps.RData")
target_species = unique(range_maps$name_matched[!is.na(range_maps$name_matched)]) target_species = unique(range_maps$name_matched[!is.na(range_maps$name_matched)])
# Query species from Symobio # Query species from Symobio
occs = tbl(con, "species_occurrences") %>% occs = tbl(con, "species_occurrences") %>% # This line connects to the "species_occurrences" table in SymobioDB
dplyr::filter(species %in% target_species) %>% dplyr::filter(species %in% target_species) %>%
dplyr::select(-year) %>% dplyr::select(-year) %>%
dplyr::distinct() %>% dplyr::distinct() %>%
...@@ -69,7 +69,7 @@ occs_final = occs_flagged %>% ...@@ -69,7 +69,7 @@ occs_final = occs_flagged %>%
dplyr::select(species, longitude, latitude) dplyr::select(species, longitude, latitude)
# Extract environmental data # Extract environmental data
proj_string = "+proj=tcea +lon_0=-58.0704167 +datum=WGS84 +units=m +no_defs" proj_string = "+proj=tcea +lon_0=-58.0704167 +datum=WGS84 +units=m +no_defs" # Equal Area projection for South America, created with https://projectionwizard.org/
raster_data = terra::project(raster_data, proj_string) raster_data = terra::project(raster_data, proj_string)
occs_final = st_transform(occs_final, proj_string) occs_final = st_transform(occs_final, proj_string)
......
...@@ -12,6 +12,7 @@ sa_polygon = sf::st_transform(sa_polygon, crs(model_data)) ...@@ -12,6 +12,7 @@ sa_polygon = sf::st_transform(sa_polygon, crs(model_data))
# ---------------------------------------# # ---------------------------------------#
# Plot all presences in the dataset #### # Plot all presences in the dataset ####
# ---------------------------------------# # ---------------------------------------#
# Prepare data
data_extent = ext(model_data) data_extent = ext(model_data)
presences = model_data %>% presences = model_data %>%
...@@ -29,8 +30,10 @@ point_counts <- terra::rasterize( ...@@ -29,8 +30,10 @@ point_counts <- terra::rasterize(
) %>% ) %>%
terra::mask(vect(sa_polygon)) terra::mask(vect(sa_polygon))
length(which(values(point_counts) > 0)) # only 36858 raster cells contain at least 1 presence record length(which(values(point_counts) > 0))
# --> only 36858 raster cells contain at least 1 presence record
# Create plot
ggplot() + ggplot() +
tidyterra::geom_spatraster(data = point_counts, maxcell = 5e7) + tidyterra::geom_spatraster(data = point_counts, maxcell = 5e7) +
scale_fill_gradientn( scale_fill_gradientn(
...@@ -49,6 +52,7 @@ ggsave("coordinate_density.pdf", path = "plots/", device = "pdf", scale = 4) ...@@ -49,6 +52,7 @@ ggsave("coordinate_density.pdf", path = "plots/", device = "pdf", scale = 4)
# ---------------------------------------# # ---------------------------------------#
# Explore folds assignment #### # Explore folds assignment ####
# ---------------------------------------# # ---------------------------------------#
# Prepare data
cv_plot(spatial_folds) cv_plot(spatial_folds)
ggsave(filename = "plots/publication/global_folds_map.pdf", device = "pdf", scale = 2.5) ggsave(filename = "plots/publication/global_folds_map.pdf", device = "pdf", scale = 2.5)
...@@ -74,6 +78,7 @@ folds_uniform = folds_summary %>% ...@@ -74,6 +78,7 @@ folds_uniform = folds_summary %>%
) %>% ) %>%
dplyr::ungroup() dplyr::ungroup()
# Create plot
plot_1 = ggplot(folds_uniform, aes(x=folds_total)) + plot_1 = ggplot(folds_uniform, aes(x=folds_total)) +
geom_histogram(binwidth=0.5, fill = "orange") + geom_histogram(binwidth=0.5, fill = "orange") +
labs(title="Number of folds per species", x="Count", y = "Number of species") + labs(title="Number of folds per species", x="Count", y = "Number of species") +
......
...@@ -24,7 +24,7 @@ data_split = model_data %>% ...@@ -24,7 +24,7 @@ data_split = model_data %>%
# ----------------------------------------------------------------------# # ----------------------------------------------------------------------#
# Set up parallel processing #### # Set up parallel processing ####
# ----------------------------------------------------------------------# # ----------------------------------------------------------------------#
num_cores <- 2 num_cores <- 2 # Some packages use internal parallelization, so be careful with increasing this param
cl <- makeCluster(num_cores) cl <- makeCluster(num_cores)
registerDoParallel(cl) registerDoParallel(cl)
......
File moved
File moved
File moved
File moved
File moved
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment