diff --git a/R/01_01_range_preparation.R b/R/01_01_range_preparation.R index 4f6a412243451e6b38f5ecc8358aa95083cc4164..487d756b6fbb74f42134912c000ba3bfa8f9be66 100644 --- a/R/01_01_range_preparation.R +++ b/R/01_01_range_preparation.R @@ -7,7 +7,7 @@ library(Rfast) sf::sf_use_s2(use_s2 = FALSE) # --------------------------------------# -# Process range maps #### +# Process range maps #### # --------------------------------------# # Load IUCN range maps range_maps = st_read( @@ -52,7 +52,7 @@ range_maps = range_maps %>% save(range_maps, file = "data/r_objects/range_maps.RData") # -------------------------------------------# -# Process gridded range maps #### +# Process gridded range maps #### # -------------------------------------------# range_maps_gridded = rnaturalearth::ne_countries() %>% dplyr::filter(continent == "South America") %>% @@ -65,10 +65,11 @@ range_maps_gridded = rnaturalearth::ne_countries() %>% save(range_maps_gridded, file = "data/r_objects/range_maps_gridded.RData") # ----------------------------------------------# -# Calculate range dissimilarity #### +# Calculate range dissimilarity #### # ----------------------------------------------# load("data/r_objects/range_maps_gridded.RData") +# Get grid ids range_maps_gridded_id = range_maps_gridded %>% dplyr::group_by(geometry) %>% dplyr::mutate(geom_id = cur_group_id()) %>% @@ -79,6 +80,7 @@ geometries_unique = range_maps_gridded_id %>% group_by(geom_id) %>% slice_head(n = 1) +# Calculate pairwise distances between grid cells geom_dist = sf::st_distance(geometries_unique, geometries_unique) %>% # Takes ~ 10 mins as.matrix() @@ -86,6 +88,7 @@ range_maps_split = range_maps_gridded_id %>% group_by(name_matched) %>% group_split() +# Calculate mean minimum distance between grid cells of species A and B mean_minimum_distances = lapply(range_maps_split, function(df1){ # Takes ~ 30 mins df_dist = lapply(range_maps_split, function(df2){ dists_subset = geom_dist[df1$geom_id, df2$geom_id, drop = F] @@ -103,11 +106,13 @@ mean_minimum_distances = lapply(range_maps_split, function(df1){ # Takes ~ 30 m return(df_dist) }) %>% bind_rows() +# Reshape into distance matrix range_dist = mean_minimum_distances %>% pivot_wider(names_from = species2, values_from = dist) %>% column_to_rownames("species1") %>% as.matrix() -range_dist = range_dist / max(range_dist) # Scale to (0,1) +# Scale to (0,1) +range_dist = range_dist / max(range_dist) save(range_dist, file = "data/r_objects/range_dist.RData") diff --git a/R/01_02_traits_preparation.R b/R/01_02_traits_preparation.R index 0e480830dfd26d97657bcc3926131fece0d5616b..c594b42e84bb4320b8735ca87134cd8dd5d1bedd 100644 --- a/R/01_02_traits_preparation.R +++ b/R/01_02_traits_preparation.R @@ -44,7 +44,7 @@ save(functional_groups, file = "data/r_objects/functional_groups.RData") # ---------------------------------------------------# # Process and merge functional traits #### # ---------------------------------------------------# -# Match names +# Match taxonomic names traits = traitdata::elton_mammals names_unique = unique(traits$scientificNameStd) @@ -84,21 +84,26 @@ traits_proc = traits_matched %>% match_epithet = stringr::str_detect(species, Species)) %>% dplyr::group_by(species) %>% dplyr::group_modify(~ { + # Only one record with accepted species name if (nrow(.x) == 1) { return (.x) } + # If multiple records for accepted species name x_mod = .x while (nrow(x_mod) > 1){ + # 1. Remove records with mismatched genus name, retry if(any(isFALSE(x_mod$match_genus))){ x_mod = dplyr::filter(x_mod, isTRUE(match_genus)) next } + # 1. Remove records with mismatched epithet name, retry if(any(isFALSE(x_mod$match_epithet))){ x_mod = dplyr::filter(x_mod, isTRUE(match_epithet)) next } + # If there are still multiple records with matching genus and epithet, just take the first record x_mod = x_mod[1,] } @@ -108,7 +113,7 @@ traits_proc = traits_matched %>% return(.x[1,]) } }) %>% - dplyr::select(c("species", diet_cols, strata_cols, activity_cols, bodymass_cols)) + dplyr::select(all_of(c("species", diet_cols, strata_cols, activity_cols, bodymass_cols))) save(traits_proc, file = "data/r_objects/traits_proc.RData") @@ -122,10 +127,11 @@ traits_target = dplyr::filter(traits_proc, species %in% target_species) traits_target$`ForStrat.Value` = as.numeric(factor(traits_target$`ForStrat.Value`, levels = c("G", "S", "Ar", "A"))) traits_target$`BodyMass.Value` = scale(log(traits_target$`BodyMass.Value`)) -diet_dist = vegan::vegdist(traits_target[,diet_cols], "bray") -foraging_dist = dist(traits_target[,strata_cols]) -activity_dist = vegan::vegdist(traits_target[,activity_cols], "bray") -bodymass_dist = dist(traits_target[,bodymass_cols]) +# Calculate distances for trait categories separately +diet_dist = vegan::vegdist(traits_target[,diet_cols], "bray") # Bray-Curtis +foraging_dist = dist(traits_target[,strata_cols]) # Euclidean +activity_dist = vegan::vegdist(traits_target[,activity_cols], "bray") # Bray-Curtis +bodymass_dist = dist(traits_target[,bodymass_cols]) # Euclidean func_dist = (diet_dist + foraging_dist/max(foraging_dist) + activity_dist + bodymass_dist/max(bodymass_dist)) / 4 names(func_dist) = traits_target$species diff --git a/R/01_03_phylo_preparation.R b/R/01_03_phylo_preparation.R index 4ba9a85f34c5d325b4383234363b0bc06b339804..806e4f63687c12d557bf99547282f2e4f20c5c0d 100644 --- a/R/01_03_phylo_preparation.R +++ b/R/01_03_phylo_preparation.R @@ -7,7 +7,7 @@ forest = read.nexus("data/phylogenies/Complete_phylogeny.nex") load("data/r_objects/range_maps_names_matched.RData") # ---------------------------------------------------# -# Match taxonomic names in phylogeny #### +# Match taxonomic names in phylogeny #### # ---------------------------------------------------# phylo_names_unique = unique(forest$UNTITLED$tip.label) @@ -31,9 +31,8 @@ phylo_names_matched = lapply(phylo_names_unique, function(name){ save(phylo_names_matched, file = "data/r_objects/phylo_names_matched.RData") # ---------------------------------------------------# -# Calculate phylo distances #### +# Calculate phylo distances #### # ---------------------------------------------------# -# Calculate pairwise phylogenetic distances across a random sample 50 of forests # Trim forest to target species phylo_names_target = dplyr::filter(phylo_names_matched, name_matched %in% range_maps_names_matched$name_matched) @@ -45,6 +44,7 @@ forest_pruned = lapply(forest, function(x) { return(x_pruned) }) +# Calculate pairwise phylogenetic distances across a random sample 50 of forests class(forest_pruned) <- "multiPhylo" indices = sample(length(forest_pruned), 50) dists = lapply(indices, function(i){ diff --git a/R/01_04_raster_preparation.R b/R/01_04_raster_preparation.R index aa9e21078be7373c2b25a678aecb1ae298f19c3b..fdee82514b24160f791071984e675aa46394409b 100644 --- a/R/01_04_raster_preparation.R +++ b/R/01_04_raster_preparation.R @@ -1,13 +1,13 @@ library(tidyverse) library(terra) library(rnaturalearth) -library(furrr) +library(sf) # Define Study extent #### sa_polygon = rnaturalearth::ne_countries(scale = 10, returnclass = "sf") %>% dplyr::filter(continent == "South America") %>% sf::st_union() %>% - st_crop(xmin = -82, ymin = -56, xmax = -34, ymax = 13) + sf::st_crop(xmin = -82, ymin = -56, xmax = -34, ymax = 13) save(sa_polygon, file = "data/r_objects/sa_polygon.RData") diff --git a/R/02_01_presence_data_preparation.R b/R/02_01_presence_data_preparation.R index 7a2697e034900d92634da38b681ab03a7358d7de..48237cd672c0bbe419af1d6f8c60e3355fbe163a 100644 --- a/R/02_01_presence_data_preparation.R +++ b/R/02_01_presence_data_preparation.R @@ -14,11 +14,11 @@ library(DBI) source("R/utils.R") -con = db_connect() # Connection to Symobio -sf::sf_use_s2(use_s2 = FALSE) # Don't use spherical geometry +con = db_connect() # Connection to Symobio DB +# sf::sf_use_s2(use_s2 = FALSE) # Don't use spherical geometry # ---------------------------------------------------------------------------# -# Prepare Geodata #### +# Load Geodata #### # ---------------------------------------------------------------------------# load("data/r_objects/sa_polygon.RData") @@ -34,7 +34,7 @@ load("data/r_objects/range_maps.RData") target_species = unique(range_maps$name_matched[!is.na(range_maps$name_matched)]) # Query species from Symobio -occs = tbl(con, "species_occurrences") %>% +occs = tbl(con, "species_occurrences") %>% # This line connects to the "species_occurrences" table in SymobioDB dplyr::filter(species %in% target_species) %>% dplyr::select(-year) %>% dplyr::distinct() %>% @@ -69,7 +69,7 @@ occs_final = occs_flagged %>% dplyr::select(species, longitude, latitude) # Extract environmental data -proj_string = "+proj=tcea +lon_0=-58.0704167 +datum=WGS84 +units=m +no_defs" +proj_string = "+proj=tcea +lon_0=-58.0704167 +datum=WGS84 +units=m +no_defs" # Equal Area projection for South America, created with https://projectionwizard.org/ raster_data = terra::project(raster_data, proj_string) occs_final = st_transform(occs_final, proj_string) diff --git a/R/03_03_model_data_finalization.R b/R/02_03_model_data_finalization.R similarity index 100% rename from R/03_03_model_data_finalization.R rename to R/02_03_model_data_finalization.R diff --git a/R/03_04_dataset_exploration.R b/R/02_04_dataset_exploration.R similarity index 95% rename from R/03_04_dataset_exploration.R rename to R/02_04_dataset_exploration.R index 53a627ac1929a6f054f614ac05707123e7aea6c8..01d3632a84ec560a532207a5fba7b7b7b6b60003 100644 --- a/R/03_04_dataset_exploration.R +++ b/R/02_04_dataset_exploration.R @@ -12,6 +12,7 @@ sa_polygon = sf::st_transform(sa_polygon, crs(model_data)) # ---------------------------------------# # Plot all presences in the dataset #### # ---------------------------------------# +# Prepare data data_extent = ext(model_data) presences = model_data %>% @@ -29,8 +30,10 @@ point_counts <- terra::rasterize( ) %>% terra::mask(vect(sa_polygon)) -length(which(values(point_counts) > 0)) # only 36858 raster cells contain at least 1 presence record +length(which(values(point_counts) > 0)) +# --> only 36858 raster cells contain at least 1 presence record +# Create plot ggplot() + tidyterra::geom_spatraster(data = point_counts, maxcell = 5e7) + scale_fill_gradientn( @@ -49,6 +52,7 @@ ggsave("coordinate_density.pdf", path = "plots/", device = "pdf", scale = 4) # ---------------------------------------# # Explore folds assignment #### # ---------------------------------------# +# Prepare data cv_plot(spatial_folds) ggsave(filename = "plots/publication/global_folds_map.pdf", device = "pdf", scale = 2.5) @@ -74,6 +78,7 @@ folds_uniform = folds_summary %>% ) %>% dplyr::ungroup() +# Create plot plot_1 = ggplot(folds_uniform, aes(x=folds_total)) + geom_histogram(binwidth=0.5, fill = "orange") + labs(title="Number of folds per species", x="Count", y = "Number of species") + diff --git a/R/04_01_modelling_ssdm.R b/R/03_01_modelling_ssdm.R similarity index 98% rename from R/04_01_modelling_ssdm.R rename to R/03_01_modelling_ssdm.R index a62cd3e0efecb194d26f89ceaab31c58caf033b4..d21855db04b10566a9a1e9f1ee4fdbb820327b67 100644 --- a/R/04_01_modelling_ssdm.R +++ b/R/03_01_modelling_ssdm.R @@ -24,7 +24,7 @@ data_split = model_data %>% # ----------------------------------------------------------------------# # Set up parallel processing #### # ----------------------------------------------------------------------# -num_cores <- 2 +num_cores <- 2 # Some packages use internal parallelization, so be careful with increasing this param cl <- makeCluster(num_cores) registerDoParallel(cl) diff --git a/R/04_02_modelling_msdm_embed.R b/R/03_02_modelling_msdm_embed.R similarity index 100% rename from R/04_02_modelling_msdm_embed.R rename to R/03_02_modelling_msdm_embed.R diff --git a/R/04_03_modelling_msdm_onehot.R b/R/03_03_modelling_msdm_onehot.R similarity index 100% rename from R/04_03_modelling_msdm_onehot.R rename to R/03_03_modelling_msdm_onehot.R diff --git a/R/04_04_modelling_msdm_rf.R b/R/03_04_modelling_msdm_rf.R similarity index 100% rename from R/04_04_modelling_msdm_rf.R rename to R/03_04_modelling_msdm_rf.R diff --git a/R/05_01_performance_report.qmd b/R/04_01_performance_report.qmd similarity index 100% rename from R/05_01_performance_report.qmd rename to R/04_01_performance_report.qmd diff --git a/R/05_02_publication_analysis.R b/R/04_02_publication_analysis.R similarity index 100% rename from R/05_02_publication_analysis.R rename to R/04_02_publication_analysis.R