# General packages library(dplyr) library(tidyr) library(furrr) # Geo packages library(terra) library(CoordinateCleaner) library(sf) # DB packages library(Symobio) library(DBI) source("R/utils.R") con = db_connect() # Connection to Symobio sf::sf_use_s2(use_s2 = FALSE) # Don't use spherical geometry # ---------------------------------------------------------------------------# # Prepare Geodata #### # ---------------------------------------------------------------------------# load("data/r_objects/sa_polygon.RData") raster_filepaths = list.files("~/symobio-modeling/data/geospatial/raster/", full.names = T) %>% stringr::str_sort(numeric = T) raster_data = terra::rast(raster_filepaths) # ---------------------------------------------------------------------------# # Prepare Occurrence Data #### # ---------------------------------------------------------------------------# load("data/r_objects/range_maps.RData") target_species = unique(range_maps$name_matched[!is.na(range_maps$name_matched)]) # Query species from Symobio occs = tbl(con, "species_occurrences") %>% dplyr::filter(species %in% target_species) %>% dplyr::select(-year) %>% dplyr::distinct() %>% collect() %>% sf::st_as_sf(coords = c("longitude", "latitude"), remove = F, crs = sf::st_crs(4326)) %>% sf::st_filter(sa_polygon) # Flag occurrences using Coordinate cleaner occs_flagged = occs %>% dplyr::mutate( longitude = round(longitude, 3), latitude = round(latitude, 3) ) %>% sf::st_drop_geometry() %>% group_by(species) %>% dplyr::distinct(longitude, latitude) %>% group_split() %>% purrr::map( # Loop over species individually due to bug in CoordinateCleaner CoordinateCleaner::clean_coordinates, lon = "longitude", lat = "latitude", tests = c("centroids", "gbif", "equal", "institutions", "outliers"), outliers_method = "quantile", verbose = F ) %>% bind_rows() # Subset species occurrences to validated records occs_final = occs_flagged %>% dplyr::filter(.summary == T) %>% sf::st_as_sf(coords = c("longitude", "latitude"), remove = F, crs = sf::st_crs(4326)) %>% dplyr::select(species, longitude, latitude) # Extract environmental data proj_string = "+proj=tcea +lon_0=-58.0704167 +datum=WGS84 +units=m +no_defs" raster_data = terra::project(raster_data, proj_string) occs_final = st_transform(occs_final, proj_string) env_data = extract(raster_data, vect(occs_final), ID = F) # Merge data + final processing occs_final = bind_cols(occs_final, env_data) %>% drop_na() %>% # Remove records with NA env vars group_by(species) %>% distinct() # Remove duplicate records # Save occurrences save(occs_final, file = "data/r_objects/occs_final.RData")