# General packages
library(dplyr)
library(tidyr)
library(furrr)

# Geo packages
library(terra)
library(CoordinateCleaner)
library(sf)

# DB packages
library(Symobio)
library(DBI)

source("R/utils.R")

con = db_connect() # Connection to Symobio
sf::sf_use_s2(use_s2 = FALSE) # Don't use spherical geometry

# ---------------------------------------------------------------------------#
# Prepare Geodata                                                         ####
# ---------------------------------------------------------------------------#
load("data/r_objects/sa_polygon.RData")

raster_filepaths = list.files("~/symobio-modeling/data/geospatial/raster/", full.names = T) %>% 
  stringr::str_sort(numeric = T) 

raster_data = terra::rast(raster_filepaths) 

# ---------------------------------------------------------------------------#
# Prepare Occurrence Data                                                 ####
# ---------------------------------------------------------------------------#
load("data/r_objects/range_maps.RData")
target_species = unique(range_maps$name_matched[!is.na(range_maps$name_matched)])

# Query species from Symobio
occs = tbl(con, "species_occurrences") %>% 
  dplyr::filter(species %in% target_species) %>% 
  dplyr::select(-year) %>% 
  dplyr::distinct() %>% 
  collect() %>% 
  sf::st_as_sf(coords = c("longitude", "latitude"), remove = F, crs = sf::st_crs(4326)) %>% 
  sf::st_filter(sa_polygon)

# Flag occurrences using Coordinate cleaner
occs_flagged = occs %>% 
  dplyr::mutate(
    longitude = round(longitude, 3),
    latitude = round(latitude, 3)
  ) %>% 
  sf::st_drop_geometry() %>% 
  group_by(species) %>% 
  dplyr::distinct(longitude, latitude) %>% 
  group_split() %>% 
  purrr::map(                     # Loop over species individually due to bug in CoordinateCleaner
    CoordinateCleaner::clean_coordinates,
    lon = "longitude", 
    lat = "latitude",
    tests = c("centroids", "gbif", "equal", "institutions", "outliers"),
    outliers_method = "quantile",
    verbose = F
  ) %>% 
  bind_rows()

# Subset species occurrences to validated records
occs_final = occs_flagged %>% 
  dplyr::filter(.summary == T) %>% 
  sf::st_as_sf(coords = c("longitude", "latitude"), remove = F, crs = sf::st_crs(4326)) %>% 
  dplyr::select(species, longitude, latitude)

# Extract environmental data
proj_string = "+proj=tcea +lon_0=-58.0704167 +datum=WGS84 +units=m +no_defs" 

raster_data = terra::project(raster_data, proj_string)
occs_final = st_transform(occs_final, proj_string)
env_data = extract(raster_data, vect(occs_final), ID = F)

# Merge data + final processing
occs_final = bind_cols(occs_final, env_data) %>% 
  drop_na() %>% # Remove records with NA env vars
  group_by(species) %>% 
  distinct()    # Remove duplicate records

# Save occurrences
save(occs_final, file = "data/r_objects/occs_final.RData")