From e3da5ed40a04a46b91552915633f8ce3c67a26ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=B6nig?= <ye87zine@usr.idiv.de> Date: Fri, 10 Jan 2025 14:34:00 +0100 Subject: [PATCH] improved implementation of absence sampling algorithnm --- R/03_02_absence_preparation.R | 86 ++++++++++++++++++++++------------- 1 file changed, 54 insertions(+), 32 deletions(-) diff --git a/R/03_02_absence_preparation.R b/R/03_02_absence_preparation.R index 2390512..914ddb7 100644 --- a/R/03_02_absence_preparation.R +++ b/R/03_02_absence_preparation.R @@ -49,15 +49,19 @@ range_maps = st_transform(range_maps, proj_string) # geographically close (reproduce sampling biases) but environmentally # # dissimilar (avoid false negatives) to the known occurrences # # ---------------------------------------------------------------------------# -future::plan("multisession", workers = 24) +future::plan("multisession", workers = 16) model_data = furrr::future_walk( .x = target_species, - .options = furrr::furrr_options(seed = 42), + .options = furrr::furrr_options(seed = 42, scheduling = FALSE), # make sure workers get constant stream of work .env_globals = c(raster_filepaths, sa_polygon, occs_final, range_maps, proj_string), .f = function(species){ # Prepare species occurrence data ##### occs_spec = dplyr::filter(occs_final, species == !!species) + if(nrow(occs_spec) <= 1){ + warning("Skipping species ", species, ": Can't process species with a single presence record." ) + return() + } occs_multipoint = occs_spec %>% summarise(geometry = st_combine(geometry)) @@ -85,7 +89,19 @@ model_data = furrr::future_walk( # Define spatial KDE ref = st_bbox(sampling_region)[c(1,3,2,4)] - spatial_kde = spatialEco::sf.kde(occs_spec, res = 10000, ref = ref, standardize = T, scale.factor = 1) %>% + + # Function to estimate bandwidth + # ---> this function is the default estimation of the sf.kde package but with a minimum of 50000 = 50km) + calc_bw <- function(x) { + r <- stats::quantile(x, c(0.25, 0.75)) + h <- (r[2] - r[1])/1.34 + result = 4 * 1.06 * min(sqrt(stats::var(x)), h) * length(x)^(-1/5) + return(max(result, 50000)) + } + + bw_x = calc_bw(st_coordinates(occs_spec)[,"X"]) + bw_y = calc_bw(st_coordinates(occs_spec)[,"Y"]) + spatial_kde = spatialEco::sf.kde(occs_spec, bw = c(bw_x, bw_y), res = 10000, ref = ref, standardize = T, scale.factor = 1) %>% crop(sampling_region, mask = T, touches = F) # Define reverse niche @@ -97,38 +113,43 @@ model_data = furrr::future_walk( env_kde <- (1-predict(env_sampling_region, maxent_fit)) %>% terra::rast() - # Rejection sampling of pseudo absences - abs_spec = st_sf(geometry = st_sfc(), crs = proj_string) + # Sample pseudo absences abs_prob = spatial_kde * env_kde - while(nrow(abs_spec) < nrow(occs_spec)){ - sample_points_raw = st_sample(sampling_region, nrow(occs_spec)) - p = na.omit(extract(abs_prob, vect(sample_points_raw))$z) - x = runif(length(p)) - - sample_points = sample_points_raw[x<p] - if(length(sample_points) == 0){ - next - } - - samples_required = nrow(occs_spec) - nrow(abs_spec) - if(length(sample_points) > samples_required){ - sample_points = sample(sample_points, samples_required) - } + abs_spec_list = list() + + samples_required = nrow(occs_spec) + + while(samples_required != 0){ + sample_points = spatSample(abs_prob, size = samples_required, method = "weights", xy = T, as.df = T) %>% + dplyr::select(-z) %>% + dplyr::mutate(x = x + runif(nrow(.), -5000, 5000), # Add jitter (res/2) to cell centroids + y = y + runif(nrow(.), -5000, 5000)) %>% # Add jitter (res/2) to cell centroids + st_as_sf(coords = c("x", "y"), crs = proj_string) - abs_env = terra::extract(raster_data, terra::vect(sample_points), ID = FALSE) %>% - dplyr::mutate( - present = 0, - geometry = st_geometry(sample_points) - ) %>% + sample_abs = bind_cols( + terra::extract(raster_data, terra::vect(sample_points), ID = FALSE) + ) %>% + bind_cols(sample_points) %>% drop_na() - abs_spec = bind_rows(abs_spec, abs_env) + abs_spec_list[[length(abs_spec_list)+1]] = sample_points + + samples_required = samples_required - nrow(sample_points) # Sometimes there are no env data for sample points, so keep sampling } - abs_coords = st_coordinates(st_transform(abs_spec, "EPSG:4326")) - abs_spec = bind_cols(abs_spec, abs_coords) %>% - dplyr::rename(longitude = X, latitude = Y) %>% - dplyr::mutate(species = !!species) + abs_spec = bind_rows(abs_spec_list) + abs_spec = abs_spec %>% + bind_cols( + st_coordinates(st_transform(abs_spec, "EPSG:4326")) + ) %>% + dplyr::rename( + longitude = X, + latitude = Y + ) %>% + dplyr::mutate( + species = !!species, + present = 0 + ) # Create presence-absence dataframe pa_spec = occs_spec %>% @@ -167,15 +188,16 @@ model_data = furrr::future_walk( NA }) - pa_spec$fold = folds + pa_spec$fold_eval = folds pa_spec$geometry = NULL save(pa_spec, file = paste0("data/r_objects/pa_sampling/", species, ".RData")) - }) + } +) # Combine presence-absence data files = list.files("data/r_objects/pa_sampling/", full.names = T) model_data = lapply(files, function(f){load(f); return(pa_spec)}) %>% bind_rows() -save(model_data, file = "data/r_objects/pa_sampling/model_data.RData") +save(model_data, file = "data/r_objects/model_data_pa_sampling.RData") -- GitLab