Skip to content
Snippets Groups Projects
Commit e3da5ed4 authored by ye87zine's avatar ye87zine
Browse files

improved implementation of absence sampling algorithnm

parent a72dfbf8
No related branches found
No related tags found
1 merge request!1Presence absence sampling
......@@ -49,15 +49,19 @@ range_maps = st_transform(range_maps, proj_string)
# geographically close (reproduce sampling biases) but environmentally #
# dissimilar (avoid false negatives) to the known occurrences #
# ---------------------------------------------------------------------------#
future::plan("multisession", workers = 24)
future::plan("multisession", workers = 16)
model_data = furrr::future_walk(
.x = target_species,
.options = furrr::furrr_options(seed = 42),
.options = furrr::furrr_options(seed = 42, scheduling = FALSE), # make sure workers get constant stream of work
.env_globals = c(raster_filepaths, sa_polygon, occs_final, range_maps, proj_string),
.f = function(species){
# Prepare species occurrence data #####
occs_spec = dplyr::filter(occs_final, species == !!species)
if(nrow(occs_spec) <= 1){
warning("Skipping species ", species, ": Can't process species with a single presence record." )
return()
}
occs_multipoint = occs_spec %>%
summarise(geometry = st_combine(geometry))
......@@ -85,7 +89,19 @@ model_data = furrr::future_walk(
# Define spatial KDE
ref = st_bbox(sampling_region)[c(1,3,2,4)]
spatial_kde = spatialEco::sf.kde(occs_spec, res = 10000, ref = ref, standardize = T, scale.factor = 1) %>%
# Function to estimate bandwidth
# ---> this function is the default estimation of the sf.kde package but with a minimum of 50000 = 50km)
calc_bw <- function(x) {
r <- stats::quantile(x, c(0.25, 0.75))
h <- (r[2] - r[1])/1.34
result = 4 * 1.06 * min(sqrt(stats::var(x)), h) * length(x)^(-1/5)
return(max(result, 50000))
}
bw_x = calc_bw(st_coordinates(occs_spec)[,"X"])
bw_y = calc_bw(st_coordinates(occs_spec)[,"Y"])
spatial_kde = spatialEco::sf.kde(occs_spec, bw = c(bw_x, bw_y), res = 10000, ref = ref, standardize = T, scale.factor = 1) %>%
crop(sampling_region, mask = T, touches = F)
# Define reverse niche
......@@ -97,38 +113,43 @@ model_data = furrr::future_walk(
env_kde <- (1-predict(env_sampling_region, maxent_fit)) %>%
terra::rast()
# Rejection sampling of pseudo absences
abs_spec = st_sf(geometry = st_sfc(), crs = proj_string)
# Sample pseudo absences
abs_prob = spatial_kde * env_kde
while(nrow(abs_spec) < nrow(occs_spec)){
sample_points_raw = st_sample(sampling_region, nrow(occs_spec))
p = na.omit(extract(abs_prob, vect(sample_points_raw))$z)
x = runif(length(p))
sample_points = sample_points_raw[x<p]
if(length(sample_points) == 0){
next
}
samples_required = nrow(occs_spec) - nrow(abs_spec)
if(length(sample_points) > samples_required){
sample_points = sample(sample_points, samples_required)
}
abs_spec_list = list()
samples_required = nrow(occs_spec)
while(samples_required != 0){
sample_points = spatSample(abs_prob, size = samples_required, method = "weights", xy = T, as.df = T) %>%
dplyr::select(-z) %>%
dplyr::mutate(x = x + runif(nrow(.), -5000, 5000), # Add jitter (res/2) to cell centroids
y = y + runif(nrow(.), -5000, 5000)) %>% # Add jitter (res/2) to cell centroids
st_as_sf(coords = c("x", "y"), crs = proj_string)
abs_env = terra::extract(raster_data, terra::vect(sample_points), ID = FALSE) %>%
dplyr::mutate(
present = 0,
geometry = st_geometry(sample_points)
) %>%
sample_abs = bind_cols(
terra::extract(raster_data, terra::vect(sample_points), ID = FALSE)
) %>%
bind_cols(sample_points) %>%
drop_na()
abs_spec = bind_rows(abs_spec, abs_env)
abs_spec_list[[length(abs_spec_list)+1]] = sample_points
samples_required = samples_required - nrow(sample_points) # Sometimes there are no env data for sample points, so keep sampling
}
abs_coords = st_coordinates(st_transform(abs_spec, "EPSG:4326"))
abs_spec = bind_cols(abs_spec, abs_coords) %>%
dplyr::rename(longitude = X, latitude = Y) %>%
dplyr::mutate(species = !!species)
abs_spec = bind_rows(abs_spec_list)
abs_spec = abs_spec %>%
bind_cols(
st_coordinates(st_transform(abs_spec, "EPSG:4326"))
) %>%
dplyr::rename(
longitude = X,
latitude = Y
) %>%
dplyr::mutate(
species = !!species,
present = 0
)
# Create presence-absence dataframe
pa_spec = occs_spec %>%
......@@ -167,15 +188,16 @@ model_data = furrr::future_walk(
NA
})
pa_spec$fold = folds
pa_spec$fold_eval = folds
pa_spec$geometry = NULL
save(pa_spec, file = paste0("data/r_objects/pa_sampling/", species, ".RData"))
})
}
)
# Combine presence-absence data
files = list.files("data/r_objects/pa_sampling/", full.names = T)
model_data = lapply(files, function(f){load(f); return(pa_spec)}) %>%
bind_rows()
save(model_data, file = "data/r_objects/pa_sampling/model_data.RData")
save(model_data, file = "data/r_objects/model_data_pa_sampling.RData")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment