From 136cdf3a0635a4c0f280a9d50fef1cf535969eb5 Mon Sep 17 00:00:00 2001 From: Francesco Sabatini <francesco.sabatini@idiv.de> Date: Mon, 2 Mar 2020 17:56:38 +0100 Subject: [PATCH] Cleaned up 04_buildHeader --- code/04_buildHeader.Rmd | 656 ++++++++++++++++++++++++---------------- 1 file changed, 393 insertions(+), 263 deletions(-) diff --git a/code/04_buildHeader.Rmd b/code/04_buildHeader.Rmd index f65feaf..9e7e932 100644 --- a/code/04_buildHeader.Rmd +++ b/code/04_buildHeader.Rmd @@ -26,8 +26,11 @@ This report documents the construction of the header file for sPlot 3.0. It is b ```{r results="hide", message=F, warning=F} knitr::opts_chunk$set(echo = TRUE) library(tidyverse) +library(viridis) library(readr) library(xlsx) +library(knitr) +library(kableExtra) ## Spatial packages library(rgdal) @@ -35,6 +38,10 @@ library(sp) library(rgeos) library(raster) library(rworldmap) +library(elevatr) +library(sf) +library(rnaturalearth) +library(dggridR) #save temporary files write("TMPDIR = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('TMPDIR'), '.Renviron')) @@ -42,6 +49,7 @@ write("R_USER = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('R_ rasterOptions(tmpdir="/data/sPlot/users/Francesco/_tmp") ``` + ```{bash, eval=F} # escape all double quotation marks. Run in Linux terminal #sed 's/"/\\"/g' sPlot_3_0_2_header.csv > sPlot_3_0_2_header_test.csv @@ -53,7 +61,7 @@ rasterOptions(tmpdir="/data/sPlot/users/Francesco/_tmp") ``` - +## 1 Import data Import header data ```{r import} header0 <- readr::read_delim("../sPlot_data_export/sPlot_3_0_2_header_test.csv", locale = locale(encoding = 'UTF-8'), @@ -111,7 +119,8 @@ header0 <- readr::read_delim("../sPlot_data_export/sPlot_3_0_2_header_test.csv", )) %>% rename(Sparse.vegetation=`Sparse vegetation`, ESY=EUNIS) %>% - dplyr::select(-COMMUNITY, -ALLIAN_REV, -REV_AUTHOR, -SUBSTRATE) #too sparse information to be useful + dplyr::select(-COMMUNITY, -ALLIAN_REV, -REV_AUTHOR, -SUBSTRATE) %>% #too sparse information to be useful + dplyr::select(-PlotID) #identical to PlotObservationID ``` The following column names occurred in the header of sPlot v2.1 and are currently missing from the header of v3.0 @@ -130,7 +139,7 @@ The following column names occurred in the header of sPlot v2.1 and are currentl Columns #1, #2, #3, #10, #11 will be dropped. The others will be derived below. -## 1 Solve spatial problems +### 1.1 Solve spatial problems There are 2020 plots in the Nile dataset without spatial coordinates. Assign manually with wide (90km) location uncertainty. ```{r} header <- header0 %>% @@ -151,19 +160,19 @@ toswap <- c(which(header$Dataset=="Japan" & header$Latitude>90), header[toswap, c("Latitude", "Longitude")] <- header[toswap, c("Longitude", "Latitude")] ``` -There are `r nrow(header %>% filter(is.na(`Location uncertainty (m)`)))`. As a first approximation, we assign the median of the respective dataset, as a negative value to indicate this is an estimation, rather than a measure. +There are `r nrow(header %>% filter(is.na("Location uncertainty (m)")))`. As a first approximation, we assign the median of the respective dataset, as a negative value to indicate this is an estimation, rather than a measure. ```{r} header <- header %>% left_join(header %>% group_by(Dataset) %>% summarize(loc.uncer.median=median(`Location uncertainty (m)`, na.rm=T)), by="Dataset") %>% - mutate(`Location uncertainty (m)`=ifelse( is.na(`Location uncertainty (m)`), + mutate(`Location uncertainty (m)`=ifelse( is.na(`Location uncertainty (m)` & !is.na(Latitude)), -abs(loc.uncer.median), `Location uncertainty (m)`)) %>% dplyr::select(-loc.uncer.median) ``` -There are still `r nrow(header %>% filter(is.na(`Location uncertainty (m)`)))` plots with no estimation of location uncertainty. +There are still `r nrow(header %>% filter(is.na("Location uncertainty (m)")))` plots with no estimation of location uncertainty. \newline Assign plot size to plots in the Patagonia dataset (input of Ana Cingolani) ```{r} @@ -173,12 +182,11 @@ header <- header %>% ``` ## 2 Formations -Fill out the columns `Forest:Sparse.vegetation` with 0\\NAs, where necessary. Create columns `is.forest` and `is.non.forest` using script developed for sPlot 2.1 +Fill out the columns `Forest:Sparse.vegetation` with NAs, where necessary. Create columns `is.forest` and `is.non.forest` using script developed for sPlot 2.1 ~~ I am not assigning plots to Faber-Langedon formation at this stage, as this is only possible for European plots having an ESY classification. - ```{r} eunis.key <- read.xlsx("../_input/EUNIS_WFT.xlsx", sheetIndex = "Sheet1", endRow = 246) %>% dplyr::select(EUNIS_code, NATURALNESS:SPARSE_VEG) %>% @@ -193,8 +201,6 @@ eunis.key <- read.xlsx("../_input/EUNIS_WFT.xlsx", sheetIndex = "Sheet1", endRow header.backup <- header -#test <- header.backup %>% slice(100000:110000) %>% dplyr::select(PlotObservationID, Dataset, ESY, Forest:Shrubland) - header <- header %>% # header.backup %>% mutate(ESY=as.character(ESY)) %>% #mutate(ESY=ifelse(ESY=="?", NA, ESY)) %>% @@ -249,7 +255,7 @@ header <- header %>% mutate(`Plants recorded`=factor(`Plants recorded`, exclude = "#N/A")) ``` Align labels consortium -```{r} +```{r message=F} databases <- read_csv("/data/sPlot/users/Francesco/_sPlot_Management/Consortium/Databases.out.csv") header <- header %>% @@ -338,7 +344,7 @@ continent.out$continent[which(is.na(continent.out$continent))] <- as.character(c save(continent.out, file = "../_derived/continent.out") stopCluster(cl) ``` -Reload and manipulate continent. Correct header.sRoot +Reload, manipulate continent and attach to header ```{r} load("../_derived/continent.out") header <- header %>% @@ -354,97 +360,52 @@ header <- header %>% ``` ## 4.2 Assign to sBiomes -```{r, eval=F} +Performed in EVE HPC cluster using function `A98_PredictorsExtract.R`. Divided in 99 chunks. +```{r eval=F} sBiomes <- readOGR("/data/sPlot/users/Francesco/Ancillary_Data/Biomes_sPlot/sBiomes.shp") crs(sBiomes) <- crs(header.shp) -library(parallel) -library(doParallel) -ncores=10 -cl <- makeForkCluster(ncores, outfile="" ) -registerDoParallel(cl) - -#divide in chunks #should take around 40' -indices <- 1:length(header.shp) -chunks <- split(indices, sort(indices%%99)) -chunkn <- length(chunks) -sBiomes.out <- foreach(i=1:chunkn, .packages=c('raster'), .combine=rbind) %dopar% { - sBiomes.tmp <- sp::over(x=header.shp[chunks[[i]],], y=sBiomes) -} -sum(is.na(sBiomes.out$Name)) -sBiomes.out.backup <- sBiomes.out -stopCluster(cl) -``` -There are `r sum(is.na(sBiomes.out$Name))` still to assign. Using the `dist2Line` is not feasible due to long computing times (3' for each plot!). Fall back on raster version of sBiomes (res 0.1°). - -```{r} -load("../_derived/sBiome.out.RData") -sBiomes01 <- raster("/data/sPlot/users/Francesco/Ancillary_Data/Biomes_sPlot/sBiome_raster01/sBiomes_raster01.tif") -## Fix NAs -toassign <- header.shp[which(is.na(sBiomes.out$Name)),] #116878 not assigned (!) -crs(sBiomes01) <- crs(toassign) - -system.time(sBiomes.out2 <- extract(sBiomes01, toassign)) - - -toassign3 <- toassign[which(sBiomes.out2==0),] #91793 not assigned (!) -robust.mode <- function(x){if(any(x!=0)) { - a <- x[which(x!=0)] #exclude zero (i.e. NAs) - return(as.numeric(names(sort(table(a), decreasing=T))[1]))} else - return(NA) - } - -ncores=10 -cl <- makeForkCluster(ncores, outfile="" ) -registerDoParallel(cl) -sBiomes.out3 <- foreach(i=1:length(toassign3), .packages=c('raster'), .combine=rbind) %dopar% { - sBiomes.out.tmp <- extract(sBiomes01, toassign3[i,], buffer=10000, fun=robust.mode) -} - -stopCluster(cl) -save(sBiomes.out, sBiomes.out2, sBiomes.out3, file = "../_derived/sBiome.out.RData") - - -toassign4 <- toassign3[which(is.na(sBiomes.out3)),] ##37887 still to classify -ncores=10 -cl <- makeForkCluster(ncores, outfile="" ) +cl <- makeForkCluster(5, outfile="") registerDoParallel(cl) -sBiomes.out4 <- foreach(i=1:length(toassign4), .packages=c('raster'), .combine=rbind) %dopar% { - sBiomes.out.tmp <- extract(sBiomes01, toassign4[i,], buffer=50000, fun=robust.mode) -} - -stopCluster(cl) -save(sBiomes.out, sBiomes.out2, sBiomes.out3, sBiomes.out4, file = "../_derived/sBiome.out.RData") +clusterEvalQ(cl, { + library(rgdal) + library(raster) + library(sp) + library(elevatr) + library(dplyr) + }) -toassign5 <- toassign4[which(is.na(sBiomes.out4)),] #4158 still to assign -nearestBiome <- foreach(i=1:1000, .packages=c('raster'), .combine=rbind) %dopar% { - nearestBiome.tmp <- geosphere::dist2Line(toassign[i,], sBiomes) +sBiomes.out <- foreach(i=1:99, .combine=rbind) %dopar% { + source("A98_PredictorsExtract.R") + PredExtr(header.shp, myfunction=NA, output="../_derived/sBiomes/", + toextract=sBiomes, typp="shp", ncores=1, chunkn=99, chunk.i=i) } -sBiomes.out[which(is.na(sBiomes.out$Name))] <- as.character(sBiomes@data[nearestBiome[,"ID"],]) -sum(is.na(sBiomes.out$Name)) -save(sBiomes.out, file = "../_derived/sBiome.out.RData") - stopCluster(cl) - ``` - Reimport output and join to header -```{r} - +```{r, message=F, warning=F} +sBiome.files <- list.files("../_derived/sBiomes", pattern="sBiomes-[0-9]+.csv", full.names=T) +sBiomes.out <- do.call(rbind, lapply(sBiome.files, function(x) {read_csv(x)})) header <- header %>% - bind_rows(sBiomes.out %>% - dplyr::select(Name, BiomeID) %>% - dplyr::rename(sBioem=Name, s)) - + left_join(sBiomes.out %>% + rename(PlotObservationID=X1) %>% + dplyr::select(PlotObservationID, Name, BiomeID) %>% + dplyr::rename(sBiome=Name, sBiomeID=BiomeID), + by="PlotObservationID") ``` +There are `r sum(is.na(sBiomes.out$Name))` unassigned plots. + ## 4.3 Extract elevation -Split data into tiles of 1 x 1 degrees, and create sp::SpatialPointsDataFrame files. Only for plots having a location uncertainty < 50 km. (Include also plots without location uncertainty, arbitrarily set to 100 m) + +Extract elevation for each plot. Loops over tiles of 1 x 1°, projects to mercator, and extract elevation for plot coordinates, as well as 2.5, 50, and 97.5 quantiles for a buffer area having a radius equal to the location uncertainty of each plot (but only if location uncertainty < 50 km). DEM derive from package [elevatr](https://cran.r-project.org/web/packages/elevatr/vignettes/introduction_to_elevatr.html#get_raster_elevation_data), which uses the [Terrain Tiles on Amazon Web Services](https://registry.opendata.aws/terrain-tiles/). Resolutions of DEM rasters vary by region. I set a zoom factor z=10, which corresponds to ~ 75-150 m. Sources are: SRTM, data.gov.at in Austria, NRCAN in Canada, SRTM, NED/3DEP 1/3 arcsec, data.gov.uk in United Kingdom, INEGI in Mexico, ArcticDEM in latitudes above 60°, LINZ in New Zealand, Kartverket in Norway, as described [here](https://github.com/tilezen/joerd/blob/master/docs/data-sources.md). +\newline +Split data into tiles of 1 x 1 degrees, and create `sp::SpatialPointsDataFrame` files. Only for plots having a location uncertainty < 50 km. ```{r create tiles} header.tiles <- header %>% dplyr::select(PlotObservationID, Dataset, Longitude, Latitude, `Location uncertainty (m)`) %>% @@ -456,20 +417,8 @@ header.tiles <- header %>% mutate(tilenam=factor(paste(Longitude_tile, Latitude_tile))) ``` -There are `r nrow(header.tiles)` plots out of `r nrow(header)` plots with Location uncertainty <= 50km (or absent). - - - -Extract elevation for each plot. Loops over tiles of 1 x 1°, projects to mercator, and extract elevation for plot coordinates, as well as 2.5, 50, and 97.5 quantiles for a buffer area having a radius equal to the location uncertainty of each plot (but only if location uncertainty < 50 km). DEM derive from package [elevatr](https://cran.r-project.org/web/packages/elevatr/vignettes/introduction_to_elevatr.html#get_raster_elevation_data), which uses the [Terrain Tiles on Amazon Web Services](https://registry.opendata.aws/terrain-tiles/). Resolutions of DEM rasters vary by region. I set a zoom factor z=10, which corresponds to ~ 75-150 m. Sources are: SRTM, data.gov.at in Austria, NRCAN in Canada, SRTM, NED/3DEP 1/3 arcsec, data.gov.uk in United Kingdom, INEGI in Mexico, ArcticDEM in latitudes above 60°, LINZ in New Zealand, Kartverket in Norway, as described [here](https://github.com/tilezen/joerd/blob/master/docs/data-sources.md) - -```{r message==F} -library(elevatr) - -continent.high.merc <- spTransform(continent.high, CRS( "+init=epsg:3857 +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m -+nadgrids=@null +no_defs")) - -require(parallel) -require(doParallel) +There are `r nrow(header.tiles)` plots out of `r nrow(header)` plots with Location uncertainty <= 50km (or absent). The total number of tiles is `r nlevels(header.tiles$tilenam)`. +```{r eval=F} cl <- makeForkCluster(5, outfile="") registerDoParallel(cl) @@ -481,75 +430,15 @@ clusterEvalQ(cl, { library(dplyr) }) -#create list of tiles for which dem could not be extracted -myfiles <- list.files("../_derived/elevatr/", pattern = "[A-Za-z]*_[0-9]+\\.RData$") -done <- NULL -done <- as.numeric(unlist(regmatches(myfiles, gregexpr("[[:digit:]]+", myfiles)))) -todo <- 1:nlevels(header.tiles$tilenam) -todo <- todo[-which(todo %in% done)] -#todo <- sample(todo, replace=F) -#foreach(i = 1:nlevels(header.sp$tilenam)) %do% { -#foreach(i = todo) %dopar% { -for(i in todo){ - #create sp and project data - if(nrow(header.tiles %>% - filter(tilenam %in% levels(header.tiles$tilenam)[i])) ==0 ) next() - sp.tile <- SpatialPointsDataFrame(coords=header.tiles %>% - filter(tilenam %in% levels(header.tiles$tilenam)[i]) %>% - dplyr::select(Longitude, Latitude), - data=header.tiles %>% - filter(tilenam %in% levels(header.tiles$tilenam)[i]) %>% - dplyr::select(-Longitude, -Latitude), - proj4string = CRS("+init=epsg:4326")) - sp.tile <- spTransform(sp.tile, CRSobj = CRS("+init=epsg:3857 +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 - +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null - +no_defs ")) #project to mercator - - #retrieve dem raster - tryCatch(raster.tile <- get_elev_raster(sp.tile, z=9, expand=max(sp.tile$`Location uncertainty (m)`), clip="bbox"), - error = function(e){e} - ) - if(!exists("raster.tile")) { - raster.tile <- NA - save(raster.tile, file = paste("../_derived/elevatr/elevation_tile_", i, "failed.RData", sep="")) - message(paste("tile", i, "doesn't work!, skip to next")) - rm(raster.tile) - next - } - # clip dem tile with continent shape - raster.tile <- mask(raster.tile, continent.high.merc) - - #extract and summarize elevation data - elev.tile <- raster::extract(raster.tile, sp.tile, small=T) - elev.tile.buffer <- raster::extract(raster.tile, sp.tile, - buffer=sp.tile@data$`Location uncertainty (m)`, - small=T) - tmp <- round(mapply( quantile, - x=elev.tile.buffer, - #center=elev.tile, - probs=rep(c(0.025, 0.5, 0.975), each=length(elev.tile)), - #loc.uncert=sp.tile$`Location uncertainty (m)`, - na.rm=T)) - elev.q95 <- setNames(data.frame(matrix(tmp, ncol = 3, nrow = length(elev.tile.buffer))), - c("Elevation_q2.5", "Elevation_median", "Elevation_q97.5")) - output.tile <- data.frame(PlotObservationID=sp.tile$PlotObservationID, - elevation=round(elev.tile), - elev.q95, - DEM.res=res(raster.tile)[1]) - - #save output - save(output.tile, file = paste("../_derived/elevatr/elevation_tile_", i, ".RData", sep="")) - print(i) -} +# Divided in 99 chunks +elevation.out <- foreach(i=1:99, .combine=rbind) %dopar% { + source("A97_ElevationExtract.R") + ElevationExtract(header.shp, output, ncores=1, chunk.i=1)} stopCluster(cl) - ``` - -###### TO CHECK BELOW HERE - For those tiles that failed, extract elevation of remaining plots one by one -```{r} +```{r, eval=F} #create list of tiles for which dem could not be extracted myfiles <- list.files("../_derived/elevatr/") failed <- list.files("../_derived/elevatr/", pattern = "[A-Za-z]*_[0-9]+failed\\.RData$") @@ -567,20 +456,36 @@ sp.tile0 <- spTransform(sp.tile0, CRSobj = CRS("+init=epsg:3857 +proj=merc +a=63 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null +no_defs ")) #project to mercator output.tile <- data.frame(NULL) + +cl <- makeForkCluster(5, outfile="") +registerDoParallel(cl) + +clusterEvalQ(cl, { + library(rgdal) + library(raster) + library(sp) + library(elevatr) + library(dplyr) + }) + #Loop over all plots -for(i in 1:10){#nrow(sp.tile0)){ +#for(i in nrow(sp.tile0)){ +elevation.failed <- foreach(i=1:nrow(sp.tile0), .packages=c('raster'), .combine=rbind) %dopar% { sp.tile <- sp.tile0[i,] tryCatch(raster.tile <- get_elev_raster(sp.tile, z=10, expand=max(sp.tile$`Location uncertainty (m)`)), - error = function(e){#bind_rows(output.tile, - # data.frame(PlotObservationID=sp.tile$PlotObservationID, - # elevation=NA, - # Elevation_q2.5=NA, - # Elevation_median=NA, - # Elevation_q97.5=NA, - # DEM.res=NA)) + error = function(e){ print(paste("could not retrieve DEM for", sp.tile$PlotObservationID))} ) + if(!exists("raster.tile")) { + output.tile <- data.frame(PlotObservationID==sp.tile$PlotObservationID, + elevation=NA, + Elevation_q2.5=NA, + Elevation_median=NA, + Elevation_q97.5=NA, + DEM.res=NA) + return(output.tile) + } else { # clip dem tile with continent shape raster.tile <- mask(raster.tile, continent.high.merc) @@ -595,63 +500,100 @@ for(i in 1:10){#nrow(sp.tile0)){ probs=rep(c(0.025, 0.5, 0.975), each=length(elev.tile)), #loc.uncert=sp.tile$`Location uncertainty (m)`, na.rm=T))) - output.tile <- bind_rows(output.tile, - data.frame(PlotObservationID=sp.tile$PlotObservationID, + output.tile <- data.frame(PlotObservationID=sp.tile$PlotObservationID, elevation=round(elev.tile), elev.q95, DEM.res=res(raster.tile)[1]) %>% - rename(Elevation_q2.5=X2.5., Elevation_median=X50., Elevation_q97.5=X97.5.)) + rename(Elevation_q2.5=X2.5., Elevation_median=X50., Elevation_q97.5=X97.5.) + return(output.tile) + } } -save(output.tile, file = paste("../_derived/elevatr/elevation_tile_", 0, ".RData", sep="")) +stopCluster(cl) +save(elevation.failed, file = "../_derived/elevatr/elevation_missing.RData") ``` - Compose tiles into a single output, and export -```{r } -myfiles <- list.files(path) -myfiles <- myfiles[grep(pattern="*.RData$", myfiles)] +```{r eval=F} +myfiles <- list.files("../_derived/elevatr/", pattern = "elevation_tile_[0-9]+\\.RData$", full.names = T) + #create empty data.frame -elevation.out <- data.frame(PlotObservationID=header$PlotObservationID, - elevation=NA, - Elevation_q2.5=NA, - Elevation_median=NA, - Elevation_q97.5=NA, - DEM.res=NA) +elevation.out <- matrix(NA, nrow=nrow(header.tiles), ncol=6) +elevation.out <- as.data.frame(elevation.out) +colnames(elevation.out) <- c("PlotObservationID", "elevation", "Elevation_q2.5", "Elevation_median", "Elevation_q97.5","DEM.res") +elevation.out$PlotObservationID <- header.tiles$PlotObservationID + +tmp <- NULL for(i in 1:length(myfiles)){ - load(paste(path, myfiles[i], sep="")) + load(myfiles[i]) #attach results to empty data.frame - mymatch <- match(output.tile$PlotObservationID, elevation.out$PlotObservationID) - elevation.out[mymatch,] <- output.tile - if(i %in% seq(1,length(myfiles), by=25)){print(i)} + tmp <- bind_rows(tmp, output.tile) + if(i %in% seq(5000, length(myfiles), by=5000)){ + mymatch <- base::match(x=tmp$PlotObservationID, table=elevation.out$PlotObservationID) + mymatch <- mymatch[!is.na(mymatch)] + elevation.out[mymatch,] <- tmp + tmp <- NULL + print(paste("Attached first", i, "files")) + } + if(i %in% seq(1,length(myfiles), by=250)){print(i)} } -write_csv(elevation.out, path ="../_derived/elevatr/Elevation_out_v301.csv") +load(file = "../_derived/elevatr/elevation_missing.RData") + +mymatch <- base::match(x=elevation.failed$PlotObservationID, table=elevation.out$PlotObservationID) +mymatch <- mymatch[!is.na(mymatch)] +elevation.out[mymatch,] <- elevation.failed +write_csv(elevation.out, path ="../_derived/elevatr/elevation.out.csv") ``` -Reimport output and check + + +Reimport output, attach to header and check ```{r message=F} -elevation.out <- read_csv("../_derived/elevatr/Elevation_out.csv") +elevation.out <- read_csv("../_derived/elevatr/elevation.out.csv") knitr::kable(head(elevation.out,10), - caption="Example of elevation output") %>% + caption="Example of elevation output (only first 10 shown)") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "center") - summary(elevation.out) ``` -There are `r sum(elevation.out$elevation < -100, na.rm=T)` plots with elevation < -100 ! +There are `r sum(is.na(elevation.out$elevation))` plots without elevation info, corresponding to `r round(sum(is.na(elevation.out$elevation))/nrow(header)*100,1)`% of total. +There are `r sum(elevation.out$elevation < -1, na.rm=T)` plots with elevation below sea level. + +```{r message=F} +elevation.out <- read_csv("../_derived/elevatr/elevation.out.csv") +knitr::kable(header %>% + dplyr::select(PlotObservationID, Dataset, `GIVD ID`) %>% + left_join(elevation.out, + by="PlotObservationID") %>% + filter(elevation< -1) %>% + group_by( `GIVD ID`, Dataset) %>% + summarize(num.plot=n()) %>% + ungroup() %>% + arrange(desc(num.plot)) %>% + slice(1:10), + caption="Dataset with highest number of plots below sea level") %>% + kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), + full_width = F, position = "center") + +``` + +```{r} +summary(elevation.out %>% + dplyr::select(-PlotObservationID)) + +``` Create Scatterplot between measured elevation in the field, and elevation derived from DEM ```{r scatterplot, cache=T} #join measured and derived elevation -mydata <- header %>% - dplyr::select(PlotObservationID, `Altitude (m)`) %>% - filter(!is.na(`Altitude (m)`)) %>% - rename(elevation_measured=`Altitude (m)`) %>% +mydata <- header %>% left_join(elevation.out %>% - dplyr::select(PlotObservationID, elevation) %>% - rename(elevation_dem=elevation), - by="PlotObservationID") + rename(elevation_dem=Elevation_median), by="PlotObservationID") %>% + dplyr::select(PlotObservationID, `Altitude (m)`, elevation_dem) %>% + filter(!is.na(`Altitude (m)`) & !is.na(elevation_dem)) %>% + rename(elevation_measured=`Altitude (m)`) + ggplot(data=mydata) + geom_point(aes(x=elevation_measured, y=elevation_dem), alpha=1/10, cex=0.8) + theme_bw() + @@ -661,22 +603,9 @@ ggplot(data=mydata) + ``` - - - - - - - # 5 Map of plots -```{r} -library(sf) -library(rnaturalearth) -library(viridis) -library(dggridR) -#### BRT predicting -#### alternative plotting +```{r, message=F, warning=F} countries <- ne_countries(returnclass = "sf") %>% st_transform(crs = "+proj=eck4") %>% st_geometry() @@ -689,10 +618,6 @@ bb <- ne_download(type = "wgs84_bounding_box", category = "physical", st_transform(crs = "+proj=eck4") %>% st_geometry() -label_parse <- function(breaks) { - parse(text = breaks) -} - ## basic graph of the world in Eckert projection w3a <- ggplot() + geom_sf(data = bb, col = "grey20", fill = "white") + @@ -706,65 +631,270 @@ w3a <- ggplot() + legend.background = element_rect(size=0.1, linetype="solid", colour = 1), legend.key.height = unit(1.1, "cm"), legend.key.width = unit(1.1, "cm")) + - scale_fill_viridis(label=label_parse) + scale_fill_viridis() +``` -## Plotting of plot density in hexagons +Graph of plot density (hexagons) +```{r, fig.align="center", fig.width=8, fig.height=4, cache=T} header2 <- header %>% filter(!is.na(Longitude) | !is.na(Latitude)) %>% dplyr::select(PlotObservationID, Latitude, Longitude, `GIVD ID`) %>% filter(!(abs(Longitude) >171 & abs(Latitude>70))) -dggs <- dgconstruct(spacing=300, metric=T, resround='down') - #Get the corresponding grid cells for each earthquake epicenter (lat-long pair) - header2$cell <- dgGEO_to_SEQNUM(dggs, header2$Longitude, header2$Latitude)$seqnum - - #Calculate number of plots for each cell - header.out <- header2 %>% - group_by(cell) %>% - summarise(value.out=log(n(), 10)) - #Get the grid cell boundaries for cells - grid <- dgcellstogrid(dggs, header.out$cell, frame=F) %>% - st_as_sf() %>% - mutate(cell = header.out$cell) %>% - mutate(value.out=header.out$value.out) %>% - st_transform("+proj=eck4") %>% - st_wrap_dateline(options = c("WRAPDATELINE=YES")) - - ## plotting - legpos <- c(0.160, .24) - - (w3 <- w3a + - geom_sf(data=grid, aes(fill=value.out),lwd=0, alpha=0.9) + - geom_sf(data = countries, col = "grey10", fill=NA, lwd = 0.3) + - scale_fill_viridis( - name="# plots", breaks=0:5, labels = c("1", "10", "100", - "1,000", "10,000", "100,000"), option="viridis" ) + - #labs(fill="# plots") + - theme(legend.position = legpos +c(-0.06, 0.25)) - ) +dggs <- dgconstruct(spacing=300, metric=T, resround='down') +#Get the corresponding grid cells for each earthquake epicenter (lat-long pair) +header2$cell <- dgGEO_to_SEQNUM(dggs, header2$Longitude, header2$Latitude)$seqnum - ggsave(filename="../_pics/PlotDensityLog10_vir.png", width = 15, height = 7, units="in", dpi=300, plot=w3) +#Calculate number of plots for each cell +header.out <- header2 %>% + group_by(cell) %>% + summarise(value.out=log(n(), 10)) - -## Graph of plot location by Dataset +#Get the grid cell boundaries for cells +grid <- dgcellstogrid(dggs, header.out$cell, frame=F) %>% + st_as_sf() %>% + mutate(cell = header.out$cell) %>% + mutate(value.out=header.out$value.out) %>% + st_transform("+proj=eck4") %>% + st_wrap_dateline(options = c("WRAPDATELINE=YES")) + +## plotting +legpos <- c(0.160, .24) +(w3 <- w3a + + geom_sf(data=grid, aes(fill=value.out),lwd=0, alpha=0.9) + + geom_sf(data = countries, col = "grey10", fill=NA, lwd = 0.3) + + scale_fill_viridis( + name="# plots", breaks=0:5, labels = c("1", "10", "100", + "1,000", "10,000", "100,000"), option="viridis" ) + + #labs(fill="# plots") + + theme(legend.position = legpos +c(-0.06, 0.25)) +) +ggsave(filename="../_pics/PlotDensityLog10_vir.png", width = 15, height = 7, units="in", dpi=300, plot=w3) +``` + +Graph of plot location by Dataset +```{r, fig.align="center", fig.width=8, fig.height=4, cache=T} header.sf <- header.shp %>% st_as_sf() %>% - #sample_frac(0.01) %>% - st_transform(crs = "+proj=eck4") #%>% - #st_geometry() + st_transform(crs = "+proj=eck4") -set.seed(1984) -w3 <- w3a + +(w4 <- w3a + geom_sf(data=header.sf %>% mutate(GIVD.ID=fct_shuffle(GIVD.ID)), aes(col=factor(GIVD.ID)), pch=16, size=0.8, alpha=0.6) + geom_sf(data = countries, col = "grey10", fill=NA, lwd = 0.3) + - #scale_color_brewer(palette = "Dark2") + - #labs(fill="# plots") + - theme(legend.position = "none") + theme(legend.position = "none")) + +ggsave(filename="../_pics/PlotDistrib_Dark2_shuffle1984.png", width = 15, height = 7, units="in", dpi=300, plot=w4) ## takes ~40' to render +``` +## 6 Fix output and export +```{r} +header.out <- header %>% + dplyr::select( + #Metadata + PlotObservationID, Dataset, "GIVD ID", "TV2 relevé number", "ORIG_NUM", "GUID", + #Geographic info + Longitude:"Location uncertainty (m)", Country, CONTINENT, sBiome, sBiomeID, Locality, + #Methodological info + "Relevé area (m²)", "Cover abundance scale", "Date of recording", "Plants recorded", + "Herbs identified (y/n)","Mosses identified (y/n)","Lichens identified (y/n)", + #Topographical + elevation_dem, "Altitude (m)", "Aspect (°)", "Slope (°)", + #Vegetation type + Forest:Naturalness, ESY, + #Vegetation structure + "Cover total (%)":"Maximum height cryptogams (mm)") +save(header.out, file = "../_output/header_splot3.0") + +``` -ggsave(filename="../_pics/PlotDistrib_Dark2_shuffle1984.png", width = 15, height = 7, units="in", dpi=300, plot=w3) ## takes ~40' to render +## Supplementary Material +### ANNEX 1 - Ancillary function - `PredExtr` +```{r} +PredExtr <- function(x.shp, myfunction=NA, output, + toextract, typp=c("raster", "shp"), ncores, chunkn=1, chunk.i=NA){ + print("Load Packages") + require(foreach) + require(parallel) + require(doParallel) + require(raster) + require(rgeos) + require(rgdal) + + print(paste("Loading", typp, "data :", toextract)) + print(paste("output will be:", output)) + if(typp=="raster"){ mypredictor <- raster(toextract)} else + mypredictor <- readOGR(toextract) + header.shp <-readOGR(x.shp) + crs(mypredictor) <- crs(header.shp) #should be verified beforehand! + + ## Divide in chunks if requested + if(chunkn>1 & !is.na(chunk.i)){ + print(paste("divide in chunks and run on chunk n.", chunk.i)) + indices <- 1:length(header.shp) + chunks <- split(indices, sort(indices%%chunkn)) + header.shp <- header.shp[chunks[[chunk.i]],] + } + + # define ancillary functions + robust.mean <- function(x){mean(x, rm.na=T)} + robust.mode <- function(x){if(any(x!=0)) { + a <- x[which(x!=0)] #exclude zero (i.e. NAs) + return(as.numeric(names(sort(table(a), decreasing=T))[1]))} else + return(NA) } + + print("go parallel") + cl <- makeForkCluster(ncores, outfile="" ) + registerDoParallel(cl) + + print("start main foreach loop") + if(typp=="raster"){ + out <- foreach(i=1:length(header.shp), .packages=c('raster'), .combine=rbind) %dopar% { + tmp <- raster::extract(mypredictor, header.shp[i,], + buffer=min(10000, max(250, header.shp@data$loc.uncert[i])), fun=myfunction) + } + write.csv(out, file = output) + } else { + out <- sp::over(x=header.shp, y=mypredictor) + toassign <- header.shp[which(is.na(out[,1])),] + print(paste(length(toassign), "plots not matched directely - seek for NN")) + if(length(toassign)>0){ + + nearest <- foreach(i=1:length(toassign), .packages=c('raster'), .combine=rbind) %dopar% { + print(i) + nearest.tmp <- tryCatch(geosphere::dist2Line(header.shp[i,], mypredictor), + error = function(e){data.frame(distance=NA, lon=NA, lat=NA,ID=NA)} + ) + #nearest.tmp <- geosphere::dist2Line(toassign[i,], mypredictor) + return(nearest.tmp) + } + out[which(is.na(out[,1])),] <- mypredictor@data[nearest[,"ID"],] + } + write.csv(out, file=output) + } + stopCluster(cl) + } +``` +### ANNEX 2 - Ancillary function - `ElevationExtract` +```{r} + +ElevationExtract <- function(header, output, ncores, chunk.i){ + print("load packages") + require(tidyverse) + + require(rgdal) + require(sp) + require(rgeos) + require(raster) + require(rworldmap) + require(elevatr) + + require(parallel) + require(doParallel) + + print("Import header and divide in tiles") + header.shp <- readOGR(header) + header.tiles <- header.shp@data %>% + bind_cols(as.data.frame(header.shp@coords)) %>% + rename(PlotObservationID=PltObID, Longitude=coords.x1, Latitude=coords.x2) %>% + mutate(lc_ncrt=abs(lc_ncrt)) %>% + filter(lc_ncrt <= 50000) %>% + mutate_at(.vars=vars(Longitude, Latitude), + .funs=list(tile=~cut(., breaks = seq(-180,180, by=.2)))) %>% + mutate(tilenam=factor(paste(Longitude_tile, Latitude_tile))) + + + print("Get continent") + sPDF <- rworldmap::getMap(resolution="high") + continent.high <- sPDF[,"continent"] + crs(continent.high) <- CRS("+init=epsg:4326") + continent.high@data$continent <- fct_recode(continent.high@data$continent, "South America"="South America and the Caribbean") + continent.high.merc <- spTransform(continent.high, CRS( "+init=epsg:3857 +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m ++nadgrids=@null +no_defs")) + + print("go parallel") + cl <- makeForkCluster(ncores, outfile="") + registerDoParallel(ncores) + + clusterEvalQ(cl, { + library(rgdal) + library(raster) + library(sp) + library(elevatr) + library(dplyr) + }) + + print("create list of tiles still to do") + myfiles <- list.files(path = output, pattern = "[A-Za-z]*_[0-9]+\\.RData$") + done <- NULL + done <- as.numeric(unlist(regmatches(myfiles, gregexpr("[[:digit:]]+", myfiles)))) + todo <- 1:nlevels(header.tiles$tilenam) + if(length(done)>0) {todo <- todo[-which(todo %in% done)]} + todo <- sample(todo, replace=F) + print(paste(length(todo), "tiles to do")) + + print("divide in chunks") + #divide in chunks #should take around 40' + indices <- 1:length(todo) + todo.chunks <- split(indices, sort(indices%%99)) + + print(paste("start main foreach loop on chunk n=", chunk.i)) + foreach(i = todo.cunks[[chunk.i]]) %dopar% { + print(paste("doing", i)) + #create sp and project data + if(nrow(header.tiles %>% + filter(tilenam %in% levels(header.tiles$tilenam)[i])) ==0 ) next() + sp.tile <- SpatialPointsDataFrame(coords=header.tiles %>% + filter(tilenam %in% levels(header.tiles$tilenam)[i]) %>% + dplyr::select(Longitude, Latitude), + data=header.tiles %>% + filter(tilenam %in% levels(header.tiles$tilenam)[i]) %>% + dplyr::select(-Longitude, -Latitude), + proj4string = CRS("+init=epsg:4326")) + sp.tile <- spTransform(sp.tile, CRSobj = CRS("+init=epsg:3857 +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 + +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null + +no_defs ")) #project to mercator + + #retrieve dem raster + tryCatch(raster.tile <- get_elev_raster(sp.tile, z=9, expand=max(sp.tile$lc_ncrt), clip="bbox"), + error = function(e){e} + ) + if(!exists("raster.tile")) { + raster.tile <- NA + save(raster.tile, file = paste(output, "elevation_tile_", i, "failed.RData", sep="")) + message(paste("tile", i, "doesn't work!, skip to next")) + rm(raster.tile) + next + } + # clip dem tile with continent shape + raster.tile <- mask(raster.tile, continent.high.merc) + + #extract and summarize elevation data + elev.tile <- raster::extract(raster.tile, sp.tile, small=T) + elev.tile.buffer <- raster::extract(raster.tile, sp.tile, + buffer=sp.tile@data$lc_ncrt, + small=T) + tmp <- round(mapply( quantile, + x=elev.tile.buffer, + #center=elev.tile, + probs=rep(c(0.025, 0.5, 0.975), each=length(elev.tile)), + #lc_ncrt=sp.tile$lc_ncrt, + na.rm=T)) + elev.q95 <- setNames(data.frame(matrix(tmp, ncol = 3, nrow = length(elev.tile.buffer))), + c("Elevation_q2.5", "Elevation_median", "Elevation_q97.5")) + output.tile <- data.frame(PlotObservationID=sp.tile$PlotObservationID, + elevation=round(elev.tile), + elev.q95, + DEM.res=res(raster.tile)[1]) + + #save output + save(output.tile, file = paste(output, "elevation_tile_", i, ".RData", sep="")) + print(paste(i, "done")) + } + stopCluster(cl) + } + ``` -- GitLab