From 9539a520ce2578c8b1db61b71e55483cea8b540f Mon Sep 17 00:00:00 2001 From: Francesco Sabatini <francesco.sabatini@idiv.de> Date: Fri, 14 Feb 2020 11:49:47 +0100 Subject: [PATCH] Extract Continent\sBiome in server - incomplete --- code/04_buildHeader.Rmd | 232 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 214 insertions(+), 18 deletions(-) diff --git a/code/04_buildHeader.Rmd b/code/04_buildHeader.Rmd index 8fe8f86..6bfca88 100644 --- a/code/04_buildHeader.Rmd +++ b/code/04_buildHeader.Rmd @@ -24,6 +24,9 @@ This report documents the construction of the header file for sPlot 3.0. It is b ```{r results="hide", message=F, warning=F} +write("TMPDIR = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('TMPDIR'), '.Renviron')) +write("R_USER = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('R_USER'), '.Renviron')) + knitr::opts_chunk$set(echo = TRUE) library(tidyverse) library(readr) @@ -40,7 +43,7 @@ library(rworldmap) ```{bash, eval=F} # escape all double quotation marks. Run in Linux terminal -sed 's/"/\\"/g' sPlot_3_0_2_header.csv > sPlot_3_0_2_header_test.csv +#sed 's/"/\\"/g' sPlot_3_0_2_header.csv > sPlot_3_0_2_header_test.csv #more general alternative in case some " are already escaped ##first removing \s before all "s, and then adding \ before all ": @@ -140,7 +143,7 @@ header <- header0 %>% list=(is.na(`Location uncertainty (m)`) & Dataset=="Egypt Nile delta"), values=-90000)) ``` -There are two plots in the `Romanina Grassland Databse` and ~4442 plots in the `Japan` database, whose lat\\long are inverted. Correct. +There are two plots in the `Romania Grassland Databse` and ~4442 plots in the `Japan` database, whose lat\\long are inverted. Correct. ```{r} toswap <- c(which(header$Dataset=="Japan" & header$Latitude>90), which(header$Dataset=="Romania Grassland Database" & header$Longitude>40)) @@ -265,10 +268,20 @@ header <- header %>% ``` +# 4 Assign plots to spatial descriptors +Create spatial point dataframe for sPlot data to intersect with spatial layers +```{r} +header.shp <- header %>% + filter(!is.na(Longitude) | !is.na(Latitude)) +header.shp <- SpatialPointsDataFrame(coords= header.shp %>% + dplyr::select(Longitude, Latitude), + proj4string = CRS("+init=epsg:4326"), + data=data.frame(PlotObservationID= header.shp$PlotObservationID, + loc.uncert=header.shp$`Location uncertainty (m)`, + `GIVD ID`=header.shp$`GIVD ID`)) +``` - - -## 4 Assign to Continents +## 4.1 Assign to Continents Download and manipulate map of continents ```{r} sPDF <- rworldmap::getMap(resolution="coarse") @@ -288,19 +301,9 @@ continent.high <- sPDF[,"continent"] crs(continent.high) <- CRS("+init=epsg:4326") continent.high@data$continent <- fct_recode(continent.high@data$continent, "South America"="South America and the Caribbean") ``` -Create spatial point dataframe for sPlot data to intersect with spatial layers -```{r} -header.shp <- header %>% - filter(!is.na(Longitude) | !is.na(Latitude)) -header.shp <- SpatialPointsDataFrame(coords= header.shp %>% - dplyr::select(Longitude, Latitude), - proj4string = CRS("+init=epsg:4326"), - data=data.frame(PlotObservationID= header.shp$PlotObservationID, - loc.uncert=header.shp$`Location uncertainty (m)`)) -``` Assign plots to continent -```{r, eval=T} +```{r, eval=F} continent.out <- sp::over(x=header.shp, y=continent) #overlay unassigned points to the high resolution layer of continent toassign <- header.shp[which(is.na(continent.out$continent)),] #154782 remain to assign @@ -315,7 +318,7 @@ crs(toassign) <- crs(continent) #go parallel -ncores=6 +ncores=8 library(parallel) library(doParallel) cl <- makeForkCluster(ncores, outfile="" ) @@ -326,6 +329,7 @@ nearestContinent <- foreach(i=1:length(toassign), .packages=c('raster'), .combin } continent.out$continent[which(is.na(continent.out$continent))] <- as.character(continent[-137,]@data[nearestContinent[,"ID"],]) save(continent.out, file = "../_derived/continent.out") +stopCluster(cl) ``` Reload and manipulate continent. Correct header.sRoot ```{r} @@ -335,14 +339,206 @@ header <- header %>% filter(!is.na(Longitude) | !is.na(Latitude)) %>% dplyr::select(PlotObservationID) %>% bind_cols(continent.out), - by="PlotObservationID") + by="PlotObservationID") %>% mutate(CONTINENT=factor(continent, levels=c("Africa", "Antarctica", "Australia", "Eurasia", "North America", "South America"), labels=c("AF", "AN", "AU", "EU", "NA", "SA"))) %>% dplyr::select(-continent) ``` +## 4.2 Assign to sBiomes +```{r, eval=F} +sBiomes <- readOGR("/data/sPlot/users/Francesco/Ancillary_Data/Biomes_sPlot/sBiomes.shp") +crs(sBiomes) <- crs(header.shp) +library(parallel) +library(doParallel) +ncores=10 +cl <- makeForkCluster(ncores, outfile="" ) +registerDoParallel(cl) + +#divide in chunks #should take around 40' +indices <- 1:length(header.shp) +chunks <- split(indices, sort(indices%%99)) +chunkn <- length(chunks) +sBiomes.out <- foreach(i=1:chunkn, .packages=c('raster'), .combine=rbind) %dopar% { + sBiomes.tmp <- sp::over(x=header.shp[chunks[[i]],], y=sBiomes) +} +sum(is.na(sBiomes.out$Name)) +sBiomes.out.backup <- sBiomes.out +stopCluster(cl) +``` +There are `r sum(is.na(sBiomes.out$Name))` still to assign. Using the `dist2Line` is not feasible due to long computing times (3' for each plot!). Fall back on raster version of sBiomes (res 0.1°). + +```{r} +load("../_derived/sBiome.out.RData") +sBiomes01 <- raster("/data/sPlot/users/Francesco/Ancillary_Data/Biomes_sPlot/sBiome_raster01/sBiomes_raster01.tif") +## Fix NAs +toassign <- header.shp[which(is.na(sBiomes.out$Name)),] #116878 not assigned (!) +crs(sBiomes01) <- crs(toassign) + +system.time(sBiomes.out2 <- extract(sBiomes01, toassign)) +toassign3 <- toassign[which(sBiomes.out2==0),] #91793 not assigned (!) +robust.mode <- function(x){if(any(x!=0)) { + a <- x[which(x!=0)] #exclude zero (i.e. NAs) + return(as.numeric(names(sort(table(a), decreasing=T))[1]))} else + return(NA) + } + +ncores=10 +cl <- makeForkCluster(ncores, outfile="" ) +registerDoParallel(cl) +sBiomes.out3 <- foreach(i=1:length(toassign3), .packages=c('raster'), .combine=rbind) %dopar% { + sBiomes.out.tmp <- extract(sBiomes01, toassign3[i,], buffer=10000, fun=robust.mode) +} + +stopCluster(cl) +save(sBiomes.out, sBiomes.out2, sBiomes.out3, file = "../_derived/sBiome.out.RData") + + +toassign4 <- toassign3[which(is.na(sBiomes.out3)),] ##37887 still to classify +ncores=10 +cl <- makeForkCluster(ncores, outfile="" ) +registerDoParallel(cl) +sBiomes.out4 <- foreach(i=1:length(toassign4), .packages=c('raster'), .combine=rbind) %dopar% { + sBiomes.out.tmp <- extract(sBiomes01, toassign4[i,], buffer=50000, fun=robust.mode) +} + +stopCluster(cl) +save(sBiomes.out, sBiomes.out2, sBiomes.out3, sBiomes.out4, file = "../_derived/sBiome.out.RData") + + +toassign5 <- toassign4[which(is.na(sBiomes.out4)),] #4158 still to assign +nearestBiome <- foreach(i=1:1000, .packages=c('raster'), .combine=rbind) %dopar% { + nearestBiome.tmp <- geosphere::dist2Line(toassign[i,], sBiomes) +} +sBiomes.out[which(is.na(sBiomes.out$Name))] <- as.character(sBiomes@data[nearestBiome[,"ID"],]) +sum(is.na(sBiomes.out$Name)) +save(sBiomes.out, file = "../_derived/sBiome.out.RData") + +stopCluster(cl) + + +``` + + +Reimport output and join to header +```{r} + +header <- header %>% + bind_rows(sBiomes.out %>% + dplyr::select(Name, BiomeID) %>% + dplyr::rename(sBioem=Name, s)) + + +``` + + + + + + + +# 5 Map of plots + +```{r} +library(sf) +library(rnaturalearth) +library(viridis) +library(dggridR) +#### BRT predicting +#### alternative plotting +countries <- ne_countries(returnclass = "sf") %>% + st_transform(crs = "+proj=eck4") %>% + st_geometry() +graticules <- ne_download(type = "graticules_15", category = "physical", + returnclass = "sf") %>% + st_transform(crs = "+proj=eck4") %>% + st_geometry() +bb <- ne_download(type = "wgs84_bounding_box", category = "physical", + returnclass = "sf") %>% + st_transform(crs = "+proj=eck4") %>% + st_geometry() + +label_parse <- function(breaks) { + parse(text = breaks) +} + +## basic graph of the world in Eckert projection +w3a <- ggplot() + + geom_sf(data = bb, col = "grey20", fill = "white") + + geom_sf(data = graticules, col = "grey20", lwd = 0.1) + + geom_sf(data = countries, fill = "grey90", col = NA, lwd = 0.3) + + coord_sf(crs = "+proj=eck4") + + theme_minimal() + + theme(axis.text = element_blank(), + legend.title=element_text(size=12), + legend.text=element_text(size=12), + legend.background = element_rect(size=0.1, linetype="solid", colour = 1), + legend.key.height = unit(1.1, "cm"), + legend.key.width = unit(1.1, "cm")) + + scale_fill_viridis(label=label_parse) + +## Plotting of plot density in hexagons +header2 <- header %>% + filter(!is.na(Longitude) | !is.na(Latitude)) %>% + dplyr::select(PlotObservationID, Latitude, Longitude, `GIVD ID`) %>% + filter(!(abs(Longitude) >171 & abs(Latitude>70))) +dggs <- dgconstruct(spacing=300, metric=T, resround='down') + #Get the corresponding grid cells for each earthquake epicenter (lat-long pair) + header2$cell <- dgGEO_to_SEQNUM(dggs, header2$Longitude, header2$Latitude)$seqnum + + #Calculate number of plots for each cell + header.out <- header2 %>% + group_by(cell) %>% + summarise(value.out=log(n(), 10)) + #Get the grid cell boundaries for cells + grid <- dgcellstogrid(dggs, header.out$cell, frame=F) %>% + st_as_sf() %>% + mutate(cell = header.out$cell) %>% + mutate(value.out=header.out$value.out) %>% + st_transform("+proj=eck4") %>% + st_wrap_dateline(options = c("WRAPDATELINE=YES")) + + ## plotting + legpos <- c(0.160, .24) + + (w3 <- w3a + + geom_sf(data=grid, aes(fill=value.out),lwd=0, alpha=0.9) + + geom_sf(data = countries, col = "grey10", fill=NA, lwd = 0.3) + + scale_fill_viridis( + name="# plots", breaks=0:5, labels = c("1", "10", "100", + "1,000", "10,000", "100,000"), option="viridis" ) + + #labs(fill="# plots") + + theme(legend.position = legpos +c(-0.06, 0.25)) + ) + + ggsave(filename="../_pics/PlotDensityLog10_vir.png", width = 15, height = 7, units="in", dpi=300, plot=w3) + + +## Graph of plot location by Dataset +header.sf <- header.shp %>% + st_as_sf() %>% + #sample_frac(0.01) %>% + st_transform(crs = "+proj=eck4") #%>% + #st_geometry() + +set.seed(1984) +w3 <- w3a + + geom_sf(data=header.sf %>% + mutate(GIVD.ID=fct_shuffle(GIVD.ID)), aes(col=factor(GIVD.ID)), pch=16, size=0.8, alpha=0.6) + + geom_sf(data = countries, col = "grey10", fill=NA, lwd = 0.3) + + #scale_color_brewer(palette = "Dark2") + + #labs(fill="# plots") + + theme(legend.position = "none") + + + +ggsave(filename="../_pics/PlotDistrib_Dark2_shuffle1984.png", width = 15, height = 7, units="in", dpi=300, plot=w3) ## takes ~40' to render + + +``` + -- GitLab