From 9539a520ce2578c8b1db61b71e55483cea8b540f Mon Sep 17 00:00:00 2001
From: Francesco Sabatini <francesco.sabatini@idiv.de>
Date: Fri, 14 Feb 2020 11:49:47 +0100
Subject: [PATCH] Extract Continent\sBiome in server - incomplete

---
 code/04_buildHeader.Rmd | 232 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 214 insertions(+), 18 deletions(-)

diff --git a/code/04_buildHeader.Rmd b/code/04_buildHeader.Rmd
index 8fe8f86..6bfca88 100644
--- a/code/04_buildHeader.Rmd
+++ b/code/04_buildHeader.Rmd
@@ -24,6 +24,9 @@ This report documents the construction of the header file for sPlot 3.0. It is b
 
 
 ```{r results="hide", message=F, warning=F}
+write("TMPDIR = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('TMPDIR'), '.Renviron'))
+write("R_USER = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('R_USER'), '.Renviron'))
+
 knitr::opts_chunk$set(echo = TRUE)
 library(tidyverse)
 library(readr)
@@ -40,7 +43,7 @@ library(rworldmap)
 
 ```{bash, eval=F}
 # escape all double quotation marks. Run in Linux terminal
-sed 's/"/\\"/g' sPlot_3_0_2_header.csv > sPlot_3_0_2_header_test.csv
+#sed 's/"/\\"/g' sPlot_3_0_2_header.csv > sPlot_3_0_2_header_test.csv
 
 #more general alternative in case some " are already escaped
 ##first removing \s before all "s, and then adding \ before all ":
@@ -140,7 +143,7 @@ header <- header0 %>%
                           list=(is.na(`Location uncertainty (m)`) & Dataset=="Egypt Nile delta"), 
                           values=-90000))
 ```
-There are two plots in the `Romanina Grassland Databse` and ~4442 plots in the `Japan` database, whose lat\\long are inverted. Correct.
+There are two plots in the `Romania Grassland Databse` and ~4442 plots in the `Japan` database, whose lat\\long are inverted. Correct.
 ```{r}
 toswap <- c(which(header$Dataset=="Japan" & header$Latitude>90), 
             which(header$Dataset=="Romania Grassland Database" & header$Longitude>40))
@@ -265,10 +268,20 @@ header <- header %>%
 ```
 
 
+# 4 Assign plots to spatial descriptors
+Create spatial point dataframe for sPlot data to intersect with spatial layers
+```{r}
+header.shp <- header %>%
+  filter(!is.na(Longitude) | !is.na(Latitude))
+header.shp <- SpatialPointsDataFrame(coords= header.shp %>% 
+                                        dplyr::select(Longitude, Latitude),
+                               proj4string = CRS("+init=epsg:4326"), 
+                               data=data.frame(PlotObservationID= header.shp$PlotObservationID, 
+                                               loc.uncert=header.shp$`Location uncertainty (m)`, 
+                                               `GIVD ID`=header.shp$`GIVD ID`))
+```
 
-
-
-## 4 Assign to Continents
+## 4.1 Assign to Continents
 Download and manipulate map of continents
 ```{r}
 sPDF <- rworldmap::getMap(resolution="coarse")
@@ -288,19 +301,9 @@ continent.high <- sPDF[,"continent"]
 crs(continent.high) <- CRS("+init=epsg:4326")
 continent.high@data$continent <- fct_recode(continent.high@data$continent, "South America"="South America and the Caribbean")
 ```
-Create spatial point dataframe for sPlot data to intersect with spatial layers
-```{r}
-header.shp <- header %>%
-  filter(!is.na(Longitude) | !is.na(Latitude))
-header.shp <- SpatialPointsDataFrame(coords= header.shp %>% 
-                                        dplyr::select(Longitude, Latitude),
-                               proj4string = CRS("+init=epsg:4326"), 
-                               data=data.frame(PlotObservationID= header.shp$PlotObservationID, 
-                                               loc.uncert=header.shp$`Location uncertainty (m)`))
-```
 
 Assign plots to continent
-```{r, eval=T}
+```{r, eval=F}
 continent.out <- sp::over(x=header.shp, y=continent)
 #overlay unassigned points to the high resolution layer of continent
 toassign <- header.shp[which(is.na(continent.out$continent)),] #154782 remain to assign
@@ -315,7 +318,7 @@ crs(toassign) <- crs(continent)
 
 
 #go parallel
-ncores=6
+ncores=8
 library(parallel)
 library(doParallel)
 cl <- makeForkCluster(ncores, outfile="" )
@@ -326,6 +329,7 @@ nearestContinent <- foreach(i=1:length(toassign), .packages=c('raster'), .combin
 }
 continent.out$continent[which(is.na(continent.out$continent))] <- as.character(continent[-137,]@data[nearestContinent[,"ID"],])
 save(continent.out, file = "../_derived/continent.out")
+stopCluster(cl)
 ```
 Reload and manipulate continent. Correct header.sRoot
 ```{r}
@@ -335,14 +339,206 @@ header <- header %>%
               filter(!is.na(Longitude) | !is.na(Latitude)) %>% 
               dplyr::select(PlotObservationID) %>% 
               bind_cols(continent.out), 
-            by="PlotObservationID")
+            by="PlotObservationID") %>% 
   mutate(CONTINENT=factor(continent, 
                             levels=c("Africa", "Antarctica", "Australia", "Eurasia", "North America", "South America"),
                             labels=c("AF", "AN", "AU", "EU", "NA", "SA"))) %>% 
   dplyr::select(-continent)
 ```
 
+## 4.2 Assign to sBiomes
+```{r, eval=F}
+sBiomes <- readOGR("/data/sPlot/users/Francesco/Ancillary_Data/Biomes_sPlot/sBiomes.shp")
+crs(sBiomes) <- crs(header.shp)
 
+library(parallel)
+library(doParallel)
+ncores=10
+cl <- makeForkCluster(ncores, outfile="" )
+registerDoParallel(cl)
+
+#divide in chunks #should take around 40'
+indices <- 1:length(header.shp)
+chunks <- split(indices, sort(indices%%99))
+chunkn <- length(chunks)
+sBiomes.out <- foreach(i=1:chunkn, .packages=c('raster'), .combine=rbind) %dopar% { 
+  sBiomes.tmp <- sp::over(x=header.shp[chunks[[i]],], y=sBiomes)
+}
+sum(is.na(sBiomes.out$Name))
+sBiomes.out.backup <- sBiomes.out
+stopCluster(cl)
+```
+There are `r sum(is.na(sBiomes.out$Name))` still to assign. Using the `dist2Line` is not feasible due to long computing times (3' for each plot!). Fall back on raster version of sBiomes (res 0.1°).
+
+```{r}
+load("../_derived/sBiome.out.RData")
+sBiomes01 <- raster("/data/sPlot/users/Francesco/Ancillary_Data/Biomes_sPlot/sBiome_raster01/sBiomes_raster01.tif")
+##  Fix NAs
+toassign <- header.shp[which(is.na(sBiomes.out$Name)),] #116878 not assigned (!)
+crs(sBiomes01) <- crs(toassign)
+
+system.time(sBiomes.out2 <- extract(sBiomes01, toassign))
 
 
+toassign3 <- toassign[which(sBiomes.out2==0),] #91793 not assigned (!)
+robust.mode <- function(x){if(any(x!=0)) {
+                              a <- x[which(x!=0)] #exclude zero (i.e. NAs)
+                              return(as.numeric(names(sort(table(a), decreasing=T))[1]))} else
+                                return(NA)        
+  }
+
+ncores=10
+cl <- makeForkCluster(ncores, outfile="" )
+registerDoParallel(cl)
+sBiomes.out3 <- foreach(i=1:length(toassign3), .packages=c('raster'), .combine=rbind) %dopar% { 
+  sBiomes.out.tmp <- extract(sBiomes01, toassign3[i,], buffer=10000, fun=robust.mode)
+}
+
+stopCluster(cl) 
+save(sBiomes.out, sBiomes.out2, sBiomes.out3, file = "../_derived/sBiome.out.RData")
+
+
+toassign4 <- toassign3[which(is.na(sBiomes.out3)),] ##37887 still to classify
+ncores=10
+cl <- makeForkCluster(ncores, outfile="" )
+registerDoParallel(cl)
+sBiomes.out4 <- foreach(i=1:length(toassign4), .packages=c('raster'), .combine=rbind) %dopar% { 
+  sBiomes.out.tmp <- extract(sBiomes01, toassign4[i,], buffer=50000, fun=robust.mode)
+}
+
+stopCluster(cl) 
+save(sBiomes.out, sBiomes.out2, sBiomes.out3, sBiomes.out4, file = "../_derived/sBiome.out.RData")
+
+
+toassign5 <- toassign4[which(is.na(sBiomes.out4)),] #4158 still to assign
+nearestBiome <- foreach(i=1:1000, .packages=c('raster'), .combine=rbind) %dopar% { 
+  nearestBiome.tmp <- geosphere::dist2Line(toassign[i,], sBiomes)
+}
+sBiomes.out[which(is.na(sBiomes.out$Name))] <- as.character(sBiomes@data[nearestBiome[,"ID"],])
+sum(is.na(sBiomes.out$Name))
+save(sBiomes.out, file = "../_derived/sBiome.out.RData")
+
+stopCluster(cl)
+
+
+```
+
+
+Reimport output and join to header
+```{r}
+
+header <- header %>% 
+  bind_rows(sBiomes.out %>% 
+              dplyr::select(Name, BiomeID) %>% 
+              dplyr::rename(sBioem=Name, s))
+
+
+```
+
+
+
+
+
+
+
+# 5 Map of plots
+
+```{r}
+library(sf)
+library(rnaturalearth)
+library(viridis)
+library(dggridR)
+#### BRT predicting
+#### alternative plotting
+countries <- ne_countries(returnclass = "sf") %>% 
+  st_transform(crs = "+proj=eck4") %>% 
+  st_geometry()
+graticules <- ne_download(type = "graticules_15", category = "physical",
+                          returnclass = "sf") %>% 
+  st_transform(crs = "+proj=eck4") %>% 
+  st_geometry()
+bb <- ne_download(type = "wgs84_bounding_box", category = "physical",
+                  returnclass = "sf") %>% 
+  st_transform(crs = "+proj=eck4") %>% 
+  st_geometry()
+
+label_parse <- function(breaks) {
+  parse(text = breaks)
+}
+
+## basic graph of the world in Eckert projection
+w3a <- ggplot() +
+  geom_sf(data = bb, col = "grey20", fill = "white") +
+  geom_sf(data = graticules, col = "grey20", lwd = 0.1) +
+  geom_sf(data = countries, fill = "grey90", col = NA, lwd = 0.3) +
+  coord_sf(crs = "+proj=eck4") +
+  theme_minimal() +
+  theme(axis.text = element_blank(), 
+        legend.title=element_text(size=12), 
+        legend.text=element_text(size=12),
+        legend.background = element_rect(size=0.1, linetype="solid", colour = 1), 
+        legend.key.height = unit(1.1, "cm"), 
+        legend.key.width = unit(1.1, "cm")) +
+  scale_fill_viridis(label=label_parse)
+
+## Plotting of plot density in hexagons
+header2 <- header %>% 
+  filter(!is.na(Longitude) | !is.na(Latitude)) %>% 
+  dplyr::select(PlotObservationID, Latitude, Longitude, `GIVD ID`) %>% 
+  filter(!(abs(Longitude) >171 & abs(Latitude>70)))
+dggs          <- dgconstruct(spacing=300, metric=T, resround='down')
+  #Get the corresponding grid cells for each earthquake epicenter (lat-long pair)
+  header2$cell <- dgGEO_to_SEQNUM(dggs, header2$Longitude, header2$Latitude)$seqnum
+  
+  #Calculate number of plots for each cell
+  header.out   <- header2 %>% 
+    group_by(cell) %>% 
+    summarise(value.out=log(n(), 10))
+  #Get the grid cell boundaries for cells 
+  grid   <- dgcellstogrid(dggs, header.out$cell, frame=F) %>%
+    st_as_sf() %>% 
+    mutate(cell = header.out$cell) %>% 
+    mutate(value.out=header.out$value.out) %>% 
+    st_transform("+proj=eck4") %>% 
+    st_wrap_dateline(options = c("WRAPDATELINE=YES"))
+  
+  ## plotting
+  legpos <- c(0.160, .24)
+  
+  (w3 <- w3a + 
+         geom_sf(data=grid, aes(fill=value.out),lwd=0, alpha=0.9)    +
+         geom_sf(data = countries, col = "grey10", fill=NA, lwd = 0.3) + 
+         scale_fill_viridis(
+           name="# plots", breaks=0:5, labels = c("1", "10", "100",
+                                                  "1,000", "10,000", "100,000"), option="viridis" ) + 
+      #labs(fill="# plots") + 
+      theme(legend.position = legpos +c(-0.06, 0.25))
+  )
+
+  ggsave(filename="../_pics/PlotDensityLog10_vir.png", width = 15, height = 7, units="in", dpi=300, plot=w3)
+
+  
+## Graph of plot location by Dataset
+header.sf <- header.shp %>% 
+  st_as_sf() %>% 
+  #sample_frac(0.01) %>%
+  st_transform(crs = "+proj=eck4") #%>% 
+  #st_geometry()
+
+set.seed(1984)
+w3 <- w3a + 
+         geom_sf(data=header.sf %>% 
+                   mutate(GIVD.ID=fct_shuffle(GIVD.ID)), aes(col=factor(GIVD.ID)), pch=16, size=0.8, alpha=0.6) +
+         geom_sf(data = countries, col = "grey10", fill=NA, lwd = 0.3) + 
+         #scale_color_brewer(palette = "Dark2") + 
+         #labs(fill="# plots") + 
+         theme(legend.position = "none")
+
+
+
+ggsave(filename="../_pics/PlotDistrib_Dark2_shuffle1984.png", width = 15, height = 7, units="in", dpi=300, plot=w3) ## takes ~40' to render
+
+
+```
+
 
-- 
GitLab