Continued editing 05_ExtractEnv. Rerunning all data extraction in cluster

943ef428 · Francesco Sabatini · 8f8ab7f1 · 943ef428
Commit 943ef428 authored 5 years ago by Francesco Sabatini
--- a/code/05_ExtractEnvironment.Rmd
+++ b/code/05_ExtractEnvironment.Rmd
@@ -15,7 +15,7 @@ output: html_document
 **Revised:**  
 **version:** 1.0

-This report documents the construction of the header file for sPlot 3.0. It is based on dataset sPlot_3.0.2, received on 24/07/2019 from Stephan Hennekens.    
+This report documents how environmental variables were extracted when constructing sPlot 3.0. It is based on dataset sPlot_3.0.2, received on 24/07/2019 from Stephan Hennekens.    


 ```{r results="hide", message=F, warning=F}
@@ -38,15 +38,173 @@ library(rworldmap)
 #library(rnaturalearth)
 #library(dggridR)

+source("A98_PredictorsExtract.R")
+
 #save temporary files
 write("TMPDIR = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('TMPDIR'), '.Renviron'))
 write("R_USER = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('R_USER'), '.Renviron'))
 rasterOptions(tmpdir="/data/sPlot/users/Francesco/_tmp")
 ```

-## Import header data
+## 1.1 Import header data

 ```{r}
 load("../_output/header_splot3.0.RData")
+header <- header.out
+```
+
+Create spatial point dataframe for sPlot data to intersect with spatial layers
+```{r}
+header.shp <- header %>%
+  filter(!is.na(Longitude) | !is.na(Latitude))
+header.shp <- SpatialPointsDataFrame(coords= header.shp %>% 
+                                        dplyr::select(Longitude, Latitude),
+                               proj4string = CRS("+init=epsg:4326"), 
+                               data=data.frame(PlotObservationID= header.shp$PlotObservationID, 
+                                               loc.uncert=header.shp$`Location uncertainty (m)`, 
+                                               `GIVD ID`=header.shp$`GIVD ID`))
+```
+
+## 1.2 Vector predictors
+### 1.2.1 Ecoregions
+```{r, message=F, eval=F}
+##Import spatial data on Ecoregions
+ecoreg <- readOGR("../sPlot/_Ancillary/official", layer="wwf_terr_ecos")
+
+ecoreg@data <- ecoreg@data %>% 
+  dplyr::select(OBJECTID, ECO_NAME, REALM, BIOME, ECO_NUM, ECO_ID, eco_code)
+
+```
+
+## 1.3 Raster data
+Download, import and reproject all raster data to same extent, CRS and resolution. Stack them and save derived files for easier future work  
+### 1.3.1 Climate
+Bioclimatic variables (bio01-bio19) derive from [CHELSA](http://chelsa-climate.org/bioclim/)
+
+Codes:
+
+Bio1 = Annual Mean Temperature  
+Bio2 = Mean Diurnal Range  
+Bio3 = Isothermality  
+Bio4 = Temperature Seasonality  
+Bio5 = Max Temperature of Warmest Month  
+Bio6 = Min Temperature of Coldest Month  
+Bio7 = Temperature Annual Range  
+Bio8 = Mean Temperature of Wettest Quarter  
+Bio9 = Mean Temperature of Driest Quarter  
+Bio10 = Mean Temperature of Warmest Quarter  
+Bio11 = Mean Temperature of Coldest Quarter  
+Bio12 = Annual Precipitation  
+Bio13 = Precipitation of Wettest Month  
+Bio14 = Precipitation of Driest Month  
+Bio15 = Precipitation Seasonality  
+Bio16 = Precipitation of Wettest Quarter  
+Bio17 = Precipitation of Driest Quarter  
+Bio18 = Precipitation of Warmest Quarter  
+Bio19 = Precipitation of Coldest Quarter  
+  
+\newline \newline
+Download raster files:
+```{r, message=F, warning=F, eval=F}
+library(downloader)
+url.chelsa <- list()
+for(i in 1:19){
+  ii <- stringr::str_pad(1:19, width=2, side="left", pad="0")[i]
+  url.chelsa[[i]] <- paste("https://www.wsl.ch/lud/chelsa/data/bioclim/integer/CHELSA_bio10_", ii, ".tif", sep="")
+  download(url.chelsa[[i]],
+           paste("/data/sPlot/users/Francesco/Ancillary_Data/CHELSA/CHELSA_bio10_", ii, ".tif", sep=""), 
+           mode = "wb")
+}
+```
+Intersect `header.shp` with each raster of the CHELSA collection.  
+Performed in EVE HPC cluster using function `A98_PredictorsExtract.R`. Divided in 99 chunks.
+```{r, warning=F, message=F, eval=F}
+for(i in 1:19){
+  ff <- paste("/data/sPlot/users/Francesco/Ancillary_Data/CHELSA/CHELSA_bio10_", 
+              stringr::str_pad(i, width=2, side="left", pad="0"), ".tif", sep="")
+  chelsa.i <- raster(ff)
+  #define output paths
+  output.ff1 <- paste("/data/sPlot/users/Francesco/sPlot3/_derived/output_pred/CHELSA_bio10_", 
+              stringr::str_pad(i, width=2, side="left", pad="0"), ".csv", sep="")
+  output.ff2 <- paste("/data/sPlot/users/Francesco/sPlot3/_derived/output_pred/CHELSA_bio10_", 
+              stringr::str_pad(i, width=2, side="left", pad="0"), "_sd.csv", sep="")
+  # Run PredExtr function
+  chelsa.out <- PredExtr(x.shp = header.shp, toextract = chelsa.i, myfunction = "robust.mean", 
+                         ncores = 4, typp = "raster", output.ff1)
+  chelsa.sd <- PredExtr(x.shp = header.shp, toextract = chelsa.i, myfunction = "sd", 
+                         ncores = 4, typp = "raster", output).ff2
+}
+```
+Reimport and assemble output, and join it to header.
+```{r, message=F, warning=F}
+chelsa.files <- list.files(path="../_derived/output_pred/", pattern="CHELSA_bio10_[0-9]+.csv$", full.names=T)
+chelsa.out <- do.call(cbind, lapply(chelsa.files, function(x) {read_csv(x) %>% 
+                                                                  column_to_rownames("X1")}))
+
+```
+
+
+
+
+### 1.3.2 Soil 
+Soil variables (5 cm depth) derive from the [ISRIC dataset](https://www.isric.org/), downloaded at 250-m resolution 
+  
+CECSOL                     Cation Exchange capacity of soil  
+CLYPPT                     Clay mass fraction in %  
+CRFVOL                     Coarse fragments volumetric in %  
+ORCDRC                     Soil Organic Carbon Content in g/kg  
+PHIHOX                     Soil pH x 10 in H20  
+SLTPPT                     Silt mass fraction in %  
+SNDPTT                     Sand mass fraction in %  
+BLDFIE                     Bulk Density (fine earth) 	in kg/m3  
+P.ret.cat                  Phosphorous Retention - Categorical value, see [ISRIC 2011-06](https://www.isric.org/documents/document-type/isric-report-201106-global-distribution-soil-phosphorus-retention-potential)  
+\newline \newline
+Download ISRIC soil data
+```{r, eval=F}
+url.bulk <- "https://files.isric.org/soilgrids/data/recent/BLDFIE_M_sl2_250m_ll.tif"
+url.cec <- "https://files.isric.org/soilgrids/data/recent/CECSOL_M_sl2_250m_ll.tif"
+url.cly <- "https://files.isric.org/soilgrids/data/recent/CLYPPT_M_sl2_250m_ll.tif"
+url.crf <- "https://files.isric.org/soilgrids/data/recent/CRFVOL_M_sl2_250m_ll.tif"
+url.orc <- "https://files.isric.org/soilgrids/data/recent/ORCDRC_M_sl2_250m_ll.tif"
+url.pH <- "https://files.isric.org/soilgrids/data/recent/PHIHOX_M_sl2_250m_ll.tif"
+url.slt <- "https://files.isric.org/soilgrids/data/recent/SLTPPT_M_sl2_250m_ll.tif"
+url.snd <- "https://files.isric.org/soilgrids/data/recent/SNDPPT_M_sl2_250m_ll.tif"
+
+download(url.bulk, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/BLDFIE_M_sl2_250m_ll.tif", mode = "wb")
+download(url.cec, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CECSOL_M_sl2_250m_ll.tif", mode = "wb")
+download(url.cly, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CLYPPT_M_sl2_250m_ll.tif", mode = "wb")
+download(url.crf, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CRFVOL_M_sl2_250m_ll.tif", mode = "wb")
+download(url.orc, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/ORCDRC_M_sl2_250m_ll.tif", mode = "wb")
+download(url.pH, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/PHIHOX_M_sl2_250m_ll.tif", mode = "wb")
+download(url.slt, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/SLTPPT_M_sl2_250m_ll.tif", mode = "wb")
+download(url.snd, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/SNDPPT_M_sl2_250m_ll.tif", mode = "wb")
+```
+
+Intersect `header.shp` with each raster of the ISRIC collection.  
+Performed in EVE HPC cluster using function `A98_PredictorsExtract.R`. Divided in 99 chunks.
+```{r, message=F, eval=F}
+for(i in 1:8){
+  ff <- list.files("/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/", 
+                                pattern = "^[^(Gene)]", full.names = T)[i]
+  isric.i <- raster(ff)
+  #define output paths
+  filename <- str_split(list.files("/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/", 
+           pattern = "^[^(Gene)]", full.names = T)[i], pattern="//")[[1]][2]
+  output.ff1 <- gsub(pattern=".tif", replacement=".csv", filename)
+  output.ff2 <- gsub(pattern=".tif", replacement="_sd.csv", filename)
+  # Run PredExtr function
+  chelsa.out <- PredExtr(x.shp = header.shp, toextract = isric.i, myfunction = "robust.mean", 
+                         ncores = 4, typp = "raster", output.ff1)
+  chelsa.sd <- PredExtr(x.shp = header.shp, toextract = isric.i, myfunction = "sd", 
+                         ncores = 4, typp = "raster", output).ff2
+}
+```
+
+
+
+
+
+## ANNEX 1 -
+```{r, code = readLines("A98_PredictorsExtract.R")}
 ```