From dfb3ce893ab7358b21e8fdc22ff98cc69681602c Mon Sep 17 00:00:00 2001
From: Francesco Sabatini <francesco.sabatini@idiv.de>
Date: Tue, 10 Mar 2020 15:42:08 +0100
Subject: [PATCH] Finished version 0 of 05 ExtractEnv

---
 code/05_ExtractEnvironment.Rmd | 144 +++++++++++++++++++++++++++------
 1 file changed, 119 insertions(+), 25 deletions(-)

diff --git a/code/05_ExtractEnvironment.Rmd b/code/05_ExtractEnvironment.Rmd
index 6f23e09..466af83 100644
--- a/code/05_ExtractEnvironment.Rmd
+++ b/code/05_ExtractEnvironment.Rmd
@@ -44,17 +44,34 @@ source("A98_PredictorsExtract.R")
 write("TMPDIR = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('TMPDIR'), '.Renviron'))
 write("R_USER = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('R_USER'), '.Renviron'))
 rasterOptions(tmpdir="/data/sPlot/users/Francesco/_tmp")
+
+#Ancillary variables
+get.summary <- function(x){x %>% 
+    summarize_all(.funs=list(no.NAs=~sum(is.na(.)), 
+                             min=~min(., na.rm=T),
+                             q025=~quantile(., 0.25, na.rm=T), 
+                             q50=~quantile(., 0.50, na.rm=T), 
+                             q75=~quantile(., .75, na.rm=T), 
+                             max=~max(., na.rm=T), 
+                             mean=~mean(., na.rm=T), 
+                             sd=~sd(., na.rm=T))) %>% 
+    gather(variable, value) %>% 
+    separate(variable, sep="_", into=c("variable", "stat")) %>% 
+    mutate(stat=factor(stat, levels=c("no.NAs", "min", "q025", "q50", "q75", "max", "mean", "sd"))) %>% 
+    spread(key=stat, value=value)
+}
+
+
 ```
 
-## 1.1 Import header data
+## 1 Import header data
 
 ```{r}
 load("../_output/header_splot3.0.RData")
-header <- header.out
 ```
 
-Create spatial point dataframe for sPlot data to intersect with spatial layers
-```{r}
+Create spatial point dataframe for sPlot data to intersect with spatial layers. Include the fiels `Location uncertainty` to allow for fuzzy matching. Each plot was intersected with the corresponding layer of environmental (soil, climate) attributes accounting for their location uncertainty (minimum set to 250m). For computing reasons, the maximum location uncertainty was set to 10km. The user should therefore be aware that the variability in climate and soil features estimations may be underestimated for plots with very high location uncertainty.
+```{r, eval=F}
 header.shp <- header %>%
   filter(!is.na(Longitude) | !is.na(Latitude))
 header.shp <- SpatialPointsDataFrame(coords= header.shp %>% 
@@ -64,24 +81,16 @@ header.shp <- SpatialPointsDataFrame(coords= header.shp %>%
                                                loc.uncert=header.shp$`Location uncertainty (m)`, 
                                                `GIVD ID`=header.shp$`GIVD ID`))
 ```
-
-## 1.2 Vector predictors
-### 1.2.1 Ecoregions
-```{r, message=F, eval=F}
-##Import spatial data on Ecoregions
-ecoreg <- readOGR("../sPlot/_Ancillary/official", layer="wwf_terr_ecos")
-
-ecoreg@data <- ecoreg@data %>% 
-  dplyr::select(OBJECTID, ECO_NAME, REALM, BIOME, ECO_NUM, ECO_ID, eco_code)
-
+```{r, echo=F}
+header.shp <- readOGR(dsn="../_derived", layer="header.shp")
+colnames(header.shp@data) <- c("PlotObservationID", "loc.uncert", "GIVD ID")
 ```
 
-## 1.3 Raster data
-Download, import and reproject all raster data to same extent, CRS and resolution. Stack them and save derived files for easier future work  
-### 1.3.1 Climate
-Bioclimatic variables (bio01-bio19) derive from [CHELSA](http://chelsa-climate.org/bioclim/)
 
-Codes:
+## 2 Climate
+Bioclimatic variables (bio01-bio19) derive from [CHELSA](http://chelsa-climate.org/bioclim/)  
+
+Codes:  
 
 Bio1 = Annual Mean Temperature  
 Bio2 = Mean Diurnal Range  
@@ -92,7 +101,7 @@ Bio6 = Min Temperature of Coldest Month
 Bio7 = Temperature Annual Range  
 Bio8 = Mean Temperature of Wettest Quarter  
 Bio9 = Mean Temperature of Driest Quarter  
-Bio10 = Mean Temperature of Warmest Quarter  
+Bio10 = Mean Temperature of Warmest Quarter   
 Bio11 = Mean Temperature of Coldest Quarter  
 Bio12 = Annual Precipitation  
 Bio13 = Precipitation of Wettest Month  
@@ -135,18 +144,43 @@ for(i in 1:19){
                          ncores = 4, typp = "raster", output).ff2
 }
 ```
-Reimport and assemble output, and join it to header.
+Reimport and assemble output.
 ```{r, message=F, warning=F}
 chelsa.files <- list.files(path="../_derived/output_pred/", pattern="CHELSA_bio10_[0-9]+.csv$", full.names=T)
-chelsa.out <- do.call(cbind, lapply(chelsa.files, function(x) {read_csv(x) %>% 
-                                                                  column_to_rownames("X1")}))
+chelsa.out <- do.call(cbind, lapply(chelsa.files, 
+                                    function(x) {read_csv(x,col_types = cols(X1 = col_character(),
+                                                                             V1 = col_double())) %>% 
+                                        column_to_rownames("X1")}))
+colnames(chelsa.out) <- paste0("bio", stringr::str_pad(1:19, width=2, side="left", pad="0"))
+#same for sd values
+chelsa.sd.files <- list.files(path="../_derived/output_pred/", pattern="CHELSA_bio10_[0-9]+_sd.csv$", full.names=T)
+chelsa.sd.out <- do.call(cbind, lapply(chelsa.sd.files, 
+                                       function(x) {read_csv(x, col_types = cols(X1 = col_character(),
+                                                                                 V1 = col_double())) %>% 
+                                           column_to_rownames("X1")}))
+colnames(chelsa.sd.out) <- paste0("bio", stringr::str_pad(1:19, width=2, side="left", pad="0"), "_sd")
+```
+Show summaries
+```{r, echo=F}
+tmp.sum <- get.summary(chelsa.out)
 
+knitr::kable(tmp.sum, 
+  caption="Summary statistics for chelsa mean statistics") %>%
+    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
+                  full_width = F, position = "center")
 ```
 
+```{r, echo=F}
+tmp.sum <- get.summary(chelsa.sd.out)
 
+knitr::kable(tmp.sum, 
+  caption="Summary statistics for chelsa s.d. statistics") %>%
+    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
+                  full_width = F, position = "center")
+```
 
 
-### 1.3.2 Soil 
+## 3 Soil 
 Soil variables (5 cm depth) derive from the [ISRIC dataset](https://www.isric.org/), downloaded at 250-m resolution 
   
 CECSOL                     Cation Exchange capacity of soil  
@@ -155,7 +189,7 @@ CRFVOL                     Coarse fragments volumetric in %
 ORCDRC                     Soil Organic Carbon Content in g/kg  
 PHIHOX                     Soil pH x 10 in H20  
 SLTPPT                     Silt mass fraction in %  
-SNDPTT                     Sand mass fraction in %  
+SNDPPT                     Sand mass fraction in %  
 BLDFIE                     Bulk Density (fine earth) 	in kg/m3  
 P.ret.cat                  Phosphorous Retention - Categorical value, see [ISRIC 2011-06](https://www.isric.org/documents/document-type/isric-report-201106-global-distribution-soil-phosphorus-retention-potential)  
 \newline \newline
@@ -200,6 +234,66 @@ for(i in 1:8){
 }
 ```
 
+Reimport and assemble output.
+```{r, message=F, warning=F}
+ISRIC.layer.names <- c("BLDFIE", "CECSOL","CLYPPT","CRFVOL","ORCDRC","PHIHOX","SLTPPT","SNDPPT")
+ISRIC.layer.names <- paste0(ISRIC.layer.names, "_M_sl2_250m_ll")
+isric.files <- list.files(path="../_derived/output_pred/", 
+                          pattern=paste0(paste0(ISRIC.layer.names, ".csv$"), collapse="|"), 
+                          full.names=T)
+
+isric.out <- do.call(cbind, lapply(isric.files,  
+                                   function(x) {read_csv(x, col_types = cols(X1 = col_character(),
+                                                                             V1 = col_double())) %>% 
+                                       column_to_rownames("X1")}))
+colnames(isric.out) <- ISRIC.layer.names
+#same for sd values
+isric.sd.files <- list.files(path="../_derived/output_pred/", 
+                          pattern=paste0(paste0(ISRIC.layer.names, "_sd.csv$"), collapse="|"), 
+                          full.names=T)
+isric.sd.out <- do.call(cbind, lapply(isric.sd.files, 
+                                      function(x) {read_csv(x, col_types = cols(X1 = col_character(),
+                                                                                 V1 = col_double())) %>% 
+                                           column_to_rownames("X1")}))
+colnames(isric.sd.out) <- paste0(ISRIC.layer.names, "_sd")
+```
+Show summaries
+```{r, echo=F}
+tmp.sum <- get.summary(isric.out)
+
+knitr::kable(tmp.sum, 
+  caption="Summary statistics for chelsa mean statistics") %>%
+    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
+                  full_width = F, position = "center")
+```
+
+```{r, echo=F}
+tmp.sum <- get.summary(chelsa.sd.out)
+
+knitr::kable(tmp.sum, 
+  caption="Summary statistics for chelsa s.d. statistics") %>%
+    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
+                  full_width = F, position = "center")
+```
+
+
+## 4 Create output and export
+
+```{r}
+soilclim <- header %>% 
+  dplyr::select(PlotObservationID) %>% 
+  left_join(header.shp@data %>% 
+              dplyr::select(PlotObservationID) %>% 
+              bind_cols(chelsa.out) %>%
+              bind_cols(chelsa.sd.out) %>% 
+              bind_cols(isric.out) %>% 
+              bind_cols(isric.sd.out) %>% 
+              distinct(), 
+            by="PlotObservationID")
+
+save(soilclim, file = "../_output/SoilClim_sPlot3.RData")
+```
+
 
 
 
-- 
GitLab