Finished version 0 of 05 ExtractEnv

dfb3ce89 · Francesco Sabatini · c12440ae · dfb3ce89
Commit dfb3ce89 authored 5 years ago by Francesco Sabatini
--- a/code/05_ExtractEnvironment.Rmd
+++ b/code/05_ExtractEnvironment.Rmd
@@ -44,17 +44,34 @@ source("A98_PredictorsExtract.R")
 write("TMPDIR = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('TMPDIR'), '.Renviron'))
 write("R_USER = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('R_USER'), '.Renviron'))
 rasterOptions(tmpdir="/data/sPlot/users/Francesco/_tmp")
+#Ancillary variables
+get.summary <- function(x){x %>% 
+    summarize_all(.funs=list(no.NAs=~sum(is.na(.)), 
+                             min=~min(., na.rm=T),
+                             q025=~quantile(., 0.25, na.rm=T), 
+                             q50=~quantile(., 0.50, na.rm=T), 
+                             q75=~quantile(., .75, na.rm=T), 
+                             max=~max(., na.rm=T), 
+                             mean=~mean(., na.rm=T), 
+                             sd=~sd(., na.rm=T))) %>% 
+    gather(variable, value) %>% 
+    separate(variable, sep="_", into=c("variable", "stat")) %>% 
+    mutate(stat=factor(stat, levels=c("no.NAs", "min", "q025", "q50", "q75", "max", "mean", "sd"))) %>% 
+    spread(key=stat, value=value)
+}
 ```
-## 1.1 Import header data
+## 1 Import header data
 ```{r}
 load("../_output/header_splot3.0.RData")
-header <- header.out
 ```
-Create spatial point dataframe for sPlot data to intersect with spatial layers
+Create spatial point dataframe for sPlot data to intersect with spatial layers. Include the fiels `Location uncertainty` to allow for fuzzy matching. Each plot was intersected with the corresponding layer of environmental (soil, climate) attributes accounting for their location uncertainty (minimum set to 250m). For computing reasons, the maximum location uncertainty was set to 10km. The user should therefore be aware that the variability in climate and soil features estimations may be underestimated for plots with very high location uncertainty.
-```{r}
+```{r, eval=F}
 header.shp <- header %>%
  filter(!is.na(Longitude) | !is.na(Latitude))
 header.shp <- SpatialPointsDataFrame(coords= header.shp %>% 
@@ -64,24 +81,16 @@ header.shp <- SpatialPointsDataFrame(coords= header.shp %>%
                                               loc.uncert=header.shp$`Location uncertainty (m)`, 
                                               `GIVD ID`=header.shp$`GIVD ID`))
 ```
+```{r, echo=F}
-## 1.2 Vector predictors
+header.shp <- readOGR(dsn="../_derived", layer="header.shp")
-### 1.2.1 Ecoregions
+colnames(header.shp@data) <- c("PlotObservationID", "loc.uncert", "GIVD ID")
-```{r, message=F, eval=F}
-##Import spatial data on Ecoregions
-ecoreg <- readOGR("../sPlot/_Ancillary/official", layer="wwf_terr_ecos")
-ecoreg@data <- ecoreg@data %>% 
-  dplyr::select(OBJECTID, ECO_NAME, REALM, BIOME, ECO_NUM, ECO_ID, eco_code)
 ```
-## 1.3 Raster data
-Download, import and reproject all raster data to same extent, CRS and resolution. Stack them and save derived files for easier future work  
-### 1.3.1 Climate
-Bioclimatic variables (bio01-bio19) derive from [CHELSA](http://chelsa-climate.org/bioclim/)
-Codes:
+## 2 Climate
+Bioclimatic variables (bio01-bio19) derive from [CHELSA](http://chelsa-climate.org/bioclim/)  
+Codes:  
 Bio1 = Annual Mean Temperature  
 Bio2 = Mean Diurnal Range  
@@ -92,7 +101,7 @@ Bio6 = Min Temperature of Coldest Month
 Bio7 = Temperature Annual Range  
 Bio8 = Mean Temperature of Wettest Quarter  
 Bio9 = Mean Temperature of Driest Quarter  
-Bio10 = Mean Temperature of Warmest Quarter  
+Bio10 = Mean Temperature of Warmest Quarter   
 Bio11 = Mean Temperature of Coldest Quarter  
 Bio12 = Annual Precipitation  
 Bio13 = Precipitation of Wettest Month  
@@ -135,18 +144,43 @@ for(i in 1:19){
                         ncores = 4, typp = "raster", output).ff2
 }
 ```
-Reimport and assemble output, and join it to header.
+Reimport and assemble output.
 ```{r, message=F, warning=F}
 chelsa.files <- list.files(path="../_derived/output_pred/", pattern="CHELSA_bio10_[0-9]+.csv$", full.names=T)
-chelsa.out <- do.call(cbind, lapply(chelsa.files, function(x) {read_csv(x) %>% 
+chelsa.out <- do.call(cbind, lapply(chelsa.files, 
-                                                                  column_to_rownames("X1")}))
+                                    function(x) {read_csv(x,col_types = cols(X1 = col_character(),
+                                                                             V1 = col_double())) %>% 
+                                        column_to_rownames("X1")}))
+colnames(chelsa.out) <- paste0("bio", stringr::str_pad(1:19, width=2, side="left", pad="0"))
+#same for sd values
+chelsa.sd.files <- list.files(path="../_derived/output_pred/", pattern="CHELSA_bio10_[0-9]+_sd.csv$", full.names=T)
+chelsa.sd.out <- do.call(cbind, lapply(chelsa.sd.files, 
+                                       function(x) {read_csv(x, col_types = cols(X1 = col_character(),
+                                                                                 V1 = col_double())) %>% 
+                                           column_to_rownames("X1")}))
+colnames(chelsa.sd.out) <- paste0("bio", stringr::str_pad(1:19, width=2, side="left", pad="0"), "_sd")
+```
+Show summaries
+```{r, echo=F}
+tmp.sum <- get.summary(chelsa.out)
+knitr::kable(tmp.sum, 
+  caption="Summary statistics for chelsa mean statistics") %>%
+    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
+                  full_width = F, position = "center")
 ```
+```{r, echo=F}
+tmp.sum <- get.summary(chelsa.sd.out)
+knitr::kable(tmp.sum, 
+  caption="Summary statistics for chelsa s.d. statistics") %>%
+    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
+                  full_width = F, position = "center")
+```
-### 1.3.2 Soil 
+## 3 Soil 
 Soil variables (5 cm depth) derive from the [ISRIC dataset](https://www.isric.org/), downloaded at 250-m resolution 
 CECSOL                     Cation Exchange capacity of soil  
@@ -155,7 +189,7 @@ CRFVOL                     Coarse fragments volumetric in %
 ORCDRC                     Soil Organic Carbon Content in g/kg  
 PHIHOX                     Soil pH x 10 in H20  
 SLTPPT                     Silt mass fraction in %  
-SNDPTT                     Sand mass fraction in %  
+SNDPPT                     Sand mass fraction in %  
 BLDFIE                     Bulk Density (fine earth) 	in kg/m3  
 P.ret.cat                  Phosphorous Retention - Categorical value, see [ISRIC 2011-06](https://www.isric.org/documents/document-type/isric-report-201106-global-distribution-soil-phosphorus-retention-potential)  
 \newline \newline
@@ -200,6 +234,66 @@ for(i in 1:8){
 }
 ```
+Reimport and assemble output.
+```{r, message=F, warning=F}
+ISRIC.layer.names <- c("BLDFIE", "CECSOL","CLYPPT","CRFVOL","ORCDRC","PHIHOX","SLTPPT","SNDPPT")
+ISRIC.layer.names <- paste0(ISRIC.layer.names, "_M_sl2_250m_ll")
+isric.files <- list.files(path="../_derived/output_pred/", 
+                          pattern=paste0(paste0(ISRIC.layer.names, ".csv$"), collapse="|"), 
+                          full.names=T)
+isric.out <- do.call(cbind, lapply(isric.files,  
+                                   function(x) {read_csv(x, col_types = cols(X1 = col_character(),
+                                                                             V1 = col_double())) %>% 
+                                       column_to_rownames("X1")}))
+colnames(isric.out) <- ISRIC.layer.names
+#same for sd values
+isric.sd.files <- list.files(path="../_derived/output_pred/", 
+                          pattern=paste0(paste0(ISRIC.layer.names, "_sd.csv$"), collapse="|"), 
+                          full.names=T)
+isric.sd.out <- do.call(cbind, lapply(isric.sd.files, 
+                                      function(x) {read_csv(x, col_types = cols(X1 = col_character(),
+                                                                                 V1 = col_double())) %>% 
+                                           column_to_rownames("X1")}))
+colnames(isric.sd.out) <- paste0(ISRIC.layer.names, "_sd")
+```
+Show summaries
+```{r, echo=F}
+tmp.sum <- get.summary(isric.out)
+knitr::kable(tmp.sum, 
+  caption="Summary statistics for chelsa mean statistics") %>%
+    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
+                  full_width = F, position = "center")
+```
+```{r, echo=F}
+tmp.sum <- get.summary(chelsa.sd.out)
+knitr::kable(tmp.sum, 
+  caption="Summary statistics for chelsa s.d. statistics") %>%
+    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
+                  full_width = F, position = "center")
+```
+## 4 Create output and export
+```{r}
+soilclim <- header %>% 
+  dplyr::select(PlotObservationID) %>% 
+  left_join(header.shp@data %>% 
+              dplyr::select(PlotObservationID) %>% 
+              bind_cols(chelsa.out) %>%
+              bind_cols(chelsa.sd.out) %>% 
+              bind_cols(isric.out) %>% 
+              bind_cols(isric.sd.out) %>% 
+              distinct(), 
+            by="PlotObservationID")
+save(soilclim, file = "../_output/SoilClim_sPlot3.RData")
+```