Imported template data extraction

ef6de585 · Francesco Sabatini · ef6de585
Commit ef6de585 authored 4 years ago by Francesco Sabatini
--- a/01_Extract_data_Project_31.Rmd
+++ b/01_Extract_data_Project_31.Rmd
+---
+title: 'Project #31 - Data Extraction'
+output:
+html_document: default
+word_document: default
+always_allow_html: yes
+---
+
+
+<center>
+  ![](https://www.idiv.de/fileadmin/content/Files_sDiv/sDiv_Workshops_Photos_Docs/sDiv_WS_Documents_sPlot/splot-long-rgb.png "sPlot Logo")
+</center>
+  
+    
+      
+        
+**Timestamp:** `r date()`  
+**Drafted:** Francesco Maria Sabatini  
+**Version:** 1.0 
+  
+
+This report documents the data extraction for **sPlot project proposal #31** - *Global variation in fine root traits along climate and soil gradients* as requested by Daniel Laughlin and Alexandra Weigelt.   
+
+
+This R script:  
+1) Extracts plots from sPlot based on species with root traits,  
+2) Calculates CWM for plots with >80% trait coverage,  
+3) Extracts climate and soils data,  
+4) Classifies plots into broad vegetation types based on species' affinities,  
+5) Shows the distribution of these root traits across vegetation types (forest, grassland, etc)  
+6) Returns scatterplots of root traits along climate and soil variables.  
+~~7) Performs a PCA of CWMs~~  
+  
+```{r results="hide", message=F}
+library(reshape2)
+library(tidyverse)
+library(dplyr)
+library(data.table)
+library(knitr)
+library(kableExtra)
+library(viridis)
+library(grid)
+library(gridExtra)
+library(vegan)
+library(xlsx)
+library(caret)
+library(foreign)
+library(raster)
+library(downloader)
+library(sp)
+library(sf)
+library(rgdal)
+library(rgeos)
+
+#save temporary files
+write("TMPDIR = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('TMPDIR'), '.Renviron'))
+write("R_USER = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('R_USER'), '.Renviron'))
+rasterOptions(tmpdir="/data/sPlot/users/Francesco/_tmp")
+```
+
+
+
+```{r, cache=T}
+#Import sPlot data
+load("/data/sPlot/releases/sPlot2.1/DT2_20161025.RData")
+load("/data/sPlot/releases/sPlot2.1/sPlot_header_20161124.RData")
+```
+
+```{r, echo=F, cache=T}
+source("/data/sPlot/users/Francesco/_sPlot_Management/Fix.header.R")
+header <- fix.header(header, exclude.sophy=F)
+#clean unnecessary columns 
+DT2 <- DT2 %>% 
+  dplyr::select(-Taxon.group, -Matched.concept)
+header <- header %>%
+  dplyr::select(-Longitude, -Latitude)
+# merge vegetation layers, where necessary
+###combine cover values across layers
+combine.cover <- function(x){
+    while (length(x)>1){
+      x[2] <- x[1]+(100-x[1])*x[2]/100
+      x <- x[-1]
+    }
+    return(x)
+}
+
+DT2 <- DT2 %>%
+  dplyr::select(PlotObservationID, species, Relative.cover) %>%
+  group_by(PlotObservationID, species) %>%
+  summarize(Relative.cover=combine.cover(Relative.cover)) %>%
+  ungroup()
+```
+
+
+
+
+# 1 Extract plots from sPlot based on species with root traits  
+Import data on root traits provided by the sRoot consortium (v 30 sept. 2019). Extract all plots containing at least one species in sRoot list. ~~(Matching could be marginally improved if checking names in sRoot's list with TPL).~~
+```{r, message=F, results=F, warning=F, cache=T}
+root_traits <- read_csv("_input/Mean_values_for_sPlot.csv")
+root_traits <- root_traits %>% #rename column to adapt input data in v1.8
+  rename(RD=Root_diameter, 
+         RootN=Root_N_concentration,
+         RTD=Root_tissue_density,
+         SRL=Specific_root_length) %>% 
+  dplyr::select(full_species:Taxonomic_information, vegtype_version1:vegtype_expert, Mycorrhizal_type_Kuyper,
+                RD, RootN, RTD, SRL) %>% 
+  ### replace all RTD values >.8 with NAs
+  mutate(RTD=replace(RTD, list=RTD>1, values=NA))
+  
+  
+species_list <- root_traits$full_species
+
+plot.sel <- (DT2 %>%
+  filter(DT2$species %in% species_list) %>%
+    dplyr::select(PlotObservationID) %>%
+    distinct())$PlotObservationID
+
+DT.sRoot <- DT2 %>% 
+  filter(DT2$PlotObservationID %in% plot.sel)
+```
+
+Out of the `r length(species_list)` species in the sRoot list, `r sum(unique(DT2$species) %in% species_list)` species are present in sPlot, for a total of `r length(unique(DT.sRoot$PlotObservationID))` records.
+
+# 2 Calculate CWMs 
+Calculate CWM and trait coverage for each trait and each plot. Select plots having more than 80% coverage for at least one trait.
+```{r, cache=T,  cache.lazy=F}
+# Merge species data table with traits
+CWM.sRoot0 <- DT.sRoot %>%
+  as.tbl() %>%
+  dplyr::select(PlotObservationID, species, Relative.cover) %>%
+  left_join(root_traits %>%
+              dplyr::rename(species=full_species) %>%
+              dplyr::select(species, RD:SRL), 
+            by="species")
+
+# Calculate CWM for each trait in each plot
+CWM.sRoot1 <- CWM.sRoot0 %>%
+  group_by(PlotObservationID) %>%
+  summarize_at(.vars= vars(RD:SRL),
+               .funs = list(~weighted.mean(., Relative.cover, na.rm=T))) %>%
+  dplyr::select(PlotObservationID, order(colnames(.))) %>%
+  reshape2::melt(id.vars="PlotObservationID", value.name="trait.value")
+
+# Calculate coverage for each trait in each plot
+CWM.sRoot2 <- CWM.sRoot0 %>%
+  mutate_at(.funs = list(~if_else(is.na(.),0,1) * Relative.cover), 
+            .vars = vars(RD:SRL)) %>%
+  group_by(PlotObservationID) %>%
+  summarize_at(.vars= vars(RD:SRL),
+               .funs = list(~sum(., na.rm=T))) %>%
+  dplyr::select(PlotObservationID, order(colnames(.))) %>%
+  reshape2::melt(id.vars="PlotObservationID", value.name="trait.coverage")
+
+# Calculate CWV
+variance2.fun <- function(trait, abu){
+  res <- as.double(NA)
+  #nam <- nam[!is.na(trait)]
+  abu <- abu[!is.na(trait)]
+  trait <- trait[!is.na(trait)]
+  abu <- abu/sum(abu)
+  if (length(trait)>1){
+    # you need more than 1 observation to calculate
+    # skewness and kurtosis
+    # for calculation see 
+    # http://r.789695.n4.nabble.com/Weighted-skewness-and-curtosis-td4709956.html
+    m.trait <- weighted.mean(trait,abu)
+    res <- sum(abu*(trait-m.trait)^2)
+  }
+  res
+}
+
+CWM.sRoot3 <- CWM.sRoot0 %>%
+  group_by(PlotObservationID) %>%
+  summarize_at(.vars= vars(RD:SRL),
+               .funs = list(~variance2.fun(., Relative.cover))) %>%
+  dplyr::select(PlotObservationID, order(colnames(.))) %>%
+  reshape2::melt(id.vars="PlotObservationID", value.name="trait.variance")
+
+## Calculate proportion of species having traits
+CWM.sRoot4 <- CWM.sRoot0 %>%
+  group_by(PlotObservationID) %>%
+  summarize_at(.vars= vars(RD:SRL),
+               .funs = list(~sum(!is.na(.)))) %>%
+  dplyr::select(PlotObservationID, order(colnames(.))) %>%
+  reshape2::melt(id.vars="PlotObservationID", value.name="trait.nspecies")
+
+# Join together
+CWM.sRoot <- CWM.sRoot1 %>%
+  left_join(CWM.sRoot2, by=c("PlotObservationID", "variable")) %>%
+  left_join(CWM.sRoot3, by=c("PlotObservationID", "variable")) %>%
+  left_join(CWM.sRoot4, by=c("PlotObservationID", "variable")) %>%
+  left_join(CWM.sRoot0 %>% 
+              group_by(PlotObservationID) %>%
+              summarize(sp.richness=n()), by=c("PlotObservationID")) %>%
+  mutate(trait.coverage.nspecies=trait.nspecies/sp.richness) %>%
+  #filter(trait.coverage>=0.8) %>%
+  arrange(PlotObservationID)
+```
+Scatterplot comparing coverage of traits values across plots, when based on relative cover and when based on proportion of species richness
+```{r, eval=T, fig.align="center", fig.width=8, fig.height=4, cache=T, warning=F}
+ggplot(data=CWM.sRoot, aes(x=trait.coverage, y=trait.coverage.nspecies, col=log(sp.richness))) + 
+  geom_point(pch="+", alpha=1/4, cex=.7) + 
+  geom_vline(xintercept = .8, lwd=.7, lty=2) + 
+  geom_abline(intercept = 0, slope=1, col=2, lty=2, lwd=.7) + 
+  xlim(c(0,1)) + 
+  ylim(c(0,1)) + 
+  facet_wrap(.~variable, ncol=4) + 
+  scale_color_viridis() + 
+  xlab("Trait coverage (Relative  cover)") + 
+  ylab("Trait coverage (Proportion of species)")
+```
+
+```{r}
+CWM.sRoot <- CWM.sRoot %>%
+  filter(trait.coverage>=0.8)
+```
+
+```{r, echo=F}
+knitr::kable(CWM.sRoot[1:20,], caption="Example of CWM data file") %>%
+    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "center")
+```
+
+
+```{r, echo=F}
+knitr::kable(CWM.sRoot %>%
+               group_by(variable) %>%
+               summarize("num.plots"=n()), caption="Number of plots with >=.8 coverage per trait") %>%
+    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "center")
+```
+
+
+
+
+Create list of plots having at least one trait with >=.8 coverage and extract header data
+```{r}
+plot80perc <- (CWM.sRoot %>%
+  dplyr::select(PlotObservationID) %>%
+  distinct())$PlotObservationID
+
+header.sRoot <- header %>%
+  filter(PlotObservationID %in% plot80perc) %>% 
+  filter(!is.na(POINT_X))
+
+DT.sRoot08 <- DT.sRoot %>%
+  filter(PlotObservationID %in% header.sRoot$PlotID)
+
+CWM.sRoot <- CWM.sRoot %>%
+  filter(PlotObservationID %in% header.sRoot$PlotID)
+```
+
+Completeness of header data
+```{r, echo=F}
+knitr::kable(data.frame(Completeness_perc=colSums(!is.na(header.sRoot))/
+                          nrow(header.sRoot)*100)[-c(1,2),,drop=F], 
+             caption="Header file - Columns present and % completeness") %>%
+    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "center")
+```
+The process results in `r nrow(header.sRoot)` plots selected, for a total of `r nrow(CWM.sRoot)` trait * plot combinations. 
+  
+
+Geographical distribution of plots
+```{r, fig.align="center", fig.height=4, fig.width=6, message=F, warning=F}
+countries <- map_data("world")
+ggworld <- ggplot(countries, aes(x=long, y=lat, group = group)) +
+  geom_polygon(col=NA, lwd=3, fill = gray(0.9)) +
+  geom_point(data=header.sRoot, aes(x=POINT_X, y=POINT_Y, group=1), col="red", alpha=0.5, cex=0.7, shape="+") + 
+  theme_bw()
+ggworld
+```
+
+
+Summarize data across data sets in sPlot, and create list of data custodians
+```{r, warning=F, message=F}
+db.out <- fread("/data/sPlot/users/Francesco/_sPlot_Management/Consortium/Databases.out.csv") %>%
+  dplyr::select(`GIVD ID`, Custodian)
+data.origin <- header.sRoot %>% 
+   group_by(`GIVD ID`) %>% 
+   summarize(Num.plot=n()) %>%
+   left_join(db.out)
+```
+
+```{r, echo=F}
+knitr::kable(data.origin[1:10,], caption="Data Origin [only first 10 rows shown]") %>%
+    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "center")
+```
+
+The data derive from `r nrow(data.origin)` datasets.  
+
+# 3 Extract climate and soils data  
+## ISRIC soil data  
+
+Download ISRIC soil data at 5 cm depth (250 m horizontal resolution)  
+```{r, eval=F}
+# Added in v1.8
+url.bulk <- "https://files.isric.org/soilgrids/data/recent/BLDFIE_M_sl2_250m_ll.tif"
+url.cec <- "https://files.isric.org/soilgrids/data/recent/CECSOL_M_sl2_250m_ll.tif"
+url.cly <- "https://files.isric.org/soilgrids/data/recent/CLYPPT_M_sl2_250m_ll.tif"
+url.crf <- "https://files.isric.org/soilgrids/data/recent/CRFVOL_M_sl2_250m_ll.tif"
+url.orc <- "https://files.isric.org/soilgrids/data/recent/ORCDRC_M_sl2_250m_ll.tif"
+url.pH <- "https://files.isric.org/soilgrids/data/recent/PHIHOX_M_sl2_250m_ll.tif"
+url.slt <- "https://files.isric.org/soilgrids/data/recent/SLTPPT_M_sl2_250m_ll.tif"
+url.snd <- "https://files.isric.org/soilgrids/data/recent/SNDPPT_M_sl2_250m_ll.tif"
+
+download(url.bulk, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/BLDFIE_M_sl2_250m_ll.tif", mode = "wb")
+download(url.cec, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CECSOL_M_sl2_250m_ll.tif", mode = "wb")
+download(url.cly, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CLYPPT_M_sl2_250m_ll.tif", mode = "wb")
+download(url.crf, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CRFVOL_M_sl2_250m_ll.tif", mode = "wb")
+download(url.orc, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/ORCDRC_M_sl2_250m_ll.tif", mode = "wb")
+download(url.pH, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/PHIHOX_M_sl2_250m_ll.tif", mode = "wb")
+download(url.slt, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/SLTPPT_M_sl2_250m_ll.tif", mode = "wb")
+download(url.snd, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/SNDPPT_M_sl2_250m_ll.tif", mode = "wb")
+```
+
+Load ISRIC rasters
+```{r, eval=F}
+mycrs <- CRS("+init=epsg:4326")
+BLDFIE <- raster("/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/BLDFIE_M_sl2_250m_ll.tif")
+CECSOL <- raster("/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CECSOL_M_sl2_250m_ll.tif")
+CLYPPT <- raster("/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CLYPPT_M_sl2_250m_ll.tif")
+CRFVOL <- raster("/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CRFVOL_M_sl2_250m_ll.tif")
+ORCDRC <- raster("/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/ORCDRC_M_sl2_250m_ll.tif")
+PHIHOX <- raster("/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/PHIHOX_M_sl2_250m_ll.tif")
+SLTPPT <- raster("/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/SLTPPT_M_sl2_250m_ll.tif")
+SNDPPT <- raster("/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/SNDPPT_M_sl2_250m_ll.tif")
+
+P.ret <- raster("/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/Generalized P-retention potential (dominant class)1.tif")
+
+# defined functions to calculate mean and mode, which exclude NAs and 0s (if needed)
+robust.mean <- function(x){mean(x, rm.na=T)}
+robust.mode <- function(x){if(any(x!=0)) {
+                              a <- x[which(x!=0)] #exclude zero (i.e. NAs)
+                              return(as.numeric(names(sort(table(a), decreasing=T))[1]))} else
+                                return(NA)
+                          }
+```
+
+Create spatial point dataframe for sPlot data to intersect with ISRIC raster layers
+```{r}
+header.shp <- SpatialPointsDataFrame(coords=header.sRoot %>%
+                                        dplyr::select(POINT_X, POINT_Y),
+                               proj4string = CRS("+init=epsg:4326"), 
+                               data=data.frame(PlotID=header.sRoot$PlotID, 
+                                               loc.uncert=header.sRoot$`Location uncertainty (m)`))
+writeOGR(header.shp, dsn = "_derived", layer = "header.shp", driver = "ESRI Shapefile", overwrite_layer = T)
+```
+
+
+Intersect sPlot SpatialPoints DataFrame with ISRIC raster layers (in Parallel computing)
+```{r, eval=F, warning=F, message=F}
+##go parallel
+library(parallel)
+library(doParallel)
+cl <- makeForkCluster(8, outfile="" )
+#cl <- makeCluster(8)
+registerDoParallel(cl)
+ 
+ BLDFIE.out <- foreach (i=1:length(header.shp), .packages=c('raster'),  .combine=rbind) %dopar% { #
+   BLDFIE.tmp <- raster::extract(BLDFIE, header.shp[i,], buffer=min(10000,  max(250, header.shp@data$loc.uncert[i])), fun=robust.mean)
+ }
+ save(BLDFIE.out, file="_derived/BLDFIE.out.RData")
+ 
+ CECSOL.out <- foreach (i=1:length(header.shp), .packages=c('raster'),  .combine=rbind) %dopar% { #
+   CECSOL.tmp <- raster::extract(CECSOL, header.shp[i,], buffer=min(10000,  max(250, header.shp@data$loc.uncert[i])), fun=robust.mean)
+ }
+ save(CECSOL.out, file="_derived/CECSOL.out.RData")  
+ 
+ 
+ CLYPPT.out <- foreach (i=1:length(header.shp), .packages=c('raster'), .combine=rbind) %dopar% { #
+   CLYPPT.tmp <- raster::extract(CLYPPT, header.shp[i,], buffer=min(10000,  max(250, header.shp@data$loc.uncert[i])), fun=robust.mean)
+ }
+ save(CLYPPT.out, file="_derived/CLYPPT.out.RData")  
+ 
+ CRFVOL.out <- foreach (i=1:length(header.shp), .packages=c('raster'),  .combine=rbind) %dopar% { #
+   CRFVOL.tmp <- raster::extract(CRFVOL, header.shp[i,], buffer=min(10000,  max(250, header.shp@data$loc.uncert[i])), fun=robust.mean)
+ }
+save(CRFVOL.out, file="_derived/CRFVOL.out.RData")  
+
+ORCDRC.out <- foreach (i=1:length(header.shp), .packages=c('raster'), .combine=rbind) %dopar% { #
+  ORCDRC.tmp <- raster::extract(ORCDRC, header.shp[i,], buffer=min(10000, max(250, header.shp@data$loc.uncert[i])), fun=robust.mean)
+}
+save(ORCDRC.out, file="_derived/ORCDRC.out.RData")  
+
+PHIHOX.out <- foreach (i=1:length(header.shp), .packages=c('raster'), .combine=rbind) %dopar% { #
+  PHIHOX.tmp <- raster::extract(PHIHOX, header.shp[i,], buffer=min(10000, max(250, header.shp@data$loc.uncert[i])), fun=robust.mean)
+}
+save(PHIHOX.out, file="_derived/PHIHOX.out.RData")  
+
+SLTPPT.out <- foreach (i=1:length(header.shp), .packages=c('raster'), .combine=rbind) %dopar% { #
+  SLTPPT.tmp <- raster::extract(SLTPPT, header.shp[i,], buffer=min(10000, max(250, header.shp@data$loc.uncert[i])), fun=robust.mean)
+}
+save(SLTPPT.out, file="_derived/SLTPPT.out.RData") 
+
+SNDPPT.out <- foreach (i=1:length(header.shp), .packages=c('raster'), .combine=rbind) %dopar% { #
+  SNDPPT.tmp <- raster::extract(SNDPPT, header.shp[i,], buffer=min(10000, max(250, header.shp@data$loc.uncert[i])), fun=robust.mean)
+}
+save(SNDPPT.out, file="_derived/SNDPPT.out.RData") 
+
+P.ret.out <- foreach (i=1:length(header.shp), .packages=c('raster'), .combine=rbind) %dopar% { #
+  P.ret.tmp <- raster::extract(P.ret, header.shp[i,], buffer=min(10000, max(250, header.shp@data$loc.uncert[i])), fun=robust.mode)
+}
+save(P.ret.out, file="_derived/P.ret.out.RData")
+
+stopCluster(cl)  
+```
+
+Reload and recompile output
+```{r, eval=T, warning=F, message=F}
+load( file="_derived/BLDFIE.out.RData")
+load( file="_derived/CECSOL.out.RData")
+load( file="_derived/CLYPPT.out.RData")
+load( file="_derived/CRFVOL.out.RData")
+load( file="_derived/ORCDRC.out.RData")  
+load( file="_derived/PHIHOX.out.RData") 
+load( file="_derived/SLTPPT.out.RData") 
+load( file="_derived/SNDPPT.out.RData")
+load(file="_derived/P.ret.out.RData")
+
+ISRIC.out <- data.frame(PlotID=header.shp@data$PlotID, 
+             BLDFIE=BLDFIE.out, 
+             CECSOL=CECSOL.out,
+             CLYPPT=CLYPPT.out,
+             CRFVOL=CRFVOL.out,
+             ORCDRC=ORCDRC.out,
+             PHIHOX=PHIHOX.out,
+             SLTPPT=SLTPPT.out,
+             SNDPPT=SNDPPT.out, 
+             P.ret=P.ret.out)
+
+P.ret.key <- read.dbf("/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/Generalized P-retention potential (dominant class)1.tif.vat.dbf")
+
+ISRIC.out <- ISRIC.out %>% 
+  left_join(P.ret.key %>% 
+              dplyr::select(VALUE, MainCLASS) %>% 
+              rename(P.ret=VALUE), by="P.ret") %>% 
+  rename(P.ret.cat=MainCLASS)
+```
+
+## Climate data 
+Join soil and climate data into the `env.sRoot` data.frame
+
+```{r, cache=T}
+
+load("/data/sPlot/releases/sPlot2.1/sPlot_header_chelsa_20161124.RData")
+#load("/data/sPlot/releases/sPlot2.1/sPlot_header_soilgrids_20161124.RData") 
+
+env.sRoot <- header.sRoot %>% 
+  dplyr::select(PlotID) %>% 
+  left_join(climate %>%
+              dplyr::select(-POINT_X, -POINT_Y), 
+            by="PlotID") %>%
+  left_join(ISRIC.out, by="PlotID") %>% 
+  dplyr::select(-P.ret)
+
+```
+
+Download and unzip rasters from [Global Aridity Index and Potential Evapotranspiration (ET0) Climate Database v2](https://figshare.com/articles/Global_Aridity_Index_and_Potential_Evapotranspiration_ET0_Climate_Database_v2/7504448/3)  
+
+```{r, eval=F}
+require(rfigshare)
+##Authentication using key from online tutorial
+client_key = '123456'
+client_secret = '65xyAzi1'
+token_key = 'n4oTQ22l4FxsuQlYZlhCFwYrrSgrlPn1lhIx32uzwzAwn4oTQ22l4FxsuQlYZlhC1F'
+token_secret = '0MNOqkQncNHuKTi6fQ8MuA'
+fs_auth(client_key, client_secret, token_key, token_secret)
+
+## Import map of aridity
+a <- fs_details(7504448, mine=F)
+url.ai <- a$files[[5]]$download_url
+url.et <- a$files[[3]]$download_url
+download(url.ai, "/data/sPlot/users/Francesco/Ancillary_Data/Aridity_Index/Aridity.zip", mode = "wb")
+download(url.et, "/data/sPlot/users/Francesco/Ancillary_Data/Aridity_Index/ET.zip", mode = "wb")
+
+unzip ("/data/sPlot/users/Francesco/Ancillary_Data/Aridity_Index/ET.zip", 
+       exdir ="/data/sPlot/users/Francesco/Ancillary_Data/Aridity_Index/")
+unzip ("/data/sPlot/users/Francesco/Ancillary_Data/Aridity_Index/Aridity.zip", 
+       exdir ="/data/sPlot/users/Francesco/Ancillary_Data/Aridity_Index/")
+
+```
+
+Extract Aridity and PET values for all selected plots
+```{r, cache=T, warning=F, message=F}
+ai <- raster("/data/sPlot/users/Francesco/Ancillary_Data/Aridity_Index/ai_et0/ai_et0.tif")
+et <- raster("/data/sPlot/users/Francesco/Ancillary_Data/Aridity_Index/et0_yr/et0_yr.tif")
+
+ai.header <- raster::extract(ai, header.shp)
+et.header <- raster::extract(et, header.shp)
+
+env.sRoot <- env.sRoot %>%
+  left_join(data.frame(PlotID=header.shp@data$PlotID,
+                       Aridity_ind=ai.header,
+                       Pot_Evapot=et.header),
+            by="PlotID")
+```
+
+## Estimate soil N based on T and pH
+**as suggested by Thom Kuyper**  
+  
+The basic idea for the plots (true for forests, not fully true for grasslands, admittedly) is that N supply is determined by the decomposition of the organic matter and the nitrogen mineralisation during the decomposition.   
+That means that we discount (1) nitrogen deposition; (2) nitrogen fixation (assuming that very few plots are so dominated by legumes that they overwhelm the signal of nitrogen through mineralisation), (3) application of nitrogen fertiliser.  
+We also assume that most plots will be nitrogen-limited, so losses of nitrogen (when mineralisation is in excess of plant nitrogen demand) is equally discounted in the estimate of nitrogen supply. Under that assumption we can estimate nitrogen supply from soil organic matter. 
+In order to do that we need a brief recap which factors affect nitrogen mineralisation, factors that can be easily included in the model.  
+*Factor 1 - Temperature*  
+For simplicity we assume that the average annual soil temperature in the uppermost 10-20 cm is equal to the average annual temperature.  From models on decomposition we chose a model developed by Yang (a PhD from Wageningen). His temperature function is:
+(1)	At average temperatures below 0 °C there is no decomposition and N mineralisation (as there is no water which is crucial for both enzyme activity and accessibility of soil organic matter in the soil solution). The few arctic and alpine plots will then possibly have estimates of zero N supply.  
+(2)	The model for temperature works for an average temperature of 9° C, so that has the standard value of 1. For other temperatures we then apply a correction factor.  
+(3)	For temperatures between 0 °C and 9 °C, we use: F = 0.1 (T+1); so the actual model yields 0 at T = -1 °C and f = 1 at 9 °C  
+(4)	For temperatures between 10 and 27 °C we use F = 2^(T-9)/9. So at 9 ° C the formula against yields f = 1; for 18 °C the formula yields f = 2 (which suggests the formula has a Q10, aka temperature sensitivity or slightly over 2).  
+
+*Factor 2 - pH and Soil C org*  
+The second correction factor has to do with what is called protection of organic matter by mineral association. Mineral association protects negatively charged organic matter, if the minerals have positive charge. (There is some protection of positively charged organic matter, e.g. basic amino acids, on negatively charged mineral particles such as clay). However, the database might not be very accurate for clay content, and would have not data for different  kinds of clay. So we might overcome this by again simplifying assumptions. I assume that protection is mainly be metal oxides and these have a more positive charge at lower pH (and hence organic matter is better protected against decomposition at lower pH). So I propose to include a second protection factor, based on pH. This simplification is also in QUEFTS and a paper by Sattari et al. (2014) that is based on QUEFTS, a model to estimate soil nutrient supply for agricultural soils, so as to calculate which part of plant nutrient demand can be covered by soil supply rather than fertilizer supply.: 
+
+SN = αN * FN * Corg  
+with  
+αN: an empirical parameter, set at 6.8  
+FN: pH-dependent coefficient to take into account the availability of organic matter in relation to mineral association  
+Corg : the fraction soil organic matter in mg kg-1. C is 50% of SOM content.  
+  
+The factor N is defined as:  
+FN = 0.4 for pH < 4.7  
+FN = 0.25*(pH-3) for 4.7 < pH < 7  
+FN = 1 for pH > 7  
+  
+The combination of Factor 1 and Factor 2 would give a reasonable estimate of N supply  
+
+```{r}
+# Added in v1.8
+env.sRoot <- env.sRoot %>% 
+  mutate(N.f1=ifelse(bio01<0, 0, 
+                     ifelse(bio01<=9,0.1*(bio01+1), 2^((bio01-9)/9)))) %>% 
+  mutate(FN=ifelse((PHIHOX/10)<4.7, 0.4, 
+                   ifelse((PHIHOX/10)<7, 0.25*((PHIHOX/10)-3), 1))) %>% 
+  mutate(N.f2=6.8*FN*(ORCDRC/1000)) %>% 
+  mutate(N.est=N.f1*N.f2) %>% 
+  dplyr::select(-FN)
+# Check range of factors
+summary(env.sRoot$N.f1)
+summary(env.sRoot$N.f2)
+summary(env.sRoot$N.est)
+```
+
+## Continents
+```{r}
+sPDF <- rworldmap::getMap(resolution="coarse")
+continent <- sPDF[,"continent"]
+crs(continent) <- crs(header.shp)
+continent@data[243,"continent"] <- "South America" ## Manually correct missing data
+# create clipped version of continent to avoid going beyond 180 lON
+coords <- data.frame(x=c(-180,180,180,-180),
+                     y=c(-90,-90,90,90))
+bboxc = Polygon(coords)
+bboxc = SpatialPolygons(list(Polygons(list(bboxc), ID = "a")), proj4string=crs(continent))
+continent_clipped <- gIntersection(continent[-137,], bboxc, byid=T) # polygon 137 gives problems... workaround
+```
+
+Assign plots to continent (as correction to header file)  
+```{r, eval=F}
+continent.out <- sp::over(x=header.shp, y=continent)
+#correct unassigned points to closest continent
+toassign <- header.shp[which(is.na(continent.out$continent)),]
+crs(toassign) <- crs(continent)
+#go parallel
+cl <- makeForkCluster(8, outfile="" )
+#cl <- makeCluster(8)
+registerDoParallel(cl)
+nearestContinent <- foreach(i=1:length(toassign), .packages=c('raster'), .combine=rbind) %dopar% { 
+  nearestContinent.tmp <- geosphere::dist2Line(toassign[i,], continent_clipped)
+} #~2 hrs with 8 cpus
+continent.out$continent[which(is.na(continent.out$continent))] <- as.character(continent[-137,]@data[nearestContinent[,"ID"],])
+save(continent.out, file = "_derived/continent.out")
+stopCluster(cl)
+```
+Reload and manipulate continent. Correct header.sRoot
+```{r}
+load("_derived/continent.out")
+header.sRoot$CONTINENT <- continent.out %>% 
+  mutate(CONTINENT=factor(continent, 
+                            levels=c("Africa", "Antarctica", "Australia", "Eurasia", "North America", "South America"),
+                            labels=c("AF", "AN", "AU", "EU", "NA", "SA"))) %>% 
+  pull(CONTINENT)
+```
+
+
+## Ecoregions
+Extract data on ecoregion and join into the `env.sRoot` data.frame. Ecoregion derive from [Olson et al. (2001)](https://doi.org/10.1641/0006-3568(2001)051[0933:TEOTWA]2.0.CO;2).
+
+
+```{r, message=F}
+##Import spatial data on Ecoregions
+ecoreg <- readOGR("/data/sPlot/users/Francesco/Ancillary_Data/Ecoregions_WWF/", layer="wwf_terr_ecos")
+crs(ecoreg) <- CRS("+init=epsg:4326")
+ecoreg@data <- ecoreg@data %>% 
+  dplyr::select(ECO_NAME, eco_code)
+## Intersect plots with ecoregions and bind column
+ecoreg.out <- sp::over(x=header.shp, y=ecoreg) # ~5 mins
+
+```
+Match missing values to nearest ecoregion
+```{r, warning=F, message=F, eval=F}
+library(doParallel)
+library(parallel)
+cl <- makeForkCluster(10, outfile="" )
+registerDoParallel(cl)
+
+toassign <- header.shp[which(is.na(ecoreg.out$ECO_NAME)),]
+nearestEcoreg <- foreach(i=1:length(toassign), .packages=c('raster'), .combine=rbind) %dopar% { 
+  nearestEcoreg.tmp <- tryCatch(data.frame(toassign[i,"PlotID"],
+                                                  geosphere::dist2Line(toassign[i,], ecoreg)),
+               error = function(e){data.frame(toassign[i,"PlotID"], distance=NA, lon=NA, lat=NA,ID=NA)}
+      )
+  return(nearestEcoreg.tmp)
+} #~20 hrs with 10 cpus
+save(nearestEcoreg, file = "_derived/EcoregionExtract.RData")
+stopCluster(cl)
+```
+Recompile and check
+```{r}
+load(file = "_derived/EcoregionExtract.RData")
+ecoreg.out[which(is.na(ecoreg.out$ECO_NAME)),] <- ecoreg@data[nearestEcoreg[,"ID"],]
+save(ecoreg.out, file = "_derived/Ecoregion.out.RData")
+env.sRoot <- env.sRoot %>% bind_cols(ecoreg.out)
+
+##double check
+header.sRoot %>%
+  left_join(env.sRoot %>%
+              dplyr::select(PlotID, ECO_NAME), 
+            by="PlotID") %>%
+  distinct(ECO_NAME, CONTINENT, .keep_all = FALSE) %>% 
+  arrange(ECO_NAME)
+
+header.shp@data$CONTINENT <- header.sRoot$CONTINENT
+header.shp@data <- header.shp@data %>% 
+  left_join(env.sRoot %>%
+              dplyr::select(PlotID, ECO_NAME), 
+            by="PlotID")
+writeOGR(header.shp, dsn="_derived/", layer="EcoregionContinent", driver = "ESRI Shapefile", overwrite_layer = T)
+```
+
+```{r, echo=F}
+knitr::kable(env.sRoot %>% 
+    group_by(ECO_NAME) %>% 
+    summarize(n.plot=n()) %>% 
+    filter(n.plot>50), caption="Number of plots for each ecoregion (only those with more than 50 plots shown)") %>%
+    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "center")
+```
+
+
+
+## Summary of environmental data available per plot
+
+```{r, echo=F}
+knitr::kable(env.sRoot[1:10,], caption="Environmental data [only first 10 rows]") %>%
+    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "center")
+```
+Bioclimatic variables (bio01-bio19) derive from [CHELSA](http://chelsa-climate.org/bioclim/)
+
+Codes:
+
+Bio1 = Annual Mean Temperature  
+Bio2 = Mean Diurnal Range  
+Bio3 = Isothermality  
+Bio4 = Temperature Seasonality  
+Bio5 = Max Temperature of Warmest Month  
+Bio6 = Min Temperature of Coldest Month  
+Bio7 = Temperature Annual Range  
+Bio8 = Mean Temperature of Wettest Quarter  
+Bio9 = Mean Temperature of Driest Quarter  
+Bio10 = Mean Temperature of Warmest Quarter  
+Bio11 = Mean Temperature of Coldest Quarter  
+Bio12 = Annual Precipitation  
+Bio13 = Precipitation of Wettest Month  
+Bio14 = Precipitation of Driest Month  
+Bio15 = Precipitation Seasonality  
+Bio16 = Precipitation of Wettest Quarter  
+Bio17 = Precipitation of Driest Quarter  
+Bio18 = Precipitation of Warmest Quarter  
+Bio19 = Precipitation of Coldest Quarter  
+  
+  
+Soil variables derive from the [ISRIC dataset](https://www.isric.org/), downloaded at 250-m resolution 
+  
+CECSOL                     Cation Exchange capacity of soil  
+CLYPPT                     Clay mass fraction in %  
+CRFVOL                     Coarse fragments volumetric in %  
+ORCDRC                     Soil Organic Carbon Content in g/kg  
+PHIHOX                     Soil pH x 10 in H20  
+SLTPPT                     Silt mass fraction in %  
+SNDPTT                     Sand mass fraction in %  
+BLDFIE                     Bulk Density (fine earth) 	in kg/m3  
+P.ret.cat                  Phosphorous Retention - Categorical value, see [ISRIC 2011-06](https://www.isric.org/documents/document-type/isric-report-201106-global-distribution-soil-phosphorus-retention-potential)  
+
+  
+N.f1 - Nitrogen factor 1  
+N.f2 - Nitrogen factor 2  
+N.est - N.f1 * N.f2 (See description above)  
+  
+Aridity Index and Potential Evapotranspiration are downloaded from [Global Aridity Index and Potential Evapotranspiration (ET0) Climate Database v2](https://figshare.com/articles/Global_Aridity_Index_and_Potential_Evapotranspiration_ET0_Climate_Database_v2/7504448/3) at 30 arc-seconds. 
+Trabucco, Antonio; Zomer, Robert (2019): Global Aridity Index and Potential Evapotranspiration (ET0) Climate Database v2. figshare. 
+
+# 4 Classifiy plots into broad vegetation types based on species' affinities
+
+sPlot has two independent systems for classifying plots to vegetation types. The first, classifies plots into forest and non-forest, based on the share of phanerophytes. The second system classifies plots into broad habitat types and relies on the expert opinion of data contributors. This is, unfortunately, not consistently available across all plots, being the large majority of classified plots only available for Europe. These broad habitat types are coded using 5, non-mutually exclusive dummy variables:  
+1) Forest - F  
+2) Grassland - G  
+3) Shrubland - S  
+4) Sparse vegetation - B (Bare)  
+5) Wetland - W  
+A plot may belong to more than one formation, e.g. a Savannah is categorized as Forest + Grassland (FG).
+
+
+```{r, cache=T, cache.lazy=T, echo=F}
+## create synthetic codes for habitats
+
+habitat_convert <- header.sRoot %>%
+                dplyr::select(Forest:Wetland) %>%
+                mutate_all(.funs= list( ~replace_na(.,0))) %>%
+                plyr::adply(., 1, function(x) ifelse(sum(x)!=0,
+                                         paste0(c("F", "G", "S", "B", "W")[x==1], collapse=""),
+                                         NA)) %>%
+                dplyr::rename(habitat.code=V1) %>%
+                dplyr::select(habitat.code) 
+```
+
+On top of the native (but incomplete) classification of plots into vegetation types, we ran ~~two~~ *one* classifications based on ~~a) Cluster analysis, or b)~~ species' affinities  
+*from v1.8 on, classification based on cluster analysis is discontinued*  
+Species affinities were assigned based on expert-opinion. Based on the relative cover of each species, we summarized species based on their habitat affinities, and summarized their relative cover. We then assigned each plot to an habitat based on the following ifelse conditions:  
+  
+wetland>0.5 -> "Wetland"  
+forest>0.3 -> "Forest"  
+grassland+heathland+steppe>0.7 -> "Grassland"  
+"Other", otherwise.
+
+```{r }
+# Added in v1.4 - Add results of cluster analysis
+# based on beal's smoothing as performed by Helge Bruelheide
+# Grasslands belong to cluster #5
+#load("_derived/cluster_5.RData")
+#header.sRoot <- header.sRoot %>%
+#  left_join(data.frame(PlotObservationID=as.integer(names(cluster.5$cluster)),
+#                       Cluster_n=cluster.5$cluster), 
+#            by="PlotObservationID")
+
+# Added in v1.7 - Classify plots into forest\grasslands based on 
+# species' habitat preference
+
+#calculate proportion of relative in cover belonging to species with different habitat affinities
+# and assign most likely habitat
+
+
+
+TopHabitat <- DT.sRoot08 %>%
+  filter(PlotObservationID %in% plot80perc) %>%
+  as.tbl() %>%
+  dplyr::select(PlotObservationID, species, Relative.cover) %>%
+  left_join(root_traits %>%
+              dplyr::rename(species=full_species, habitat=vegtype_expert_homogenized) %>%
+              filter(is.in.sPlot=="WAHR") %>%
+              dplyr::select(species, habitat) %>%
+              mutate(value=1) %>%
+              reshape2::dcast(species ~ habitat, value.var="value", fill=0), 
+            by="species") %>%
+  group_by(PlotObservationID) %>%
+  summarize_at(.vars= vars(forest:wetland),
+               .funs = list(~weighted.mean(., Relative.cover, na.rm=T))) %>%
+  mutate(TopHabitat=ifelse(wetland>0.5, "Wetland", 
+                           ifelse(forest>0.3, "Forest", ifelse(
+                             grassland+heathland+steppe>0.7, "Grassland",
+                             "Other"))))
+
+habitats.sRoot <- CWM.sRoot %>%
+  left_join(header.sRoot %>% 
+              mutate(fornonfor=ifelse(is.forest==T, "for", 
+                                      ifelse(is.non.forest==T, "nonfor", NA))) %>%
+              dplyr::select(PlotObservationID, fornonfor) %>%
+              bind_cols(habitat_convert), 
+    by="PlotObservationID") %>%
+  left_join(TopHabitat %>% 
+              dplyr::select(PlotObservationID, TopHabitat),
+              by="PlotObservationID")
+
+
+#Attach to header
+header.sRoot <- header.sRoot %>%
+  left_join(TopHabitat %>%
+              dplyr::select(PlotObservationID, TopHabitat) %>%
+              distinct(), 
+            by="PlotObservationID")
+```
+
+The match between sPlot's native classification system and the one based on species affinities is evaluated using a confusion matrix
+```{r message=F}
+Habitat.compare <- header.sRoot %>%
+    as.tbl() %>%
+    dplyr::select(PlotObservationID, Forest:Wetland) %>%
+    mutate(Other=ifelse((Sparse.vegetation==1 | Shrubland==1), 1, 0)) %>%
+    dplyr::select(-Sparse.vegetation, - Shrubland) %>%
+    reshape2::melt(id.vars="PlotObservationID") %>%
+    rename(HabitatObs=variable) %>%
+    filter(value==1) %>%
+    left_join(TopHabitat %>% 
+                dplyr::select(PlotObservationID, TopHabitat), 
+              by="PlotObservationID")
+
+#Build a confusion matrix to evaluate the comparison  
+u <- union(Habitat.compare$TopHabitat, Habitat.compare$HabitatObs)
+t <- table( factor(Habitat.compare$TopHabitat, u), factor(Habitat.compare$HabitatObs, u))
+confm <- caret::confusionMatrix(t)
+
+```
+
+```{r echo=F}
+knitr::kable(confm$table, caption="Confusion matrix between sPlot's native classification of habitats (columns), and classification based on species' habitat affinity (rows)") %>%
+    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "center")
+```
+
+Formulas of associated statistics are available on the help page of the [caret package](https://www.rdocumentation.org/packages/caret/versions/6.0-84/topics/confusionMatrix) and associated references.
+The overall accuracy of the classification based on species' habitat affinity, when tested against sPlot's native habitat classification is `r round(confm$overall[1],2)`, the Kappa statistics is `r round(confm$overall[2],2)`.
+```{r echo=F}
+knitr::kable(t(confm$byClass), caption="Associated statistics of confusion matrix by class") %>%
+    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "center")
+```
+
+Summary of plots with trait coverage >=.8 across habitat types (based on species affinities)
+```{r, echo=F}
+knitr::kable(as.matrix(table((CWM.sRoot %>%
+                               left_join(header.sRoot %>%
+                                           dplyr::select(PlotObservationID, TopHabitat),
+                                         by="PlotObservationID"))[,c("variable", "TopHabitat")],
+                                  exclude=NULL)), caption="Number of plots across habitats") %>%
+    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "center")
+```
+
+```{r, echo=F, eval=F}
+#Summary of plots with trait coverage >=.8 across habitat types (based on native sPlot's classification)
+knitr::kable(as.matrix(table(habitats.sRoot[,c("variable", "habitat.code" )],
+                                  exclude=NULL)), caption="Plots per habitat*trait combination") %>%
+    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "center")
+```
+
+
+
+Summary of plots with trait coverage >=.8 across country/habitat types. 
+```{r, echo=F}
+continent_habitat <- header.sRoot %>%
+  dplyr::select(CONTINENT, TopHabitat)
+
+knitr::kable(as.matrix(table(continent_habitat[,2:1],
+                                  exclude=NULL)), caption="Plots per habitat*continent combination") %>%
+    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "center")
+```
+
+
+# 5 Distribution of CWMs across vegetation types
+
+```{r, fig.width=8, fig.height=6, fig.align="center", eval=F, echo=F}
+#CWMs of different traits across forest and non forest
+ggplot(data=habitats.sRoot, aes(x=fornonfor, y=trait.value, fill=fornonfor)) + 
+    geom_boxplot(outlier.alpha = 1/3, outlier.size = 1.5, outlier.shape = "+") + 
+    #geom_smooth(method = "loess") + 
+    scale_color_viridis(guide=F, discrete = T) + 
+    facet_wrap(. ~ variable, nrow = 2) + 
+    theme_classic() + 
+    theme(axis.title.x=element_blank(),
+        axis.text.x=element_blank(),
+        axis.ticks.x=element_blank(), 
+        axis.title.y=element_blank(),
+        axis.text.y=element_blank(),
+        axis.ticks.y=element_blank())
+```
+
+
+
+```{r, fig.width=8, fig.height=11, fig.align="center"}
+ggplot(data=habitats.sRoot, aes(x=TopHabitat, y=trait.value, fill=TopHabitat)) + 
+    geom_boxplot(outlier.alpha = 1/3, outlier.size = 1.5, outlier.shape = "+") + 
+    #geom_smooth(method = "loess") + 
+    scale_color_viridis(guide=F, discrete = T) + 
+    facet_wrap(. ~ variable, nrow = 4, scales = "free_y") + 
+    theme_classic() + 
+    theme(axis.title.x=element_blank(),
+        axis.text.x=element_blank(),
+        axis.ticks.x=element_blank(), 
+        axis.title.y=element_blank(),
+        #axis.text.y=element_blank(),
+        #axis.ticks.y=element_blank()
+        )
+```
+
+
+
+# 6 Scatterplots of root traits along climate and soil variables
+
+Show geographic distribution and create scatterplots between CWMs and environmental variables.  
+```{r, fig.align="center", fig.height=4, fig.width=6, message=F, warning=F}
+gg.global <- ggplot(countries, aes(x=long, y=lat, group = group)) +
+  geom_polygon(col=NA, lwd=3, fill = gray(0.9)) +
+  geom_point(data=header.sRoot %>%
+               filter(!is.na(TopHabitat)), 
+             aes(x=POINT_X, y=POINT_Y, group=1, col=TopHabitat),
+             alpha=0.5, cex=0.7, shape="+") + 
+  facet_wrap(.~ TopHabitat, ncol=2) + 
+  theme_bw() + 
+  scale_color_brewer(palette="Dark2", guide=F)
+gg.global
+```
+
+
+```{r, fig.align="center", fig.width=2, fig.height=2, warning=F}
+CWM.env <- CWM.sRoot %>%
+  left_join(env.sRoot %>%
+              dplyr::rename(PlotObservationID=PlotID), by="PlotObservationID") %>%
+  left_join(header.sRoot %>%
+              dplyr::select(PlotObservationID, TopHabitat), 
+            by="PlotObservationID") %>%
+  mutate(TopHabitat=factor(TopHabitat))
+
+#Scatterplots
+ggenv <- list()
+for(i in 1:27) {
+  CWM.env$out <- CWM.env[,8+i]
+  labpos.x <- quantile(CWM.env$out, 0.05, na.rm=T)
+  ggenv[[i]] <- ggplot(data=CWM.env, aes(x=out, y=trait.value, col=TopHabitat)) + 
+    geom_point(alpha=0.1, pch=16, cex=.6) + 
+    scale_color_brewer(palette="Dark2", guide=F) + 
+    facet_grid(variable ~ ., scale="free_y") + 
+    theme_classic() + 
+    ggtitle(colnames(CWM.env)[9+i]) + 
+    theme(axis.title.x=element_blank(),
+        axis.title.y=element_blank()) 
+  if(!i %in% c(4,8,12,16,20,24,27)) ggenv[[i]] <- ggenv[[i]] + theme(strip.text.y = element_blank())
+  }
+
+ggenv[[28]] <- cowplot::get_legend(ggplot(data=CWM.env %>%
+                                            filter(!is.na(TopHabitat)), 
+                                          aes(x=out, y=trait.value, col=TopHabitat)) + 
+                             geom_point(pch=16, cex=4) + 
+                             scale_color_brewer(palette="Dark2", name="Habitat"))
+plot(ggenv[[28]])
+```
+
+```{r, fig.align="center", fig.width=8, fig.height=11, cache=T, cache.lazy=F, warning=F}
+gridExtra::grid.arrange(ggenv[[1]], ggenv[[2]], ggenv[[3]], ggenv[[4]], ncol=4, widths=c(1,1,1,1.2))
+gridExtra::grid.arrange(ggenv[[5]], ggenv[[6]], ggenv[[7]], ggenv[[8]], ncol=4, widths=c(1,1,1,1.2))
+gridExtra::grid.arrange(ggenv[[9]], ggenv[[10]], ggenv[[11]], ggenv[[12]], ncol=4, widths=c(1,1,1,1.2))
+gridExtra::grid.arrange(ggenv[[13]], ggenv[[14]], ggenv[[15]], ggenv[[16]], ncol=4, widths=c(1,1,1,1.2))
+gridExtra::grid.arrange(ggenv[[17]], ggenv[[18]], ggenv[[19]], ggenv[[20]], ncol=4, widths=c(1,1,1,1.2))
+gridExtra::grid.arrange(ggenv[[21]], ggenv[[22]], ggenv[[23]], ggenv[[24]], ncol=4, widths=c(1,1,1,1.2))
+gridExtra::grid.arrange(ggenv[[25]], ggenv[[26]], ggenv[[27]], ncol=4, widths=c(1,1,1.2,1))
+```
+
+
+  
+```{r, warning=F, message=F, fig.aligh="center", fig.height=6, fig.width=6, eval=F, echo=F}
+# 6 PCA of CWMs
+## PCA on all traits 
+#CWM.wide <- reshape2::dcast(CWM.sRoot %>% 
+#                              dplyr::select(PlotObservationID:trait.value) %>%
+#                              filter(PlotObservationID %in% 
+#                                       names(cluster.5$cluster[cluster.5$cluster==cl])), 
+#                            PlotObservationID~variable, value.var = "trait.value", fill = NA) %>%
+#  filter(!is.na(rowSums(.)))
+#(CWM.pca <- rda(CWM.wide[,-1], scale=T))
+#ordiplot(CWM.pca)
+#plot(envfit(CWM.pca, CWM.wide[,-1]))
+
+
+## PCA on the most complete 4 traits only
+
+##selected env variables
+env.sRoot.sel <- env.sRoot %>%
+                  filter(env.sRoot$PlotID %in% (header.sRoot %>%
+                                    filter(TopHabitat=="Grassland"))$PlotID) %>%
+                  dplyr::select(PlotID, bio01, bio12, PHIHOX, ORCDRC, CLYPPT, CECSOL, N.est) %>% 
+                  rename(MAT=bio01, MAP=bio12, pH=PHIHOX, OM=ORCDRC, Clay=CLYPPT, CEC=CECSOL, N=N.est) %>%
+                  filter(pH != 0) %>%
+                  filter(CEC != 0) %>%
+                  drop_na %>%
+                  mutate_at(vars(MAT:N),
+                            .funs=~scale(.))
+CWM.wide2 <- reshape2::dcast(CWM.sRoot %>%
+                               filter(variable %in% c("Root_diameter", "SRL", "rootN", "RTD")), 
+                             PlotObservationID~variable, value.var = "trait.value", fill = NA) %>%
+                    filter(PlotObservationID %in% env.sRoot.sel$PlotID) %>%
+  filter(!is.na(rowSums(.))) %>%
+  arrange(PlotObservationID)
+
+env.sRoot.sel <- env.sRoot.sel %>%
+  filter(PlotID %in% CWM.wide2$PlotObservationID) %>%
+  arrange(PlotID)
+
+(CWM.pca2 <- rda(CWM.wide2[,-1], scale=T))
+
+envfit.grasslands <- envfit(CWM.pca2, env.sRoot.sel[,-1])
+biplot (CWM.pca2, display = 'species', scaling = 'species', choices=c(1,2))
+plot(envfit.grasslands)
+```
+
+# 7 Export
+
+```{r, eval=T}
+save( root_traits, DT.sRoot08, CWM.sRoot, header.sRoot, data.origin, env.sRoot,  #CWM.pca2, #habitats.sRoot,
+      file="_derived/sRoot_sPlot.RData" )
+sessionInfo()
+```