Skip to content
Snippets Groups Projects
Select Git revision
  • 5b6b37bdaec30dcbbfb1e2f71b5f2a0f57f264a4
  • master default protected
2 results

05_ExtractEnvironment.Rmd

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    title: "sPlot 3.0 - Environmental Data"
    author: "Francesco Maria Sabatini"
    date: "2/14/2020"
    output: html_document
    ![](/data/sPlot/users/Francesco/_sPlot_Management/splot-long-rgb.png "sPlot Logo")

    Timestamp: r date()
    Drafted: Francesco Maria Sabatini
    Revised:
    version: 1.0

    This report documents how environmental variables were extracted when constructing sPlot 3.0. It is based on dataset sPlot_3.0.2, received on 24/07/2019 from Stephan Hennekens.

    knitr::opts_chunk$set(echo = TRUE)
    library(tidyverse)
    library(viridis)
    library(readr)
    library(xlsx)
    library(knitr)
    library(kableExtra)
    
    ## Spatial packages
    library(rgdal)
    library(sp)
    library(sf)
    library(rgeos)
    library(raster)
    library(rworldmap)
    #library(elevatr)
    #library(rnaturalearth)
    #library(dggridR)
    
    source("A98_PredictorsExtract.R")
    
    #save temporary files
    write("TMPDIR = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('TMPDIR'), '.Renviron'))
    write("R_USER = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('R_USER'), '.Renviron'))
    rasterOptions(tmpdir="/data/sPlot/users/Francesco/_tmp")
    
    #Ancillary variables
    get.summary <- function(x){x %>% 
        summarize_all(.funs=list(num.NAs=~sum(is.na(.)), 
                                 min=~min(., na.rm=T),
                                 q025=~quantile(., 0.25, na.rm=T), 
                                 q50=~quantile(., 0.50, na.rm=T), 
                                 q75=~quantile(., .75, na.rm=T), 
                                 max=~max(., na.rm=T), 
                                 mean=~mean(., na.rm=T), 
                                 sd=~sd(., na.rm=T))) %>% 
        gather(variable, value) %>% 
        separate(variable, sep="_", into=c("variable", "stat")) %>% 
        mutate(stat=factor(stat, levels=c("num.NAs", "min", "q025", "q50", "q75", "max", "mean", "sd"))) %>% 
        spread(key=stat, value=value)
    }

    1 Import header data

    load("../_output/header_sPlot3.0.RData")

    Create spatial point dataframe for sPlot data to intersect with spatial layers. Include the fiels Location uncertainty to allow for fuzzy matching. Each plot was intersected with the corresponding layer of environmental (soil, climate) attributes accounting for their location uncertainty (minimum set to 250m). For computing reasons, the maximum location uncertainty was set to 10km. The user should therefore be aware that the variability in climate and soil features estimations may be underestimated for plots with very high location uncertainty.

    header.shp <- header %>%
      filter(!is.na(Longitude) | !is.na(Latitude))
    header.shp <- SpatialPointsDataFrame(coords= header.shp %>% 
                                            dplyr::select(Longitude, Latitude),
                                   proj4string = CRS("+init=epsg:4326"), 
                                   data=data.frame(PlotObservationID= header.shp$PlotObservationID, 
                                                   loc.uncert=header.shp$`Location uncertainty (m)`, 
                                                   `GIVD ID`=header.shp$`GIVD ID`))
    header.shp <- readOGR(dsn="../_derived", layer="header.shp")
    colnames(header.shp@data) <- c("PlotObservationID", "loc.uncert", "GIVD ID")

    2 Climate

    Bioclimatic variables (bio01-bio19) derive from CHELSA

    Codes:

    Bio1 = Annual Mean Temperature
    Bio2 = Mean Diurnal Range
    Bio3 = Isothermality
    Bio4 = Temperature Seasonality
    Bio5 = Max Temperature of Warmest Month
    Bio6 = Min Temperature of Coldest Month
    Bio7 = Temperature Annual Range
    Bio8 = Mean Temperature of Wettest Quarter
    Bio9 = Mean Temperature of Driest Quarter
    Bio10 = Mean Temperature of Warmest Quarter
    Bio11 = Mean Temperature of Coldest Quarter
    Bio12 = Annual Precipitation
    Bio13 = Precipitation of Wettest Month
    Bio14 = Precipitation of Driest Month
    Bio15 = Precipitation Seasonality
    Bio16 = Precipitation of Wettest Quarter
    Bio17 = Precipitation of Driest Quarter
    Bio18 = Precipitation of Warmest Quarter
    Bio19 = Precipitation of Coldest Quarter

    \newline \newline Download raster files:

    library(downloader)
    url.chelsa <- list()
    for(i in 1:19){
      ii <- stringr::str_pad(1:19, width=2, side="left", pad="0")[i]
      url.chelsa[[i]] <- paste("https://www.wsl.ch/lud/chelsa/data/bioclim/integer/CHELSA_bio10_", ii, ".tif", sep="")
      download(url.chelsa[[i]],
               paste("/data/sPlot/users/Francesco/Ancillary_Data/CHELSA/CHELSA_bio10_", ii, ".tif", sep=""), 
               mode = "wb")
    }

    Intersect header.shp with each raster of the CHELSA collection.
    Performed in EVE HPC cluster using function A98_PredictorsExtract.R. Divided in 99 chunks.

    header.shp.path <- "../_derived/header.shp.shp"
    
    for(i in 1:19){
      ff <- paste("/data/sPlot/users/Francesco/Ancillary_Data/CHELSA/CHELSA_bio10_", 
                  stringr::str_pad(i, width=2, side="left", pad="0"), ".tif", sep="")
      #define output paths
      output.ff1 <- paste("/data/sPlot/users/Francesco/sPlot3/_derived/output_pred/CHELSA_bio10_", 
                  stringr::str_pad(i, width=2, side="left", pad="0"), ".csv", sep="")
      output.ff2 <- paste("/data/sPlot/users/Francesco/sPlot3/_derived/output_pred/CHELSA_bio10", 
                  stringr::str_pad(i, width=2, side="left", pad="0"), "_sd.csv", sep="")
      # Run PredExtr function - sink to file
      PredExtr(x.shp = header.shp.path, toextract = ff, myfunction = robust.mean, 
                             ncores = 4, typp = "raster", output.ff1)
      PredExtr(x.shp = header.shp.path, toextract = ff, myfunction = robust.sd, 
                             ncores = 4, typp = "raster", output.ff2)
    }

    Reimport and assemble output.

    chelsa.files <- list.files(path="../_derived/output_pred/", pattern="CHELSA_bio10_[0-9]+.csv$", full.names=T)
    chelsa.out <- do.call(cbind, lapply(chelsa.files, 
                                        function(x) {read_csv(x,col_types = cols(X1 = col_character(),
                                                                                 V1 = col_double())) %>% 
                                            column_to_rownames("X1")}))
    colnames(chelsa.out) <- paste0("bio", stringr::str_pad(1:19, width=2, side="left", pad="0"))
    #same for sd values
    chelsa.sd.files <- list.files(path="../_derived/output_pred/", pattern="CHELSA_bio10_[0-9]+_sd.csv$", full.names=T)
    chelsa.sd.out <- do.call(cbind, lapply(chelsa.sd.files, 
                                           function(x) {read_csv(x, col_types = cols(X1 = col_character(),
                                                                                     V1 = col_double())) %>% 
                                               column_to_rownames("X1")}))
    colnames(chelsa.sd.out) <- paste0("bio", stringr::str_pad(1:19, width=2, side="left", pad="0"), "sd")

    Show summaries

    tmp.sum <- get.summary(chelsa.out)
    
    knitr::kable(tmp.sum, 
      caption="Summary statistics for chelsa mean statistics", digits = 3) %>%
        kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                      full_width = F, position = "center")
    tmp.sd.sum <- get.summary(chelsa.sd.out)
    
    knitr::kable(tmp.sd.sum, 
      caption="Summary statistics for chelsa s.d. statistics") %>%
        kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                      full_width = F, position = "center")

    3 Soil

    Soil variables (5 cm depth) derive from the ISRIC dataset, downloaded at 250-m resolution

    CECSOL Cation Exchange capacity of soil
    CLYPPT Clay mass fraction in %
    CRFVOL Coarse fragments volumetric in %
    ORCDRC Soil Organic Carbon Content in g/kg
    PHIHOX Soil pH x 10 in H20
    SLTPPT Silt mass fraction in %
    SNDPPT Sand mass fraction in %
    BLDFIE Bulk Density (fine earth) in kg/m3
    P.ret.cat Phosphorous Retention - Categorical value, see ISRIC 2011-06
    \newline \newline Download ISRIC soil data

    url.bulk <- "https://files.isric.org/soilgrids/data/recent/BLDFIE_M_sl2_250m_ll.tif"
    url.cec <- "https://files.isric.org/soilgrids/data/recent/CECSOL_M_sl2_250m_ll.tif"
    url.cly <- "https://files.isric.org/soilgrids/data/recent/CLYPPT_M_sl2_250m_ll.tif"
    url.crf <- "https://files.isric.org/soilgrids/data/recent/CRFVOL_M_sl2_250m_ll.tif"
    url.orc <- "https://files.isric.org/soilgrids/data/recent/ORCDRC_M_sl2_250m_ll.tif"
    url.pH <- "https://files.isric.org/soilgrids/data/recent/PHIHOX_M_sl2_250m_ll.tif"
    url.slt <- "https://files.isric.org/soilgrids/data/recent/SLTPPT_M_sl2_250m_ll.tif"
    url.snd <- "https://files.isric.org/soilgrids/data/recent/SNDPPT_M_sl2_250m_ll.tif"
    
    download(url.bulk, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/BLDFIE_M_sl2_250m_ll.tif", mode = "wb")
    download(url.cec, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CECSOL_M_sl2_250m_ll.tif", mode = "wb")
    download(url.cly, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CLYPPT_M_sl2_250m_ll.tif", mode = "wb")
    download(url.crf, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/CRFVOL_M_sl2_250m_ll.tif", mode = "wb")
    download(url.orc, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/ORCDRC_M_sl2_250m_ll.tif", mode = "wb")
    download(url.pH, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/PHIHOX_M_sl2_250m_ll.tif", mode = "wb")
    download(url.slt, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/SLTPPT_M_sl2_250m_ll.tif", mode = "wb")
    download(url.snd, "/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/SNDPPT_M_sl2_250m_ll.tif", mode = "wb")

    Intersect header.shp with each raster of the ISRIC collection.
    Performed in EVE HPC cluster using function A98_PredictorsExtract.R. Divided in 99 chunks.

    for(i in 1:8){
      ff <- list.files("/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/", 
                                    pattern = "^[^(Gene)]", full.names = T)[i]
      isric.i <- raster(ff)
      #define output paths
      filename <- str_split(list.files("/data/sPlot/users/Francesco/Ancillary_Data/ISRIC/", 
               pattern = "^[^(Gene)]", full.names = T)[i], pattern="//")[[1]][2]
      output.ff1 <- gsub(pattern=".tif", replacement=".csv", filename)
      output.ff2 <- gsub(pattern=".tif", replacement="_sd.csv", filename)
      # Run PredExtr function - sink to file
      PredExtr(x.shp = header.shp.path, toextract = isric.i, myfunction = robust.mean, 
                             ncores = 4, typp = "raster", output.ff1)
      PredExtr(x.shp = header.shp.path, toextract = isric.i, myfunction = robust.sd, 
                             ncores = 4, typp = "raster", output.ff2)
    }

    Reimport and assemble output.

    ISRIC.layer.names <- c("BLDFIE", "CECSOL","CLYPPT","CRFVOL","ORCDRC","PHIHOX","SLTPPT","SNDPPT")
    ISRIC.layer.names1 <- paste0(ISRIC.layer.names, "_M_sl2_250m_ll")
    isric.files <- list.files(path="../_derived/output_pred/", 
                              pattern=paste0(paste0(ISRIC.layer.names1, ".csv$"), collapse="|"), 
                              full.names=T)
    
    isric.out <- do.call(cbind, lapply(isric.files,  
                                       function(x) {read_csv(x, col_types = cols(X1 = col_character(),
                                                                                 V1 = col_double())) %>% 
                                           column_to_rownames("X1")}))
    colnames(isric.out) <- ISRIC.layer.names
    #same for sd values
    isric.sd.files <- list.files(path="../_derived/output_pred/", 
                              pattern=paste0(paste0(ISRIC.layer.names1, "_sd.csv$"), collapse="|"), 
                              full.names=T)
    isric.sd.out <- do.call(cbind, lapply(isric.sd.files, 
                                          function(x) {read_csv(x, col_types = cols(X1 = col_character(),
                                                                                     V1 = col_double())) %>% 
                                               column_to_rownames("X1")}))
    colnames(isric.sd.out) <- paste0(ISRIC.layer.names, "sd")

    Show summaries

    tmp.sum <- get.summary(isric.out)
    
    knitr::kable(tmp.sum, 
      caption="Summary statistics for isric mean statistics", digits = 3) %>%
        kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                      full_width = F, position = "center")
    tmp.sum <- get.summary(isric.sd.out)
    
    knitr::kable(tmp.sum, 
      caption="Summary statistics for isric s.d. statistics", digits = 3) %>%
        kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                      full_width = F, position = "center")

    4 Elevation variability

    Reimport output (calculated in 04_buildHeader)

    elevation.out <- read_csv("../_derived/elevatr/elevation.out.csv")

    5 Create output and export

    soilclim <- header %>% 
      dplyr::select(PlotObservationID) %>% 
      left_join(elevation.out %>% 
                  dplyr::select(PlotObservationID, Elevation_median, Elevation_q2.5, Elevation_q97.5, Elevation_DEM.res=DEM.res), 
                by="PlotObservationID") %>% 
      left_join(header.shp@data %>% 
                  dplyr::select(PlotObservationID) %>% 
                  bind_cols(chelsa.out) %>%
                  bind_cols(chelsa.sd.out) %>% 
                  bind_cols(isric.out) %>% 
                  bind_cols(isric.sd.out) %>% 
                  distinct(), 
                by="PlotObservationID")
    knitr::kable(soilclim %>% 
                   sample_n(20), 
      caption="Show environmenal info for 20 randomly selected plots ", digits = 3) %>%
        kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                      full_width = F, position = "center")
    save(soilclim, file = "../_output/SoilClim_sPlot3.RData")

    ANNEX 1 - Code to Extract predictors