From e26c043e830ec6083b99acc9eddcb8f3cd680c8c Mon Sep 17 00:00:00 2001 From: Francesco Sabatini <francesco.sabatini@idiv.de> Date: Tue, 10 Mar 2020 16:25:05 +0100 Subject: [PATCH] Various bugs fixed, increased cross-consistency --- code/04_buildHeader.Rmd | 202 ++------------------------------- code/05_ExtractEnvironment.Rmd | 13 +-- code/06_buildDT.Rmd | 26 +++-- code/07_buildCWMs.Rmd | 5 +- 4 files changed, 36 insertions(+), 210 deletions(-) diff --git a/code/04_buildHeader.Rmd b/code/04_buildHeader.Rmd index 54aac93..3d4a3f8 100644 --- a/code/04_buildHeader.Rmd +++ b/code/04_buildHeader.Rmd @@ -282,7 +282,7 @@ header <- header %>% ``` -# 4 Assign plots to spatial descriptors +## 4 Assign plots to spatial descriptors Create spatial point dataframe for sPlot data to intersect with spatial layers ```{r} header.shp <- header %>% @@ -293,10 +293,10 @@ header.shp <- SpatialPointsDataFrame(coords= header.shp %>% data=data.frame(PlotObservationID= header.shp$PlotObservationID, loc.uncert=header.shp$`Location uncertainty (m)`, `GIVD ID`=header.shp$`GIVD ID`)) -#writeOGR(header.shp, dsn="../derived", layer="header.shp", driver="ESRI Shapefile", overwrite_layer = T) +writeOGR(header.shp, dsn="../_derived/", layer="header.shp", driver="ESRI Shapefile", overwrite_layer = T) ``` -## 4.1 Assign to Continents +### 4.1 Assign to Continents Download and manipulate map of continents ```{r} sPDF <- rworldmap::getMap(resolution="coarse") @@ -369,7 +369,7 @@ knitr::kable(header %>% full_width = F, position = "center") ``` -## 4.2 Assign to sBiomes +### 4.2 Assign to sBiomes Performed in EVE HPC cluster using function `A98_PredictorsExtract.R`. Divided in 99 chunks. ```{r eval=F} sBiomes <- readOGR("/data/sPlot/users/Francesco/Ancillary_Data/Biomes_sPlot/sBiomes.shp") @@ -418,7 +418,7 @@ knitr::kable(header %>% full_width = F, position = "center") ``` -## 4.3 Extract WWF Ecoregion +### 4.3 Extract WWF Ecoregions Extract ecoregion name and ID from [Ecoregions of the World](https://www.worldwildlife.org/publications/terrestrial-ecoregions-of-the-world). Olson et al. 2001 [(BioScience)](https://academic.oup.com/bioscience/article/51/11/933/227116). Computation was performed in EVE HPC cluster using function `A98_PredictorsExtract.R`. Divided in 99 chunks. ```{r eval=F} @@ -470,15 +470,17 @@ Summarize: ```{r message=F, echo=F} knitr::kable(header %>% group_by(Ecoregion) %>% - summarize(num.plot=n()), - caption="Number of plots per Ecoregion") %>% + summarize(num.plot=n()) %>% + arrange(desc(num.plot)) %>% + slice(1:30), + caption="Number of plots in the 30 best represented Ecoregions") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "center") ``` -## 4.4 Extract elevation +### 4.4 Extract elevation Extract elevation for each plot. Loops over tiles of 1 x 1°, projects to mercator, and extract elevation for plot coordinates, as well as 2.5, 50, and 97.5 quantiles for a buffer area having a radius equal to the location uncertainty of each plot (but only if location uncertainty < 50 km). DEM derive from package [elevatr](https://cran.r-project.org/web/packages/elevatr/vignettes/introduction_to_elevatr.html#get_raster_elevation_data), which uses the [Terrain Tiles on Amazon Web Services](https://registry.opendata.aws/terrain-tiles/). Resolutions of DEM rasters vary by region. I set a zoom factor z=10, which corresponds to ~ 75-150 m. Sources are: SRTM, data.gov.at in Austria, NRCAN in Canada, SRTM, NED/3DEP 1/3 arcsec, data.gov.uk in United Kingdom, INEGI in Mexico, ArcticDEM in latitudes above 60°, LINZ in New Zealand, Kartverket in Norway, as described [here](https://github.com/tilezen/joerd/blob/master/docs/data-sources.md). \newline @@ -794,190 +796,10 @@ knitr::kable(header %>% slice(1:20), ## Supplementary Material ### ANNEX 1 - Ancillary function - `PredExtr` -```{r} -PredExtr <- function(x.shp, myfunction=NA, output, - toextract, typp=c("raster", "shp"), ncores, chunkn=1, chunk.i=NA){ - print("Load Packages") - require(foreach) - require(parallel) - require(doParallel) - require(raster) - require(rgeos) - require(rgdal) - - print(paste("Loading", typp, "data :", toextract)) - print(paste("output will be:", output)) - if(typp=="raster"){ mypredictor <- raster(toextract)} else - mypredictor <- readOGR(toextract) - header.shp <-readOGR(x.shp) - crs(mypredictor) <- crs(header.shp) #should be verified beforehand! - - ## Divide in chunks if requested - if(chunkn>1 & !is.na(chunk.i)){ - print(paste("divide in chunks and run on chunk n.", chunk.i)) - indices <- 1:length(header.shp) - chunks <- split(indices, sort(indices%%chunkn)) - header.shp <- header.shp[chunks[[chunk.i]],] - } - - # define ancillary functions - robust.mean <- function(x){mean(x, rm.na=T)} - robust.mode <- function(x){if(any(x!=0)) { - a <- x[which(x!=0)] #exclude zero (i.e. NAs) - return(as.numeric(names(sort(table(a), decreasing=T))[1]))} else - return(NA) } - - print("go parallel") - cl <- makeForkCluster(ncores, outfile="" ) - registerDoParallel(cl) - - print("start main foreach loop") - if(typp=="raster"){ - out <- foreach(i=1:length(header.shp), .packages=c('raster'), .combine=rbind) %dopar% { - tmp <- raster::extract(mypredictor, header.shp[i,], - buffer=min(10000, max(250, header.shp@data$loc.uncert[i])), fun=myfunction) - } - write.csv(out, file = output) - } else { - out <- sp::over(x=header.shp, y=mypredictor) - toassign <- header.shp[which(is.na(out[,1])),] - print(paste(length(toassign), "plots not matched directely - seek for NN")) - if(length(toassign)>0){ - - nearest <- foreach(i=1:length(toassign), .packages=c('raster'), .combine=rbind) %dopar% { - print(i) - nearest.tmp <- tryCatch(geosphere::dist2Line(header.shp[i,], mypredictor), - error = function(e){data.frame(distance=NA, lon=NA, lat=NA,ID=NA)} - ) - #nearest.tmp <- geosphere::dist2Line(toassign[i,], mypredictor) - return(nearest.tmp) - } - out[which(is.na(out[,1])),] <- mypredictor@data[nearest[,"ID"],] - } - write.csv(out, file=output) - } - stopCluster(cl) - } +```{r, code = readLines("A98_PredictorsExtract.R")} ``` ### ANNEX 2 - Ancillary function - `ElevationExtract` -```{r} - -ElevationExtract <- function(header, output, ncores, chunk.i){ - print("load packages") - require(tidyverse) - - require(rgdal) - require(sp) - require(rgeos) - require(raster) - require(rworldmap) - require(elevatr) - - require(parallel) - require(doParallel) - - print("Import header and divide in tiles") - header.shp <- readOGR(header) - header.tiles <- header.shp@data %>% - bind_cols(as.data.frame(header.shp@coords)) %>% - rename(PlotObservationID=PltObID, Longitude=coords.x1, Latitude=coords.x2) %>% - mutate(lc_ncrt=abs(lc_ncrt)) %>% - filter(lc_ncrt <= 50000) %>% - mutate_at(.vars=vars(Longitude, Latitude), - .funs=list(tile=~cut(., breaks = seq(-180,180, by=.2)))) %>% - mutate(tilenam=factor(paste(Longitude_tile, Latitude_tile))) - - - print("Get continent") - sPDF <- rworldmap::getMap(resolution="high") - continent.high <- sPDF[,"continent"] - crs(continent.high) <- CRS("+init=epsg:4326") - continent.high@data$continent <- fct_recode(continent.high@data$continent, "South America"="South America and the Caribbean") - continent.high.merc <- spTransform(continent.high, CRS( "+init=epsg:3857 +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m -+nadgrids=@null +no_defs")) - - print("go parallel") - cl <- makeForkCluster(ncores, outfile="") - registerDoParallel(ncores) - - clusterEvalQ(cl, { - library(rgdal) - library(raster) - library(sp) - library(elevatr) - library(dplyr) - }) - - print("create list of tiles still to do") - myfiles <- list.files(path = output, pattern = "[A-Za-z]*_[0-9]+\\.RData$") - done <- NULL - done <- as.numeric(unlist(regmatches(myfiles, gregexpr("[[:digit:]]+", myfiles)))) - todo <- 1:nlevels(header.tiles$tilenam) - if(length(done)>0) {todo <- todo[-which(todo %in% done)]} - todo <- sample(todo, replace=F) - print(paste(length(todo), "tiles to do")) - - print("divide in chunks") - #divide in chunks #should take around 40' - indices <- 1:length(todo) - todo.chunks <- split(indices, sort(indices%%99)) - - print(paste("start main foreach loop on chunk n=", chunk.i)) - foreach(i = todo.cunks[[chunk.i]]) %dopar% { - print(paste("doing", i)) - #create sp and project data - if(nrow(header.tiles %>% - filter(tilenam %in% levels(header.tiles$tilenam)[i])) ==0 ) next() - sp.tile <- SpatialPointsDataFrame(coords=header.tiles %>% - filter(tilenam %in% levels(header.tiles$tilenam)[i]) %>% - dplyr::select(Longitude, Latitude), - data=header.tiles %>% - filter(tilenam %in% levels(header.tiles$tilenam)[i]) %>% - dplyr::select(-Longitude, -Latitude), - proj4string = CRS("+init=epsg:4326")) - sp.tile <- spTransform(sp.tile, CRSobj = CRS("+init=epsg:3857 +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 - +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null - +no_defs ")) #project to mercator - - #retrieve dem raster - tryCatch(raster.tile <- get_elev_raster(sp.tile, z=9, expand=max(sp.tile$lc_ncrt), clip="bbox"), - error = function(e){e} - ) - if(!exists("raster.tile")) { - raster.tile <- NA - save(raster.tile, file = paste(output, "elevation_tile_", i, "failed.RData", sep="")) - message(paste("tile", i, "doesn't work!, skip to next")) - rm(raster.tile) - next - } - # clip dem tile with continent shape - raster.tile <- mask(raster.tile, continent.high.merc) - - #extract and summarize elevation data - elev.tile <- raster::extract(raster.tile, sp.tile, small=T) - elev.tile.buffer <- raster::extract(raster.tile, sp.tile, - buffer=sp.tile@data$lc_ncrt, - small=T) - tmp <- round(mapply( quantile, - x=elev.tile.buffer, - #center=elev.tile, - probs=rep(c(0.025, 0.5, 0.975), each=length(elev.tile)), - #lc_ncrt=sp.tile$lc_ncrt, - na.rm=T)) - elev.q95 <- setNames(data.frame(matrix(tmp, ncol = 3, nrow = length(elev.tile.buffer))), - c("Elevation_q2.5", "Elevation_median", "Elevation_q97.5")) - output.tile <- data.frame(PlotObservationID=sp.tile$PlotObservationID, - elevation=round(elev.tile), - elev.q95, - DEM.res=res(raster.tile)[1]) - - #save output - save(output.tile, file = paste(output, "elevation_tile_", i, ".RData", sep="")) - print(paste(i, "done")) - } - stopCluster(cl) - } - +```{r, code = readLines("A97_ElevationExtract.R")} ``` ### ANNEX 3 - SessionInfo() ```{r} diff --git a/code/05_ExtractEnvironment.Rmd b/code/05_ExtractEnvironment.Rmd index 466af83..8997cbe 100644 --- a/code/05_ExtractEnvironment.Rmd +++ b/code/05_ExtractEnvironment.Rmd @@ -17,7 +17,6 @@ output: html_document This report documents how environmental variables were extracted when constructing sPlot 3.0. It is based on dataset sPlot_3.0.2, received on 24/07/2019 from Stephan Hennekens. - ```{r results="hide", message=F, warning=F} knitr::opts_chunk$set(echo = TRUE) library(tidyverse) @@ -60,8 +59,6 @@ get.summary <- function(x){x %>% mutate(stat=factor(stat, levels=c("no.NAs", "min", "q025", "q50", "q75", "max", "mean", "sd"))) %>% spread(key=stat, value=value) } - - ``` ## 1 Import header data @@ -158,7 +155,7 @@ chelsa.sd.out <- do.call(cbind, lapply(chelsa.sd.files, function(x) {read_csv(x, col_types = cols(X1 = col_character(), V1 = col_double())) %>% column_to_rownames("X1")})) -colnames(chelsa.sd.out) <- paste0("bio", stringr::str_pad(1:19, width=2, side="left", pad="0"), "_sd") +colnames(chelsa.sd.out) <- paste0("bio", stringr::str_pad(1:19, width=2, side="left", pad="0"), "sd") ``` Show summaries ```{r, echo=F} @@ -237,9 +234,9 @@ for(i in 1:8){ Reimport and assemble output. ```{r, message=F, warning=F} ISRIC.layer.names <- c("BLDFIE", "CECSOL","CLYPPT","CRFVOL","ORCDRC","PHIHOX","SLTPPT","SNDPPT") -ISRIC.layer.names <- paste0(ISRIC.layer.names, "_M_sl2_250m_ll") +ISRIC.layer.names1 <- paste0(ISRIC.layer.names, "_M_sl2_250m_ll") isric.files <- list.files(path="../_derived/output_pred/", - pattern=paste0(paste0(ISRIC.layer.names, ".csv$"), collapse="|"), + pattern=paste0(paste0(ISRIC.layer.names1, ".csv$"), collapse="|"), full.names=T) isric.out <- do.call(cbind, lapply(isric.files, @@ -249,13 +246,13 @@ isric.out <- do.call(cbind, lapply(isric.files, colnames(isric.out) <- ISRIC.layer.names #same for sd values isric.sd.files <- list.files(path="../_derived/output_pred/", - pattern=paste0(paste0(ISRIC.layer.names, "_sd.csv$"), collapse="|"), + pattern=paste0(paste0(ISRIC.layer.names1, "_sd.csv$"), collapse="|"), full.names=T) isric.sd.out <- do.call(cbind, lapply(isric.sd.files, function(x) {read_csv(x, col_types = cols(X1 = col_character(), V1 = col_double())) %>% column_to_rownames("X1")})) -colnames(isric.sd.out) <- paste0(ISRIC.layer.names, "_sd") +colnames(isric.sd.out) <- paste0(ISRIC.layer.names, "sd") ``` Show summaries ```{r, echo=F} diff --git a/code/06_buildDT.Rmd b/code/06_buildDT.Rmd index 599322a..1c8e0a3 100644 --- a/code/06_buildDT.Rmd +++ b/code/06_buildDT.Rmd @@ -13,6 +13,7 @@ output: html_document MEMO!! WHAT TO DO WITH LAYER WHEN IS CONSISTENTLY ZERO IN A PLOT? CHANGE TO NA? WHAT TO DO INSTEAD WHEN LAYER==0 IN A PLOT WHERE LAYER INFO IS OTHERWISE AVAILABLE? +!!! ADD Explanation of fields!!! **Timestamp:** `r date()` @@ -41,7 +42,7 @@ write("R_USER = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('R_ Search and replace unclosed quotation marks and escape them. Run in Linux terminal ```{bash, eval=F} # escape all double quotation marks. Run in Linux terminal -# sed 's/"/\\"/g' sPlot_3_0_2_header.csv > sPlot_3_0_2_header_test.csv +# sed 's/"/\\"/g' sPlot_3_0_2_species.csv > sPlot_3_0_2_species_test.csv ``` ## Import data Table @@ -98,7 +99,10 @@ DT1 <- DT0 %>% # Simplify Rank_correct mutate(Rank_correct=fct_collapse(Rank_correct, lower=c("subspecies", "variety", "infraspecies", "race", "forma"))) %>% - mutate(Rank_correct=fct_explicit_na(Rank_correct, "No_match")) + mutate(Rank_correct=fct_explicit_na(Rank_correct, "No_match")) %>% + mutate(Name_short=replace(Name_short, + list=Name_short=="No suitable", + values=NA)) ``` ## Explore name matching based on Backbone v1.2 @@ -306,9 +310,10 @@ To make the cover data more user friendly, I simplify the way cover is stored, s ```{r} # Create Ab_scale field DT1 <- DT1 %>% - mutate(Ab_scale = ifelse(`Cover code` %in% c("x_BA", "x_IC", "x_SC", "x_IV", "x_RF", "x") & !is.na(x_), - `Cover code`, - "CoverPerc")) %>% + mutate(Ab_scale = ifelse(`Cover code` %in% + c("x_BA", "x_IC", "x_SC", "x_IV", "x_RF", "x") & !is.na(x_), + `Cover code`, + "CoverPerc")) %>% mutate(Ab_scale = ifelse(Ab_scale =="x", "pa", Ab_scale)) ``` @@ -341,16 +346,18 @@ Transform these plots to p\\a and correct field `Ab_scale`. Note: the column `Ab DT1 <- DT1 %>% mutate(Ab_scale=replace(Ab_scale, list=PlotObservationID %in% mixed, - values="pa")) %>% + values="mixed")) %>% #Create additional field Abundance to avoid overwriting original data - mutate(Abundance =ifelse(Ab_scale %in% c("x_BA", "x_IC", "x_SC", "x_IV", "x_RF"), + mutate(Abundance =ifelse(Ab_scale %in% c("x_BA", "x_IC", "x_SC", "x_IV", "x_RF", "x"), x_, `Cover %`)) %>% mutate(Abundance=replace(Abundance, list=PlotObservationID %in% mixed, values=1)) ``` -Double check and summarize abundance_scales + + +Double check and summarize `abundance_scales` ```{r} scale_check <- DT1 %>% distinct(PlotObservationID, Layer, Ab_scale) %>% @@ -359,7 +366,7 @@ scale_check <- DT1 %>% unique(Ab_scale), "Multiple_scales")) -nrow(scale_check)== length(unique(DT2$PlotObservationID)) +nrow(scale_check)== length(unique(DT1$PlotObservationID)) table(scale_check$Ab_scale_combined) ``` @@ -405,7 +412,6 @@ knitr::kable(DT2 %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "center") ``` -***!!! ADD Explanation of fields!!!*** ```{r} diff --git a/code/07_buildCWMs.Rmd b/code/07_buildCWMs.Rmd index 1c4aa8c..0a19bfd 100644 --- a/code/07_buildCWMs.Rmd +++ b/code/07_buildCWMs.Rmd @@ -763,7 +763,7 @@ Cross check with sPlot's 5-class (incomplete) native classification deriving fro ```{r} cross.check <- header %>% dplyr::select(PlotObservationID, Forest) %>% - left_join(plot.gf %>% + left_join(plot.vegtype %>% dplyr::select(PlotObservationID, is.forest, is.non.forest) %>% rename(Forest=is.forest, Other=is.non.forest) %>% gather(isfor_isnonfor, value, -PlotObservationID) %>% @@ -831,7 +831,8 @@ save(header, file="../_output/header_splot3.0.RData") # Appendix ## Growth forms of most common species -```{r, code = readLines("../_derived/Species_missingGF_complete.csv")} +```{r comment=''} +cat(readLines("../_derived/Species_missingGF_complete.csv"), sep = '\n') ``` ## SessionInfo ```{r} -- GitLab