Skip to content
Snippets Groups Projects
Commit e26c043e authored by Francesco Sabatini's avatar Francesco Sabatini
Browse files

Various bugs fixed, increased cross-consistency

parent dfb3ce89
Branches
No related tags found
No related merge requests found
......@@ -282,7 +282,7 @@ header <- header %>%
```
# 4 Assign plots to spatial descriptors
## 4 Assign plots to spatial descriptors
Create spatial point dataframe for sPlot data to intersect with spatial layers
```{r}
header.shp <- header %>%
......@@ -293,10 +293,10 @@ header.shp <- SpatialPointsDataFrame(coords= header.shp %>%
data=data.frame(PlotObservationID= header.shp$PlotObservationID,
loc.uncert=header.shp$`Location uncertainty (m)`,
`GIVD ID`=header.shp$`GIVD ID`))
#writeOGR(header.shp, dsn="../derived", layer="header.shp", driver="ESRI Shapefile", overwrite_layer = T)
writeOGR(header.shp, dsn="../_derived/", layer="header.shp", driver="ESRI Shapefile", overwrite_layer = T)
```
## 4.1 Assign to Continents
### 4.1 Assign to Continents
Download and manipulate map of continents
```{r}
sPDF <- rworldmap::getMap(resolution="coarse")
......@@ -369,7 +369,7 @@ knitr::kable(header %>%
full_width = F, position = "center")
```
## 4.2 Assign to sBiomes
### 4.2 Assign to sBiomes
Performed in EVE HPC cluster using function `A98_PredictorsExtract.R`. Divided in 99 chunks.
```{r eval=F}
sBiomes <- readOGR("/data/sPlot/users/Francesco/Ancillary_Data/Biomes_sPlot/sBiomes.shp")
......@@ -418,7 +418,7 @@ knitr::kable(header %>%
full_width = F, position = "center")
```
## 4.3 Extract WWF Ecoregion
### 4.3 Extract WWF Ecoregions
Extract ecoregion name and ID from [Ecoregions of the World](https://www.worldwildlife.org/publications/terrestrial-ecoregions-of-the-world). Olson et al. 2001 [(BioScience)](https://academic.oup.com/bioscience/article/51/11/933/227116).
Computation was performed in EVE HPC cluster using function `A98_PredictorsExtract.R`. Divided in 99 chunks.
```{r eval=F}
......@@ -470,15 +470,17 @@ Summarize:
```{r message=F, echo=F}
knitr::kable(header %>%
group_by(Ecoregion) %>%
summarize(num.plot=n()),
caption="Number of plots per Ecoregion") %>%
summarize(num.plot=n()) %>%
arrange(desc(num.plot)) %>%
slice(1:30),
caption="Number of plots in the 30 best represented Ecoregions") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = F, position = "center")
```
## 4.4 Extract elevation
### 4.4 Extract elevation
Extract elevation for each plot. Loops over tiles of 1 x 1°, projects to mercator, and extract elevation for plot coordinates, as well as 2.5, 50, and 97.5 quantiles for a buffer area having a radius equal to the location uncertainty of each plot (but only if location uncertainty < 50 km). DEM derive from package [elevatr](https://cran.r-project.org/web/packages/elevatr/vignettes/introduction_to_elevatr.html#get_raster_elevation_data), which uses the [Terrain Tiles on Amazon Web Services](https://registry.opendata.aws/terrain-tiles/). Resolutions of DEM rasters vary by region. I set a zoom factor z=10, which corresponds to ~ 75-150 m. Sources are: SRTM, data.gov.at in Austria, NRCAN in Canada, SRTM, NED/3DEP 1/3 arcsec, data.gov.uk in United Kingdom, INEGI in Mexico, ArcticDEM in latitudes above 60°, LINZ in New Zealand, Kartverket in Norway, as described [here](https://github.com/tilezen/joerd/blob/master/docs/data-sources.md).
\newline
......@@ -794,190 +796,10 @@ knitr::kable(header %>% slice(1:20),
## Supplementary Material
### ANNEX 1 - Ancillary function - `PredExtr`
```{r}
PredExtr <- function(x.shp, myfunction=NA, output,
toextract, typp=c("raster", "shp"), ncores, chunkn=1, chunk.i=NA){
print("Load Packages")
require(foreach)
require(parallel)
require(doParallel)
require(raster)
require(rgeos)
require(rgdal)
print(paste("Loading", typp, "data :", toextract))
print(paste("output will be:", output))
if(typp=="raster"){ mypredictor <- raster(toextract)} else
mypredictor <- readOGR(toextract)
header.shp <-readOGR(x.shp)
crs(mypredictor) <- crs(header.shp) #should be verified beforehand!
## Divide in chunks if requested
if(chunkn>1 & !is.na(chunk.i)){
print(paste("divide in chunks and run on chunk n.", chunk.i))
indices <- 1:length(header.shp)
chunks <- split(indices, sort(indices%%chunkn))
header.shp <- header.shp[chunks[[chunk.i]],]
}
# define ancillary functions
robust.mean <- function(x){mean(x, rm.na=T)}
robust.mode <- function(x){if(any(x!=0)) {
a <- x[which(x!=0)] #exclude zero (i.e. NAs)
return(as.numeric(names(sort(table(a), decreasing=T))[1]))} else
return(NA) }
print("go parallel")
cl <- makeForkCluster(ncores, outfile="" )
registerDoParallel(cl)
print("start main foreach loop")
if(typp=="raster"){
out <- foreach(i=1:length(header.shp), .packages=c('raster'), .combine=rbind) %dopar% {
tmp <- raster::extract(mypredictor, header.shp[i,],
buffer=min(10000, max(250, header.shp@data$loc.uncert[i])), fun=myfunction)
}
write.csv(out, file = output)
} else {
out <- sp::over(x=header.shp, y=mypredictor)
toassign <- header.shp[which(is.na(out[,1])),]
print(paste(length(toassign), "plots not matched directely - seek for NN"))
if(length(toassign)>0){
nearest <- foreach(i=1:length(toassign), .packages=c('raster'), .combine=rbind) %dopar% {
print(i)
nearest.tmp <- tryCatch(geosphere::dist2Line(header.shp[i,], mypredictor),
error = function(e){data.frame(distance=NA, lon=NA, lat=NA,ID=NA)}
)
#nearest.tmp <- geosphere::dist2Line(toassign[i,], mypredictor)
return(nearest.tmp)
}
out[which(is.na(out[,1])),] <- mypredictor@data[nearest[,"ID"],]
}
write.csv(out, file=output)
}
stopCluster(cl)
}
```{r, code = readLines("A98_PredictorsExtract.R")}
```
### ANNEX 2 - Ancillary function - `ElevationExtract`
```{r}
ElevationExtract <- function(header, output, ncores, chunk.i){
print("load packages")
require(tidyverse)
require(rgdal)
require(sp)
require(rgeos)
require(raster)
require(rworldmap)
require(elevatr)
require(parallel)
require(doParallel)
print("Import header and divide in tiles")
header.shp <- readOGR(header)
header.tiles <- header.shp@data %>%
bind_cols(as.data.frame(header.shp@coords)) %>%
rename(PlotObservationID=PltObID, Longitude=coords.x1, Latitude=coords.x2) %>%
mutate(lc_ncrt=abs(lc_ncrt)) %>%
filter(lc_ncrt <= 50000) %>%
mutate_at(.vars=vars(Longitude, Latitude),
.funs=list(tile=~cut(., breaks = seq(-180,180, by=.2)))) %>%
mutate(tilenam=factor(paste(Longitude_tile, Latitude_tile)))
print("Get continent")
sPDF <- rworldmap::getMap(resolution="high")
continent.high <- sPDF[,"continent"]
crs(continent.high) <- CRS("+init=epsg:4326")
continent.high@data$continent <- fct_recode(continent.high@data$continent, "South America"="South America and the Caribbean")
continent.high.merc <- spTransform(continent.high, CRS( "+init=epsg:3857 +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m
+nadgrids=@null +no_defs"))
print("go parallel")
cl <- makeForkCluster(ncores, outfile="")
registerDoParallel(ncores)
clusterEvalQ(cl, {
library(rgdal)
library(raster)
library(sp)
library(elevatr)
library(dplyr)
})
print("create list of tiles still to do")
myfiles <- list.files(path = output, pattern = "[A-Za-z]*_[0-9]+\\.RData$")
done <- NULL
done <- as.numeric(unlist(regmatches(myfiles, gregexpr("[[:digit:]]+", myfiles))))
todo <- 1:nlevels(header.tiles$tilenam)
if(length(done)>0) {todo <- todo[-which(todo %in% done)]}
todo <- sample(todo, replace=F)
print(paste(length(todo), "tiles to do"))
print("divide in chunks")
#divide in chunks #should take around 40'
indices <- 1:length(todo)
todo.chunks <- split(indices, sort(indices%%99))
print(paste("start main foreach loop on chunk n=", chunk.i))
foreach(i = todo.cunks[[chunk.i]]) %dopar% {
print(paste("doing", i))
#create sp and project data
if(nrow(header.tiles %>%
filter(tilenam %in% levels(header.tiles$tilenam)[i])) ==0 ) next()
sp.tile <- SpatialPointsDataFrame(coords=header.tiles %>%
filter(tilenam %in% levels(header.tiles$tilenam)[i]) %>%
dplyr::select(Longitude, Latitude),
data=header.tiles %>%
filter(tilenam %in% levels(header.tiles$tilenam)[i]) %>%
dplyr::select(-Longitude, -Latitude),
proj4string = CRS("+init=epsg:4326"))
sp.tile <- spTransform(sp.tile, CRSobj = CRS("+init=epsg:3857 +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0
+lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null
+no_defs ")) #project to mercator
#retrieve dem raster
tryCatch(raster.tile <- get_elev_raster(sp.tile, z=9, expand=max(sp.tile$lc_ncrt), clip="bbox"),
error = function(e){e}
)
if(!exists("raster.tile")) {
raster.tile <- NA
save(raster.tile, file = paste(output, "elevation_tile_", i, "failed.RData", sep=""))
message(paste("tile", i, "doesn't work!, skip to next"))
rm(raster.tile)
next
}
# clip dem tile with continent shape
raster.tile <- mask(raster.tile, continent.high.merc)
#extract and summarize elevation data
elev.tile <- raster::extract(raster.tile, sp.tile, small=T)
elev.tile.buffer <- raster::extract(raster.tile, sp.tile,
buffer=sp.tile@data$lc_ncrt,
small=T)
tmp <- round(mapply( quantile,
x=elev.tile.buffer,
#center=elev.tile,
probs=rep(c(0.025, 0.5, 0.975), each=length(elev.tile)),
#lc_ncrt=sp.tile$lc_ncrt,
na.rm=T))
elev.q95 <- setNames(data.frame(matrix(tmp, ncol = 3, nrow = length(elev.tile.buffer))),
c("Elevation_q2.5", "Elevation_median", "Elevation_q97.5"))
output.tile <- data.frame(PlotObservationID=sp.tile$PlotObservationID,
elevation=round(elev.tile),
elev.q95,
DEM.res=res(raster.tile)[1])
#save output
save(output.tile, file = paste(output, "elevation_tile_", i, ".RData", sep=""))
print(paste(i, "done"))
}
stopCluster(cl)
}
```{r, code = readLines("A97_ElevationExtract.R")}
```
### ANNEX 3 - SessionInfo()
```{r}
......
......@@ -17,7 +17,6 @@ output: html_document
This report documents how environmental variables were extracted when constructing sPlot 3.0. It is based on dataset sPlot_3.0.2, received on 24/07/2019 from Stephan Hennekens.
```{r results="hide", message=F, warning=F}
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
......@@ -60,8 +59,6 @@ get.summary <- function(x){x %>%
mutate(stat=factor(stat, levels=c("no.NAs", "min", "q025", "q50", "q75", "max", "mean", "sd"))) %>%
spread(key=stat, value=value)
}
```
## 1 Import header data
......@@ -158,7 +155,7 @@ chelsa.sd.out <- do.call(cbind, lapply(chelsa.sd.files,
function(x) {read_csv(x, col_types = cols(X1 = col_character(),
V1 = col_double())) %>%
column_to_rownames("X1")}))
colnames(chelsa.sd.out) <- paste0("bio", stringr::str_pad(1:19, width=2, side="left", pad="0"), "_sd")
colnames(chelsa.sd.out) <- paste0("bio", stringr::str_pad(1:19, width=2, side="left", pad="0"), "sd")
```
Show summaries
```{r, echo=F}
......@@ -237,9 +234,9 @@ for(i in 1:8){
Reimport and assemble output.
```{r, message=F, warning=F}
ISRIC.layer.names <- c("BLDFIE", "CECSOL","CLYPPT","CRFVOL","ORCDRC","PHIHOX","SLTPPT","SNDPPT")
ISRIC.layer.names <- paste0(ISRIC.layer.names, "_M_sl2_250m_ll")
ISRIC.layer.names1 <- paste0(ISRIC.layer.names, "_M_sl2_250m_ll")
isric.files <- list.files(path="../_derived/output_pred/",
pattern=paste0(paste0(ISRIC.layer.names, ".csv$"), collapse="|"),
pattern=paste0(paste0(ISRIC.layer.names1, ".csv$"), collapse="|"),
full.names=T)
isric.out <- do.call(cbind, lapply(isric.files,
......@@ -249,13 +246,13 @@ isric.out <- do.call(cbind, lapply(isric.files,
colnames(isric.out) <- ISRIC.layer.names
#same for sd values
isric.sd.files <- list.files(path="../_derived/output_pred/",
pattern=paste0(paste0(ISRIC.layer.names, "_sd.csv$"), collapse="|"),
pattern=paste0(paste0(ISRIC.layer.names1, "_sd.csv$"), collapse="|"),
full.names=T)
isric.sd.out <- do.call(cbind, lapply(isric.sd.files,
function(x) {read_csv(x, col_types = cols(X1 = col_character(),
V1 = col_double())) %>%
column_to_rownames("X1")}))
colnames(isric.sd.out) <- paste0(ISRIC.layer.names, "_sd")
colnames(isric.sd.out) <- paste0(ISRIC.layer.names, "sd")
```
Show summaries
```{r, echo=F}
......
......@@ -13,6 +13,7 @@ output: html_document
MEMO!! WHAT TO DO WITH LAYER WHEN IS CONSISTENTLY ZERO IN A PLOT? CHANGE TO NA?
WHAT TO DO INSTEAD WHEN LAYER==0 IN A PLOT WHERE LAYER INFO IS OTHERWISE AVAILABLE?
!!! ADD Explanation of fields!!!
**Timestamp:** `r date()`
......@@ -41,7 +42,7 @@ write("R_USER = /data/sPlot/users/Francesco/_tmp", file=file.path(Sys.getenv('R_
Search and replace unclosed quotation marks and escape them. Run in Linux terminal
```{bash, eval=F}
# escape all double quotation marks. Run in Linux terminal
# sed 's/"/\\"/g' sPlot_3_0_2_header.csv > sPlot_3_0_2_header_test.csv
# sed 's/"/\\"/g' sPlot_3_0_2_species.csv > sPlot_3_0_2_species_test.csv
```
## Import data Table
......@@ -98,7 +99,10 @@ DT1 <- DT0 %>%
# Simplify Rank_correct
mutate(Rank_correct=fct_collapse(Rank_correct,
lower=c("subspecies", "variety", "infraspecies", "race", "forma"))) %>%
mutate(Rank_correct=fct_explicit_na(Rank_correct, "No_match"))
mutate(Rank_correct=fct_explicit_na(Rank_correct, "No_match")) %>%
mutate(Name_short=replace(Name_short,
list=Name_short=="No suitable",
values=NA))
```
## Explore name matching based on Backbone v1.2
......@@ -306,9 +310,10 @@ To make the cover data more user friendly, I simplify the way cover is stored, s
```{r}
# Create Ab_scale field
DT1 <- DT1 %>%
mutate(Ab_scale = ifelse(`Cover code` %in% c("x_BA", "x_IC", "x_SC", "x_IV", "x_RF", "x") & !is.na(x_),
`Cover code`,
"CoverPerc")) %>%
mutate(Ab_scale = ifelse(`Cover code` %in%
c("x_BA", "x_IC", "x_SC", "x_IV", "x_RF", "x") & !is.na(x_),
`Cover code`,
"CoverPerc")) %>%
mutate(Ab_scale = ifelse(Ab_scale =="x", "pa", Ab_scale))
```
......@@ -341,16 +346,18 @@ Transform these plots to p\\a and correct field `Ab_scale`. Note: the column `Ab
DT1 <- DT1 %>%
mutate(Ab_scale=replace(Ab_scale,
list=PlotObservationID %in% mixed,
values="pa")) %>%
values="mixed")) %>%
#Create additional field Abundance to avoid overwriting original data
mutate(Abundance =ifelse(Ab_scale %in% c("x_BA", "x_IC", "x_SC", "x_IV", "x_RF"),
mutate(Abundance =ifelse(Ab_scale %in% c("x_BA", "x_IC", "x_SC", "x_IV", "x_RF", "x"),
x_, `Cover %`)) %>%
mutate(Abundance=replace(Abundance,
list=PlotObservationID %in% mixed,
values=1))
```
Double check and summarize abundance_scales
Double check and summarize `abundance_scales`
```{r}
scale_check <- DT1 %>%
distinct(PlotObservationID, Layer, Ab_scale) %>%
......@@ -359,7 +366,7 @@ scale_check <- DT1 %>%
unique(Ab_scale),
"Multiple_scales"))
nrow(scale_check)== length(unique(DT2$PlotObservationID))
nrow(scale_check)== length(unique(DT1$PlotObservationID))
table(scale_check$Ab_scale_combined)
```
......@@ -405,7 +412,6 @@ knitr::kable(DT2 %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = F, position = "center")
```
***!!! ADD Explanation of fields!!!***
```{r}
......
......@@ -763,7 +763,7 @@ Cross check with sPlot's 5-class (incomplete) native classification deriving fro
```{r}
cross.check <- header %>%
dplyr::select(PlotObservationID, Forest) %>%
left_join(plot.gf %>%
left_join(plot.vegtype %>%
dplyr::select(PlotObservationID, is.forest, is.non.forest) %>%
rename(Forest=is.forest, Other=is.non.forest) %>%
gather(isfor_isnonfor, value, -PlotObservationID) %>%
......@@ -831,7 +831,8 @@ save(header, file="../_output/header_splot3.0.RData")
# Appendix
## Growth forms of most common species
```{r, code = readLines("../_derived/Species_missingGF_complete.csv")}
```{r comment=''}
cat(readLines("../_derived/Species_missingGF_complete.csv"), sep = '\n')
```
## SessionInfo
```{r}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment