diff --git a/code/04_buildHeader.Rmd b/code/04_buildHeader.Rmd
index 3d4a3f8bb3a2ffb26e2abc4660f5c027bcc279ce..60122fb93c3322c7d3a9ddb370e05b6f44ad7cb9 100644
--- a/code/04_buildHeader.Rmd
+++ b/code/04_buildHeader.Rmd
@@ -783,7 +783,7 @@ header <- header %>%
     Forest:Naturalness, ESY, 
     #Vegetation structure
                 "Cover total (%)":"Maximum height cryptogams (mm)")
-save(header, file = "../_output/header_splot3.0.RData")
+save(header, file = "../_output/header_sPlot3.0.RData")
 
 ```
 
diff --git a/code/05_ExtractEnvironment.Rmd b/code/05_ExtractEnvironment.Rmd
index 8997cbe07b6066d8ad638de2e6db722074a8a2ac..473e97d311c575f48fc4af0331d5d4aa182c57fd 100644
--- a/code/05_ExtractEnvironment.Rmd
+++ b/code/05_ExtractEnvironment.Rmd
@@ -46,7 +46,7 @@ rasterOptions(tmpdir="/data/sPlot/users/Francesco/_tmp")
 
 #Ancillary variables
 get.summary <- function(x){x %>% 
-    summarize_all(.funs=list(no.NAs=~sum(is.na(.)), 
+    summarize_all(.funs=list(num.NAs=~sum(is.na(.)), 
                              min=~min(., na.rm=T),
                              q025=~quantile(., 0.25, na.rm=T), 
                              q50=~quantile(., 0.50, na.rm=T), 
@@ -56,7 +56,7 @@ get.summary <- function(x){x %>%
                              sd=~sd(., na.rm=T))) %>% 
     gather(variable, value) %>% 
     separate(variable, sep="_", into=c("variable", "stat")) %>% 
-    mutate(stat=factor(stat, levels=c("no.NAs", "min", "q025", "q50", "q75", "max", "mean", "sd"))) %>% 
+    mutate(stat=factor(stat, levels=c("num.NAs", "min", "q025", "q50", "q75", "max", "mean", "sd"))) %>% 
     spread(key=stat, value=value)
 }
 ```
@@ -64,7 +64,7 @@ get.summary <- function(x){x %>%
 ## 1 Import header data
 
 ```{r}
-load("../_output/header_splot3.0.RData")
+load("../_output/header_sPlot3.0.RData")
 ```
 
 Create spatial point dataframe for sPlot data to intersect with spatial layers. Include the fiels `Location uncertainty` to allow for fuzzy matching. Each plot was intersected with the corresponding layer of environmental (soil, climate) attributes accounting for their location uncertainty (minimum set to 250m). For computing reasons, the maximum location uncertainty was set to 10km. The user should therefore be aware that the variability in climate and soil features estimations may be underestimated for plots with very high location uncertainty.
@@ -162,7 +162,7 @@ Show summaries
 tmp.sum <- get.summary(chelsa.out)
 
 knitr::kable(tmp.sum, 
-  caption="Summary statistics for chelsa mean statistics") %>%
+  caption="Summary statistics for chelsa mean statistics", digits = 3) %>%
     kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                   full_width = F, position = "center")
 ```
@@ -259,16 +259,16 @@ Show summaries
 tmp.sum <- get.summary(isric.out)
 
 knitr::kable(tmp.sum, 
-  caption="Summary statistics for chelsa mean statistics") %>%
+  caption="Summary statistics for isric mean statistics", digits = 3) %>%
     kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                   full_width = F, position = "center")
 ```
 
 ```{r, echo=F}
-tmp.sum <- get.summary(chelsa.sd.out)
+tmp.sum <- get.summary(isric.sd.out)
 
 knitr::kable(tmp.sum, 
-  caption="Summary statistics for chelsa s.d. statistics") %>%
+  caption="Summary statistics for isric s.d. statistics", digits = 3) %>%
     kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                   full_width = F, position = "center")
 ```
@@ -288,14 +288,23 @@ soilclim <- header %>%
               distinct(), 
             by="PlotObservationID")
 
-save(soilclim, file = "../_output/SoilClim_sPlot3.RData")
 ```
 
+```{r, echo=F}
+knitr::kable(soilclim %>% 
+               sample_n(20), 
+  caption="Show environmenal info for 20 randomly selected plots ", digits = 3) %>%
+    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
+                  full_width = F, position = "center")
+```
 
+```{r}
+save(soilclim, file = "../_output/SoilClim_sPlot3.RData")
+```
 
 
 
-## ANNEX 1 -
+## ANNEX 1 - Code to Extract predictors
 ```{r, code = readLines("A98_PredictorsExtract.R")}
 ```
 
diff --git a/code/06_buildDT.Rmd b/code/06_buildDT.Rmd
index 5ef7d36411abe5cd9c291b5986133870c59b34d2..32501c15716991ce912489bd2af114f542bb11ae 100644
--- a/code/06_buildDT.Rmd
+++ b/code/06_buildDT.Rmd
@@ -120,7 +120,7 @@ name.check <- DT1 %>%
 
 ```{r, echo=F}
 knitr::kable(name.check %>% sample_n(30), 
-  caption="Check 30 random species names from DT after matching to backbone") %>%
+  caption="Check 30 random species names from DT that changed name after matching to backbone") %>%
     kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                   full_width = F, position = "center")
 ```
@@ -141,7 +141,7 @@ name.check.freq <- DT1 %>%
 
 ```{r, echo=F}
 knitr::kable(name.check.freq %>% slice(1:40), 
-  caption="Check 40 most common species names from DT after matching to backbone") %>%
+  caption="Check 40 most common species names from DT that changed name after matching to backbone") %>%
     kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                   full_width = F, position = "center")
 ```
@@ -281,7 +281,7 @@ table(DT1$`Taxon group`, exclude=NULL)
 nunknown <- DT1 %>% filter(`Taxon group`=="Unknown") %>% nrow()
 ```
 
-After cross-checking all sources of information, the number of taxa not having `Taxon group` information decreased to `r nunknown` species.
+After cross-checking all sources of information, the number of taxa not having `Taxon group` information decreased to `r nunknown` entries
 
 ```{r, echo=F, eval=F}
 #Check the most frequent genera for which we don't have taxon group info
@@ -316,12 +316,9 @@ DT1 <- DT1 %>%
                              c("x_BA", "x_IC", "x_SC", "x_IV", "x_RF") & !is.na(x_), 
                            `Cover code`, 
                            "CoverPerc"))  
-
-#%>% 
-#  mutate(Ab_scale = ifelse(Ab_scale =="x", "pa", Ab_scale)) 
 ```
 
-Fix some errors. There are some plots where only p\\a information is available (`Cover code`=="x"), but have zeros in the field `Cover %`. Consider this as presence\\absence and transform `Cover %` to 1.  
+Fix some errors. There are some plots where only p\\a information is available (`Cover code=="x"`), but have all zeros in the field `Cover %`. Consider this as presence\\absence and transform `Cover %` to 1.  
 ```{r}
 DT1 <- DT1 %>% 
   mutate(`Cover %`=replace(`Cover %`, 
@@ -333,7 +330,7 @@ DT1 <- DT1 %>%
                                    pull(PlotObservationID))), 
                            values=1))
 ```
-For all plot-layer combinations where only p\\a information is available (`Cover code`=="x"), and all the entries of the field `Cover % == 1`. Consider this as presence\\absence and transform `Ab_scale` to "pa". This is done to avoid confusion with plots where `Cover code=="x"` but "x" has to be intended as a class in the cover scale used. For p\\a plots, replace the field `Cover %` with 0, and assign the value 1 to the field `x_`.  
+Consider all plot-layer combinations where `Cover code=="x"`, and all the entries of the field `Cover % == 1` as presence\\absence data, and transform `Ab_scale` to "pa". This is done to avoid confusion with plots where `Cover code=="x"` but "x" has to be intended as a class in the cover scale used. For p\\a plots, replace the field `Cover %` with NA, and assign the value 1 to the field `x_`.  
 ```{r}
 #plots with at least one entry in Cover code=="x"
 sel <- DT1 %>% 
@@ -389,7 +386,7 @@ DT1 <- DT1 %>%
 
 
 
-Double check and summarize `abundance_scales`
+Double check and summarize `Ab_scales`
 ```{r}
 scale_check <- DT1 %>% 
   distinct(PlotObservationID, Layer, Ab_scale) %>% 
@@ -414,11 +411,13 @@ DT1 <- DT1 %>%
             by=c("PlotObservationID")) %>% 
   mutate(Relative.cover=Abundance/tot.abundance)
 
+# check: there should be no plot where the sum of all relative covers !=0
 DT1 %>% 
-  group_by(PlotObservationID, Layer) %>% 
+  group_by(PlotObservationID) %>% 
   summarize(tot.cover=sum(Relative.cover), 
             num.layers=sum(unique(Layer))) %>% 
-  filter(tot.cover != num.layers)
+  filter(tot.cover != num.layers) %>% 
+  nrow()
 ```
 
 
@@ -450,7 +449,7 @@ knitr::kable(DT2 %>%
 save(DT2, file = "../_output/DT_sPlot3.0.RData")
 ```
 
-##SessionInfo
+## SessionInfo
 ```{r, echo=F}
 sessionInfo()
 ```
diff --git a/code/07_buildCWMs.Rmd b/code/07_buildCWMs.Rmd
index 76dcac89760712873e0355edf28dbca580f805bb..08223dea9cc6056189694479524d51582b641297 100644
--- a/code/07_buildCWMs.Rmd
+++ b/code/07_buildCWMs.Rmd
@@ -26,6 +26,7 @@ library(knitr)
 library(kableExtra)
 library(stringr)
 library(caret)
+library(viridis)
 ```
 
 # 1 Data import, preparation and cleaning 
@@ -302,8 +303,6 @@ combine.cover <- function(x){
 }
 
 DT2.comb <- DT2 %>% 
-  filter(PlotObservationID %in% sample(unique(DT2$PlotObservationID), 10000, replace=F)) %>% 
-  #filter(PlotObservationID==365) %>% 
   group_by(PlotObservationID, species, Rank_correct) %>% 
   summarize(Relative.cover=combine.cover(Relative.cover)) %>%
   ungroup() %>% 
@@ -412,7 +411,7 @@ knitr::kable(CWM %>%
                   full_width = F, position = "center")
 ```
 Scatterplot comparing coverage of traits values across plots, when based on relative cover and when based on proportion of species richness
-```{r, eval=T, fig.align="center", fig.width=5, fig.height=4, cache=T, warning=F}
+```{r, eval=T, fig.align="center", fig.width=6, fig.height=4, cache=T, warning=F}
 ggplot(data=CWM %>% 
          #all variables have the same coverage. Showcase with LDMC
          filter(variable=="LDMC_mean"), aes(x=trait.coverage, y=prop.sp.with.trait, col=log(sp.richness))) + 
@@ -423,19 +422,37 @@ ggplot(data=CWM %>%
   scale_color_viridis() + 
   theme_bw() +
   xlab("Trait coverage (Relative  cover)") + 
-  ylab("Trait coverage (Proportion of species)")
+  ylab("Trait coverage (Proportion of species)") + 
+  coord_equal()
 ```
 
-```{r}
-CWM %>% 
+```{r, warning=F}
+CWM.coverage <- CWM %>% 
   filter(variable=="LDMC_mean") %>% 
-  dplyr::select()
-  
+  summarize_at(.vars=vars(trait.coverage, prop.sp.with.trait),
+                .funs=list(min=~min(., na.rm=T),
+                           q025=~quantile(., 0.25, na.rm=T), 
+                           q50=~quantile(., 0.50, na.rm=T), 
+                           q75=~quantile(., .75, na.rm=T), 
+                           max=~max(., na.rm=T), 
+                           mean=~mean(., na.rm=T), 
+                           sd=~sd(., na.rm=T))) %>% 
+  gather(key=variable, value=value) %>% 
+  separate(variable, sep="_", into=c("metric", "stat")) %>% 
+  mutate(stat=factor(stat, levels=c("min", "q025", "q50", "q75", "max", "mean", "sd"))) %>% 
+  spread(key=stat, value=value) 
+
 ```
 
+```{r, echo=F}
+knitr::kable(CWM.coverage, 
+  caption="Summary of plot-level coverage of CWM and CWVs", digits = 3) %>%
+    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
+                  full_width = F, position = "center")
+```
 
 Calculate summary statistics for CWMs and CWVs
-```{r}
+```{r, warning=F}
 CWM.summary <- CWM %>% 
   rename(myvar=variable) %>% 
   group_by(myvar) %>% 
@@ -498,17 +515,16 @@ save(try.combined.means, CWM, file="../_output/Traits_CWMs_sPlot3.RData")
 
 
 # 3 Classify plots in `is.forest` or `is.non.forest` based on species traits
-sPlot has two independent systems for classifying plots to vegetation types. The first, classifies plots into forest and non-forest, based on the share of trees, and the layering of vegetation. The second system classifies plots into broad habitat types and relies on the expert opinion of data contributors. This is, unfortunately, not consistently available across all plots, being the large majority of classified plots only available for Europe. These broad habitat types are coded using 5, non-mutually exclusive dummy variables:  
-1) Forest - F  
-2) Grassland - G  
-3) Shrubland - S  
-4) Sparse vegetation - B (Bare)  
-5) Wetland - W  
-A plot may belong to more than one formation, e.g. a Savannah is categorized as Forest + Grassland (FG).  
-\newline\newline
-Derive the `if.forest` and `is.non.forest` classification of plots.    
-
-## 3.1 Derive species level information on Growth Forms.
+sPlot has two independent systems for classifying plots to vegetation types. The first relies on the expert opinion of data contributors and classifies plots into broad habitat types. These broad habitat types are coded using 5, non-mutually exclusive dummy variables:  
+1) Forest  
+2) Grassland  
+3) Shrubland  
+4) Sparse vegetation  
+5) Wetland  
+A plot may belong to more than one formation, e.g. a Savannah is categorized as Forest + Grassland (FG). This system is, unfortunately, not consistently available across all plots, being the large majority of classified plots only available for Europe.  
+There is therefore the need to give at least some indication to the remaining unclassified plots. To achieve this, already from v2.1, sPlot started using a classification into forest and non-forest, based on the share of trees, and the layering of vegetation. Here, we derived the (mutually exclusive) `is.forest` and `is.non.forest` classification of plots.    
+
+## 3.1 Derive species level information on Growth Forms
 We used different sources of information:  
 1) Data from the gap-filled trait matrix  
 2) Manual cleaning of the most common species for which growth trait info is not available  
@@ -547,7 +563,7 @@ write_csv(top.gf.nas %>%
             filter(n>1000), 
   path="../_derived/Species_missingGF.csv")
 ```
-The first `r nrow(top.gf.nas)` species account for `r sum(top.gf.nas %>% filter(n>1000) %>% pull(n))/sum(top.gf.nas$n)*100`% of the missing records. Assign growth forms manually, reimport and coalesce into `DT.gf`
+The first `r nrow(top.gf.nas)` species account for `r round(sum(top.gf.nas %>% filter(n>1000) %>% pull(n))/sum(top.gf.nas$n)*100,2)`% of the missing records. Assign growth forms manually, reimport and coalesce into `DT.gf`
 
 ```{r}
 gf.manual <- read_csv("../_derived/Species_missingGF_complete.csv")
@@ -561,6 +577,7 @@ DT.gf <- DT.gf %>%
 ```
 After manual completion, the number of records without growth form information decresead to `r sum(is.na(DT.gf$GrowthForm))`.  
 \newline\newline
+
 Step 3: Import additional data on growth-form from TRY (Accessed 10 March 2020).  
 All public data on growth form downloaded. First take care of unmatched quotation marks in the txt file. Do this from command line.
 ```{bash, eval=F}
@@ -691,7 +708,7 @@ GF %>%
 ```
 
 
-## 3.2 Perform actual classification of plots
+## 3.2 Classify plots into forest\\non-forest
 Define a plot as forest if:  
 1) Has a total cover of the the tree layer >=25% (from header)  
 2) Has a total cover in Layer 1 >=25% (from DT)