diff --git a/code/07_buildCWMs.Rmd b/code/07_buildCWMs.Rmd
index 0a19bfd6ef86ef6361f75fc2ba54fe0fefa78d11..76dcac89760712873e0355edf28dbca580f805bb 100644
--- a/code/07_buildCWMs.Rmd
+++ b/code/07_buildCWMs.Rmd
@@ -16,8 +16,7 @@ output: html_document
 **Revised:**  
 **version:** 1.0
   
-This reports documents the construction of Community Weighted Means (CWMs) and Variance (CWVs), complementing species composition data from sPlot 3.0 and Plant functional traits from TRY 5.0. 
-Functional Traits were received by [Jens Kattge](jkattge@bgc-jena.mpg.de) on Jan 21, 2020. 
+This reports documents 1) the construction of Community Weighted Means (CWMs) and Variance (CWVs); and 2) the classification of plots into forest\\non-forest based on species growth forms. It complements species composition data from sPlot 3.0 and gap-filled plant functional traits from TRY 5.0, as received by [Jens Kattge](jkattge@bgc-jena.mpg.de) on Jan 21, 2020. 
 
 ```{r results="hide", message=F, warning=F}
 library(tidyverse)
@@ -29,7 +28,7 @@ library(stringr)
 library(caret)
 ```
 
-# Import data
+# 1 Data import, preparation and cleaning 
 ```{r}
 load("../_output/DT_sPlot3.0.RData")
 load("../_output/Backbone3.0.RData")
@@ -37,13 +36,16 @@ load("../_output/Backbone3.0.RData")
 
 Import TRY data
 ```{r, warning=F, message=F}
+# Species, Genus, Family
 try.species <- read_csv(
   "../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/input_data/hierarchy.info.csv",
   locale = locale(encoding = "latin1")) 
+# Original data without gap-filling. With species and trait lables
 try.allinfo <- read_csv(
   "../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/input_data/traits_x_georef_wide_table.csv", 
   locale = locale(encoding = "latin1"), 
                         col_types = paste0(c("dddccccc",rep("c", 84)), collapse=""))
+# Individual-level gap-filled data - order as in try.allinfo
 try.individuals0 <- read_csv(
   "../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/gapfilled_data/mean_gap_filled_back_transformed.csv", 
   locale = locale(encoding = "latin1"))
@@ -75,7 +77,7 @@ There are `r nrow(try.species)` individual observations from `r nrow(try.species
 ```
 
 
-## Attach resolved names from Backbone
+## 1.2 Attach resolved names from Backbone
 ```{r}
 try.species.names <- try.allinfo %>% 
   dplyr::select(Species, Genus, GrowthForm) %>% 
@@ -101,7 +103,7 @@ Out of the `r nrow(sPlot.species)` standardizes species names in sPlot 3.0, `r n
 
 
 
-## Create legend of trait names
+## 1.3 Create legend of trait names
 ```{r, warning=F}
 trait.legend <- data.frame(full=try.allinfo %>% 
                              dplyr::select(starts_with("StdValue_")) %>% 
@@ -157,7 +159,7 @@ try.individuals <- try.individuals0 %>%
               rename_at(col.from, .funs=function(x) col.to)
 ```
 
-## Fix some known errors in the gap-filled matrix
+## 1.3 Fix some known errors in the gap-filled matrix
 Check traits at the individual level. There are some traits with unexpected negative entries:
 ```{r}
 try.species.names %>% 
@@ -201,12 +203,12 @@ This results in the exclusion of `r length(unique(c(toexclude, toexclude2, toexc
 
 
 
-## Calculate species and genus level trait means and sd
+## 1.4 Calculate species and genus level trait means and sd
 ```{r}
 ## Calculate species level trait means and sd. 
 try.species.means <- try.individuals %>% 
   group_by(Name_short) %>% 
-  #Add a field to indivate the number of observation per taxon
+  #Add a field to indicate the number of observation per taxon
   left_join(x={.} %>% 
               summarize(n=n()), 
             y={.} %>% 
@@ -229,7 +231,15 @@ try.genus.means <- try.individuals %>%
 ```
 The average number of observations per species and genus is `r round(mean(try.species.means$n),1)` and `r round(mean(try.genus.means$n),1)`, respectively. As many as `r sum(try.species.means$n==1)` species have only one observation (`r sum(try.genus.means$n==1)` at the genus level).  
 
-## Match taxa based on species, if available, or Genus
+```{r, echo=F}
+knitr::kable(try.species.means %>% 
+               sample_n(15), 
+  caption="Example of trait means for 15 randomly selected species", digits = 3) %>%
+    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
+                  full_width = F, position = "center")
+```
+
+## 1.5 Match taxa based on species, if available, or Genus
 Combined the trait means based on species and genera into a single object, and check how many of these taxa match to the (resolved) species names in `DT2`.
 ```{r, warning=F}
 try.combined.means <- try.genus.means %>% 
@@ -250,7 +260,7 @@ total.matches <- DT2 %>%
 ```
 The total number of matched taxa (either at species, or genus level) is `r total.matches`.  
 
-### Calculate summary statistics for species- and genus-level mean traits
+## 1.6 Calculate summary statistics for species- and genus-level mean traits
 ```{r}
 mysummary <- try.combined.means %>% 
                group_by(Rank_correct) %>% 
@@ -272,13 +282,13 @@ mysummary <- try.combined.means %>%
 
 ```{r, echo=F}
 knitr::kable(mysummary, 
-  caption="Summary statistics for each trait, when summarized across species or genera") %>%
+  caption="Summary statistics for each trait, when summarized across species or genera", digits = 3) %>%
     kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                   full_width = F, position = "center")
 ```
 
 
-## Calculate CWMs and CWVs for each plot
+# 2 Calculate CWMs and CWVs for each plot
 Merge vegetation layers, where necessary. Combine cover values across layers
 ```{r}
 #Ancillary function
@@ -292,9 +302,20 @@ combine.cover <- function(x){
 }
 
 DT2.comb <- DT2 %>% 
-  group_by(PlotObservationID, species, Rank_correct) %>%
+  filter(PlotObservationID %in% sample(unique(DT2$PlotObservationID), 10000, replace=F)) %>% 
+  #filter(PlotObservationID==365) %>% 
+  group_by(PlotObservationID, species, Rank_correct) %>% 
   summarize(Relative.cover=combine.cover(Relative.cover)) %>%
-  ungroup()
+  ungroup() %>% 
+  # re-normalize to 100%
+  left_join(x=., 
+            y={.} %>% 
+              group_by(PlotObservationID) %>% 
+              summarize(Tot.cover=sum(Relative.cover)), 
+            by="PlotObservationID") %>% 
+  mutate(Relative.cover=Relative.cover/Tot.cover) %>% 
+  dplyr::select(-Tot.cover)
+
 ```
 
 Calculate CWMs and CWV, as well as plot coverage statistics (proportion of total cover for which trait info exist, and proportion of species for which we have trait info). To avoid misleading results, CWM is calculated ONLY for plots for which we have some abundance information. All plots where `Ab_scale`=="pa" in **ANY** of the layers are therefore excluded. 
@@ -362,6 +383,7 @@ CWM3 <- CWM0 %>%
 ## Calculate proportion of species having traits
 CWM4 <- CWM0 %>%
   group_by(PlotObservationID) %>%
+  #distinct(PlotObservationID, species, .keep_all = T) %>% 
   summarize_at(.vars= vars(StemDens_mean:LeafWaterCont_mean),
                .funs = list(~sum(!is.na(.)))) %>%
   dplyr::select(PlotObservationID, order(colnames(.))) %>%
@@ -380,16 +402,37 @@ CWM <- CWM1 %>%
   arrange(PlotObservationID)
 
 ```
-### Explore CWM output
-
+### 2.1 Explore CWM output
 ```{r, echo=F}
 knitr::kable(CWM %>% 
                filter(PlotObservationID %in%
                         sample(unique(DT2$PlotObservationID), 3, replace=F)), 
-  caption="Community weighted means of 3 randomly selected plots") %>%
+  caption="Community weighted means of 3 randomly selected plots", digits = 3) %>%
     kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                   full_width = F, position = "center")
 ```
+Scatterplot comparing coverage of traits values across plots, when based on relative cover and when based on proportion of species richness
+```{r, eval=T, fig.align="center", fig.width=5, fig.height=4, cache=T, warning=F}
+ggplot(data=CWM %>% 
+         #all variables have the same coverage. Showcase with LDMC
+         filter(variable=="LDMC_mean"), aes(x=trait.coverage, y=prop.sp.with.trait, col=log(sp.richness))) + 
+  geom_point(pch="+", alpha=1/3) + 
+  geom_abline(intercept = 0, slope=1, col=2, lty=2, lwd=.7) + 
+  xlim(c(0,1)) + 
+  ylim(c(0,1)) + 
+  scale_color_viridis() + 
+  theme_bw() +
+  xlab("Trait coverage (Relative  cover)") + 
+  ylab("Trait coverage (Proportion of species)")
+```
+
+```{r}
+CWM %>% 
+  filter(variable=="LDMC_mean") %>% 
+  dplyr::select()
+  
+```
+
 
 Calculate summary statistics for CWMs and CWVs
 ```{r}
@@ -415,7 +458,7 @@ CWM.summary <- CWM %>%
 
 ```{r, echo=F}
 knitr::kable(CWM.summary, 
-  caption="Summary of CWMs and CWVs across all plots") %>%
+  caption="Summary of CWMs and CWVs across all plots", digits = 3) %>%
     kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                   full_width = F, position = "center")
 ```
@@ -443,18 +486,18 @@ coverage.summary <- CWM %>%
 
 ```{r, echo=F}
 knitr::kable(coverage.summary, 
-  caption="Summary of CWMs and CWVs across all plots") %>%
+  caption="Summary of CWMs and CWVs across all plots", digits = 3) %>%
     kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                   full_width = F, position = "center")
 ```
 
-### Export CWM and species mean trait values
+### 2.2 Export CWM and species mean trait values
 ```{r}
-save(try.combined.means, CWM, file="../_output/Traits_CWMs.RData")
+save(try.combined.means, CWM, file="../_output/Traits_CWMs_sPlot3.RData")
 ```
 
 
-## Classify plots in `is.forest` or `is.non.forest` based on species traits
+# 3 Classify plots in `is.forest` or `is.non.forest` based on species traits
 sPlot has two independent systems for classifying plots to vegetation types. The first, classifies plots into forest and non-forest, based on the share of trees, and the layering of vegetation. The second system classifies plots into broad habitat types and relies on the expert opinion of data contributors. This is, unfortunately, not consistently available across all plots, being the large majority of classified plots only available for Europe. These broad habitat types are coded using 5, non-mutually exclusive dummy variables:  
 1) Forest - F  
 2) Grassland - G  
@@ -465,7 +508,7 @@ A plot may belong to more than one formation, e.g. a Savannah is categorized as
 \newline\newline
 Derive the `if.forest` and `is.non.forest` classification of plots.    
 
-### Derive species level information on Growth Forms.
+## 3.1 Derive species level information on Growth Forms.
 We used different sources of information:  
 1) Data from the gap-filled trait matrix  
 2) Manual cleaning of the most common species for which growth trait info is not available  
@@ -518,13 +561,14 @@ DT.gf <- DT.gf %>%
 ```
 After manual completion, the number of records without growth form information decresead to `r sum(is.na(DT.gf$GrowthForm))`.  
 \newline\newline
-Step 3: Import additional data on growth-form from TRY (Accessed 10 March 2020). All public data on growth form downloaded. First take care of unmatched quotation marks in the txt file. Do this from command line.
+Step 3: Import additional data on growth-form from TRY (Accessed 10 March 2020).  
+All public data on growth form downloaded. First take care of unmatched quotation marks in the txt file. Do this from command line.
 ```{bash, eval=F}
 # escape all unmatched quotation marks. Run in Linux terminal
 #sed 's/"/\\"/g' 8854.txt > 8854_test.csv
 #sed "s/'/\\'/g" 8854.txt > 8854_test.csv
 ```
-
+Information on growth form is not organized and has a myriad of levels. Extract and simplify to the set of few types used so far. In case a species is attributed to multiple growth forms use a majority vote. 
 ```{r, message=F}
 all.gf0 <- read_delim("../_input/TRY5.0_v1.1/8854_test.txt", delim="\t") 
 
@@ -540,9 +584,15 @@ all.gf <- all.gf0 %>%
                                        "vine|climber|liana|carnivore|epiphyte|^succulent|lichen|parasite|
                                        hydrohalophyte|aquatic|cactous|parasitic|hydrophytes|carnivorous"), 
                                        "other")) %>%
-  mutate(GrowthForm_simplified=replace(GrowthForm_simplified, list=str_detect(GrowthForm0, "tree|conifer|^woody$|palmoid|mangrove|gymnosperm"), "tree")) %>% 
-  mutate(GrowthForm_simplified=replace(GrowthForm_simplified, list=str_detect(GrowthForm0, "shrub|scrub|bamboo"), "shrub")) %>%
-  mutate(GrowthForm_simplified=replace(GrowthForm_simplified, list=str_detect(GrowthForm0, "herb|sedge|graminoid|fern|forb|herbaceous|grass|chaemaephyte|geophyte|annual"), "herb")) %>%
+  mutate(GrowthForm_simplified=replace(GrowthForm_simplified, 
+                                       list=str_detect(GrowthForm0, 
+                                                       "tree|conifer|^woody$|palmoid|mangrove|gymnosperm"), "tree")) %>% 
+  mutate(GrowthForm_simplified=replace(GrowthForm_simplified, 
+                                       list=str_detect(GrowthForm0, "shrub|scrub|bamboo"), "shrub")) %>%
+  mutate(GrowthForm_simplified=replace(GrowthForm_simplified, 
+                                       list=str_detect(GrowthForm0,
+                                                       "herb|sedge|graminoid|fern|forb|herbaceous|grass|chaemaephyte|geophyte|annual"),
+                                       "herb")) %>%
   mutate(GrowthForm_simplified=ifelse(GrowthForm_simplified %in% c("other", "herb", "shrub", "tree"), 
                                       GrowthForm_simplified, NA)) %>% 
   filter(!is.na(GrowthForm_simplified)) 
@@ -594,7 +644,7 @@ knitr::kable(DT.gf %>%
               distinct(species, GrowthForm, PlantHeight_mean) %>% 
               group_by(GrowthForm) %>% 
               summarize(Height=mean(PlantHeight_mean, na.rm=T)), 
-  caption="Average height per growth form") %>%
+  caption="Average height per growth form", digits = 3) %>%
     kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                   full_width = F, position = "center")
 ```
@@ -641,7 +691,7 @@ GF %>%
 ```
 
 
-### Perform actual classification of plots
+## 3.2 Perform actual classification of plots
 Define a plot as forest if:  
 1) Has a total cover of the the tree layer >=25% (from header)  
 2) Has a total cover in Layer 1 >=25% (from DT)  
@@ -656,9 +706,16 @@ The first three criteria are declined to define non forest as follows:
 Criteria 2 and 3 only apply to plots having cover data in percentage.  
 Reimport header file
 ```{r}
-load("../_output/header_splot3.0.RData")
+load("../_output/header_sPlot3.0.RData")
+```
+
+```{r echo=F}
+#delete is.forest is.non.forest columns, in case of overwrite
+header <- header %>% 
+    dplyr::select(PlotObservationID:ESY, `Cover total (%)`:`Maximum height cryptogams (mm)`)
 ```
 
+
 ```{r}
 # Criterium 1
 plot.vegtype1 <- header %>% 
@@ -758,7 +815,7 @@ plot.vegtype <- header %>%
 table(plot.vegtype %>% dplyr::select(is.forest, is.non.forest), exclude=NULL)
 ```
 
-### Cross-check and validate
+## 3.3 Cross-check and validate
 Cross check with sPlot's 5-class (incomplete) native classification deriving from data contributors. Build a Confusion matrix.
 ```{r}
 cross.check <- header %>% 
@@ -805,7 +862,7 @@ Through the process described above, we managed to classify `r plot.vegtype %>%
 \newline\newline
 The total number of plots with attribution to forest\\non-forest (either coming from sPlot's native classification, or from the process above) is: `r header.vegtype %>% dplyr::select(-PlotObservationID) %>% filter(rowMeans(is.na(.)) < 1) %>% nrow()`.
 
-### Export and update other objects
+# 4 Export and update other objects
 ```{r}
 sPlot.traits <- sPlot.species %>% 
   arrange(species) %>% 
@@ -816,7 +873,7 @@ sPlot.traits <- sPlot.species %>%
               rename(species=Taxon_name), by="species") %>% 
   dplyr::select(-Rank_correct)
   
-save(try.combined.means, CWM, sPlot.traits, file="../_output/Traits_CWMs.RData")
+save(try.combined.means, CWM, sPlot.traits, file="../_output/Traits_CWMs_sPlot3.RData")
 
 header <- header %>% 
   left_join(plot.vegtype %>% 
@@ -824,7 +881,7 @@ header <- header %>%
             by="PlotObservationID") %>% 
   dplyr::select(PlotObservationID:ESY, is.forest:is.non.forest, everything())
 
-save(header, file="../_output/header_splot3.0.RData")
+save(header, file="../_output/header_sPlot3.0.RData")
 ```