From e68beb15de0a8d5b82bb2f30f6b980a151d876bb Mon Sep 17 00:00:00 2001
From: Francesco Sabatini <francesco.sabatini@idiv.de>
Date: Thu, 12 Mar 2020 14:24:56 +0100
Subject: [PATCH] Corrected attribution of Ab_scale to pa

---
 code/06_buildDT.Rmd | 66 +++++++++++++++++++++++++++++++++------------
 1 file changed, 49 insertions(+), 17 deletions(-)

diff --git a/code/06_buildDT.Rmd b/code/06_buildDT.Rmd
index 1c8e0a3..5ef7d36 100644
--- a/code/06_buildDT.Rmd
+++ b/code/06_buildDT.Rmd
@@ -194,21 +194,22 @@ table(DT1$`Taxon group`, exclude=NULL)
 ```
 
 Check species with conflicting `Taxon group` information and fix manually.
-```{r, eval=F}
+```{r, eval=T}
 #check for conflicts in attribution of genera to Taxon groups
-conflict <- DT1 %>% 
+DT1 %>% 
   filter(!is.na(Name_short)) %>% 
-  dplyr::select(Genus, `Taxon group`) %>% 
   filter(!is.na(`Taxon group`)) %>% 
+  distinct(Name_short, `Taxon group`) %>% 
+  mutate(Genus=word(Name_short,1)) %>% 
+  dplyr::select(Genus, `Taxon group`) %>% 
   distinct() %>% 
   group_by(Genus) %>% 
   summarize(n=n()) %>% 
   filter(n>1) %>% 
-  arrange(desc(n)) %>% 
-  pull(Genus)
+  arrange(desc(n))
 ```
 
-Manually fix some known problems in `Taxon group` attribution. Some list of taxa (e.g., `lichen.genera`, `mushroom.genera`) derive from the `Backbone`.
+Manually fix some known problems in `Taxon group` attribution. Some lists of taxa (e.g., `lichen.genera`, `mushroom.genera`) were defined when building the `Backbone`.
 ```{r}
 #Attach genus info
 DT1 <- DT1 %>% 
@@ -249,7 +250,7 @@ table(DT1$`Taxon group`, exclude=NULL)
 ```
 
 
-Delete all records of fungi, and use lists of genera to fix additional problems. While in the previous round the matching was done on the resolved Genus name, here the match is based on the unresolved Genus name.
+Delete all records of fungi, and use lists of genera to fix additional problems. While in the previous round the matching was done on the resolved Genus name, here the match is based on unresolved Genus names.
 ```{r}
 DT1 <- DT1 %>% 
   dplyr::select(-Genus) %>% 
@@ -270,7 +271,8 @@ DT1 <- DT1 %>%
                                list=Genus %in% vascular.gen, 
                                values="Vascular plant")) %>% 
   mutate(`Taxon group` = fct_explicit_na(`Taxon group`, "Unknown")) %>% 
-  filter(`Taxon group`!="Mushroom")# %>% 
+  filter(`Taxon group`!="Mushroom") %>%
+  mutate(`Taxon group`=factor(`Taxon group`))
   #dplyr::select(-Genus)
 
 table(DT1$`Taxon group`, exclude=NULL)
@@ -282,7 +284,7 @@ nunknown <- DT1 %>% filter(`Taxon group`=="Unknown") %>% nrow()
 After cross-checking all sources of information, the number of taxa not having `Taxon group` information decreased to `r nunknown` species.
 
 ```{r, echo=F, eval=F}
-#Check the most frequent species for which we don't have taxon group info
+#Check the most frequent genera for which we don't have taxon group info
 DT1 %>% 
   filter(`Taxon group` == "Unknown") %>% 
   group_by(Genus) %>% 
@@ -311,10 +313,12 @@ To make the cover data more user friendly, I simplify the way cover is stored, s
 # Create Ab_scale field
 DT1 <- DT1 %>% 
   mutate(Ab_scale = ifelse(`Cover code` %in% 
-                             c("x_BA", "x_IC", "x_SC", "x_IV", "x_RF", "x") & !is.na(x_), 
+                             c("x_BA", "x_IC", "x_SC", "x_IV", "x_RF") & !is.na(x_), 
                            `Cover code`, 
-                           "CoverPerc")) %>% 
-  mutate(Ab_scale = ifelse(Ab_scale =="x", "pa", Ab_scale)) 
+                           "CoverPerc"))  
+
+#%>% 
+#  mutate(Ab_scale = ifelse(Ab_scale =="x", "pa", Ab_scale)) 
 ```
 
 Fix some errors. There are some plots where only p\\a information is available (`Cover code`=="x"), but have zeros in the field `Cover %`. Consider this as presence\\absence and transform `Cover %` to 1.  
@@ -329,6 +333,29 @@ DT1 <- DT1 %>%
                                    pull(PlotObservationID))), 
                            values=1))
 ```
+For all plot-layer combinations where only p\\a information is available (`Cover code`=="x"), and all the entries of the field `Cover % == 1`. Consider this as presence\\absence and transform `Ab_scale` to "pa". This is done to avoid confusion with plots where `Cover code=="x"` but "x" has to be intended as a class in the cover scale used. For p\\a plots, replace the field `Cover %` with 0, and assign the value 1 to the field `x_`.  
+```{r}
+#plots with at least one entry in Cover code=="x"
+sel <- DT1 %>% 
+  filter(`Cover code`=="x") %>% 
+  distinct(PlotObservationID) %>% 
+  pull(PlotObservationID)
+
+DT1 <- DT1 %>% 
+  left_join(DT1 %>%
+              filter(PlotObservationID %in% sel) %>% 
+              group_by(PlotObservationID, Layer) %>% 
+              mutate(to.pa= all(`Cover %`==1 & `Cover code`=="x")) %>% 
+              distinct(PlotObservationID, Layer, to.pa), 
+            by=c("PlotObservationID", "Layer")) %>% 
+  replace_na(list(to.pa=F)) %>% 
+  mutate(Ab_scale=ifelse(to.pa==T, "pa", Ab_scale)) %>% 
+  mutate(`Cover %`=ifelse(to.pa==T, NA, `Cover %`)) %>% 
+  mutate(x_=ifelse(to.pa==T, 1, x_)) %>% 
+  dplyr::select(-to.pa)
+```
+
+
 There are also some plots having different cover scales in the same layer. They are not many, and I will reduce their cover value to p\\a.  
 Find these plots first:
 ```{r}
@@ -346,9 +373,14 @@ Transform these plots to p\\a and correct field `Ab_scale`. Note: the column `Ab
 DT1 <- DT1 %>% 
   mutate(Ab_scale=replace(Ab_scale, 
                            list=PlotObservationID %in% mixed, 
-                           values="mixed")) %>% 
+                           values="mixed")) %>%
+  mutate(`Cover %`=replace(`Cover %`, 
+                           list=Ab_scale=="mixed",
+                           values=NA)) %>% 
+  mutate(x_=replace(x_,  list=Ab_scale=="mixed", values=1)) %>% 
+  mutate(Ab_scale=replace(Ab_scale, list=Ab_scale=="mixed", values="pa")) %>% 
   #Create additional field Abundance to avoid overwriting original data
-  mutate(Abundance =ifelse(Ab_scale %in% c("x_BA", "x_IC", "x_SC", "x_IV", "x_RF", "x"), 
+  mutate(Abundance =ifelse(Ab_scale %in% c("x_BA", "x_IC", "x_SC", "x_IV", "x_RF", "pa"), 
                           x_, `Cover %`)) %>% 
   mutate(Abundance=replace(Abundance, 
                            list=PlotObservationID %in% mixed, 
@@ -371,15 +403,15 @@ table(scale_check$Ab_scale_combined)
 ```
 
 
-Transform abundances to relative abundance, on a layer by layer basis. For consistency with the previous version of sPlot, this field is called `Relative cover`.  
+Transform abundances to relative abundance. For consistency with the previous version of sPlot, this field is called `Relative cover`.  
 *Watch out* - Even plots with p\\a information are transformed to relative cover.  
 ```{r}
 DT1 <- DT1 %>% 
   left_join(x=., 
             y={.} %>%
-              group_by(PlotObservationID, Layer) %>% 
+              group_by(PlotObservationID) %>% 
               summarize(tot.abundance=sum(Abundance)), 
-            by=c("PlotObservationID", "Layer")) %>% 
+            by=c("PlotObservationID")) %>% 
   mutate(Relative.cover=Abundance/tot.abundance)
 
 DT1 %>% 
-- 
GitLab