From 6ac780f087d3f035fa44b41beae2fb3f3152e219 Mon Sep 17 00:00:00 2001
From: Francesco Sabatini <francesco.sabatini@idiv.de>
Date: Wed, 22 Apr 2020 18:53:18 +0200
Subject: [PATCH] 01 Proceduro to select best trait combo based on C.I.

---
 01_Mesobromion.R | 204 +++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 172 insertions(+), 32 deletions(-)

diff --git a/01_Mesobromion.R b/01_Mesobromion.R
index 2ad93f9..52967c8 100644
--- a/01_Mesobromion.R
+++ b/01_Mesobromion.R
@@ -1,6 +1,9 @@
 setwd("/data/sPlot/users/Helge/HIDDEN")
 library(tidyverse)
 library(foreign)
+library(gridExtra)
+library(grid)
+
 source("99_HIDDEN_functions.R")
 
 
@@ -24,6 +27,32 @@ dim(traits0) #907 obs. of  75 variables:
 traits0 <- traits0 %>% 
   dplyr::select_if(~mean(!is.na(.)) >= 0.88) # 907 x 67
 
+### Transform binary traits to 0-1
+traits.asym.binary <- c('LEB_F_Makrophanerophyt','LEB_F_Nanophanerophyt',
+                        'LEB_F_Hemikryptophyt','LEB_F_Geophyt','LEB_F_Hemiphanerophyt','LEB_F_Therophyt',
+                        'LEB_F_Hydrophyt','LEB_F_Pseudophanerophyt','LEB_F_Chamaephyt',
+                        'LEB_D_plurienn_pollakanth','LBE_D_plurienn_hapaxanth','LEB_D_annuell',
+                        'LEB_D_bienn','V_VER_absent','V_VER_Wurzelspross','V_VER_Ausläufer','
+                        V_VER_Rhizom','V_VER_Innovationsknopse.mit.Wurzelknolle',
+                        'V_VER_Innovationsknospe.mit.Speicherwurzel','V_VER_Ausläuferknolle',
+                        'V_VER_Brutsprösschen','V_VER_Fragmentation','V_VER_Turio','V_VER_Sprossknolle',
+                        'V_VER_phyllogener_Spross','V_VER_Rhizompleiokorm','V_VER_Zwiebel',
+                        'V_VER_Ausläuferrhizom','V_VER_Ausläuferzwiebel','V_VER_Bulbille',
+                        'ROS_T_rosettenlose.Pflanzen','ROS_T_Halbrosettenpflanze','ROS_T_Ganzrosettenpflanzen',
+                        'BL_AUSD_immergrün','BL_AUSD_sommergrün','BL_AUSD_vorsommergrün',
+                        'BL_AUSD_überwinternd_grün','BL_ANAT_skleromorph','BL_ANAT_mesomorph',
+                        'BL_ANAT_hygromorph','BL_ANAT_hydromorph','BL_ANAT_blattsukkulent','BL_ANAT_helomorph',
+                        'BL_FORM_gelappt_gefiedert_gefingert_mehrfach','BL_FORM_Vollblatt_Normalblatt',
+                        'BL_FORM_Grasblatt_Langblatt','BL_FORM_nadelförmig','BL_FORM_röhrig',
+                        'BL_FORM_schuppenförmig','BL_FORM_schwertförmig','REPR_T_Samen_Sporen',
+                        'REPR_T_vegetativ','BLU_KL_WIND','BLU_KL_POLLEN',
+                        'BLU_KL_NEKTAR_HONIG_INSEKTEN','STRAT_T_C','STRAT_T_CR','STRAT_T_CS',
+                        'STRAT_T_CSR','STRAT_T_R','STRAT_T_S','STRAT_T_SR')
+
+traits0 <- traits0 %>% 
+  mutate_at(.vars=vars(any_of(traits.asym.binary)), 
+            .funs=~(.>0)*1)
+
 
 ## Import traits from TRY and match to species
 load("/data/sPlot2.0/TRY.all.mean.sd.3.by.genus.species.tree.Rdata")
@@ -152,6 +181,9 @@ write_delim(species, path="_data/Mesobromion/species.out.10perc.txt", delim="\t"
 write_delim(traits, path="_data/Mesobromion/traits.out.10perc.txt", delim="\t")
 
 
+
+
+
 #### CORRELATION BETWEEN FUZZY WEIGHTED AND BEALS MATRICES  #### #### #### 
 #### WAS RUN IN THE CLUSTER WITH THE SCRIPT 01b_MesobromionCluster.R
 
@@ -160,7 +192,7 @@ write_delim(traits, path="_data/Mesobromion/traits.out.10perc.txt", delim="\t")
 ############################
 ## calculate corr between species composition matrix and traits
 species <- read_delim("_data/Mesobromion/species.out.10perc.txt", delim="\t")
-traits <- read_delim("_data/Mesobromion/traits.out.10perc.sign.txt", delim="\t")
+traits <- read_delim("_data/Mesobromion/traits.out.10perc.txt", delim="\t")
 
 traits <- traits %>% 
   column_to_rownames("species0") # %>% 
@@ -169,11 +201,11 @@ traits <- traits %>%
 
 
 #### ## Import output #### 
-myfilelist <- list.files(path="_derived/Mesobromion/", pattern="HIDDEN_signpair_[0-9]+.RData", full.names = T)
+myfilelist <- list.files(path="_derived/Mesobromion/", pattern="HIDDEN_sign_[0-9]+.RData", full.names = T)
 dataFiles = purrr::map(myfilelist, function(x){get(load(x))}) 
 corXY = bind_rows(dataFiles) %>% 
   as.tbl()
-rm(corXY.output, dataFiles)
+rm( dataFiles)
 
 trait.labs <- data.frame(Trait.comb=1:ncol(traits), 
                  trait.name=colnames(traits))#[-which(colnames(traits) %in% c("species", "species0"))])
@@ -196,73 +228,181 @@ corXY.ci <- corXY %>%
   mutate(sign_minus=greater.than.perm<0.025)  %>% 
   mutate(Trait.comb2=Trait.comb) %>% 
   #split strings of trait combinations and add labels
-  separate(Trait.comb2, into=paste0("trait", 1:7)) %>% 
-  mutate_at(.vars=vars(trait1:trait7),
+  separate(Trait.comb2, into=paste0("trait", 1:15)) %>% 
+  mutate_at(.vars=vars(trait1:trait15),
             .funs=~factor(., levels=trait.labs$Trait.comb, labels=trait.labs$trait.name)) %>% 
   rowwise() %>% 
   mutate(ntraits= length(unlist(strsplit(Trait.comb, "_")))) %>% 
   ungroup() %>% 
   arrange(ntraits, Coef.obs) %>% 
-  dplyr::select(Trait.comb, Test, n, ntraits, everything())
+  dplyr::select(Trait.comb, Test, n, ntraits, everything()) %>% 
+  mutate_at(.vars=vars(starts_with("sign")), 
+            .funs=~factor(.*1, levels=c(0,1), labels=c("FALSE", "TRUE")))
   
-corXY.ci.sign <- corXY.ci 
-corXY.ci.pair <- corXY.ci
-
-corXY.ci <- bind_rows(corXY.ci.sign, corXY.ci.pair)
-
-corXY.ci %>% 
-  arrange(ntraits, Trait.comb, Coef.obs) %>% 
-  dplyr::select(Trait.comb, Test, n, ntraits, everything())
+#corXY.ci %>% 
+#  arrange(ntraits, Trait.comb, Coef.obs) %>% 
+# dplyr::select(Trait.comb, Test, n, ntraits, everything())
 
 traits.sign.alone <- corXY.ci %>% 
   filter(ntraits==1) %>% 
   filter(sign_plus==T) %>% 
   pull(trait1)
 
+### filter combinations where all traits belong to traits.sign.alone
 mydata <- corXY.ci %>% 
+  filter_at(.vars=vars(trait1:trait15), all_vars(. %in% traits.sign.alone | is.na(.)))%>% 
+  mutate(seq=1:n())
+
+
+### Plot combinations one by one
+mydata.byone <- mydata %>% 
   filter(ntraits==1)%>% 
   mutate(seq=1:n())
-ggplot(data=mydata) + 
+
+
+top.first <- ggplot(data=mydata.byone) + 
   geom_segment(aes(x=q025, xend=q975, y=seq, yend=seq, 
                    col=sign_plus)) + 
   geom_point(aes(x=Coef.obs, y=seq), pch=15) + 
+  scale_y_continuous(breaks=mydata.byone$seq, 
+                     labels=mydata.byone$trait1, name=NULL)  +  
+  scale_x_continuous(name="RD correlation") + 
+  labs(fill = "Significant")
+
+tt2 <- ttheme_minimal()
+topfirst.leg <- cowplot::plot_grid(top.first,
+                         tableGrob(trait.labs %>% 
+                                     dplyr::select(trait.name), theme=tt2), 
+                         nrow=1, rel_widths=c(0.65, 0.35))
+
+
+ggsave(filename = "_pics/TopFirstPredictors_CI.png", dpi=400, 
+       width=6, height=5, topfirst.leg)
+
+allpredictors <- top.first %+% (mydata) +
   scale_y_continuous(breaks=mydata$seq, 
-                     labels=mydata$trait1)  +  
-  scale_x_continuous(name="RV correlation")
+                     labels=mydata$Trait.comb) + 
+  theme_classic() + 
+  theme(axis.text.y = element_text(size=3)) #+ 
+  #facet_wrap(.~ntraits, scales = "free_y",  nrow=2)
 
+ggsave(filename = "_pics/All_predictors_sign_individually_CI_unfaceted.png", dpi=400, 
+       width=6, height=30, allpredictors)
+  
 
-mydata <- corXY.ci %>% 
-  #filter(ntraits==2)%>% 
+
+get.best <- function(x, N){
+  out <- x %>% 
+    filter(ntraits==N) %>% 
+    filter(sign_plus==TRUE) %>% 
+    arrange(desc(Coef.obs)) %>% 
+    slice(1) %>% 
+    pull(Trait.comb)
+  lab <- trait.labs %>%
+    filter(Trait.comb %in% strsplit(out, split = "_")[[1]]) %>% 
+    pull(trait.name)
+  return(list(Trait.comb=out, trait.name=lab))
+}
+
+top.one.by.one <- get.best(mydata, N=1)
+
+
+maxtraits <- 10
+for(nn in 1:maxtraits){
+  if(nn==1) {
+    best.at.1 <- get.best(mydata, N=nn)
+    newdata <- mydata %>% 
+      filter_at(.vars=vars(trait1:trait15),
+                .vars_predicate = any_vars(. %in% best.at.1$trait.name | is.na(.))) 
+    new.best.row <- newdata %>% 
+      filter(Trait.comb==best.at.1$Trait.comb) 
+    upper <- best.row$q975
+    lower <- best.row$q025
+    print(paste("new best at nn", nn, best.at.1$trait.name))
+  }
+  if(nn>1){
+    better <- list()
+    better$Trait.comb <- newdata %>% 
+       filter(ntraits==nn) %>% 
+       filter(q025>upper) %>% 
+       arrange(desc(Coef.obs)) %>% 
+       slice(1) %>% 
+       pull(Trait.comb)
+
+     if(length(better$Trait.comb>0)){
+       better$trait.name <- trait.labs %>%
+         filter(Trait.comb %in% strsplit(better$Trait.comb, split = "_")[[1]]) %>% 
+         pull(trait.name)
+       
+       newdata <- newdata %>% 
+         rowwise() %>% 
+         mutate(nmatching= sum(unlist(strsplit(Trait.comb, "_")) %in% 
+                                 unlist(strsplit(better$Trait.comb, "_")),
+                                 na.rm=T)) %>% 
+         ungroup() %>% 
+         filter(nmatching==nn)
+       
+       new.best.row <- newdata %>% 
+         filter(Trait.comb==better$Trait.comb) 
+       upper <- new.best.row$q975
+       lower <- new.best.row$q025
+       print(paste("new best at nn", nn, paste(better$trait.name, collapse=" ")))
+       best <- better
+     }
+  }
+}
+
+mydata.best <- mydata %>% 
+  filter_at(.vars=vars(trait1:trait15), 
+            .vars_predicate = all_vars(. %in% best$trait.name | is.na(.))) %>% 
+  filter(ntraits <= length(best$trait.name)) %>% 
+  arrange(ntraits, Coef.obs) %>% 
+  mutate(seq=1:n())
+
+(top.all <- top.first %+% mydata.best + 
+    scale_y_continuous(breaks=mydata.best$seq, 
+                       labels=mydata.best$Trait.comb) + 
+    theme(axis.text.y = element_text(size=3)))
+
+ggsave(filename = "_pics/Best8_AllCombinations_CI.png", dpi=400, 
+       width=6, height=10, top.all)
+
+mydata.out <- mydata %>%
+  #filter(ntraits<3)%>% 
   #filter(sign_plus==T) %>% 
   #filter(trait1 %in% traits.sign.alone & (is.na(trait2) | trait2 %in% traits.sign.alone)) %>% 
   mutate(seq=1:nrow(.)) # %>% 
 #  arrange(desc(obs)) %>% 
 #  slice(1:20)
-top7 <- ggplot(data=mydata %>% 
-                 rename(Significant=sign_plus)) + 
+
+top10 <- ggplot(data=mydata.out %>% 
+                  rename(Significant=sign_plus)) + 
   geom_segment(aes(x=q025, xend=q975, y=seq, yend=seq, 
                    col=Significant)) + 
-  geom_point(aes(x=Coef.obs, y=seq), pch=15) + 
+  geom_point(aes(x=Coef.obs, y=seq), pch=15) +
+  facet_wrap(.~ntraits, nrow=2, scales = "free_y") + 
   scale_y_continuous(breaks=mydata$seq, 
                      labels=mydata$Trait.comb, 
                      name="Trait combination") +  
-  scale_x_continuous(name="RV correlation") + 
+  scale_x_continuous(name="RD correlation") + 
   theme(legend.position="bottom", legend.box = "horizontal")
 
-library(gridExtra)
-library(grid)
-tt2 <- ttheme_minimal()
-top7.leg <- cowplot::plot_grid(top7,
-                         tableGrob(trait.labs %>% dplyr::select(trait.name), theme=tt2), 
-                         nrow=1, rel_widths=c(0.65, 0.35))
+#library(gridExtra)
+#library(grid)
+#tt2 <- ttheme_minimal()
+#top7.leg <- cowplot::plot_grid(top7,
+#                         tableGrob(trait.labs %>% dplyr::select(trait.name), theme=tt2), 
+#                         nrow=1, rel_widths=c(0.65, 0.35))
+#
+#ggsave(filename = "_pics/Top07Predictors_CI.png", dpi=400, 
+#       width=8, height=11, top7.leg)
 
-ggsave(filename = "_pics/Top7Predictors_CI.png", dpi=400, 
-       width=8, height=11, top7.leg)
 
 
 
 #traits.sign <- traits %>% 
-#  dplyr::select(species0, traits.sign.alone)
+#  rownames_to_column("species0") %>% 
+#  dplyr::select(species0, all_of(traits.sign.alone))
 #write_delim(traits.sign, path="_data/Mesobromion/traits.out.10perc.sign.txt", delim="\t")
 
 break()
-- 
GitLab