From 6ac780f087d3f035fa44b41beae2fb3f3152e219 Mon Sep 17 00:00:00 2001 From: Francesco Sabatini <francesco.sabatini@idiv.de> Date: Wed, 22 Apr 2020 18:53:18 +0200 Subject: [PATCH] 01 Proceduro to select best trait combo based on C.I. --- 01_Mesobromion.R | 204 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 172 insertions(+), 32 deletions(-) diff --git a/01_Mesobromion.R b/01_Mesobromion.R index 2ad93f9..52967c8 100644 --- a/01_Mesobromion.R +++ b/01_Mesobromion.R @@ -1,6 +1,9 @@ setwd("/data/sPlot/users/Helge/HIDDEN") library(tidyverse) library(foreign) +library(gridExtra) +library(grid) + source("99_HIDDEN_functions.R") @@ -24,6 +27,32 @@ dim(traits0) #907 obs. of 75 variables: traits0 <- traits0 %>% dplyr::select_if(~mean(!is.na(.)) >= 0.88) # 907 x 67 +### Transform binary traits to 0-1 +traits.asym.binary <- c('LEB_F_Makrophanerophyt','LEB_F_Nanophanerophyt', + 'LEB_F_Hemikryptophyt','LEB_F_Geophyt','LEB_F_Hemiphanerophyt','LEB_F_Therophyt', + 'LEB_F_Hydrophyt','LEB_F_Pseudophanerophyt','LEB_F_Chamaephyt', + 'LEB_D_plurienn_pollakanth','LBE_D_plurienn_hapaxanth','LEB_D_annuell', + 'LEB_D_bienn','V_VER_absent','V_VER_Wurzelspross','V_VER_Ausläufer',' + V_VER_Rhizom','V_VER_Innovationsknopse.mit.Wurzelknolle', + 'V_VER_Innovationsknospe.mit.Speicherwurzel','V_VER_Ausläuferknolle', + 'V_VER_Brutsprösschen','V_VER_Fragmentation','V_VER_Turio','V_VER_Sprossknolle', + 'V_VER_phyllogener_Spross','V_VER_Rhizompleiokorm','V_VER_Zwiebel', + 'V_VER_Ausläuferrhizom','V_VER_Ausläuferzwiebel','V_VER_Bulbille', + 'ROS_T_rosettenlose.Pflanzen','ROS_T_Halbrosettenpflanze','ROS_T_Ganzrosettenpflanzen', + 'BL_AUSD_immergrün','BL_AUSD_sommergrün','BL_AUSD_vorsommergrün', + 'BL_AUSD_überwinternd_grün','BL_ANAT_skleromorph','BL_ANAT_mesomorph', + 'BL_ANAT_hygromorph','BL_ANAT_hydromorph','BL_ANAT_blattsukkulent','BL_ANAT_helomorph', + 'BL_FORM_gelappt_gefiedert_gefingert_mehrfach','BL_FORM_Vollblatt_Normalblatt', + 'BL_FORM_Grasblatt_Langblatt','BL_FORM_nadelförmig','BL_FORM_röhrig', + 'BL_FORM_schuppenförmig','BL_FORM_schwertförmig','REPR_T_Samen_Sporen', + 'REPR_T_vegetativ','BLU_KL_WIND','BLU_KL_POLLEN', + 'BLU_KL_NEKTAR_HONIG_INSEKTEN','STRAT_T_C','STRAT_T_CR','STRAT_T_CS', + 'STRAT_T_CSR','STRAT_T_R','STRAT_T_S','STRAT_T_SR') + +traits0 <- traits0 %>% + mutate_at(.vars=vars(any_of(traits.asym.binary)), + .funs=~(.>0)*1) + ## Import traits from TRY and match to species load("/data/sPlot2.0/TRY.all.mean.sd.3.by.genus.species.tree.Rdata") @@ -152,6 +181,9 @@ write_delim(species, path="_data/Mesobromion/species.out.10perc.txt", delim="\t" write_delim(traits, path="_data/Mesobromion/traits.out.10perc.txt", delim="\t") + + + #### CORRELATION BETWEEN FUZZY WEIGHTED AND BEALS MATRICES #### #### #### #### WAS RUN IN THE CLUSTER WITH THE SCRIPT 01b_MesobromionCluster.R @@ -160,7 +192,7 @@ write_delim(traits, path="_data/Mesobromion/traits.out.10perc.txt", delim="\t") ############################ ## calculate corr between species composition matrix and traits species <- read_delim("_data/Mesobromion/species.out.10perc.txt", delim="\t") -traits <- read_delim("_data/Mesobromion/traits.out.10perc.sign.txt", delim="\t") +traits <- read_delim("_data/Mesobromion/traits.out.10perc.txt", delim="\t") traits <- traits %>% column_to_rownames("species0") # %>% @@ -169,11 +201,11 @@ traits <- traits %>% #### ## Import output #### -myfilelist <- list.files(path="_derived/Mesobromion/", pattern="HIDDEN_signpair_[0-9]+.RData", full.names = T) +myfilelist <- list.files(path="_derived/Mesobromion/", pattern="HIDDEN_sign_[0-9]+.RData", full.names = T) dataFiles = purrr::map(myfilelist, function(x){get(load(x))}) corXY = bind_rows(dataFiles) %>% as.tbl() -rm(corXY.output, dataFiles) +rm( dataFiles) trait.labs <- data.frame(Trait.comb=1:ncol(traits), trait.name=colnames(traits))#[-which(colnames(traits) %in% c("species", "species0"))]) @@ -196,73 +228,181 @@ corXY.ci <- corXY %>% mutate(sign_minus=greater.than.perm<0.025) %>% mutate(Trait.comb2=Trait.comb) %>% #split strings of trait combinations and add labels - separate(Trait.comb2, into=paste0("trait", 1:7)) %>% - mutate_at(.vars=vars(trait1:trait7), + separate(Trait.comb2, into=paste0("trait", 1:15)) %>% + mutate_at(.vars=vars(trait1:trait15), .funs=~factor(., levels=trait.labs$Trait.comb, labels=trait.labs$trait.name)) %>% rowwise() %>% mutate(ntraits= length(unlist(strsplit(Trait.comb, "_")))) %>% ungroup() %>% arrange(ntraits, Coef.obs) %>% - dplyr::select(Trait.comb, Test, n, ntraits, everything()) + dplyr::select(Trait.comb, Test, n, ntraits, everything()) %>% + mutate_at(.vars=vars(starts_with("sign")), + .funs=~factor(.*1, levels=c(0,1), labels=c("FALSE", "TRUE"))) -corXY.ci.sign <- corXY.ci -corXY.ci.pair <- corXY.ci - -corXY.ci <- bind_rows(corXY.ci.sign, corXY.ci.pair) - -corXY.ci %>% - arrange(ntraits, Trait.comb, Coef.obs) %>% - dplyr::select(Trait.comb, Test, n, ntraits, everything()) +#corXY.ci %>% +# arrange(ntraits, Trait.comb, Coef.obs) %>% +# dplyr::select(Trait.comb, Test, n, ntraits, everything()) traits.sign.alone <- corXY.ci %>% filter(ntraits==1) %>% filter(sign_plus==T) %>% pull(trait1) +### filter combinations where all traits belong to traits.sign.alone mydata <- corXY.ci %>% + filter_at(.vars=vars(trait1:trait15), all_vars(. %in% traits.sign.alone | is.na(.)))%>% + mutate(seq=1:n()) + + +### Plot combinations one by one +mydata.byone <- mydata %>% filter(ntraits==1)%>% mutate(seq=1:n()) -ggplot(data=mydata) + + + +top.first <- ggplot(data=mydata.byone) + geom_segment(aes(x=q025, xend=q975, y=seq, yend=seq, col=sign_plus)) + geom_point(aes(x=Coef.obs, y=seq), pch=15) + + scale_y_continuous(breaks=mydata.byone$seq, + labels=mydata.byone$trait1, name=NULL) + + scale_x_continuous(name="RD correlation") + + labs(fill = "Significant") + +tt2 <- ttheme_minimal() +topfirst.leg <- cowplot::plot_grid(top.first, + tableGrob(trait.labs %>% + dplyr::select(trait.name), theme=tt2), + nrow=1, rel_widths=c(0.65, 0.35)) + + +ggsave(filename = "_pics/TopFirstPredictors_CI.png", dpi=400, + width=6, height=5, topfirst.leg) + +allpredictors <- top.first %+% (mydata) + scale_y_continuous(breaks=mydata$seq, - labels=mydata$trait1) + - scale_x_continuous(name="RV correlation") + labels=mydata$Trait.comb) + + theme_classic() + + theme(axis.text.y = element_text(size=3)) #+ + #facet_wrap(.~ntraits, scales = "free_y", nrow=2) +ggsave(filename = "_pics/All_predictors_sign_individually_CI_unfaceted.png", dpi=400, + width=6, height=30, allpredictors) + -mydata <- corXY.ci %>% - #filter(ntraits==2)%>% + +get.best <- function(x, N){ + out <- x %>% + filter(ntraits==N) %>% + filter(sign_plus==TRUE) %>% + arrange(desc(Coef.obs)) %>% + slice(1) %>% + pull(Trait.comb) + lab <- trait.labs %>% + filter(Trait.comb %in% strsplit(out, split = "_")[[1]]) %>% + pull(trait.name) + return(list(Trait.comb=out, trait.name=lab)) +} + +top.one.by.one <- get.best(mydata, N=1) + + +maxtraits <- 10 +for(nn in 1:maxtraits){ + if(nn==1) { + best.at.1 <- get.best(mydata, N=nn) + newdata <- mydata %>% + filter_at(.vars=vars(trait1:trait15), + .vars_predicate = any_vars(. %in% best.at.1$trait.name | is.na(.))) + new.best.row <- newdata %>% + filter(Trait.comb==best.at.1$Trait.comb) + upper <- best.row$q975 + lower <- best.row$q025 + print(paste("new best at nn", nn, best.at.1$trait.name)) + } + if(nn>1){ + better <- list() + better$Trait.comb <- newdata %>% + filter(ntraits==nn) %>% + filter(q025>upper) %>% + arrange(desc(Coef.obs)) %>% + slice(1) %>% + pull(Trait.comb) + + if(length(better$Trait.comb>0)){ + better$trait.name <- trait.labs %>% + filter(Trait.comb %in% strsplit(better$Trait.comb, split = "_")[[1]]) %>% + pull(trait.name) + + newdata <- newdata %>% + rowwise() %>% + mutate(nmatching= sum(unlist(strsplit(Trait.comb, "_")) %in% + unlist(strsplit(better$Trait.comb, "_")), + na.rm=T)) %>% + ungroup() %>% + filter(nmatching==nn) + + new.best.row <- newdata %>% + filter(Trait.comb==better$Trait.comb) + upper <- new.best.row$q975 + lower <- new.best.row$q025 + print(paste("new best at nn", nn, paste(better$trait.name, collapse=" "))) + best <- better + } + } +} + +mydata.best <- mydata %>% + filter_at(.vars=vars(trait1:trait15), + .vars_predicate = all_vars(. %in% best$trait.name | is.na(.))) %>% + filter(ntraits <= length(best$trait.name)) %>% + arrange(ntraits, Coef.obs) %>% + mutate(seq=1:n()) + +(top.all <- top.first %+% mydata.best + + scale_y_continuous(breaks=mydata.best$seq, + labels=mydata.best$Trait.comb) + + theme(axis.text.y = element_text(size=3))) + +ggsave(filename = "_pics/Best8_AllCombinations_CI.png", dpi=400, + width=6, height=10, top.all) + +mydata.out <- mydata %>% + #filter(ntraits<3)%>% #filter(sign_plus==T) %>% #filter(trait1 %in% traits.sign.alone & (is.na(trait2) | trait2 %in% traits.sign.alone)) %>% mutate(seq=1:nrow(.)) # %>% # arrange(desc(obs)) %>% # slice(1:20) -top7 <- ggplot(data=mydata %>% - rename(Significant=sign_plus)) + + +top10 <- ggplot(data=mydata.out %>% + rename(Significant=sign_plus)) + geom_segment(aes(x=q025, xend=q975, y=seq, yend=seq, col=Significant)) + - geom_point(aes(x=Coef.obs, y=seq), pch=15) + + geom_point(aes(x=Coef.obs, y=seq), pch=15) + + facet_wrap(.~ntraits, nrow=2, scales = "free_y") + scale_y_continuous(breaks=mydata$seq, labels=mydata$Trait.comb, name="Trait combination") + - scale_x_continuous(name="RV correlation") + + scale_x_continuous(name="RD correlation") + theme(legend.position="bottom", legend.box = "horizontal") -library(gridExtra) -library(grid) -tt2 <- ttheme_minimal() -top7.leg <- cowplot::plot_grid(top7, - tableGrob(trait.labs %>% dplyr::select(trait.name), theme=tt2), - nrow=1, rel_widths=c(0.65, 0.35)) +#library(gridExtra) +#library(grid) +#tt2 <- ttheme_minimal() +#top7.leg <- cowplot::plot_grid(top7, +# tableGrob(trait.labs %>% dplyr::select(trait.name), theme=tt2), +# nrow=1, rel_widths=c(0.65, 0.35)) +# +#ggsave(filename = "_pics/Top07Predictors_CI.png", dpi=400, +# width=8, height=11, top7.leg) -ggsave(filename = "_pics/Top7Predictors_CI.png", dpi=400, - width=8, height=11, top7.leg) #traits.sign <- traits %>% -# dplyr::select(species0, traits.sign.alone) +# rownames_to_column("species0") %>% +# dplyr::select(species0, all_of(traits.sign.alone)) #write_delim(traits.sign, path="_data/Mesobromion/traits.out.10perc.sign.txt", delim="\t") break() -- GitLab