From 6150554e4bfafcc3639c7c06d1e10bf56ceead8f Mon Sep 17 00:00:00 2001
From: Francesco Sabatini <francesco.sabatini@idiv.de>
Date: Thu, 12 Nov 2020 12:25:58 +0100
Subject: [PATCH] Added graph of SampleSize rXY vs rXW

---
 03_Figures_Simulations.R | 68 +++++++++++++++++++++++++++++++++-------
 1 file changed, 57 insertions(+), 11 deletions(-)

diff --git a/03_Figures_Simulations.R b/03_Figures_Simulations.R
index ba103fa..fa2bdf1 100644
--- a/03_Figures_Simulations.R
+++ b/03_Figures_Simulations.R
@@ -4,13 +4,16 @@
 library(tidyverse)
 
 #### import function
-FormatData <- function(myfiles){
+ FormatData <- function(myfiles){
   output <- NULL
   for(ff in myfiles){
     iter <- gsub(pattern="/Summary.txt$", replacement="", ff) 
+    iter <- strsplit(iter, split = "/")[[1]]
+    iter <- iter[[length(iter)]]
     iter <- strsplit(iter, split = "_")[[1]]
     iter <- as.integer(unlist(regmatches(iter, gregexpr("[[:digit:]]+", iter))))
-    tmp <- read_delim(paste(mypath, ff, sep="/"), delim="\t", col_names = F) %>% 
+    if(length(iter<4)){ iter <- c(iter, NA)}
+    tmp <- read_delim(ff, delim="\t", col_names = F) %>% 
       dplyr::select(-X1, -X3, -X5, -X9, -X11, -X13) %>% 
       rename(simulated=X2, trait=X4, envir=X6, stat.type=X7, stat.obs=X8, pvalue=X10, SES=X12, exp.med=X14 ) %>% 
       mutate(stat.type=gsub(pattern = "^r\\(", replacement="", stat.type)) %>%
@@ -21,13 +24,14 @@ FormatData <- function(myfiles){
       mutate(main=iter[[1]]) %>% 
       mutate(inter=iter[[2]]) %>% 
       mutate(corr=iter[[3]]) %>% 
+      mutate(SampleN=iter[[4]]) %>% 
       dplyr::select(main:corr, everything())
     output <- bind_rows(output, tmp)
   }
   
   outp.summary <- output %>% 
     dplyr::filter(!stat.type %in% c("TY", "XY", "XY.T", "XY.TR")) %>% 
-    group_by(main, inter, corr, trait, envir, stat.type) %>% 
+    group_by(main, inter, corr, trait, SampleN, envir, stat.type) %>% 
     summarize(stat.obs.med=median(stat.obs),
               power=mean(pvalue<=0.05),
               SES.med=median(SES), 
@@ -35,14 +39,18 @@ FormatData <- function(myfiles){
               nsim=n()) %>% 
     bind_rows(output %>% 
                 dplyr::filter(stat.type %in% c("TY", "XY", "XY.T", "XY.TR")) %>% 
-                group_by(main, inter, corr, trait, stat.type) %>% 
+                group_by(main, inter, corr, trait, SampleN, stat.type) %>% 
                 summarize(stat.obs.med=median(stat.obs),
                           power=mean(pvalue<=0.05),
                           SES.med=median(SES), 
                           exp.med.med=median(exp.med), 
                           nsim=n())) %>% 
     dplyr::select(main:stat.type, nsim, stat.obs.med:exp.med.med) %>% 
-    arrange(stat.type, main, inter, corr, trait, envir)
+    arrange(stat.type, main, inter, corr, trait, envir) %>% 
+    ungroup() %>% 
+    ## drop empty columns
+    select_if(~!(all(is.na(.)) | all(. == "")))
+      
   return(outp.summary)
 }
 
@@ -55,7 +63,7 @@ get.ntraits <- function(x){
 
 #### FIGURE 2 #####
 mypath <- "_data/Experiment_02Mar2020_FactorInteraction&TraitCorr"
-myfiles <- list.files(path=mypath, pattern = "Summary.txt", recursive = T)
+myfiles <- list.files(path=mypath, pattern = "Summary.txt", recursive = T, full=T)
 outp.summary <- FormatData(myfiles) 
 
 
@@ -108,7 +116,7 @@ ggsave(filename="_pics/R1/Fig2_CorrInte_02March.png", width=6, height=5, device=
 
 #### FIGURE 3 #####
 mypath <- "_data/Experiment_04Mar2020_TraitNumber"
-myfiles <- list.files(path=mypath, pattern = "Summary.txt", recursive = T)
+myfiles <- list.files(path=mypath, pattern = "Summary.txt", recursive = T, full=T)
 outp.summary <- FormatData(myfiles) %>% 
   rename(ntraits=inter)
 
@@ -210,7 +218,7 @@ ggsave(filename="_pics/R1/Fig3_TraitNumber.png",
 
 #### FIGURE S4 #####
 mypath <- "_data/Experiment_08Jul2020_TraitSuppression&TraitCorr"
-myfiles <- list.files(path=mypath, pattern = "Summary.txt", recursive = T)
+myfiles <- list.files(path=mypath, pattern = "Summary.txt", recursive = T, full=T)
 outp.summary <- FormatData(myfiles) 
 
 
@@ -252,7 +260,7 @@ ggsave(filename="_pics/R1/FigS4_Extra_CorrInte_08Jul20.png", width=6, height=5,
 
 #### FIGURE SXXX - XW ####
 mypath <- "_data/Experiment_30Oct2020_FactorInteraction&TraitCorr_XW"
-myfiles <- list.files(path=mypath, pattern = "Summary.txt", recursive = T)
+myfiles <- list.files(path=mypath, pattern = "Summary.txt", recursive = T, full=T)
 outp.summary <- FormatData(myfiles) 
 
 mypalette <- palette(c("#e41a1c",  #1 - red)
@@ -295,7 +303,7 @@ ggsave(filename="_pics/R1/FigSXXX_CorrInte_30November_XW.png", width=6, height=5
 
 ### FIGURE SXXY - SBM ####
 mypath <- "_data/Experiment_30Oct2020_FactorInteraction&TraitCorr_XW"
-myfiles <- list.files(path=mypath, pattern = "Summary.txt", recursive = T)
+myfiles <- list.files(path=mypath, pattern = "Summary.txt", recursive = T, full=T)
 outp.summary <- FormatData(myfiles) 
 
 mypalette <- palette(c("#e41a1c",  #1 - red)
@@ -337,7 +345,7 @@ ggsave(filename="_pics/R1/FigSXXY_CorrInte_30November_SbM.png", width=6, height=
 
 #### FIGURE SXXO - XE ####
 mypath <- "_data/Experiment_02Mar2020_FactorInteraction&TraitCorr"
-myfiles <- list.files(path=mypath, pattern = "Summary.txt", recursive = T)
+myfiles <- list.files(path=mypath, pattern = "Summary.txt", recursive = T, full=T)
 outp.summary <- FormatData(myfiles) 
 
 mypalette <- palette(c("#e41a1c",  #1 - red)
@@ -378,6 +386,44 @@ ggplot(data=outp.summary %>%
 ggsave(filename="_pics/R1/FigSXXO_CorrInte_02March_XE.png", width=6, height=5, device="png", dpi = 300, last_plot())
 
 
+#### FIGURE SXXP rXY & rXW vs Sample Size ####
+mypath <- "_data/Experiment_30Oct2020_FactorInteraction&TraitCorr_XY_SampleSize_Main=040_Inter=00_Corr=00_v21169/"
+myfiles <- list.files(path=mypath, pattern = "Summary.txt", recursive = T, full=T)
+myfiles <- myfiles[!grepl("_new", x=myfiles)] ## exclude 'new' directories --> correct?
+outp.summary1 <- FormatData(myfiles) 
+
+mypath2 <- "_data/Experiment_30Oct2020_FactorInteraction&TraitCorr_XW_SampleSize_Main=040_Inter=00_Corr=00_v21169/"
+myfiles2 <- list.files(path=mypath2, pattern = "Summary.txt", recursive = T, full=T)
+outp.summary2 <- FormatData(myfiles2) %>% 
+  mutate(stat.type=ifelse(stat.type=="XY", "XW", stat.type))
+outp.summary <- outp.summary1 %>%
+  bind_rows(outp.summary2)
+
+
+ggplot(data= outp.summary %>% 
+         mutate(main=main/100) %>% 
+         dplyr::filter(stat.type %in% c("XY", "XW")) %>% 
+         mutate(stat.type=factor(stat.type, levels=c("XY", "XW"), labels=c("r(XY)", "r(XW)"))) %>% 
+         dplyr::filter(trait %in% c("1", "2", "1 2", "3", "1 3", "2 3")) %>% 
+         mutate(trait=factor(trait, levels=c("1", "2", "3", "1 2", "1 3", "2 3"), 
+                             labels=c("t1", "t2", "tn", "t1 t2", "t1 tn", "t2 tn"))) %>% 
+         dplyr::filter(trait=="t1"))+
+  geom_line(aes(x=SampleN, y=power, group=stat.type, col=stat.type)) + 
+  scale_color_manual("Statistics",
+                     values=c("#e41a1c",  #1 - red)
+                              "#377eb8" #3 - blue
+                              )) +
+  theme_bw() + 
+  scale_x_continuous(name="Sample size") + 
+  scale_y_continuous(name="Prop. of significant tests") + 
+  theme(panel.grid = element_blank(), 
+        legend.position = c(0.8, 0.2))
+
+ggsave(filename="_pics/R1/FigSXXP_CorrInte_02March_SampleSize.png", 
+       width=3, height=3, device="png", dpi = 300, last_plot())
+
+
+
 
 
 
-- 
GitLab