Skip to content
Snippets Groups Projects
Commit f16e242d authored by Francesco Sabatini's avatar Francesco Sabatini
Browse files

Cleaned the house

parent 77cc017e
Branches
No related tags found
No related merge requests found
setwd("/data/sPlot/users/Helge/HIDDEN")
library(tidyverse)
library(SYNCSA)
library(vegan)
library(abind)
library(ade4)
source("99_HIDDEN_functions.R")
#library(MatrixCorrelation)
### Import data
#Bi <- read_delim("_data/B_Simulated.txt", n_max = 50, delim="\t", col_names = F)[,1:2]
#Bi <- read_delim("_data/Two_Simulated_Datasets/B_Simulated.txt", n_max = 50, delim="\t", col_names = F)[,1:2]
#Bi <- read_delim("_data/SimulatedData_Neutral/B_Simulated.txt", n_max = 50, delim="\t", col_names = F)[,1:2]
Bi <- read_delim("_data/SimulatedData_PartialFiltering_Feedback/B_Simulated.txt", n_max = 50, delim="\t", col_names = F)[,1:5]
Bi <- Bi %>%
#dplyr::rename(trait1=X1, trait2=X2) %>%
mutate(rown = paste0("Sp", 1:50)) %>%
column_to_rownames("rown")
colnames(Bi) <- paste0("traits", 1:ncol(Bi))
Bi <- as.matrix(Bi)
#Wi <- read_delim("_data/W_Simulated.txt", n_max = 200, delim="\t", col_names = F)[,1:50]
#Wi <- read_delim("_data/Two_Simulated_Datasets/W_Simulated_B.txt", n_max = 200, delim="\t", col_names = F)[,1:50]
#Wi <- read_delim("_data/SimulatedData_Neutral/W_Simulated.txt", n_max = 200, delim="\t", col_names = F)[,1:50]
Wi <- read_delim("_data/SimulatedData_PartialFiltering_Feedback/W_Simulated.txt", n_max = 200, delim="\t", col_names = F)[,1:50]
Wi <- Wi %>%
mutate(rown = paste0("Site", 1:200)) %>%
column_to_rownames("rown")
colnames(Wi) <- paste0("Sp", 1:50)
Wi <- as.matrix(Wi)
#Ei <- read_delim("_data/E_Simulated.txt", n_max = 2, delim="\t", col_names = F)[,1:200]
#Ei <- read_delim("_data/Two_Simulated_Datasets/E_Simulated.txt", n_max = 200, delim="\t", col_names = F)[,1:2]
#Ei <- read_delim("_data/SimulatedData_Neutral/E_Simulated.txt", n_max = 200, delim="\t", col_names = F)[,1:2]
Ei <- read_delim("_data/SimulatedData_PartialFiltering_Feedback/E_Simulated.txt", n_max = 200, delim="\t", col_names = F)[,1:2]
Ei <- as.data.frame(Ei)
rownames(Ei) <- paste0("Site", 1:200)
colnames(Ei) <- paste0("Env", 1:2)
Ei$Env3 <- Ei$Env1*Ei$Env2
Ei <- (as.matrix(Ei))
## create list of indices for each combination of traits up to a max number of interactions
n.traits <- ncol(Bi)
max.inter.t <- 1 #ncol(Bi)
allcomb.t <- NULL
for(n.inter in 1:max.inter.t){
allcomb.t <- c(allcomb.t, combn(1:n.traits, n.inter, simplify=F))
}
## same for environmental variables
n.env <- ncol(Ei)
max.inter.env <- ncol(Ei)
allcomb.env <- NULL
for(n.inter in 1:max.inter.env){
allcomb.env <- c(allcomb.env, combn(1:n.env, n.inter, simplify=F))
}
names.t <- unlist(lapply(allcomb.t, paste, collapse="_"))
names.env <- unlist(lapply(allcomb.env, paste, collapse="_"))
comb.list <- expand.grid(names.t, names.env)
colnames(comb.list) <- c("trait", "env")
##Run on simulated data
corXY <- NULL
for(i in 1:length(allcomb.t)){
tt <- unlist(allcomb.t[i])
corXY <- rbind(corXY,
get.corXY(comm=Wi, trait=Bi, trait.sel=tt, stat="RV"))
print(i)
}
corXY %>% arrange(Test, desc(Coef))
corTE <- NULL
for(i in 1:nrow(comb.list)){
tt <- as.numeric(unlist(strsplit(as.character(comb.list[i, "trait"]), "_")))
ee <- as.numeric(unlist(strsplit(as.character(comb.list[i, "env"]), "_")))
corTE <- rbind(corTE,
get.corTE(comm=Wi, trait=Bi, envir = Ei, trait.sel = tt, env.sel=ee, stat="RV"))
print(i)
}
corTE %>% arrange(Test, desc(Coef))
corXE <- NULL
for(i in 1:nrow(comb.list)){
tt <- as.numeric(unlist(strsplit(as.character(comb.list[i, "trait"]), "_")))
ee <- as.numeric(unlist(strsplit(as.character(comb.list[i, "env"]), "_")))
corXE <- rbind(corXE,
get.corXE(comm=Wi, trait=Bi, envir = Ei, trait.sel = tt, env.sel=ee, stat="RV"))
print(i)
}
corXE %>% arrange(Test, desc(Coef))
### Create Simulated dataset and calculate corXY and corTE
#create list of permuted trait matrices
nperm <- 199
Bi.perm <- lapply(1:nperm, FUN=function(x){
tmp <- Bi[sample(1:nrow(Bi)),]
rownames(tmp) <- rownames(Bi)
return(tmp)
}
)
## corXY on permuted
corXY.perm <- matrix(NA, nrow=length(allcomb.t), ncol=nperm, dimnames = list(paste("t", names.t, sep=""),
paste("p",1:nperm, sep="")))
for(i in 1:length(allcomb.t)){
tt <- unlist(allcomb.t[i])
for(b in 1:nperm){
corXY.perm[i,b] <- get.corXY(comm=Wi, trait=Bi.perm[[b]], trait.sel=tt, stat="RV")$Coef}
print(i)
}
## transform obs to SES
corXY.perm.df <- get.SES(corXY, corXY.perm, stat="RV") %>%
rowwise() %>%
mutate(nvar=length(unlist(str_split(Trait.comb, "_")))) #%>%
# arrange(nvar) %>%
# ungroup() %>%
# filter(grepl(pattern = "1", x = Trait.comb)) %>%
# group_by(nvar) %>%
# arrange(nvar, desc(obs)) %>%
# ungroup() %>%
# mutate(seq=row_number())
ggplot(data=corXY.perm.df) +
#geom_segment(aes(x=conf.m, xend=conf.p, y=as.numeric(as.factor(Trait.comb)), yend=as.numeric(as.factor(Trait.comb)))) +
geom_segment(aes(x=conf.m, xend=conf.p, y=seq, yend=seq)) +
geom_point(aes(x=obs, y=seq), pch=15) +
scale_y_continuous(breaks=corXY.perm.df$seq,
labels=corXY.perm.df$Trait.comb)
## corTE on permuted
corTE.perm <- array(NA, dim = list(length(allcomb.t),
nperm,
length(allcomb.env)),
dimnames = list(paste("t", names.t, sep=""),
paste("p",1:nperm, sep=""),
paste("e", names.env, sep="")))
for(i in 1:length(allcomb.t)){
tt <- unlist(allcomb.t[i])
for(e in 1:length(allcomb.env)){
ee <- unlist(allcomb.env[e])
for(b in 1:nperm){
corTE.perm[i,b,e] <- get.corTE(comm=Wi, trait=Bi.perm[[b]], envir = Ei, trait.sel=tt, env.sel=ee, stat="RV")$Coef
}
}
print(i)
}
## transform obs to SES
corTE.perm.df <- get.SES(corTE, corTE.perm, stat="RV") %>%
rowwise() %>%
mutate(nvar=length(unlist(str_split(Trait.comb, "_")))) %>%
mutate(nenv=length(unlist(str_split(Env.comb, "_")))) %>%
ungroup() %>%
arrange(nvar) %>%
mutate(Trait.comb=factor(Trait.comb, levels=paste("t", names.t, sep=""))) %>%
#filter(grepl(pattern = "1", x = Trait.comb) & nvar==1) %>%
#filter(grepl(pattern = "1", x = Env.comb)) %>%
filter(nenv==1, SES>1.96) %>%
group_by(nvar) %>%
arrange(nvar, desc(obs)) %>%
ungroup() %>%
mutate(seq=row_number()) %>%
arrange(nvar, seq) %>%
mutate(Env.comb=factor(Env.comb))
# scatterplot with color coding
ggplot(data=corTE.perm.df) +
geom_point(aes(x=as.factor(Env.comb), y=Trait.comb, col=obs, size=obs), pch=16) +
scale_color_viridis(option="magma", direction = -1) +
#scale_x_continuous(name="Env",
# breaks=(1:3),
# labels=levels(corTE.perm.df$Env.comb), limits = c(0.5,3.5)) +
theme_bw()
# faceted spider plot
ggplot(data=corTE.perm.df) +
geom_segment(aes(x=conf.m, xend=conf.p, y=seq, yend=seq)) +
geom_point(aes(x=obs, y=seq), pch=15) +
scale_y_continuous(breaks=corTE.perm.df$seq,
labels=corTE.perm.df$Trait.comb) +
facet_wrap(factor(corTE.perm.df$Env.comb), nrow=2)
ggplot(data=corTE.perm.df) +
#geom_point(aes(x=as.factor(Env.comb), y=Trait.comb, col=obs, size=obs), pch=16) +
geom_segment(aes(x=conf.m, xend=conf.p, y=Trait.comb, yend=Trait.comb)) +
geom_point(aes(x=obs, y=Trait.comb), pch=17, cex=2) +
#scale_color_distiller(type="seq", direction = -1) +
#scale_color_viridis(option="magma", direction = -1) +
#scale_x_continuous(name="Env",
# breaks=(1:3),
# labels=levels(corTE.perm.df$Env.comb), limits = c(0.5,3.5)) +
theme_bw() +
facet_grid(Env.comb~.)
## corXE on permuted
corXE.perm <- array(NA, dim = list(length(allcomb.t),
nperm,
length(allcomb.env)),
dimnames = list(paste("t", names.t, sep=""),
paste("p",1:nperm, sep=""),
paste("e", names.env, sep="")))
for(i in 1:length(allcomb.t)){
tt <- unlist(allcomb.t[i])
for(e in 1:length(allcomb.env)){
ee <- unlist(allcomb.env[e])
for(b in 1:nperm){
corXE.perm[i,b,e] <- get.corXE(comm=Wi, trait=Bi.perm[[b]], envir = Ei, trait.sel=tt, env.sel=ee, stat="RV")$Coef
}
print(ee)
}
print(paste("i=",i, "tt=", paste(tt, collapse=" "), "done"))
}
## transform obs to SES
corXE.perm.df <- get.SES(corXE, corXE.perm, stat="RV") %>%
rowwise() %>%
mutate(nvar=length(unlist(str_split(Trait.comb, "_")))) %>%
mutate(nenv=length(unlist(str_split(Env.comb, "_")))) %>%
ungroup() %>%
arrange(nvar) %>%
mutate(Trait.comb=factor(Trait.comb, levels=paste("t", names.t, sep=""))) %>%
#filter(grepl(pattern = "1", x = Trait.comb) & nvar==1) %>%
#filter(grepl(pattern = "1", x = Env.comb)) %>%
filter(nenv==1, SES>1.96) %>%
group_by(nvar) %>%
arrange(nvar, desc(obs)) %>%
ungroup() %>%
mutate(seq=row_number()) %>%
arrange(nvar, seq) %>%
mutate(Env.comb=factor(Env.comb))
ggplot(data=corXE.perm.df) +
#geom_point(aes(x=as.factor(Env.comb), y=Trait.comb, col=obs, size=obs), pch=16) +
geom_segment(aes(x=conf.m, xend=conf.p, y=Trait.comb, yend=Trait.comb)) +
geom_point(aes(x=obs, y=Trait.comb), pch=17, cex=2) +
#scale_color_distiller(type="seq", direction = -1) +
#scale_color_viridis(option="magma", direction = -1) +
#scale_x_continuous(name="Env",
# breaks=(1:3),
# labels=levels(corTE.perm.df$Env.comb), limits = c(0.5,3.5)) +
theme_bw() +
facet_grid(Env.comb~.)
break()
### parallel alternative to above
ncores=4
library(parallel)
library(doParallel)
cl <- makeForkCluster(ncores, outfile="" )
registerDoParallel(cl)
mybind.array <- function(x, y){
require(abind)
return(abind(x,y, along=3))
}
#foreach(i=1:length(comb.list), .combine=mybind.array ) %do% {
myarray <- foreach(i=1:5, .combine=mybind.array ) %do% {
tt <- as.numeric(unlist(strsplit(as.character(comb.list[i, "trait"]), split="_")))
ee <- as.numeric(unlist(strsplit(as.character(comb.list[i, "env"]), split="_")))
corTE.tmp <- matrix(NA, nrow=nperm, ncol=1)
for(b in 1:nperm){
corTE.tmp[b,1] <- get.corTE(comm=Wi, trait=Bi.perm[[b]], envir = Ei, trait.sel=tt, env.sel=ee, stat="RV")$Coef
}
print(i)
}
stopCluster(cl)
#### deprecated
#### Observed
corXY <- NULL #data.frame(Trait.comb=NULL, Env.comb=NULL, Test=NULL, Coef=NULL, pvalue=NULL)
corTE <- NULL
for(i in 1:length(allcomb.t)) {
ii <- unlist(allcomb.t[i])
lab.tmp <- paste(ii, collapse="_")
syn.out.tmp <- syncsa(Wi, traits = Bi[,ii, drop=F], envir = Ei,
checkdata = TRUE, ro.method = "mantel", method = "pearson", dist = "euclidean", scale = TRUE,
scale.envir = F, ranks = TRUE, put.together = NULL, na.rm = FALSE, strata = NULL,
permutations = 1, parallel = NULL, notification = TRUE)
mantel.tmp <- mantel(W.beals.d, dist(syn.out.tmp$matrices$X))
RV.tmp <- RV.rtest(W.beals, as.data.frame(syn.out.tmp$matrices$X))
prot.tmp <- protest(W.beals, syn.out.tmp$matrices$X)
corXY <- rbind(corXY,
data.frame(Trait.comb=lab.tmp, Test="Mantel", Coef=mantel.tmp$statistic, pvalue=mantel.tmp$signif),
data.frame(Trait.comb=lab.tmp, Test="RV", Coef=RV.tmp$obs, pvalue=RV.tmp$pvalue),
data.frame(Trait.comb=lab.tmp, Test="Procrustes", Coef=prot.tmp$t0, pvalue=prot.tmp$signif))
print(paste("Trait=", paste(ii, collapse="_")))
for(e in 1:length(allcomb.env)){
ee <- unlist(allcomb.env[e])
lab.env <- paste(ee, collapse="_")
mantel.tmp <- mantel(dist(syn.out.tmp$matrices$E[,ee, drop=F]), dist(syn.out.tmp$matrices$T))
RV.tmp <- RV.rtest(as.data.frame(syn.out.tmp$matrices$E[,ee, drop=F]), as.data.frame(syn.out.tmp$matrices$T))
#prot.tmp <- protest(syn.out.tmp$matrices$E[,ee, drop=F], syn.out.tmp$matrices$T)
corTE <- rbind(corTE,
data.frame(Trait.comb=lab.tmp, Env.comb=lab.env, Test="Mantel", Coef=mantel.tmp$statistic, pvalue=mantel.tmp$signif),
data.frame(Trait.comb=lab.tmp, Env.comb=lab.env, Test="RV", Coef=RV.tmp$obs, pvalue=RV.tmp$pvalue)#,
#data.frame(Trait.comb=lab.tmp, Env.comb=lab.env, Test="Procrustes", Coef=prot.tmp$t0, pvalue=prot.tmp$signif)
)
print(paste("Env=", paste(ee, collapse="_")))
}
}
####
### From Zoltan Botta-Dukat
PValue<-function(x,stat,k=99,lower=F)
# Calculation of p-values by algorithm proposed by
# Knijnenburg, T. A., L. F. A. Wessels, M. J. T. Reinders, and I. Shmulevich. 2009.
# Fewer permutations, more accurate P-values. Bioinformatics 25:i161–i168.
#
# Parameters:
# x = vector of test statistic generated by randomization algorithm
# stat = observed value of test statistic
# k = maximum number of most extreme values used for fitting generalized Pateto distribution
# lower = if H1 is that stat is lower than expected
{
require(evd)
require(DescTools)
n<-length(x)
if (n<k) stop("Error: Too few random values", "\n")
if (lower)
{
m<-max(c(x,stat))
x<-m-x
stat<-m-stat
}
p<-(sum(stat<x)+sum(stat==x)/2)/n
if (p<0.05)
{
x<-sort(x,decreasing=T)
k<-99
repeat
{
tresh<-(x[k]+x[k+1])/2
M<-fpot(x, tresh,std.err=F)
AD.test<-AndersonDarlingTest(x[1:k], null = "pgpd",loc=tresh,scale=M$estimate[1],shape=M$estimate[2])
if (AD.test$p.value>0.05) break
k<-k-1
if (k==0) break
}
#if (k==0) stop("Error: Generalized Pareto distribution cannot be fitted", "\n")
if (k==0) {
p <- NA
} else {
p<-pgpd(tresh,scale=M$estimate[1],shape=M$estimate[2])
}
}
return(p)
}
PValue(Coeff.var.random[,1,3],stat=coeff.var[1,4])
x <- Coeff.var.random[,1,3]
stat <- coeff.var[1,4]
## this script extracts all the summary.txt files from all subfolder
## and summarizes the output for each run, trait x environment combination, and statistics
## It then plots the summarized output
library(tidyverse)
mypath <- "_data/Experiment_04Mar2020"
myfiles <- list.files(path=mypath, pattern = "Summary.txt", recursive = T)
output <- NULL
for(ff in myfiles){
iter <- gsub(pattern="/Summary.txt$", replacement="", ff)
iter <- strsplit(iter, split = "_")[[1]]
iter <- as.integer(unlist(regmatches(iter, gregexpr("[[:digit:]]+", iter))))
tmp <- read_delim(paste(mypath, ff, sep="/"), delim="\t", col_names = F) %>%
dplyr::select(-X1, -X3, -X5, -X9, -X11, -X13) %>%
rename(simulated=X2, trait=X4, envir=X6, stat.type=X7, stat.obs=X8, pvalue=X10, SES=X12, exp.med=X14 ) %>%
mutate(stat.type=gsub(pattern = "^r\\(", replacement="", stat.type)) %>%
mutate(stat.type=gsub(pattern = "\\)\\=$", replacement="", stat.type)) %>%
mutate(stat.type=gsub(pattern = "_", replacement="\\.", stat.type)) %>%
mutate(trait=gsub(pattern="[[:space:]]+$", replacement="", trait)) %>%
mutate(envir=gsub(pattern="[[:space:]]+$", replacement="", envir)) %>%
mutate(main=iter[[1]]) %>%
mutate(ntraits=iter[[2]]) %>%
mutate(corr=iter[[3]]) %>%
dplyr::select(main:corr, everything())
output <- bind_rows(output, tmp)
}
outp.summary <- output %>%
dplyr::filter(!stat.type %in% c("XY", "XY.T", "XY.TR")) %>%
group_by(main, ntraits, corr, trait, envir, stat.type) %>%
summarize(stat.obs.med=median(stat.obs),
power=mean(pvalue<=0.05),
SES.med=median(SES),
exp.med.med=median(exp.med),
nsim=n()) %>%
bind_rows(output %>%
dplyr::filter(stat.type %in% c("XY", "XY.T", "XY.TR")) %>%
group_by(main, ntraits, corr, trait, stat.type) %>%
summarize(stat.obs.med=median(stat.obs),
power=mean(pvalue<=0.05),
SES.med=median(SES),
exp.med.med=median(exp.med),
nsim=n())) %>%
dplyr::select(main:stat.type, nsim, stat.obs.med:exp.med.med) %>%
arrange(stat.type, main, ntraits, corr, trait, envir)
outp.summary
get.ntraits <- function(x){
tmp <- str_split(x, pattern = " ")[[1]]
return(length(tmp))
}
## plotting power for XY with corr
ggplot(data=outp.summary %>%
ungroup() %>%
rowwise() %>%
mutate(sel.ntraits=factor(get.ntraits(trait))) %>%
ungroup() %>%
dplyr::filter(stat.type=="XY") %>%
#filter(ntraits==3) %>%
#dplyr::filter(trait %in% c("1", "2", "1 2", "3")) %>%
#dplyr::filter(trait %in% c("1", "2", "3")) %>%
#dplyr::filter(trait %in% c("1", "2", "3", "1 2", "1 2 3")) %>%
mutate(ntraits=as.factor(ntraits))) +
geom_line(aes(x=main, y=power, group=trait, col=trait)) +
#scale_colour_brewer(palette = "Dark2") +
facet_grid(sel.ntraits~ntraits) +
theme_bw() +
theme(panel.grid = element_blank())
ggsave(filename="_data/Experiment_04Mar2020/corXY_obs_Exp04March2020.png", width=6, height=5, device="png", dpi = 300, last_plot())
## plotting non-parametric SES for XY with corr
ggplot(data=outp.summary %>%
ungroup() %>%
rowwise() %>%
mutate(sel.ntraits=factor(get.ntraits(trait))) %>%
ungroup() %>%
dplyr::filter(stat.type=="XY") %>%
filter(ntraits==3) %>%
#dplyr::filter(trait %in% c("1", "2", "1 2", "3")) %>%
#dplyr::filter(trait %in% c("1", "2", "3")) %>%
#dplyr::filter(trait %in% c("1", "2", "3", "1 2", "1 2 3")) %>%
mutate(ntraits=as.factor(ntraits))) +
geom_line(aes(x=main, y=SES.med, group=trait, col=trait)) +
#scale_colour_brewer(palette = "Dark2") +
#facet_grid(sel.ntraits~ntraits, scales = "free") +
theme_bw() +
theme(panel.grid = element_blank())
ggsave(filename="_data/Experiment_04Mar2020/corXY_SES_Exp04March2020.png", width=6, height=5, device="png", dpi = 300, last_plot())
## plotting XE
ggplot(data=outp.summary %>%
ungroup() %>%
dplyr::filter(stat.type=="XE") %>%
#dplyr::filter(envir=="1") %>%
filter(corr==0) %>%
filter(trait %in% c("1","2","3", "1 2")) %>%
mutate(ntraits=as.factor(ntraits))
#mutate(group0=paste("t", trait, " - e", envir))
) +
geom_line(aes(x=main, y=power, group=trait, col=trait)) +
scale_colour_brewer(palette = "Dark2") +
facet_grid(envir~ntraits) +
theme_bw() +
theme(panel.grid = element_blank())
## plotting XY.T
ggplot(data=outp.summary %>%
ungroup() %>%
dplyr::filter(stat.type=="XY.T") %>%
dplyr::filter(trait %in% c("1", "2", "1 2", "3")) %>%
mutate(ntraits=as.factor(ntraits))) +
geom_line(aes(x=main, y=power, group=trait, col=trait)) +
scale_colour_brewer(palette = "Dark2") +
facet_grid(corr~ntraits) +
theme_bw() +
theme(panel.grid = element_blank())
## plotting XY.TR
ggplot(data=outp.summary %>%
ungroup() %>%
dplyr::filter(stat.type=="XY.TR") %>%
dplyr::filter(trait %in% c("1", "2", "1 2", "3")) %>%
mutate(ntraits=as.factor(ntraits))) +
geom_line(aes(x=main, y=power, group=trait, col=trait)) +
scale_colour_brewer(palette = "Dark2") +
facet_grid(corr~ntraits) +
theme_bw() +
theme(panel.grid = element_blank())
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment