diff --git a/01b_MesobromionCluster.R b/01b_MesobromionCluster.R new file mode 100644 index 0000000000000000000000000000000000000000..375929f826a88e4d98404f35790188b58fbf25b8 --- /dev/null +++ b/01b_MesobromionCluster.R @@ -0,0 +1,223 @@ +#Functions to define correlations between Beals, SYNCSA, ENV +# W - Sp. composition +# X - fuzzy weighted +# Y - Beals +# B - traits +# E - Environment + + +require(tidyverse) +require(SYNCSA) +require(vegan) +require(abind) +require(ade4) +require(parallel) +require(doParallel) + +#### Function 1 - CorXY #### +get.corXY <- function(comm, traits, trait.sel="all", stat=c("mantel", "RV", "procrustes")){ + if(identical(trait.sel, "all")) {trait.sel <- 1:ncol(traits)} + ii <- trait.sel + lab.tmp <- paste(ii, collapse="_") + ### delete potential missing species + if(any(colSums(comm)==0)){ + empty <- which(colSums(comm)==0) + traits <- traits[-empty,] + comm <- comm[,-empty] + } + syn.out.tmp <- matrix.x(comm=comm, traits=traits[,ii,drop=F], scale=T)$matrix.X + W.beals <- as.data.frame(beals(comm, include=T, type=2)) + corXY <- NULL + if("mantel" %in% stat){ + W.beals.d <- dist(W.beals) + mantel.tmp <- mantel(W.beals.d, dist(syn.out.tmp[])) + corXY <- rbind(corXY, + data.frame(Trait.comb=lab.tmp, Test="Mantel", Coef=mantel.tmp$statistic, pvalue=mantel.tmp$signif)) + } + if("RV" %in% stat){ + RV.tmp <- RV.rtest(W.beals, as.data.frame(syn.out.tmp)) + corXY <- rbind(corXY, + data.frame(Trait.comb=lab.tmp, Test="RV", Coef=RV.tmp$obs, pvalue=RV.tmp$pvalue)) + } + if("procrustes" %in% stat){ + prot.tmp <- protest(W.beals, syn.out.tmp) + corXY <- rbind(corXY, + data.frame(Trait.comb=lab.tmp, Test="Procrustes", Coef=prot.tmp$t0, pvalue=prot.tmp$signif)) + } + return(corXY) +} + + + +#### Function 2 - CorTE #### +get.corTE <- function(comm, traits, envir, trait.sel="all", env.sel="all", stat=c("mantel", "RV")){ + if(identical(trait.sel, "all")) {trait.sel <- 1:ncol(traits)} + if(identical(env.sel, "all" )) {env.sel <- 1:ncol(envir)} + ii <- trait.sel + lab.tmp <- paste(ii, collapse="_") + ## delete potential missing species + if(any(colSums(comm)==0)){ + empty <- which(colSums(comm)==0) + traits <- traits[-empty,] + comm <- comm[,-empty] + } + syn.out.tmp <- matrix.t(comm=comm, traits=traits, scale=T)$matrix.T + ee <- env.sel + lab.env <- paste(ee, collapse="_") + corTE <- NULL + if("mantel" %in% stat){ + mantel.tmp <- mantel(dist(envir[,ee, drop=F]), dist(syn.out.tmp[,ii,drop=F])) + corTE <- rbind(corTE, + data.frame(Trait.comb=lab.tmp, Env.comb=lab.env, Test="Mantel", Coef=mantel.tmp$statistic, pvalue=mantel.tmp$signif)) + } + if("RV" %in% stat){ + RV.tmp <- RV.rtest(as.data.frame(envir[,ee, drop=F]), as.data.frame(syn.out.tmp[,ii,drop=F])) + corTE <- rbind(corTE, + data.frame(Trait.comb=lab.tmp, Env.comb=lab.env, Test="RV", Coef=RV.tmp$obs, pvalue=RV.tmp$pvalue)) + } + if("procrustes" %in% stat){ + prot.tmp <- protest(envir[,ee, drop=F], syn.out.tmp[,ii,drop=F]) + corTE <- rbind(corTE, + data.frame(Trait.comb=lab.tmp, Env.comb=lab.env, Test="Procrustes", Coef=prot.tmp$t0, pvalue=prot.tmp$signif)) + } + return(corTE) +} + + +#### Function 3 - CorXE #### +get.corXE <- function(comm, traits, envir, trait.sel="all", env.sel="all", stat=c("mantel", "RV", "procrustes")){ + if(identical(trait.sel, "all")) {trait.sel <- 1:ncol(traits)} + if(identical(env.sel, "all" )) {env.sel <- 1:ncol(envir)} + ii <- trait.sel + lab.tmp <- paste(ii, collapse="_") + ### delete potential missing species + if(any(colSums(comm)==0)){ + empty <- which(colSums(comm)==0) + traits <- traits[-empty,] + comm <- comm[,-empty] + } + syn.out.tmp <- matrix.x(comm=comm, traits=traits[,ii,drop=F], scale=T)$matrix.X + ee <- env.sel + lab.env <- paste(ee, collapse="_") + corXE <- NULL + if("mantel" %in% stat){ + mantel.tmp <- mantel(dist(envir[,ee, drop=F]), dist(syn.out.tmp[])) + corXE <- rbind(corXE, + data.frame(Trait.comb=lab.tmp, Env.comb=lab.env, Test="Mantel", Coef=mantel.tmp$statistic, pvalue=mantel.tmp$signif)) + } + if("RV" %in% stat){ + RV.tmp <- RV.rtest(as.data.frame(envir[,ee, drop=F]), as.data.frame(syn.out.tmp)) + corXE <- rbind(corXE, + data.frame(Trait.comb=lab.tmp, Env.comb=lab.env, Test="RV", Coef=RV.tmp$obs, pvalue=RV.tmp$pvalue)) + } + if("procrustes" %in% stat){ + prot.tmp <- protest(envir[,ee, drop=F], syn.out.tmp) + corXE <- rbind(corXE, + data.frame(Trait.comb=lab.tmp, Env.comb=lab.env, Test="Procrustes", Coef=prot.tmp$t0, pvalue=prot.tmp$signif)) + } + return(corXE) +} + + + +### Get SES (both parametric and non parametric) +#obs.df = output from one of the get functions above +#perm.df = df traitCombination X permutations with the chosen statistic of correlation on permuteda data + +get.SES <- function(obs.df, perm.df, stat="RV") { + nperm <- dim(perm.df)[2] + SES.out <- as.data.frame.table(perm.df) %>% + rename(Trait.comb=Var1, perm=Var2, coef=Freq ) %>% + group_by(Trait.comb) %>% + left_join(obs.df %>% + filter(Test==stat) %>% + dplyr::select(-pvalue, -Test) %>% + mutate(Trait.comb=paste0("t", Trait.comb)) %>% + rename(obs=Coef), + by=c("Trait.comb")) %>% + summarize(q15 = quantile(coef, probs = 0.15865), + q50 = quantile(coef, .5), + q84 = quantile(coef, 0.84135), + mean.perm=mean(coef), + sd.perm=sd(coef), + greater.than.obs=sum( (abs(coef)>=abs(obs))/n())) %>% + left_join(obs.df %>% + filter(Test==stat) %>% + dplyr::select(-pvalue, -Test) %>% + mutate(Trait.comb=paste0("t", Trait.comb)) %>% + rename(obs=Coef), + by=c("Trait.comb")) %>% + mutate(SES.np= ifelse(obs>=q50, (obs-q50)/(q84-q50), (obs-q50)/(q50-q15))) %>% + mutate(SES=(obs-mean.perm)/sd.perm) %>% + arrange(desc(SES.np)) %>% + mutate(conf.m=obs-1.65*(sd.perm/sqrt(nperm))) %>% + mutate(conf.p=obs+1.65*(sd.perm/sqrt(nperm))) + return(SES.out) +} + + + + +Mesobromion <- function(species.path, traits.path, output, myfunction="get.corXY", max.inter.t, chunkn, chunk.i, nperm=99){ + + myfunction <- get(myfunction) + ## calculate corr between species composition matrix and traits + species <- read_delim(species.path, delim="\t") %>% + as.data.frame() + traits <- read_delim(traits.path, delim="\t") %>% + as.data.frame() + + + traits <- traits %>% + column_to_rownames("species0") %>% + rename_all(.funs=~gsub(pattern=".mean$", replacement="", x=.)) + + ## create list of indices for each combination of traits up to a max number of interactions + n.traits <- ncol(traits) + allcomb.t <- NULL + for(n.inter in 1:max.inter.t){ + allcomb.t <- c(allcomb.t, combn(1:n.traits, n.inter, simplify=F)) + } + nall <- length(allcomb.t) + + ## Divide in chunks if requested + if(chunkn>1 & !is.na(chunk.i)){ + print(paste("divide in", chunkn, "chunks and run on chunk n.", chunk.i)) + indices <- 1:length(allcomb.t) + chunks <- split(indices, sort(indices%%chunkn)) + allcomb.t <- allcomb.t[chunks[[chunk.i]]] + } + print(paste("Running on", length(allcomb.t),"out of", nall, "combinations")) + names.t <- unlist(lapply(allcomb.t, paste, collapse="_")) + + print("Run on observed data") + cor.obs <- NULL + for(i in 1:length(allcomb.t)) { + tt <- unlist(allcomb.t[i]) + cor.obs <- rbind(cor.obs, + myfunction(comm=species, trait=traits, trait.sel=tt, stat="RV")) + print(i) + } + + print("Permute traits data") + set.seed(1984) + traits.perm <- lapply(1:nperm, FUN=function(x){ + tmp <- traits[sample(1:nrow(traits)),] + rownames(tmp) <- rownames(traits) + return(tmp)} + ) + + print("Run on permuted data") + cor.perm <- matrix(NA, nrow=length(allcomb.t), ncol=nperm, dimnames = list(paste("t", names.t, sep=""), + paste("p",1:nperm, sep=""))) + for(j in 1:length(allcomb.t)){ + tt <- unlist(allcomb.t[j]) + for(b in 1:nperm){ + cor.perm[j,b] <- myfunction(comm=species, trait=traits.perm[[b]], trait.sel=tt, stat="RV")$Coef + print(paste("trait", tt, "perm", b)) + } + } + + #save(corXY, file="_data/Mesobromion/corXY/corXY.RData") + save(cor.obs, cor.perm, file = paste(output, "_", chunk.i, ".RData", sep="")) +} diff --git a/cli_01b.r b/cli_01b.r new file mode 100644 index 0000000000000000000000000000000000000000..de8fbe042371cbef8da324746e1fd61953ac5c99 --- /dev/null +++ b/cli_01b.r @@ -0,0 +1,81 @@ +library(optparse) + +# ------------------------------------------------------------------------------ +# defaults +# ------------------------------------------------------------------------------ + +default.max.inter.t <- 1 +default.nperm <- 99 +# ------------------------------------------------------------------------------ +# parsing arguments +# ------------------------------------------------------------------------------ + +options <- list ( + + make_option( + opt_str = c("-m", "--max.inter.t"), + dest = "max.inter.t", + type = "integer", + default = default.max.inter.t, + help = "Max number of interactions for traits", + metavar = "4" + ), + + make_option( + opt_str = c("-n", "--chunkn"), + dest = "chunkn", + type = "integer", + default = 1, + help = "How many chunks?", + metavar = "4" + ), + + make_option( + opt_str = c("-i", "--chunk.i"), + dest = "chunk.i", + type = "integer", + default = NA, + help = "Which chunk?", + metavar = "4" + ), + make_option( + opt_str = c("-p", "--nperm"), + dest = "nperm", + type = "integer", + default = default.nperm, + help = "How many permutations?", + metavar = "4" + ) +) + +parser <- OptionParser( + usage = "Rscript %prog [options] data dt_beals header output", + option_list = options, + description = "\nan awesome R script", + epilogue = "use with caution, the awesomeness might slap you in the face!" +) + +cli <- parse_args(parser, positional_arguments = 4) + +# ------------------------------------------------------------------------------ +# assign a few shortcuts +# ------------------------------------------------------------------------------ + +species.path <- cli$args[1] +traits.path <- cli$args[2] +output <- cli$args[3] +myfunction <- cli$args[4] +max.inter.t <- cli$options$max.inter.t +chunkn <- cli$options$chunkn +chunk.i <- cli$options$chunk.i +nperm <- cli$options$nperm + +# ------------------------------------------------------------------------------ +# actual program +# ------------------------------------------------------------------------------ + +source("01b_MesobromionCluster.R") + +Mesobromion(species.path, traits.path, output, myfunction, max.inter.t, chunkn, chunk.i, nperm) + + diff --git a/session.R b/session.R new file mode 100644 index 0000000000000000000000000000000000000000..16ae1cb097dab7d1a16c70a22a678d642df2ee1e --- /dev/null +++ b/session.R @@ -0,0 +1,12 @@ + +source("01b_MesobromionCluster.R") +species.path <- "_data/Mesobromion/species.out.10perc.txt" +traits.path <- "_data/Mesobromion/traits.out.10perc.txt" +output <- "test" +myfunction <- "get.corXY" +max.inter.t <- 1 +chunkn <- 40 +chunk.i <- 1 +nperm <- 99 + +Mesobromion(species.path, traits.path, output, myfunction, max.inter.t, chunkn, chunk.i, nperm) diff --git a/submit_01b.sh b/submit_01b.sh new file mode 100644 index 0000000000000000000000000000000000000000..b089bea4a21af80817b78e6c686f8a978c09bfc7 --- /dev/null +++ b/submit_01b.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +#$ -S /bin/bash +#$ -N HIDDEN + +#$ -o /work/$USER/$JOB_NAME-$JOB_ID-$TASK_ID.log +#$ -j y + +#$ -l h_rt=00:30:00:00 +#$ -l h_vmem=1G + +#$ -pe smp 8-32 + +#$ -cwd + +#$ -t 1-99 + + +module load R + +output=/data/splot/HIDDEN/output/HIDDEN + +Rscript \ + cli_01b.r \ + --max.inter.t 1 \ + --chunk.i $SGE_TASK_ID \ + --chunkn 20 \ + --nperm 99 \ + /data/splot/HIDDEN/species.out.10perc.txt \ + /data/splot/HIDDEN/traits.out.10perc.txt \ + "$output" \ + "get.corXY"