diff --git a/01_Mesobromion.R b/01_Mesobromion.R index 52967c870e9a623cd3c301ecab30f39d689e3108..05490087f78db4ed46c9f9308ff20a44b68ffb21 100644 --- a/01_Mesobromion.R +++ b/01_Mesobromion.R @@ -201,7 +201,7 @@ traits <- traits %>% #### ## Import output #### -myfilelist <- list.files(path="_derived/Mesobromion/", pattern="HIDDEN_sign_[0-9]+.RData", full.names = T) +myfilelist <- list.files(path="_derived/Mesobromion/", pattern="HIDDEN_individual_[0-9]+.RData", full.names = T) dataFiles = purrr::map(myfilelist, function(x){get(load(x))}) corXY = bind_rows(dataFiles) %>% as.tbl() @@ -224,8 +224,8 @@ corXY.ci <- corXY %>% greater.than.perm=mean(Coef.obs>Coef.perm), n=n())) %>% #calculate significance using permuted correlations - mutate(sign_plus=greater.than.perm>0.975) %>% - mutate(sign_minus=greater.than.perm<0.025) %>% + mutate(sign_plus=greater.than.perm>0.995) %>% + mutate(sign_minus=greater.than.perm<0.005) %>% mutate(Trait.comb2=Trait.comb) %>% #split strings of trait combinations and add labels separate(Trait.comb2, into=paste0("trait", 1:15)) %>% @@ -352,6 +352,10 @@ for(nn in 1:maxtraits){ } } + + + + mydata.best <- mydata %>% filter_at(.vars=vars(trait1:trait15), .vars_predicate = all_vars(. %in% best$trait.name | is.na(.))) %>% diff --git a/01b_MesobromionCluster.R b/01b_MesobromionCluster.R index b7c56c41c5980d8b4a1cca85566283105afc14f3..b86f930b0dd281e1720329dbff99b9b25946a654 100644 --- a/01b_MesobromionCluster.R +++ b/01b_MesobromionCluster.R @@ -18,15 +18,63 @@ require(doParallel) source("99_HIDDEN_functions.R") -Mesobromion <- function(species.path, traits.path, output, myfunction="get.corXY.bootstrap -", max.inter.t, chunkn, chunk.i, nperm=199){ +get.ci <- function(corXY){ + corXY %>% + filter(bootstr.n==0) %>% + dplyr::select(-bootstr.n) %>% + group_by(Trait.comb) %>% + slice(1) %>% + left_join(corXY %>% + filter(bootstr.n!=0) %>% + group_by(Trait.comb) %>% + summarize(q025=quantile(Coef.obs, 0.025), + q975=quantile(Coef.obs, 0.975), + greater.than.perm=mean(Coef.obs>Coef.perm), + n=n())) %>% + #calculate significance using permuted correlations + mutate(sign_plus=greater.than.perm>0.995) %>% + mutate(sign_minus=greater.than.perm<0.005) %>% + ungroup() %>% + mutate(Trait.comb=as.character(Trait.comb)) %>% + rowwise() %>% + mutate(ntraits= length(unlist(strsplit(Trait.comb, split = "_")))) %>% + ungroup() %>% + arrange(ntraits, Coef.obs) +} + +get.best <- function(x, N){ + out <- x %>% + filter(ntraits==N) %>% + filter(sign_plus==TRUE) %>% + arrange(desc(Coef.obs)) %>% + slice(1) %>% + pull(Trait.comb) + return(out) +} + + +Mesobromion <- function(species.path, traits.path, output, myfunction="get.corXY.bootstrap", + combinations=c("all", "sequential"), max.inter.t, chunkn, chunk.i, nperm=199, ncores=1){ + + if(ncores>1){ + require(parallel) + require(doParallel) + cl <- makeForkCluster(ncores, outfile="" ) + registerDoParallel(cl) + `%myinfix%` <- `%dopar%` + } else { + `%myinfix%` <- `%do%` + } + myfunction <- get(myfunction) ## calculate corr between species composition matrix and traits species <- read_delim(species.path, delim="\t") %>% as.data.frame() traits <- read_delim(traits.path, delim="\t") %>% - as.data.frame() + as.data.frame() %>% +### TEMPORARY FOR TESTING! + dplyr::select(1:11) traits <- traits %>% @@ -35,39 +83,134 @@ Mesobromion <- function(species.path, traits.path, output, myfunction="get.corXY #temporary ### Use only a subset of traits #dplyr::select(LeafArea:Disp.unit.leng) - ## create list of indices for each combination of traits up to a max number of interactions - n.traits <- ncol(traits) - allcomb.t <- NULL - for(n.inter in 1:max.inter.t){ - allcomb.t <- c(allcomb.t, combn(1:n.traits, n.inter, simplify=F)) - } - nall <- length(allcomb.t) - - ## Divide in chunks if requested - if(chunkn>1 & !is.na(chunk.i)){ - print(paste("divide in", chunkn, "chunks and run on chunk n.", chunk.i)) + if(combinations=="all") { + ## create list of indices for each combination of traits up to a max number of interactions + n.traits <- ncol(traits) + allcomb.t <- NULL + for(n.inter in 1:max.inter.t){ + allcomb.t <- c(allcomb.t, combn(1:n.traits, n.inter, simplify=F)) + } + nall <- length(allcomb.t) + + ## Divide in chunks if requested + if(chunkn>1 & !is.na(chunk.i)){ + print(paste("Test all Combo - divide in", chunkn, "chunks and run on chunk n.", chunk.i)) indices <- 1:length(allcomb.t) chunks <- split(indices, sort(indices%%chunkn)) allcomb.t <- allcomb.t[chunks[[chunk.i]]] } - print(paste("Running on", length(allcomb.t),"out of", nall, "combinations")) - names.t <- unlist(lapply(allcomb.t, paste, collapse="_")) + print(paste("Running on", length(allcomb.t),"out of", nall, "combinations")) + #names.t <- unlist(lapply(allcomb.t, paste, collapse="_")) + + print("Start main loop") + corXY.output <- NULL + corXY.output <- foreach(i=1:length(allcomb.t), .combine=rbind) %myinfix% { + tt <- unlist(allcomb.t[i]) + corXY.output <- rbind(corXY.output, + get.corXY.bootstrap(comm=species, trait=traits, + trait.sel=tt, bootstrap=nperm)) + } + save(corXY.output, file = paste(output, "_.RData", sep="")) + } + - print("Start main loop") -# cor.obs <- NA -# cor.bootstr <- matrix(NA, nrow=length(allcomb.t), ncol=nperm, -# dimnames = list(paste("t", names.t, sep=""), paste("p",1:nperm, sep=""))) -# cor.perm <- matrix(NA, nrow=length(allcomb.t), ncol=nperm, -# dimnames = list(paste("t", names.t, sep=""), paste("p",1:nperm, sep=""))) - - corXY.output <- NULL - for(i in 1:length(allcomb.t)) { - tt <- unlist(allcomb.t[i]) - #bootstrap species matrix -# set.seed(1984) - corXY.output <- rbind(corXY.output, - get.corXY.bootstrap(comm=species, trait=traits, - trait.sel=tt, bootstrap=nperm)) - save(corXY.output, file = paste(output, "_", chunk.i, ".RData", sep="")) + if(combinations=="sequential"){ + corXY.output <- NULL + for(nround in 1:max.inter.t){ + ## select combination of traits based on best + if(nround==1) { + allcomb.t <- lapply(1:ncol(traits), function(x){return(x)}) + nall <- length(allcomb.t) + } else { + best.split <- as.numeric(unlist(strsplit(best, "_"))) + max.inter.t <- nround-1 + #n.traits <- ncol(traits) + list.traits <- as.numeric(traits.sign.alone) + list.traits <- list.traits[-which(list.traits %in% best.split)] ## list of remaining traits without best + if(length(list.traits) < max.inter.t) { + save(corXY.output, file = paste(output, ".RData", sep="")) + if(ncores>1) {stopCluster(cl)} + stop("Tested all combo of significant traits") + } + allcomb.t <- NULL + #for(n.inter in 1:max.inter.t){ + allcomb.t <- c(allcomb.t, combn(list.traits, max.inter.t, simplify=F)) + #} + #add best to all combo + allcomb.t <- lapply(allcomb.t, function(x){return(c(best.split, x))}) + nall <- length(allcomb.t) + } + + ## Divide in chunks if requested + if(chunkn>1 & !is.na(chunk.i)){ + print(paste("nround", nround, "--- divide in", chunkn, "chunks and run on chunk n.", chunk.i)) + indices <- 1:length(allcomb.t) + chunks <- split(indices, sort(indices%%chunkn)) + } else {chunks <- list(1:length(allcomb.t))} + #names.t <- unlist(lapply(allcomb.t, paste, collapse="_")) + + ## start main foreach loop + print(paste("Start main loop - round", nround)) + corXY.output <- foreach(ch=chunks, .combine=rbind) %myinfix% { + allcomb.i <- allcomb.t[unlist(ch)] + print(paste("Running on", length(allcomb.i),"out of", nall, "combinations")) + + for(i in 1:length(allcomb.i)){ + tt <- unlist(allcomb.i[i]) + corXY.output <- rbind(corXY.output, + get.corXY.bootstrap(comm=species, trait=traits, + trait.sel=tt, bootstrap=nperm)) + } + return(corXY.output) + } + #Intermediate save + #save(corXY.output, file = paste(output, "_", nround, ".RData", sep="")) + + print(paste("Summarize after round", nround, "and find best, significant trait combos")) + corXY.ci <- get.ci(corXY.output) + ##get best or new best + if(nround==1) { + traits.sign.alone <- corXY.ci %>% + filter(ntraits==1) %>% + filter(sign_plus==T) %>% + pull(Trait.comb) + if(length(traits.sign.alone)==0) { + stop("No trait is significant taken singularly")} + print(paste("Traits significant taken singularly:", paste(traits.sign.alone, collapse=" "))) + best <- get.best(corXY.ci, N=1) + best.row <- corXY.ci %>% + filter(Trait.comb==best) + upper <- best.row$q975 + lower <- best.row$q025 + print(paste("new best at nround=", nround, "is trait combination", best)) + } + + if(nround>1){ + better <- corXY.ci %>% + filter(ntraits==nround) %>% + filter(q025>upper) %>% + arrange(desc(Coef.obs)) %>% + slice(1) %>% + pull(Trait.comb) + + if(length(better>0)){ + new.best.row <- corXY.ci %>% + #rowwise() %>% + #mutate(nmatching= sum(unlist(strsplit(Trait.comb, "_")) %in% + # unlist(strsplit(better, "_")), + # na.rm=T)) %>% + #ungroup() %>% + #filter(nmatching==nround) %>% + filter(Trait.comb==better) + upper <- new.best.row$q975 + lower <- new.best.row$q025 + best <- better + print(paste("new best at round=", nround, "is trait combination", best)) + } else {print(paste("no new best at round=", nround))} + } + } + save(corXY.output, file = paste(output, ".RData", sep="")) + } + if(ncores>1){stopCluster(cl)} } diff --git a/99_HIDDEN_functions.R b/99_HIDDEN_functions.R index 8ca71e6b29097ddc929d6b0882ad053de63575b2..1e01fcb40966c86570e80e4d1759ed6f6554df18 100644 --- a/99_HIDDEN_functions.R +++ b/99_HIDDEN_functions.R @@ -95,7 +95,7 @@ get.corXY.bootstrap <- function(comm, traits, trait.sel="all", bootstrap=199){ corXY <- rbind(corXY, data.frame(Trait.comb=lab.tmp, bootstr.n=b, Test="RD", Coef.obs=RD.tmp, Coef.perm=RD.perm.tmp)) - print(paste("trait", paste0(ii, collapse="_"), "perm", b)) + if(b %in% round(seq(1,bootstrap, length.out=10))){print(paste("trait", paste0(ii, collapse="_"), "perm", b))} } return(corXY) } diff --git a/cli_01b.r b/cli_01b.r index de8fbe042371cbef8da324746e1fd61953ac5c99..1396a6bab4131cc8e693c2c9590d66bd95cd7789 100644 --- a/cli_01b.r +++ b/cli_01b.r @@ -6,12 +6,22 @@ library(optparse) default.max.inter.t <- 1 default.nperm <- 99 +default.ncores <- 1 # ------------------------------------------------------------------------------ # parsing arguments # ------------------------------------------------------------------------------ options <- list ( + make_option( + opt_str = c("-c", "--ncores"), + dest = "ncores", + type = "integer", + default = default.ncores, + help = paste0("number of CPU cores to use, defaults to ", default.ncores), + metavar = "4" + ), + make_option( opt_str = c("-m", "--max.inter.t"), dest = "max.inter.t", @@ -65,10 +75,12 @@ species.path <- cli$args[1] traits.path <- cli$args[2] output <- cli$args[3] myfunction <- cli$args[4] +combinations <- cli$args[5] max.inter.t <- cli$options$max.inter.t chunkn <- cli$options$chunkn chunk.i <- cli$options$chunk.i nperm <- cli$options$nperm +ncores <- cli$options$ncores # ------------------------------------------------------------------------------ # actual program @@ -76,6 +88,8 @@ nperm <- cli$options$nperm source("01b_MesobromionCluster.R") -Mesobromion(species.path, traits.path, output, myfunction, max.inter.t, chunkn, chunk.i, nperm) - +Mesobromion(species.path, traits.path, output, myfunction, combinations, max.inter.t, + chunkn, chunk.i, nperm) +Mesobromion(species.path, traits.path, output, myfunction, combinations, max.inter.t, + chunkn, chunk.i, nperm, ncores) diff --git a/session.R b/session.R index cf3d70006660235e535ce6f9b46ab6b983da5a99..9f8962ae448b200dd671fed33868d809e8e88514 100644 --- a/session.R +++ b/session.R @@ -1,12 +1,16 @@ -source("01b_MesobromionCluster.R") species.path <- "_data/Mesobromion/species.out.10perc.txt" traits.path <- "_data/Mesobromion/traits.out.10perc.txt" output <- "test" myfunction <- "get.corXY.bootstrap" -max.inter.t <- 1 -chunkn <- 40 -chunk.i <- 1 -nperm <- 3 +max.inter.t <- 4 +chunkn <- 1 +chunk.i <- NA +nperm <- 25 +ncores <- 10 +combinations <- "sequential" -Mesobromion(species.path, traits.path, output, myfunction, max.inter.t, chunkn, chunk.i, nperm) +source("01b_MesobromionCluster.R") +#Mesobromion(species.path, traits.path, output, myfunction, max.inter.t, chunkn, chunk.i, nperm) +Mesobromion(species.path, traits.path, output, myfunction, combinations, max.inter.t, + chunkn, chunk.i, nperm, ncores) diff --git a/submit_01b.sh b/submit_01b.sh index 8b002580b9f54800f13361d09a7ad7c4ee4c22d2..0cd1fc46af6035d5b69f32275fc35209fd67fe22 100644 --- a/submit_01b.sh +++ b/submit_01b.sh @@ -3,17 +3,17 @@ #$ -S /bin/bash #$ -N HIDDEN -#$ -o /work/$USER/$JOB_NAME-$JOB_ID-$TASK_ID.log +#$ -o /work/$USER/$JOB_NAME-$JOB_ID.log #$ -j y -#$ -l h_rt=00:30:00:00 +#$ -l h_rt=00:45:00:00 #$ -l h_vmem=1G #$ -pe smp 1-1 #$ -cwd -#$ -t 1-1 +#$ -t 24-32 module load R @@ -22,11 +22,12 @@ output=/data/splot/HIDDEN/output/HIDDEN Rscript \ cli_01b.r \ - --max.inter.t 2 \ - --chunk.i $SGE_TASK_ID \ - --chunkn 300 \ + --max.inter.t 20 \ + --chunk.i NA \ + --chunkn 32 \ --nperm 999 \ /data/splot/HIDDEN/species.out.10perc.txt \ /data/splot/HIDDEN/traits.out.10perc.txt \ "$output" \ - "get.corXY.bootstrap" + "get.corXY.bootstrap" \ + "sequential"