diff --git a/00_testing.R b/00_testing.R index 0c454db23dcbd2aad5254a6a9a4992d3af1b50e4..490bd32be3308e8b653f03c1dee73ea7c1f9b1e4 100644 --- a/00_testing.R +++ b/00_testing.R @@ -1,10 +1,11 @@ - setwd("/data/sPlot/users/Helge/HIDDEN") library(tidyverse) library(SYNCSA) library(vegan) library(abind) library(ade4) +source("99_HIDDEN_functions.R") + #library(MatrixCorrelation) ### Import data @@ -39,12 +40,7 @@ colnames(Ei) <- paste0("Env", 1:2) Ei$Env3 <- Ei$Env1*Ei$Env2 Ei <- (as.matrix(Ei)) -#### run beals -#W.beals <- as.data.frame(beals(Wi, include=T, type=2)) -#W.beals.d <- dist(W.beals) -#W.beals <- W.beals[,which(colSums(W.beals)>0)] -#W.beals <- cbind(W.beals, matrix(0, nrow=200, ncol=7)) -#X.syn.out <- cbind(syn.out$matrices$X, matrix(0, nrow=200, ncol=7)) + ## create list of indices for each combination of traits up to a max number of interactions @@ -69,10 +65,7 @@ colnames(comb.list) <- c("trait", "env") -get.corXE(comm=Wi, envir=Ei, traits=Bi) - - -##Run on observed data +##Run on simulated data corXY <- NULL for(i in 1:length(allcomb.t)){ tt <- unlist(allcomb.t[i]) @@ -120,10 +113,6 @@ Bi.perm <- lapply(1:nperm, FUN=function(x){ - - - - ## corXY on permuted corXY.perm <- matrix(NA, nrow=length(allcomb.t), ncol=nperm, dimnames = list(paste("t", names.t, sep=""), paste("p",1:nperm, sep=""))) @@ -134,33 +123,10 @@ for(i in 1:length(allcomb.t)){ print(i) } + + ## transform obs to SES -corXY.perm.df <- as.data.frame.table(corXY.perm) %>% - rename(Trait.comb=Var1, perm=Var2, coef=Freq ) %>% - group_by(Trait.comb) %>% - left_join(corXY %>% - filter(Test=="RV") %>% - dplyr::select(-pvalue, -Test) %>% - mutate(Trait.comb=paste0("t", Trait.comb)) %>% - rename(obs=Coef), - by=c("Trait.comb")) %>% - summarize(q15 = quantile(coef, probs = 0.15865), - q50 =quantile(coef, .5), - q84 = quantile(coef, 0.84135), - mean.perm=mean(coef), - sd.perm=sd(coef), - greater.obs=sum( (abs(coef)>=abs(obs))/n())) %>% - left_join(corXY %>% - filter(Test=="RV") %>% - dplyr::select(-pvalue, -Test) %>% - mutate(Trait.comb=paste0("t", Trait.comb)) %>% - rename(obs=Coef), - by=c("Trait.comb")) %>% - mutate(SES.np= ifelse(obs>=q50, (obs-q50)/(q84-q50), (obs-q50)/(q50-q15))) %>% - mutate(SES=(obs-mean.perm)/sd.perm) %>% - arrange(desc(SES.np)) %>% - mutate(conf.m=obs-1.65*(sd.perm/sqrt(nperm))) %>% - mutate(conf.p=obs+1.65*(sd.perm/sqrt(nperm))) %>% +corXY.perm.df <- get.SES(corXY, corXY.perm, stat="RV") %>% rowwise() %>% mutate(nvar=length(unlist(str_split(Trait.comb, "_")))) #%>% # arrange(nvar) %>% @@ -204,23 +170,8 @@ for(i in 1:length(allcomb.t)){ } -#### Not updated to Asymmetrical/non parametric SES --- TRANSFORM TO FUNCTION! ## transform obs to SES -corTE.perm.df <- as.data.frame.table(corTE.perm) %>% - rename(Trait.comb=Var1, perm=Var2, Env.comb=Var3, coef=Freq ) %>% - group_by(Trait.comb, Env.comb) %>% - summarize(mean.perm=mean(coef), sd.perm=sd(coef)) %>% - left_join(corTE %>% - filter(Test=="RV") %>% - dplyr::select(-pvalue, -Test) %>% - mutate(Trait.comb=paste0("t", Trait.comb)) %>% - mutate(Env.comb=paste0("e", Env.comb)) %>% - rename(obs=Coef), - by=c("Trait.comb", "Env.comb")) %>% - mutate(SES=(obs-mean.perm)/sd.perm) %>% - arrange(desc(SES)) %>% - mutate(conf.m=obs-1.65*(sd.perm/sqrt(nperm))) %>% - mutate(conf.p=obs+1.65*(sd.perm/sqrt(nperm))) %>% +corTE.perm.df <- get.SES(corTE, corTE.perm, stat="RV") %>% rowwise() %>% mutate(nvar=length(unlist(str_split(Trait.comb, "_")))) %>% mutate(nenv=length(unlist(str_split(Env.comb, "_")))) %>% @@ -292,21 +243,7 @@ for(i in 1:length(allcomb.t)){ } ## transform obs to SES -corXE.perm.df <- as.data.frame.table(corXE.perm) %>% - rename(Trait.comb=Var1, perm=Var2, Env.comb=Var3, coef=Freq ) %>% - group_by(Trait.comb, Env.comb) %>% - summarize(mean.perm=mean(coef), sd.perm=sd(coef)) %>% - left_join(corXE %>% - filter(Test=="RV") %>% - dplyr::select(-pvalue, -Test) %>% - mutate(Trait.comb=paste0("t", Trait.comb)) %>% - mutate(Env.comb=paste0("e", Env.comb)) %>% - rename(obs=Coef), - by=c("Trait.comb", "Env.comb")) %>% - mutate(SES=(obs-mean.perm)/sd.perm) %>% - arrange(desc(SES)) %>% - mutate(conf.m=obs-1.65*(sd.perm/sqrt(nperm))) %>% - mutate(conf.p=obs+1.65*(sd.perm/sqrt(nperm))) %>% +corXE.perm.df <- get.SES(corXE, corXE.perm, stat="RV") %>% rowwise() %>% mutate(nvar=length(unlist(str_split(Trait.comb, "_")))) %>% mutate(nenv=length(unlist(str_split(Env.comb, "_")))) %>% diff --git a/01_Mesobromion.R b/01_Mesobromion.R index 1fe51bed71e750b554bf3ea14398b73877bde722..22d7759830ee4e069be814e9c61d5ae537cb9247 100644 --- a/01_Mesobromion.R +++ b/01_Mesobromion.R @@ -1,6 +1,7 @@ +setwd("/data/sPlot/users/Helge/HIDDEN") library(tidyverse) - -#setwd("c:\\Daten\\iDiv3\\sPlot\\ValerioPillar\\IAVS-Lyon\\Data\\") #choose your working directory +library(foreign) +source("99_HIDDEN_functions.R") #### 1. traits data @@ -13,7 +14,12 @@ traits0 <- read_delim("_data/Mesobromion/traits3.txt", delim =";", col_names = T mutate(species=gsub(pattern=" agg | x | spec$| agg$| s | Sec | ", replacement=" ", x=species)) %>% mutate(species=gsub(pattern=" $", replacement = "", x = species)) %>% mutate(species=ifelse(is.na(word(species, 1, 2)), species, word(species, 1, 2))) -str(traits) #907 obs. of 85 variables: +str(traits0) #907 obs. of 85 variables: + +#keep only traits with >=88 completeness +traits0 <- traits0 %>% + dplyr::select_if(~mean(!is.na(.)) >= 0.88) #907 obs 79 variables + ## Import traits from TRY and match to species load("/data/sPlot2.0/TRY.all.mean.sd.3.by.genus.species.tree.Rdata") @@ -23,19 +29,18 @@ alltry <- TRY.all.mean.sd.3.by.genus.species.tree %>% traits <- traits0 %>% ungroup() %>% - dplyr::select(species, species0) %>% + #dplyr::select(species, species0) %>% left_join(alltry %>% rename(species=StandSpeciesName), by="species") %>% filter(!is.na(LeafArea.mean)) -dim(traits) #[1] 805 19 +dim(traits) #[1] 805 97 ##### 2. Header Data -library(foreign) env0 <- read_delim("_data/Mesobromion/GVRD_MES2_site.csv", delim = ",") -str(env) #6868 obs. of 6 variables: +str(env0) #6868 obs. of 6 variables: set.seed(1984) header <- "/data/sPlot/users/Francesco/Project_11/Germany/_data/tvhabita.dbf" @@ -47,8 +52,9 @@ env.all <- env0 %>% filter(!is.na(LAT)) %>% filter(!(LAT==0 | LON==0)) -env <- env.all %>% - sample_frac(0.1) #Take 10% subset +env <- env.all +#env <- env.all %>% +# sample_frac(0.1) #Take 10% subset @@ -59,19 +65,88 @@ species0 <- read.table("_data/Mesobromion/GVRD_Mes2_veg1.csv", sep = ",", header dim(species0) #6868 obs. of 907 variables: rownames(species0) <- env0$RELEVE_NR +## select only plots already selected in env species <- env %>% dplyr::select(RELEVE_NR) %>% left_join(species0 %>% mutate(RELEVE_NR=env0$RELEVE_NR), by="RELEVE_NR") %>% + column_to_rownames("RELEVE_NR")# %>% + #dplyr::select(traits$species0) + +dim(species) # [1] 5810 907 + +releve08trait <- species %>% + rownames_to_column("RELEVE_NR") %>% + reshape2::melt(.id="RELEVE_NR") %>% + rename(species0=variable, pres=value) %>% + as.tbl() %>% + filter(pres>0) %>% + arrange(RELEVE_NR) %>% + ## attach traits + left_join(traits %>% + dplyr::select(-species), by="species0") %>% + mutate_at(.vars = vars(PR_STAT_Indigen:Wood.vessel.length.mean), + .funs = list(~if_else(is.na(.),0,1) * pres)) %>% + group_by(RELEVE_NR) %>% + summarize_at(.vars= vars(PR_STAT_Indigen:Wood.vessel.length.mean), + .funs = list(~mean(.))) %>% + dplyr::select(RELEVE_NR, order(colnames(.))) %>% + reshape2::melt(id.vars="RELEVE_NR", value.name="trait.coverage") %>% + group_by(RELEVE_NR) %>% + summarize(ntraits08=sum(trait.coverage>=0.8)) %>% + #select only those traits where at least 75 traits have a coverage of >0.8 of the species + #filter(ntraits08==95) %>% dim() + pull(RELEVE_NR) + +set.seed(1984) +releve08trait.samp <- sample(releve08trait, round(length(releve08trait)/10), replace=F) +species <- species %>% + rownames_to_column("RELEVE_NR") %>% + filter(RELEVE_NR %in% releve08trait.samp) %>% column_to_rownames("RELEVE_NR") %>% - dplyr::select(traits$species0) + dplyr::select(traits %>% pull(species0)) %>% + #select only species occurring it at least one plot + dplyr::select(names(which(colSums(.)!=0)) ) + + +env <- env %>% + filter(RELEVE_NR %in% releve08trait.samp) + +traits <- traits %>% + dplyr::select(-species) %>% + dplyr::select(species0, everything()) %>% + filter(species0 %in% colnames(species)) + +dim(species) #581 508 +dim(traits) #508 96 +dim(env) #581 8 + + +## Select only plots where >90% of species have trait info [TRY] +# releve08trait <- species %>% +# rownames_to_column("RELEVE_NR") %>% +# reshape2::melt(.id="RELEVE_NR") %>% +# rename(species0=variable, pres=value) %>% +# filter(pres>0) %>% +# arrange(RELEVE_NR) %>% +# ## attach traits +# left_join(traits %>% +# dplyr::select(-species, LeafArea.mean), by="species0") %>% +# group_by(RELEVE_NR) %>% +# summarize(trait.coverage=mean(!is.na(LeafArea.mean))) %>% +# filter(trait.coverage<0.8) %>% +# pull(RELEVE_NR) +# + + + + -dim(species) # [1] 581 805 #same as in the trait matrix ##export for Valerio write_delim(species, path="_data/Mesobromion/species.out.10perc.txt", delim="\t") -write_delim(traits %>% dplyr::select(-species), path="_data/Mesobromion/traits.out.10perc.txt", delim="\t") +write_delim(traits, path="_data/Mesobromion/traits.out.10perc.txt", delim="\t") @@ -79,20 +154,17 @@ write_delim(traits %>% dplyr::select(-species), path="_data/Mesobromion/traits.o ############################# ## calculate corr between species composition matrix and traits -library(SYNCSA) -library(vegan) -library(abind) -library(ade4) +species <- read_delim("_data/Mesobromion/species.out.10perc.txt", delim="\t") +traits <- read_delim("_data/Mesobromion/traits.out.10perc.txt", delim="\t") traits <- traits %>% - dplyr::select(-species) %>% column_to_rownames("species0") %>% - rename_all(.funs=~gsub(pattern=".mean$", replacement="", x=.)) %>% - dplyr::select(PlantHeight, LeafC.perdrymass, LeafN, StemDens, Stem.cond.dens, Seed.num.rep.unit, SLA) + rename_all(.funs=~gsub(pattern=".mean$", replacement="", x=.))# %>% + #dplyr::select(PlantHeight, LeafC.perdrymass, LeafN, StemDens, Stem.cond.dens, Seed.num.rep.unit, SLA) ## create list of indices for each combination of traits up to a max number of interactions n.traits <- ncol(traits) -max.inter.t <- 7 +max.inter.t <- 1 allcomb.t <- NULL for(n.inter in 1:max.inter.t){ allcomb.t <- c(allcomb.t, combn(1:n.traits, n.inter, simplify=F)) @@ -107,8 +179,8 @@ require(doParallel) cl <- makeForkCluster(8, outfile="") registerDoParallel(cl) -#corXY <- NULL -corXY <- foreach(i=1:length(allcomb.t), .packages=c('SYNCSA', "vegan", "ade4", "tidyverse"), .combine=rbind) %dopar% { +corXY <- NULL +corXY <- foreach(i=1:length(allcomb.t), .packages=c('SYNCSA', "vegan", "ade4", "tidyverse"), .combine=rbind) %do% { #for(i in 1:length(allcomb.t)){ tt <- unlist(allcomb.t[i]) corXY <- get.corXY(comm=species, trait=traits, trait.sel=tt, stat="RV") @@ -123,6 +195,7 @@ corXY %>% arrange(Test, desc(Coef)) save(corXY, file="_data/Mesobromion/corXY/corXY.RData") +aa <- matrix.x(comm=species, trait=traits, trait.sel=tt, stat="RV") diff --git a/99_HIDDEN_functions.R b/99_HIDDEN_functions.R index 82584f79a25fe5d22144e446e0ed43128d5beeea..774927d4f07aa31d9a0c9989ad7a38580d881d15 100644 --- a/99_HIDDEN_functions.R +++ b/99_HIDDEN_functions.R @@ -115,4 +115,44 @@ get.corXE <- function(comm, traits, envir, trait.sel="all", env.sel="all", stat= data.frame(Trait.comb=lab.tmp, Env.comb=lab.env, Test="Procrustes", Coef=prot.tmp$t0, pvalue=prot.tmp$signif)) } return(corXE) -} \ No newline at end of file +} + + + +### Get SES (both parametric and non parametric) +#obs.df = output from one of the get functions above +#perm.df = df traitCombination X permutations with the chosen statistic of correlation on permuteda data + +get.SES <- function(obs.df, perm.df, stat="RV") { + nperm <- dim(perm.df)[2] + SES.out <- as.data.frame.table(perm.df) %>% + rename(Trait.comb=Var1, perm=Var2, coef=Freq ) %>% + group_by(Trait.comb) %>% + left_join(obs.df %>% + filter(Test==stat) %>% + dplyr::select(-pvalue, -Test) %>% + mutate(Trait.comb=paste0("t", Trait.comb)) %>% + rename(obs=Coef), + by=c("Trait.comb")) %>% + summarize(q15 = quantile(coef, probs = 0.15865), + q50 = quantile(coef, .5), + q84 = quantile(coef, 0.84135), + mean.perm=mean(coef), + sd.perm=sd(coef), + greater.than.obs=sum( (abs(coef)>=abs(obs))/n())) %>% + left_join(obs.df %>% + filter(Test==stat) %>% + dplyr::select(-pvalue, -Test) %>% + mutate(Trait.comb=paste0("t", Trait.comb)) %>% + rename(obs=Coef), + by=c("Trait.comb")) %>% + mutate(SES.np= ifelse(obs>=q50, (obs-q50)/(q84-q50), (obs-q50)/(q50-q15))) %>% + mutate(SES=(obs-mean.perm)/sd.perm) %>% + arrange(desc(SES.np)) %>% + mutate(conf.m=obs-1.65*(sd.perm/sqrt(nperm))) %>% + mutate(conf.p=obs+1.65*(sd.perm/sqrt(nperm))) + return(SES.out) +} + + +