Skip to content
Snippets Groups Projects
Commit ce501916 authored by Francesco Sabatini's avatar Francesco Sabatini
Browse files

Data preparation, exclude plots without enough sp with traits

parent 1f761692
Branches
No related tags found
No related merge requests found
setwd("/data/sPlot/users/Helge/HIDDEN") setwd("/data/sPlot/users/Helge/HIDDEN")
library(tidyverse) library(tidyverse)
library(SYNCSA) library(SYNCSA)
library(vegan) library(vegan)
library(abind) library(abind)
library(ade4) library(ade4)
source("99_HIDDEN_functions.R")
#library(MatrixCorrelation) #library(MatrixCorrelation)
### Import data ### Import data
...@@ -39,12 +40,7 @@ colnames(Ei) <- paste0("Env", 1:2) ...@@ -39,12 +40,7 @@ colnames(Ei) <- paste0("Env", 1:2)
Ei$Env3 <- Ei$Env1*Ei$Env2 Ei$Env3 <- Ei$Env1*Ei$Env2
Ei <- (as.matrix(Ei)) Ei <- (as.matrix(Ei))
#### run beals
#W.beals <- as.data.frame(beals(Wi, include=T, type=2))
#W.beals.d <- dist(W.beals)
#W.beals <- W.beals[,which(colSums(W.beals)>0)]
#W.beals <- cbind(W.beals, matrix(0, nrow=200, ncol=7))
#X.syn.out <- cbind(syn.out$matrices$X, matrix(0, nrow=200, ncol=7))
## create list of indices for each combination of traits up to a max number of interactions ## create list of indices for each combination of traits up to a max number of interactions
...@@ -69,10 +65,7 @@ colnames(comb.list) <- c("trait", "env") ...@@ -69,10 +65,7 @@ colnames(comb.list) <- c("trait", "env")
get.corXE(comm=Wi, envir=Ei, traits=Bi) ##Run on simulated data
##Run on observed data
corXY <- NULL corXY <- NULL
for(i in 1:length(allcomb.t)){ for(i in 1:length(allcomb.t)){
tt <- unlist(allcomb.t[i]) tt <- unlist(allcomb.t[i])
...@@ -120,10 +113,6 @@ Bi.perm <- lapply(1:nperm, FUN=function(x){ ...@@ -120,10 +113,6 @@ Bi.perm <- lapply(1:nperm, FUN=function(x){
## corXY on permuted ## corXY on permuted
corXY.perm <- matrix(NA, nrow=length(allcomb.t), ncol=nperm, dimnames = list(paste("t", names.t, sep=""), corXY.perm <- matrix(NA, nrow=length(allcomb.t), ncol=nperm, dimnames = list(paste("t", names.t, sep=""),
paste("p",1:nperm, sep=""))) paste("p",1:nperm, sep="")))
...@@ -134,33 +123,10 @@ for(i in 1:length(allcomb.t)){ ...@@ -134,33 +123,10 @@ for(i in 1:length(allcomb.t)){
print(i) print(i)
} }
## transform obs to SES ## transform obs to SES
corXY.perm.df <- as.data.frame.table(corXY.perm) %>% corXY.perm.df <- get.SES(corXY, corXY.perm, stat="RV") %>%
rename(Trait.comb=Var1, perm=Var2, coef=Freq ) %>%
group_by(Trait.comb) %>%
left_join(corXY %>%
filter(Test=="RV") %>%
dplyr::select(-pvalue, -Test) %>%
mutate(Trait.comb=paste0("t", Trait.comb)) %>%
rename(obs=Coef),
by=c("Trait.comb")) %>%
summarize(q15 = quantile(coef, probs = 0.15865),
q50 =quantile(coef, .5),
q84 = quantile(coef, 0.84135),
mean.perm=mean(coef),
sd.perm=sd(coef),
greater.obs=sum( (abs(coef)>=abs(obs))/n())) %>%
left_join(corXY %>%
filter(Test=="RV") %>%
dplyr::select(-pvalue, -Test) %>%
mutate(Trait.comb=paste0("t", Trait.comb)) %>%
rename(obs=Coef),
by=c("Trait.comb")) %>%
mutate(SES.np= ifelse(obs>=q50, (obs-q50)/(q84-q50), (obs-q50)/(q50-q15))) %>%
mutate(SES=(obs-mean.perm)/sd.perm) %>%
arrange(desc(SES.np)) %>%
mutate(conf.m=obs-1.65*(sd.perm/sqrt(nperm))) %>%
mutate(conf.p=obs+1.65*(sd.perm/sqrt(nperm))) %>%
rowwise() %>% rowwise() %>%
mutate(nvar=length(unlist(str_split(Trait.comb, "_")))) #%>% mutate(nvar=length(unlist(str_split(Trait.comb, "_")))) #%>%
# arrange(nvar) %>% # arrange(nvar) %>%
...@@ -204,23 +170,8 @@ for(i in 1:length(allcomb.t)){ ...@@ -204,23 +170,8 @@ for(i in 1:length(allcomb.t)){
} }
#### Not updated to Asymmetrical/non parametric SES --- TRANSFORM TO FUNCTION!
## transform obs to SES ## transform obs to SES
corTE.perm.df <- as.data.frame.table(corTE.perm) %>% corTE.perm.df <- get.SES(corTE, corTE.perm, stat="RV") %>%
rename(Trait.comb=Var1, perm=Var2, Env.comb=Var3, coef=Freq ) %>%
group_by(Trait.comb, Env.comb) %>%
summarize(mean.perm=mean(coef), sd.perm=sd(coef)) %>%
left_join(corTE %>%
filter(Test=="RV") %>%
dplyr::select(-pvalue, -Test) %>%
mutate(Trait.comb=paste0("t", Trait.comb)) %>%
mutate(Env.comb=paste0("e", Env.comb)) %>%
rename(obs=Coef),
by=c("Trait.comb", "Env.comb")) %>%
mutate(SES=(obs-mean.perm)/sd.perm) %>%
arrange(desc(SES)) %>%
mutate(conf.m=obs-1.65*(sd.perm/sqrt(nperm))) %>%
mutate(conf.p=obs+1.65*(sd.perm/sqrt(nperm))) %>%
rowwise() %>% rowwise() %>%
mutate(nvar=length(unlist(str_split(Trait.comb, "_")))) %>% mutate(nvar=length(unlist(str_split(Trait.comb, "_")))) %>%
mutate(nenv=length(unlist(str_split(Env.comb, "_")))) %>% mutate(nenv=length(unlist(str_split(Env.comb, "_")))) %>%
...@@ -292,21 +243,7 @@ for(i in 1:length(allcomb.t)){ ...@@ -292,21 +243,7 @@ for(i in 1:length(allcomb.t)){
} }
## transform obs to SES ## transform obs to SES
corXE.perm.df <- as.data.frame.table(corXE.perm) %>% corXE.perm.df <- get.SES(corXE, corXE.perm, stat="RV") %>%
rename(Trait.comb=Var1, perm=Var2, Env.comb=Var3, coef=Freq ) %>%
group_by(Trait.comb, Env.comb) %>%
summarize(mean.perm=mean(coef), sd.perm=sd(coef)) %>%
left_join(corXE %>%
filter(Test=="RV") %>%
dplyr::select(-pvalue, -Test) %>%
mutate(Trait.comb=paste0("t", Trait.comb)) %>%
mutate(Env.comb=paste0("e", Env.comb)) %>%
rename(obs=Coef),
by=c("Trait.comb", "Env.comb")) %>%
mutate(SES=(obs-mean.perm)/sd.perm) %>%
arrange(desc(SES)) %>%
mutate(conf.m=obs-1.65*(sd.perm/sqrt(nperm))) %>%
mutate(conf.p=obs+1.65*(sd.perm/sqrt(nperm))) %>%
rowwise() %>% rowwise() %>%
mutate(nvar=length(unlist(str_split(Trait.comb, "_")))) %>% mutate(nvar=length(unlist(str_split(Trait.comb, "_")))) %>%
mutate(nenv=length(unlist(str_split(Env.comb, "_")))) %>% mutate(nenv=length(unlist(str_split(Env.comb, "_")))) %>%
......
setwd("/data/sPlot/users/Helge/HIDDEN")
library(tidyverse) library(tidyverse)
library(foreign)
#setwd("c:\\Daten\\iDiv3\\sPlot\\ValerioPillar\\IAVS-Lyon\\Data\\") #choose your working directory source("99_HIDDEN_functions.R")
#### 1. traits data #### 1. traits data
...@@ -13,7 +14,12 @@ traits0 <- read_delim("_data/Mesobromion/traits3.txt", delim =";", col_names = T ...@@ -13,7 +14,12 @@ traits0 <- read_delim("_data/Mesobromion/traits3.txt", delim =";", col_names = T
mutate(species=gsub(pattern=" agg | x | spec$| agg$| s | Sec | ", replacement=" ", x=species)) %>% mutate(species=gsub(pattern=" agg | x | spec$| agg$| s | Sec | ", replacement=" ", x=species)) %>%
mutate(species=gsub(pattern=" $", replacement = "", x = species)) %>% mutate(species=gsub(pattern=" $", replacement = "", x = species)) %>%
mutate(species=ifelse(is.na(word(species, 1, 2)), species, word(species, 1, 2))) mutate(species=ifelse(is.na(word(species, 1, 2)), species, word(species, 1, 2)))
str(traits) #907 obs. of 85 variables: str(traits0) #907 obs. of 85 variables:
#keep only traits with >=88 completeness
traits0 <- traits0 %>%
dplyr::select_if(~mean(!is.na(.)) >= 0.88) #907 obs 79 variables
## Import traits from TRY and match to species ## Import traits from TRY and match to species
load("/data/sPlot2.0/TRY.all.mean.sd.3.by.genus.species.tree.Rdata") load("/data/sPlot2.0/TRY.all.mean.sd.3.by.genus.species.tree.Rdata")
...@@ -23,19 +29,18 @@ alltry <- TRY.all.mean.sd.3.by.genus.species.tree %>% ...@@ -23,19 +29,18 @@ alltry <- TRY.all.mean.sd.3.by.genus.species.tree %>%
traits <- traits0 %>% traits <- traits0 %>%
ungroup() %>% ungroup() %>%
dplyr::select(species, species0) %>% #dplyr::select(species, species0) %>%
left_join(alltry %>% left_join(alltry %>%
rename(species=StandSpeciesName), rename(species=StandSpeciesName),
by="species") %>% by="species") %>%
filter(!is.na(LeafArea.mean)) filter(!is.na(LeafArea.mean))
dim(traits) #[1] 805 19 dim(traits) #[1] 805 97
##### 2. Header Data ##### 2. Header Data
library(foreign)
env0 <- read_delim("_data/Mesobromion/GVRD_MES2_site.csv", delim = ",") env0 <- read_delim("_data/Mesobromion/GVRD_MES2_site.csv", delim = ",")
str(env) #6868 obs. of 6 variables: str(env0) #6868 obs. of 6 variables:
set.seed(1984) set.seed(1984)
header <- "/data/sPlot/users/Francesco/Project_11/Germany/_data/tvhabita.dbf" header <- "/data/sPlot/users/Francesco/Project_11/Germany/_data/tvhabita.dbf"
...@@ -47,8 +52,9 @@ env.all <- env0 %>% ...@@ -47,8 +52,9 @@ env.all <- env0 %>%
filter(!is.na(LAT)) %>% filter(!is.na(LAT)) %>%
filter(!(LAT==0 | LON==0)) filter(!(LAT==0 | LON==0))
env <- env.all %>% env <- env.all
sample_frac(0.1) #Take 10% subset #env <- env.all %>%
# sample_frac(0.1) #Take 10% subset
...@@ -59,19 +65,88 @@ species0 <- read.table("_data/Mesobromion/GVRD_Mes2_veg1.csv", sep = ",", header ...@@ -59,19 +65,88 @@ species0 <- read.table("_data/Mesobromion/GVRD_Mes2_veg1.csv", sep = ",", header
dim(species0) #6868 obs. of 907 variables: dim(species0) #6868 obs. of 907 variables:
rownames(species0) <- env0$RELEVE_NR rownames(species0) <- env0$RELEVE_NR
## select only plots already selected in env
species <- env %>% species <- env %>%
dplyr::select(RELEVE_NR) %>% dplyr::select(RELEVE_NR) %>%
left_join(species0 %>% left_join(species0 %>%
mutate(RELEVE_NR=env0$RELEVE_NR), mutate(RELEVE_NR=env0$RELEVE_NR),
by="RELEVE_NR") %>% by="RELEVE_NR") %>%
column_to_rownames("RELEVE_NR")# %>%
#dplyr::select(traits$species0)
dim(species) # [1] 5810 907
releve08trait <- species %>%
rownames_to_column("RELEVE_NR") %>%
reshape2::melt(.id="RELEVE_NR") %>%
rename(species0=variable, pres=value) %>%
as.tbl() %>%
filter(pres>0) %>%
arrange(RELEVE_NR) %>%
## attach traits
left_join(traits %>%
dplyr::select(-species), by="species0") %>%
mutate_at(.vars = vars(PR_STAT_Indigen:Wood.vessel.length.mean),
.funs = list(~if_else(is.na(.),0,1) * pres)) %>%
group_by(RELEVE_NR) %>%
summarize_at(.vars= vars(PR_STAT_Indigen:Wood.vessel.length.mean),
.funs = list(~mean(.))) %>%
dplyr::select(RELEVE_NR, order(colnames(.))) %>%
reshape2::melt(id.vars="RELEVE_NR", value.name="trait.coverage") %>%
group_by(RELEVE_NR) %>%
summarize(ntraits08=sum(trait.coverage>=0.8)) %>%
#select only those traits where at least 75 traits have a coverage of >0.8 of the species
#filter(ntraits08==95) %>% dim()
pull(RELEVE_NR)
set.seed(1984)
releve08trait.samp <- sample(releve08trait, round(length(releve08trait)/10), replace=F)
species <- species %>%
rownames_to_column("RELEVE_NR") %>%
filter(RELEVE_NR %in% releve08trait.samp) %>%
column_to_rownames("RELEVE_NR") %>% column_to_rownames("RELEVE_NR") %>%
dplyr::select(traits$species0) dplyr::select(traits %>% pull(species0)) %>%
#select only species occurring it at least one plot
dplyr::select(names(which(colSums(.)!=0)) )
env <- env %>%
filter(RELEVE_NR %in% releve08trait.samp)
traits <- traits %>%
dplyr::select(-species) %>%
dplyr::select(species0, everything()) %>%
filter(species0 %in% colnames(species))
dim(species) #581 508
dim(traits) #508 96
dim(env) #581 8
## Select only plots where >90% of species have trait info [TRY]
# releve08trait <- species %>%
# rownames_to_column("RELEVE_NR") %>%
# reshape2::melt(.id="RELEVE_NR") %>%
# rename(species0=variable, pres=value) %>%
# filter(pres>0) %>%
# arrange(RELEVE_NR) %>%
# ## attach traits
# left_join(traits %>%
# dplyr::select(-species, LeafArea.mean), by="species0") %>%
# group_by(RELEVE_NR) %>%
# summarize(trait.coverage=mean(!is.na(LeafArea.mean))) %>%
# filter(trait.coverage<0.8) %>%
# pull(RELEVE_NR)
#
dim(species) # [1] 581 805 #same as in the trait matrix
##export for Valerio ##export for Valerio
write_delim(species, path="_data/Mesobromion/species.out.10perc.txt", delim="\t") write_delim(species, path="_data/Mesobromion/species.out.10perc.txt", delim="\t")
write_delim(traits %>% dplyr::select(-species), path="_data/Mesobromion/traits.out.10perc.txt", delim="\t") write_delim(traits, path="_data/Mesobromion/traits.out.10perc.txt", delim="\t")
...@@ -79,20 +154,17 @@ write_delim(traits %>% dplyr::select(-species), path="_data/Mesobromion/traits.o ...@@ -79,20 +154,17 @@ write_delim(traits %>% dplyr::select(-species), path="_data/Mesobromion/traits.o
############################# #############################
## calculate corr between species composition matrix and traits ## calculate corr between species composition matrix and traits
library(SYNCSA) species <- read_delim("_data/Mesobromion/species.out.10perc.txt", delim="\t")
library(vegan) traits <- read_delim("_data/Mesobromion/traits.out.10perc.txt", delim="\t")
library(abind)
library(ade4)
traits <- traits %>% traits <- traits %>%
dplyr::select(-species) %>%
column_to_rownames("species0") %>% column_to_rownames("species0") %>%
rename_all(.funs=~gsub(pattern=".mean$", replacement="", x=.)) %>% rename_all(.funs=~gsub(pattern=".mean$", replacement="", x=.))# %>%
dplyr::select(PlantHeight, LeafC.perdrymass, LeafN, StemDens, Stem.cond.dens, Seed.num.rep.unit, SLA) #dplyr::select(PlantHeight, LeafC.perdrymass, LeafN, StemDens, Stem.cond.dens, Seed.num.rep.unit, SLA)
## create list of indices for each combination of traits up to a max number of interactions ## create list of indices for each combination of traits up to a max number of interactions
n.traits <- ncol(traits) n.traits <- ncol(traits)
max.inter.t <- 7 max.inter.t <- 1
allcomb.t <- NULL allcomb.t <- NULL
for(n.inter in 1:max.inter.t){ for(n.inter in 1:max.inter.t){
allcomb.t <- c(allcomb.t, combn(1:n.traits, n.inter, simplify=F)) allcomb.t <- c(allcomb.t, combn(1:n.traits, n.inter, simplify=F))
...@@ -107,8 +179,8 @@ require(doParallel) ...@@ -107,8 +179,8 @@ require(doParallel)
cl <- makeForkCluster(8, outfile="") cl <- makeForkCluster(8, outfile="")
registerDoParallel(cl) registerDoParallel(cl)
#corXY <- NULL corXY <- NULL
corXY <- foreach(i=1:length(allcomb.t), .packages=c('SYNCSA', "vegan", "ade4", "tidyverse"), .combine=rbind) %dopar% { corXY <- foreach(i=1:length(allcomb.t), .packages=c('SYNCSA', "vegan", "ade4", "tidyverse"), .combine=rbind) %do% {
#for(i in 1:length(allcomb.t)){ #for(i in 1:length(allcomb.t)){
tt <- unlist(allcomb.t[i]) tt <- unlist(allcomb.t[i])
corXY <- get.corXY(comm=species, trait=traits, trait.sel=tt, stat="RV") corXY <- get.corXY(comm=species, trait=traits, trait.sel=tt, stat="RV")
...@@ -123,6 +195,7 @@ corXY %>% arrange(Test, desc(Coef)) ...@@ -123,6 +195,7 @@ corXY %>% arrange(Test, desc(Coef))
save(corXY, file="_data/Mesobromion/corXY/corXY.RData") save(corXY, file="_data/Mesobromion/corXY/corXY.RData")
aa <- matrix.x(comm=species, trait=traits, trait.sel=tt, stat="RV")
......
...@@ -115,4 +115,44 @@ get.corXE <- function(comm, traits, envir, trait.sel="all", env.sel="all", stat= ...@@ -115,4 +115,44 @@ get.corXE <- function(comm, traits, envir, trait.sel="all", env.sel="all", stat=
data.frame(Trait.comb=lab.tmp, Env.comb=lab.env, Test="Procrustes", Coef=prot.tmp$t0, pvalue=prot.tmp$signif)) data.frame(Trait.comb=lab.tmp, Env.comb=lab.env, Test="Procrustes", Coef=prot.tmp$t0, pvalue=prot.tmp$signif))
} }
return(corXE) return(corXE)
} }
\ No newline at end of file
### Get SES (both parametric and non parametric)
#obs.df = output from one of the get functions above
#perm.df = df traitCombination X permutations with the chosen statistic of correlation on permuteda data
get.SES <- function(obs.df, perm.df, stat="RV") {
nperm <- dim(perm.df)[2]
SES.out <- as.data.frame.table(perm.df) %>%
rename(Trait.comb=Var1, perm=Var2, coef=Freq ) %>%
group_by(Trait.comb) %>%
left_join(obs.df %>%
filter(Test==stat) %>%
dplyr::select(-pvalue, -Test) %>%
mutate(Trait.comb=paste0("t", Trait.comb)) %>%
rename(obs=Coef),
by=c("Trait.comb")) %>%
summarize(q15 = quantile(coef, probs = 0.15865),
q50 = quantile(coef, .5),
q84 = quantile(coef, 0.84135),
mean.perm=mean(coef),
sd.perm=sd(coef),
greater.than.obs=sum( (abs(coef)>=abs(obs))/n())) %>%
left_join(obs.df %>%
filter(Test==stat) %>%
dplyr::select(-pvalue, -Test) %>%
mutate(Trait.comb=paste0("t", Trait.comb)) %>%
rename(obs=Coef),
by=c("Trait.comb")) %>%
mutate(SES.np= ifelse(obs>=q50, (obs-q50)/(q84-q50), (obs-q50)/(q50-q15))) %>%
mutate(SES=(obs-mean.perm)/sd.perm) %>%
arrange(desc(SES.np)) %>%
mutate(conf.m=obs-1.65*(sd.perm/sqrt(nperm))) %>%
mutate(conf.p=obs+1.65*(sd.perm/sqrt(nperm)))
return(SES.out)
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment