Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
00_Mesobromion_DataPreparation.R 14.37 KiB
setwd("/data/sPlot/users/Helge/HIDDEN")
library(tidyverse)
library(foreign)
library(gridExtra)
library(grid)
library(vegan)

source("99_HIDDEN_functions.R")

##### PART 1 ####
#### 1. traits data  #### 

traits0 <- read_delim("_data/Mesobromion/traits3.txt", delim =";", col_names = T, locale = locale(encoding = 'latin1')) %>% 
  dplyr::select(- c("PR_STAT_Indigen", "PR_STAT_Neophyt", "PR_STAT_Archaeophyt", "URBAN_urbanophob", "URBAN_maessig_urbanophob", 
                    "URBAN_urbanoneutral", "URBAN_maessig_urbanophil", "URBAN_urbanophil", "WUH_von", "WUH_bis", "ARL_c_I_von", "ARL_c_I_bis", 
                    "BL_ANAT_hydromorph")) %>%  #empty trait
  mutate(species0=species) %>% 
  rowwise() %>% 
  # quick and dirty clean up names
  mutate(species=gsub(pattern="_", replacement = " ", x = species)) %>% 
  mutate(species=gsub(pattern=" agg | x | spec$| agg$| s | Sec |  ", replacement=" ", x=species)) %>% 
  mutate(species=gsub(pattern=" $", replacement = "", x = species)) %>% 
  mutate(species=ifelse(is.na(word(species, 1, 2)), species, word(species, 1, 2)))
dim(traits0) #907 obs. of  75 variables:



#keep only traits with >=88 completeness
traits0 <- traits0 %>% 
  dplyr::select_if(~mean(!is.na(.)) >= 0.88) # 907 x 67





### Transform binary traits to 0-1
traits.asym.binary <- c('LEB_F_Makrophanerophyt','LEB_F_Nanophanerophyt',
                        'LEB_F_Hemikryptophyt','LEB_F_Geophyt','LEB_F_Hemiphanerophyt','LEB_F_Therophyt',
                        'LEB_F_Hydrophyt','LEB_F_Pseudophanerophyt','LEB_F_Chamaephyt',
                        'LEB_D_plurienn_pollakanth','LBE_D_plurienn_hapaxanth','LEB_D_annuell',
                        'LEB_D_bienn','V_VER_absent','V_VER_Wurzelspross','V_VER_Ausläufer',
                        'V_VER_Rhizom','V_VER_Innovationsknopse.mit.Wurzelknolle',
                        'V_VER_Innovationsknospe.mit.Speicherwurzel','V_VER_Ausläuferknolle',
                        'V_VER_Brutsprösschen','V_VER_Fragmentation','V_VER_Turio','V_VER_Sprossknolle',
                        'V_VER_phyllogener_Spross','V_VER_Rhizompleiokorm','V_VER_Zwiebel',
                        'V_VER_Ausläuferrhizom','V_VER_Ausläuferzwiebel','V_VER_Bulbille',
                        'ROS_T_rosettenlose.Pflanzen','ROS_T_Halbrosettenpflanze','ROS_T_Ganzrosettenpflanzen',
                        'BL_AUSD_immergrün','BL_AUSD_sommergrün','BL_AUSD_vorsommergrün',
                        'BL_AUSD_überwinternd_grün','BL_ANAT_skleromorph','BL_ANAT_mesomorph',
                        'BL_ANAT_hygromorph','BL_ANAT_hydromorph','BL_ANAT_blattsukkulent','BL_ANAT_helomorph',
                        'BL_FORM_gelappt_gefiedert_gefingert_mehrfach','BL_FORM_Vollblatt_Normalblatt',
                        'BL_FORM_Grasblatt_Langblatt','BL_FORM_nadelförmig','BL_FORM_röhrig',
                        'BL_FORM_schuppenförmig','BL_FORM_schwertförmig','REPR_T_Samen_Sporen',
                        'REPR_T_vegetativ','BLU_KL_WIND','BLU_KL_POLLEN',
                        'BLU_KL_NEKTAR_HONIG_INSEKTEN','STRAT_T_C','STRAT_T_CR','STRAT_T_CS',
                        'STRAT_T_CSR','STRAT_T_R','STRAT_T_S','STRAT_T_SR')

traits0 <- traits0 %>% 
  mutate_at(.vars=vars(any_of(traits.asym.binary)), 
            .funs=~(.>0)*1)


## Import traits from TRY and match to species
load("/data/sPlot2.0/TRY.all.mean.sd.3.by.genus.species.tree.Rdata")
alltry <- TRY.all.mean.sd.3.by.genus.species.tree %>% 
  dplyr::select(!ends_with(".sd")) %>% 
  dplyr::select(StandSpeciesName, LeafArea.mean:Wood.vessel.length.mean) %>% 
  dplyr::select(-Wood.vessel.length.mean, -StemDens.mean, -Stem.cond.dens.mean) %>% 
  rename_all(.funs=~gsub(pattern=".mean$", replacement="", x=.))
all.traits <- traits0 %>% 
  ungroup() %>% 
  #dplyr::select(species, species0) %>% 
  left_join(alltry %>% 
              rename(species=StandSpeciesName), 
            by="species") 
traits <- all.traits %>% 
  filter(!is.na(LeafArea))
dim(all.traits) #[1] 907  81
dim(traits) #[1] 805  2



##### 2. Header Data #### 
env0 <- read_delim("_data/Mesobromion/GVRD_MES2_site.csv", delim = ",")
str(env0) #6868 obs. of  6 variables:

set.seed(1984)
header   <- "/data/sPlot/users/Francesco/Project_11/Germany/_data/tvhabita.dbf"
env  <- env0 %>% 
  left_join(foreign::read.dbf(header) %>% 
              as.data.frame() %>% 
              dplyr::select(RELEVE_NR, LAT, LON),
            by="RELEVE_NR") %>% 
  filter(!is.na(LAT)) %>%
  filter(!(LAT==0 | LON==0)) 

env.all <- env



### 3. Import species data #### 
# columns in species correspond to those in env
# there is no PlotObservationID (yet)
species0 <- read.table("_data/Mesobromion/GVRD_Mes2_veg1.csv", sep = ",", header=T)
dim(species0) #6868 obs. of  907 variables:
rownames(species0) <- env0$RELEVE_NR

## select only plots already selected in env
species <- env %>% 
  dplyr::select(RELEVE_NR) %>% 
  left_join(species0 %>%
              mutate(RELEVE_NR=env0$RELEVE_NR), 
            by="RELEVE_NR") %>% 
  column_to_rownames("RELEVE_NR") %>% 
  ## delete species not appearing in any plot
  dplyr::select(colnames(.)[which(colSums(.)!=0)])
#dplyr::select(traits$species0)

dim(species) # [1] 5810 881

releve08trait <- species %>% 
  rownames_to_column("RELEVE_NR") %>% 
  reshape2::melt(.id="RELEVE_NR") %>% 
  rename(species0=variable, pres=value) %>% 
  as.tbl() %>% 
  filter(pres>0) %>% 
  arrange(RELEVE_NR)  %>% 
  ## attach traits 
  left_join(traits %>% 
              dplyr::select(-species), by="species0") %>% 
  mutate_at(.vars = vars(LEB_F_Makrophanerophyt:Disp.unit.leng), 
            .funs = list(~if_else(is.na(.),0,1) * pres)) %>%
  group_by(RELEVE_NR) %>%
  summarize_at(.vars= vars(LEB_F_Makrophanerophyt:Disp.unit.leng),
               .funs = list(~mean(.))) %>%
  dplyr::select(RELEVE_NR, order(colnames(.))) %>%
  reshape2::melt(id.vars="RELEVE_NR", value.name="trait.coverage") %>% 
  group_by(RELEVE_NR) %>% 
  summarize(ntraits08=mean(trait.coverage>=0.8)) %>% 
  #select only those releves where we have a coverage of >0.8 for all traits
  filter(ntraits08==1) %>% 
  pull(RELEVE_NR)

set.seed(1984)
releve08trait.samp <- sample(releve08trait, round(length(releve08trait)/10), replace=F)
species <- species %>% 
  rownames_to_column("RELEVE_NR") %>% 
  filter(RELEVE_NR %in% releve08trait.samp) %>% 
  #column_to_rownames("RELEVE_NR") %>% 
  #as.tbl() %>% 
  dplyr::select(RELEVE_NR, one_of(traits$species0))


env <- env %>% 
  filter(RELEVE_NR %in% releve08trait.samp)

traits <- traits %>% 
  dplyr::select(-species) %>% 
  dplyr::select(species0, everything()) %>% 
  filter(species0 %in% colnames(species))


recode.traits <- function(x){
  ## recode traits to numeric
  #recode binary traits to nominal
  
  robust.mean <- function(x1,x2=NA,x3=NA,x4=NA){
    x <- c(x1,x2,x3,x4)
    if(any(!is.na(x))){mean(x, na.rm=T)} else {NA}
  }
  colnames(x)[which(colnames(x)=="LBE_D_plurienn_hapaxanth")] <- "LEB_D_plurienn_hapaxanth"
  x <- x %>% 
    mutate(BLU_KL_NEKTAR_HONIG_INSEKTEN=replace(BLU_KL_NEKTAR_HONIG_INSEKTEN, 
                                                list=species0 %in% c("Convallaria_majalis", "Maianthemum_bifolium"),
                                                values=0))
  
  x <- x %>% 
    as.tbl() %>% 
    dplyr::select(-starts_with("BL_FORM"), -starts_with("REPR_T"), -starts_with("BLU_KL"), -starts_with("STRAT_T"), -starts_with("BL_AUSD")) %>% 
    left_join(x %>% 
                dplyr::select(species0, `BL_AUSD_immergrün`:`BL_AUSD_überwinternd_grün`, REPR_T_Samen_Sporen:STRAT_T_SR) %>% 
                gather(key=Trait, value="value", -species0) %>% 
                separate(Trait, into = c("Trait", "Organ", "Level"), sep = "_", extra = "merge") %>% 
                unite(Trait, Trait, Organ) %>% 
                filter(value==1) %>% 
                dplyr::select(-value) %>% 
                spread(Trait, Level) %>% 
                mutate_at(.vars=vars(BL_AUSD:STRAT_T), 
                          .funs=~as.factor(.)), 
              by="species0")
  
  out <- x %>% 
    dplyr::select(-starts_with("BL_ANAT"), -starts_with("LEB_D"), -starts_with("ROS_T")) %>% 
    left_join(x %>% 
                dplyr::select(species0, starts_with("BL_ANAT")) %>% 
                mutate(BL_ANAT_helomorph=ifelse(BL_ANAT_helomorph==1, 1, NA)) %>% 
                mutate(BL_ANAT_hygromorph=ifelse(BL_ANAT_hygromorph==1, 2, NA)) %>% 
                mutate(BL_ANAT_mesomorph=ifelse(BL_ANAT_mesomorph==1, 3, NA)) %>% 
                mutate(BL_ANAT_skleromorph=ifelse(BL_ANAT_skleromorph==1, 4, NA)) %>% 
                rowwise() %>% 
                mutate(BL_ANAT=robust.mean(BL_ANAT_helomorph, BL_ANAT_hygromorph, BL_ANAT_mesomorph, BL_ANAT_skleromorph)) %>% 
                ungroup() %>% 
                dplyr::select(species0, BL_ANAT, BL_ANAT_blattsukkulent), 
              by="species0") %>% 
    left_join(x %>% 
                dplyr::select(species0, starts_with("LEB_D"))  %>%
                rowwise() %>% 
                mutate(LEB_D_plurienn=max(LEB_D_plurienn_pollakanth + LEB_D_plurienn_hapaxanth, na.rm=T)) %>% 
                ungroup() %>% 
                mutate(LEB_D_plurienn=ifelse(LEB_D_plurienn==1, 3, NA)) %>% 
                mutate(LEB_D_annuell=ifelse(LEB_D_annuell==1, 1, NA)) %>% 
                mutate(LEB_D_bienn =ifelse(LEB_D_bienn==1, 2, NA)) %>% 
                rowwise() %>% 
                mutate(LEB_D=robust.mean(LEB_D_annuell, LEB_D_bienn, LEB_D_plurienn)) %>% 
                ungroup() %>% 
                dplyr::select(species0, LEB_D), 
              by="species0") %>% 
    left_join(x %>% 
                dplyr::select(species0, starts_with("ROS_T")) %>% 
                mutate(ROS_T=ROS_T_Ganzrosettenpflanzen) %>% 
                mutate(ROS_T=replace(ROS_T, 
                                     list=ROS_T_Halbrosettenpflanze==1, 
                                     values=0.5)) %>% 
                mutate(ROS_T=replace(ROS_T, 
                                     list=ROS_T_rosettenlose.Pflanzen==1, 
                                     values=0)) %>% 
                dplyr::select(species0, ROS_T), 
              by="species0") 
  return(out)
}
traits <- recode.traits(traits)
 

### ordered factors


dim(species) #558 783
dim(traits) #783 53
dim(env) #558 8




######4.  Extract Environmental Factors ######
### CHELSA
library(raster)
library(sp)
Temp <- raster("../../Francesco/Ancillary_Data/CHELSA/CHELSA_bio10_01.tif")
Prec <- raster("../../Francesco/Ancillary_Data/CHELSA/CHELSA_bio10_12.tif")
PHIPHOX <- raster("../../Francesco/Ancillary_Data/ISRIC/PHIHOX_M_sl2_250m_ll.tif")
ORCDRC <- raster("../../Francesco/Ancillary_Data/ISRIC/ORCDRC_M_sl2_250m_ll.tif")


env.sp <- SpatialPointsDataFrame(coords=env %>% dplyr::select(LON, LAT), 
                                 data=env %>% dplyr::select(-LON, -LAT), 
                                 proj4string = raster::crs("+proj=longlat +datum=WGS84 +no_defs")) 
env <- env %>% 
  mutate(Temp=raster::extract(Temp, env.sp)/10) %>% 
  mutate(Prec=raster::extract(Prec, env.sp)) %>% 
  mutate(PHIPHOX=raster::extract(PHIPHOX, env.sp)/10) %>% 
  mutate(ORCDRC=raster::extract(ORCDRC, env.sp)) 



## Select only plots where >90% of species have trait info [TRY]
# releve08trait <- species %>% 
#   rownames_to_column("RELEVE_NR") %>% 
#   reshape2::melt(.id="RELEVE_NR") %>% 
#   rename(species0=variable, pres=value) %>% 
#   filter(pres>0) %>% 
#   arrange(RELEVE_NR)  %>% 
#   ## attach traits 
#   left_join(traits %>% 
#               dplyr::select(-species, LeafArea.mean), by="species0") %>% 
#   group_by(RELEVE_NR) %>% 
#   summarize(trait.coverage=mean(!is.na(LeafArea.mean))) %>% 
#   filter(trait.coverage<0.8) %>% 
#   pull(RELEVE_NR)
# 






##export for Valerio
write_delim(species, path="_data/Mesobromion/species.out.10perc.txt", delim="\t")
write_delim(traits, path="_data/Mesobromion/traits.out.10perc.cov.txt", delim="\t")
write_delim(env, path="_data/Mesobromion/env.10perc.cov.txt", delim="\t")


## version without missing species
empty <- which(colSums(species[,-1])==0)
traits_nozero <- traits[-empty,]
species_nozero <- species[,-(empty+1)]

write_delim(species_nozero , path="_data/Mesobromion/species.out.10perc_nozero.txt", delim="\t")
write_delim(traits_nozero, path="_data/Mesobromion/traits.out.10perc_nozero.txt", delim="\t")

write_delim(species %>% 
              dplyr::select(RELEVE_NR), 
            path="_derived/Mesobromion/ReleveList.txt", delim="\t")

### version with cover values ### 4/08/2020
species.proz <- read_csv("_data/Mesobromion/GVRD_Mes2_proz.csv", locale = locale(encoding = 'latin1'))
species.proz$RELEVE_NR <- env0$RELEVE_NR
species.proz <- species.proz %>% 
  filter(RELEVE_NR %in% (species %>% pull(RELEVE_NR))) %>% 
  #transform percentage cover to relative.cover
  mutate(sumVar = rowSums(.[-1])) %>% 
  mutate_at(.vars=vars(-RELEVE_NR), 
            .funs=~./sumVar) %>% 
  dplyr::select(-sumVar) %>% 
  ## delete species not appearing in any plot
  dplyr::select(colnames(.)[which(colSums(.)!=0)])
dim(species.proz) #[1] 558 533
write_delim(species.proz , path="_data/Mesobromion/species.out.10perc.cov.txt", delim="\t")

## align traits to species in species.proz
traits.proz <- recode.traits(all.traits)
traits.proz <- data.frame(species=colnames(species.proz)[-1] ) %>% 
  ### clean species names in both data.frames
  mutate(species0=as.character(species)) %>% 
  rowwise() %>% 
  # quick and dirty clean up names
  mutate(species=gsub(pattern="_agg_|_x_|_spec$|_agg$|_s_|_Sec_|__", replacement="_", x=species)) %>% 
  mutate(species=gsub(pattern="_$", replacement = "", x = species)) %>% 
  mutate(species=ifelse(is.na(word(species, 1, 2)), species, word(species, 1, 2))) %>% 
  ungroup() %>% 
  left_join(traits.proz %>% 
              mutate(species=species0) %>% 
              rowwise() %>% 
              mutate(species=gsub(pattern="_agg_|_x_|_spec$|_agg$|_s_|_Sec_|__", replacement="_", x=species)) %>% 
              mutate(species=gsub(pattern="_$", replacement = "", x = species)) %>% 
              mutate(species=ifelse(is.na(word(species, 1, 2)), species, word(species, 1, 2))) %>% 
              ungroup() %>% 
              dplyr::select(-species0),
            by="species") %>% 
  dplyr::select(-species) %>% 
  dplyr::select(`species0`, everything())

##check for species without trait info
traits.proz %>% 
  filter_at(.vars=vars(-"species0"), 
            all_vars(is.na(.))) %>% 
  dim() ## [1] 16 53 # species with no trait info
write_delim(traits.cov, path="_data/Mesobromion/traits.out.10perc.cov.txt", delim="\t")

#### CORRELATION BETWEEN FUZZY WEIGHTED AND BEALS MATRICES
#### WAS RUN IN THE CLUSTER WITH THE SCRIPT 01b_MesobromionCluster.R