Skip to content
Snippets Groups Projects
Select Git revision
  • e03551b4519c2bc80125d8b1dbd91d8c2bc2d8f3
  • master default protected
  • beta
  • dev
  • andrewssobral-patch-1
  • update
  • thomas-fork
  • 2.0
  • v3.2.0
  • v3.1.0
  • v3.0
  • bgslib_py27_ocv3_win64
  • bgslib_java_2.0.0
  • bgslib_console_2.0.0
  • bgslib_matlab_win64_2.0.0
  • bgslib_qtgui_2.0.0
  • 2.0.0
  • bgs_console_2.0.0
  • bgs_matlab_win64_2.0.0
  • bgs_qtgui_2.0.0
  • v1.9.2_x86_mfc_gui
  • v1.9.2_x64_java_gui
  • v1.9.2_x86_java_gui
23 results

run_demo.sh

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    Trait-Environment-Relationships2.R 83.63 KiB
    getwd()
    ###### Traits #####
    # TRY 2.0
    load("data/TRY.all.mean.sd.2.Rdata")
    str(TRY.all.mean.sd.2)
    dim(TRY.all.mean.sd.2)
    # 40791 obs. of  38 variables
    # tests
    any(is.na(TRY.all.mean.sd.2$SLA.sd)) #T
    
    # TRY 3.0
    TRY3.0 <- read.csv("data/Export_sPlot_2016_09_12.csv")
    str(TRY3.0)
    levels(TRY3.0$Species)[100] # "Abronia villosa", just an example
    which(TRY3.0$Species==levels(TRY3.0$Species)[100]) # an example of one species with several (7) observations
    TRY3.0[which(TRY3.0$Species==levels(TRY3.0$Species)[100]),c(1:19)] # trait values differ between observations
    any(is.na(TRY3.0[,c(2:19)])) #F
    any(TRY3.0[,c(2:19)]==0) #F
    
    for (i in 2:19){
      TRY3.0[,i] <- log(TRY3.0[,i])
    }
    any(is.na(TRY3.0[,c(2:19)])) #F
    
    TRY.all.n.3 <- aggregate(TRY3.0[,1], by=list(TRY3.0$Species), FUN=length)
    head(TRY.all.n.3)
    str(TRY.all.n.3) #59319 obs. of  2 variables:
    names(TRY.all.n.3)
    names(TRY.all.n.3) <- c("StandSpeciesName","n")
    
    TRY.all.mean.3 <- aggregate(TRY3.0[,c(2:19)], by=list(TRY3.0$Species), FUN=mean)
    str(TRY.all.mean.3) #59319 obs. of  19 variables:
    names(TRY.all.mean.3)
    ' [1] "Group.1" "X1"      "X4"      "X11"     "X13"     "X14"     "X15"     "X18"     "X26"     "X27"     "X47"    
    [12] "X50"     "X56"     "X78"     "X138"    "X163"    "X169"    "X237"    "X282"  '
    names(TRY.all.mean.3) <- c("StandSpeciesName","LeafArea.mean", "StemDens.mean", "SLA.mean", "LeafC.perdrymass.mean",
                               "LeafN.mean","LeafP.mean", "PlantHeight.mean", "SeedMass.mean", "Seed.length.mean",
                               "LDMC.mean","LeafNperArea.mean", "LeafNPratio.mean", "Leaf.delta.15N.mean", 
                               "Seed.num.rep.unit.mean", "Leaffreshmass.mean", "Stem.cond.dens.mean", 
                               "Disp.unit.leng.mean", "Wood.vessel.length.mean")
    any(is.na(TRY.all.mean.3$SLA.mean)) #F
    
    TRY.all.sd.3 <- aggregate(TRY3.0[,c(2:19)], by=list(TRY3.0$Species), FUN=sd)
    str(TRY.all.sd.3) #59319 obs. of  19 variables:
    names(TRY.all.sd.3)
    names(TRY.all.sd.3) <- c("StandSpeciesName","LeafArea.sd", "StemDens.sd", "SLA.sd", "LeafC.perdrymass.sd",
                               "LeafN.sd","LeafP.sd", "PlantHeight.sd", "SeedMass.sd", "Seed.length.sd",
                               "LDMC.sd","LeafNperArea.sd", "LeafNPratio.sd", "Leaf.delta.15N.sd", 
                               "Seed.num.rep.unit.sd", "Leaffreshmass.sd", "Stem.cond.dens.sd", 
                               "Disp.unit.leng.sd", "Wood.vessel.length.sd")
    any(is.na(TRY.all.sd.3$SLA.sd)) #T
    TRY.all.mean.sd.3 <- data.frame(TRY.all.n.3,TRY.all.mean.3[,c(2:19)],TRY.all.sd.3[,c(2:19)])
    str(TRY.all.mean.sd.3) #59319 obs. of  38 variables
    'data.frame:	59319 obs. of  38 variables:
    $ StandSpeciesName       : Factor w/ 59319 levels "Aa sp","Abarema adenophora",..: 1 2 3 4 5 6 7 8 9 10 ...
    $ n                      : int  1 4 1 1 38 4 86 7 5 95 ...
    $ LeafArea.mean          : num  6.61 6.96 7.04 7.11 6.75 ...
    $ StemDens.mean          : num  -0.822 -0.427 -0.57 -0.619 -0.543 ...
    $ SLA.mean               : num  2.24 2.38 2.41 2.56 2.65 ...
    $ LeafC.perdrymass.mean  : num  6.17 6.26 6.19 6.17 6.18 ...
    $ LeafN.mean             : num  2.92 3.19 3.25 3.41 3.23 ...
    $ LeafP.mean             : num  -0.0143 0.3403 -0.1557 -0.0999 -0.217 ...
    $ PlantHeight.mean       : num  -0.498 2.874 2.544 2.293 2.394 ...
    $ SeedMass.mean          : num  -4.08 4.49 4.27 4.23 4.13 ...
    $ Seed.length.mean       : num  -0.317 2.021 1.88 1.92 1.912 ...
    $ LDMC.mean              : num  -1.471 -0.798 -0.938 -1.087 -1.111 ...
    $ LeafNperArea.mean      : num  0.896 0.834 0.838 0.804 0.617 ...
    $ LeafNPratio.mean       : num  2.54 3.26 3.49 3.55 3.57 ...
    $ Leaf.delta.15N.mean    : num  0.38 1.258 0.925 -0.169 0.719 ...
    $ Seed.num.rep.unit.mean : num  9.56 1.64 1.39 1.18 1.32 ...
    $ Leaffreshmass.mean     : num  -1.3303 -0.2516 -0.0799 -0.0889 -0.4985 ...
    $ Stem.cond.dens.mean    : num  3.67 2.14 1.94 2.08 2.06 ...
    $ Disp.unit.leng.mean    : num  -0.555 2.916 2.676 2.689 2.842 ...
    $ Wood.vessel.length.mean: num  6.13 5.73 6 6.06 6.21 ...
    $ LeafArea.sd            : num  NA 0.0303 NA NA 0.0556 ...
    $ StemDens.sd            : num  NA 0.1101 NA NA 0.0123 ...
    $ SLA.sd                 : num  NA 0.1063 NA NA 0.0695 ...
    $ LeafC.perdrymass.sd    : num  NA 0.0086 NA NA 0.00511 ...
    $ LeafN.sd               : num  NA 0.0638 NA NA 0.0113 ...
    $ LeafP.sd               : num  NA 0.3936 NA NA 0.0232 ...
    $ PlantHeight.sd         : num  NA 0.0626 NA NA 0.1806 ...
    $ SeedMass.sd            : num  NA 0.0315 NA NA 0.0787 ...
    $ Seed.length.sd         : num  NA 0.0457 NA NA 0.0193 ...
    $ LDMC.sd                : num  NA 0.0553 NA NA 0.0573 ...
    $ LeafNperArea.sd        : num  NA 0.098 NA NA 0.068 ...
    $ LeafNPratio.sd         : num  NA 0.291 NA NA 0.0304 ...
    $ Leaf.delta.15N.sd      : num  NA 0.1651 NA NA 0.0398 ...
    $ Seed.num.rep.unit.sd   : num  NA 0.211 NA NA 0.259 ...
    $ Leaffreshmass.sd       : num  NA 0.0748 NA NA 0.0571 ...
    $ Stem.cond.dens.sd      : num  NA 0.1389 NA NA 0.0349 ...
    $ Disp.unit.leng.sd      : num  NA 0.0927 NA NA 0.0293 ...
    $ Wood.vessel.length.sd  : num  NA 0.1466 NA NA 0.0271 ...'
    
    plot(SLA.mean~LDMC.mean, data=TRY.all.mean.sd.3)
    #summary(lm(SLA.mean~LDMC.mean, data=TRY.all.mean.sd.2))
    #abline(lm(SLA.mean~LDMC.mean, data=TRY.all.mean.sd.2),col="blue", lwd=2)
    #sqrt(0.3415)
    
    #TRY.all.mean.sd.3 <- TRY.all.mean.sd.3[,-39]
    save(TRY.all.mean.sd.3, file="TRY.all.mean.sd.3.Rdata")
    
    ###### Backbone #####
    load("data/backbone.v.2.splot.try3.Rdata")
    # alternatively:
    #sPlot3 <- read.csv(paste("data/", "backbone.v.2.splot.try3.csv", 
    #                         sep = ""), sep=",", quote = "\"'")
    str(backbone.splot.try3) 
    # data.frame':	122901 obs. of  33 variables:
    
    ###### Harmonized plot data ######
    DT <- read.csv("data/plots_without_unified_cover_scales_20160120a.csv")
    DT <- read.csv("data/plots_without_unified_cover_scales_20160120a.csv")
    library(data.table)
    DT <- data.table(DT)
    colnames(DT)
    colnames(DT)[9] <- "Cover"
    setkey(DT,PlotObservationID)
    
    ###### Header data #######
    splot.header <- read.csv("data/sPlot_14_04_2015_coord.csv")
    str(splot.header)
    'data.frame:	1117940 obs. of  3 variables:
    $ PlotObservationID: int  1 2 3 4 5 6 7 8 9 10 ...
    $ Longitude        : num  -154 -154 -154 -154 -154 ...
    $ Latitude         : num  62.4 62.4 62.4 62.4 62.4 ...'
    
    ##### Bioclim data ####
    splot.bioclim <- read.csv(paste("data/","sPlot.csv", sep = ""), sep = ";", na.strings=c("","NA"))
    str(splot.bioclim)
    #data.frame':	1117382 obs. of  52 variables:
    
    ####################
    ##### Matching #####
    #index6 <- match(TRY.all.mean.sd.2$StandSpeciesName,backbone.splot.try3$names.sPlot.TRY)
    index6 <- match(TRY.all.mean.sd.3$StandSpeciesName,backbone.splot.try3$names.sPlot.TRY)
    any(is.na(index6)) #T
    # not all species in the trait list are also in the backbone
    TRY.all.mean.sd.3$StandSpeciesName[is.na(index6)]
    # 16 species in TRY are not in the backbone
    '[1] Albizia greveanaÊ              AlnusÊgymnothyrsus             Carapanaœba sp                
    [4] ChionanthusÊsp                 Clerodendron nudiflorumÊ       Copa’ba sp                    
    [7] Copa’fera sp                   Corynocarpus laevigatusÊ       Enterospermum madagascarienseÊ
    [10] Hierochlo‘ odorata             Ing‡ sp                        Norrisia maiorÊ               
    [13] NothofagusÊbrassospora         QuercusÊsp                     ShoreaÊanthoshorea            
    [16] ShoreaÊsp  '
    TRY.all.mean.sd.3[TRY.all.mean.sd.3$StandSpeciesName=="Albizia greveanaÊ",] # n=10
    TRY.all.mean.sd.3[TRY.all.mean.sd.3$StandSpeciesName=="Albizia greveana",] # 0
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Albizia greveanaÊ"] #0
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Albizia greveana"] #0
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="AlnusÊgymnothyrsus"] #0
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Alnus gymnothyrsus"] #0
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Carapanaœba sp"] #0
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Carapanauba sp"] #0
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Carapanaúba sp"] #0
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="ChionanthusÊsp"] #0
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Chionanthus sp"] #1
    TRY.all.mean.sd.3[TRY.all.mean.sd.3$StandSpeciesName=="ChionanthusÊsp",] # n=10
    TRY.all.mean.sd.3[TRY.all.mean.sd.3$StandSpeciesName=="Chionanthus sp",] # n=15
    DT$species[DT$species=="Chionanthus sp" & !is.na(DT$species)] #0
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Clerodendron nudiflorumÊ"] #0
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Clerodendron nudiflorum"] #0
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Copa’ba sp"] #0
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Copaiba sp"] #1
    TRY.all.mean.sd.3[TRY.all.mean.sd.3$StandSpeciesName=="Copa’ba sp",] # n=0, strange, there should be at least one
    TRY.all.mean.sd.3[TRY.all.mean.sd.3$StandSpeciesName=="Copaiba sp",] # n=1
    DT$species[DT$species=="Copaiba sp" & !is.na(DT$species)] #0
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Copa’fera sp"] #0
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Copaifera sp"] #1
    DT$species[DT$species=="Copaifera sp" & !is.na(DT$species)] #0
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Corynocarpus laevigatusÊ"] #0
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Corynocarpus laevigatus"] #0
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Enterospermum madagascarienseÊ"] #0
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Enterospermum madagascariense"] #1
    DT$species[DT$species=="Enterospermum madagascariense" & !is.na(DT$species)] #0
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Hierochlo‘ odorata"] #0
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Hierochloe odorata"] #1
    TRY.all.mean.sd.3[TRY.all.mean.sd.3$StandSpeciesName=="Hierochlo‘ odorata",] # n=0, strange, there should be at least one
    TRY.all.mean.sd.3[TRY.all.mean.sd.3$StandSpeciesName=="Hierochloe odorata",] # n=63
    DT$species[DT$species=="Hierochloe odorata" & !is.na(DT$species)] #545
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Ing‡ sp"] #0
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Inga sp"] #1
    DT$species[DT$species=="Inga sp" & !is.na(DT$species)] #0
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Norrisia maiorÊ"] #0
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Norrisia maior"] #1
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="NothofagusÊbrassospora"] #0
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Nothofagus brassospora"] #1
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="QuercusÊsp"] #0
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Quercus sp"] #1
    DT$species[DT$species=="Quercus sp" & !is.na(DT$species)] #0
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="ShoreaÊanthoshorea"] #0
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Shorea anthoshorea"] #1
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="ShoreaÊsp"] #0
    backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Shorea sp"] #1
    DT$species[DT$species=="Shorea sp" & !is.na(DT$species)] #0
    # none of the species is in sPlot, thus we can take them out
    
    ### data cleaning ###
    length(TRY.all.mean.sd.3$StandSpeciesName[is.na(TRY.all.mean.sd.3$StandSpeciesName)])
    # No record is NA!
    #TRY.all.mean.sd.2[which(is.na(TRY.all.mean.sd.2$StandSpeciesName)),]
    # record no. 7947 has StandSpeciesName = NA
    #TRY.all.mean.sd.2 <- TRY.all.mean.sd.2[-which(is.na(TRY.all.mean.sd.2$StandSpeciesName)),]
    
    TRY.all.mean.sd.3$species <- backbone.splot.try3$name.short.correct[index6]
    dim(TRY.all.mean.sd.3) # 59319    39
    
    
    index1 <- match(DT$PlotObservationID, splot.header$PlotObservationID)
    any(is.na(index1)) #F
    length(index1) #  24241941
    length(unique(index1[!is.na(index1)])) #1117940
    
    index2 <- match(splot.header$PlotObservationID,DT$PlotObservationID)
    any(is.na(index2)) #F
    length(index2) #   1117940
    length(unique(index2[!is.na(index2)])) #  1117940
    dim(splot.header)
    # 1117940      3
    identical(as.character(splot.header$PlotObservationID),as.character(DT$PlotObservationID[index2]))
    # T
    
    any(is.na(DT$species)) #T
    # that should not be the case!
    length(DT$species[!is.na(DT$species)]) #24221565
    length(DT$species) #24241941
    24241941-24221565 # 20376 NA names!!!
    # it gives nonsense to match them with traits
    index7 <- match(DT$species,TRY.all.mean.sd.3$species)
    length(index7) #24241941
    length(index7[!is.na(index7)]) 
    # 21839463, with TRY3.0
    # 21040927, with TRY2.0
    # 19841429, with first TRY version
    24241941 - 21839463 # 2402478 entries have no traits
    (24241941 - 2402478)/24241941*100 
    # which are 90.08958% of all entries
    # previously: 86.79555% with TRY2.0
    # previously: 18.15247 % and with first TRY version
    
    # reduce DT, and splot.header
    #splot.header <- splot.header[!is.na(DT$species[index2]),]
    #dim(splot.header) #1117476       3
    DT <- DT[!is.na(DT$species),]
    index7 <- match(DT$species,TRY.all.mean.sd.3$species)
    length(index7) #24221565
    
    
    ### CWM ###
    mean(TRY.all.mean.sd.3$SLA.mean) # 2.634988
    min(TRY.all.mean.sd.3$SLA.mean) # 0.03072317
    max(TRY.all.mean.sd.3$SLA.mean) # 5.182365
    # example
    DT$trait <- NA
    DT$trait <- TRY.all.mean.sd.3$SLA.mean[index7]
    length(DT$trait[!is.na(DT$trait)]) # 21819087
    length(DT$trait[is.na(DT$trait)]) #   2402478
    str(DT)
    
    colnames(TRY.all.mean.sd.3)
    CWM2 <-  DT[,list(CWM.SLA = weighted.mean(trait,Cover,na.rm = T)),by=PlotObservationID]
    # I have checked that we do not need to prefilter only those entries that have a trait value
    # this works fine
    str(CWM2)
    dim(CWM2) #1117898
    which(colnames(TRY.all.mean.sd.3)=="LeafArea.mean") # 3
    which(colnames(TRY.all.mean.sd.3)=="Wood.vessel.length.mean") # 20
    CWM <- array(NA,c(dim(CWM2)[1],18),dimnames=list(CWM2$PlotObservationID,
                                                  colnames(TRY.all.mean.sd.3)[3:20]))
    rm(CWM2)
    for (i in 1:18){
      DT$trait <- NA
      DT$trait <- TRY.all.mean.sd.3[index7,i+2]
      CWM[,i] <-  DT[,list(CWM.trait= weighted.mean(trait,Relative.cover,na.rm = T)),by=PlotObservationID]$CWM.trait
      
    }
    str(CWM) #num [1:1117898, 1:18]
    CWM[1:20,]
    tail(CWM)
    write.csv(CWM,file = "CWM_TRY3.csv", row.names = T)
    
    CWM <- read.csv("C://Daten//iDiv2//splot2//CWM_TRY3.csv")
    str(CWM) #1117898 obs. of  19 variables
    dimnames(CWM)[[1]] <- CWM$X
    CWM <- CWM[,-1]
    tail(CWM)
    
    ##### MAP ####
    index3 <- match(dimnames(CWM)[[1]],splot.header$PlotObservationID)
    length(index3) #1117898
    any(is.na(index3)) # F
    #length(index3[is.na(index3)]) #422
    splot.header2 <- splot.header[index3,]
    'CWM1$Longitude <- splot.header$Longitude[index10]
    CWM1$Latitude <- splot.header$Latitude[index10]
    '
    str(CWM) # num [1:1117898, 1:18]
    CWM <- CWM[!(is.na(splot.header2$Longitude)|is.na(splot.header2$Latitude)|is.na(CWM[,"SLA.mean"])),]
    str(CWM) # num [1:1112287, 1:18]
    1112287/1117898*100 # 99.49808% of plots have traits, Long and Lat
    any(is.na(CWM)) #F
    # match header again
    index4 <- match(dimnames(CWM)[[1]],splot.header$PlotObservationID)
    length(index4) #1112287
    any(is.na(index4)) # F
    splot.header2 <- splot.header[index4,]
    # now in the same sequence as CWM
    
    library(raster)
    CRSlonlat <- CRS("+proj=longlat +datum=WGS84")
    coords <- cbind(splot.header2$Longitude, splot.header2$Latitude)
    coords <- SpatialPoints(coords, proj4string=CRSlonlat)
    map.CWM <- SpatialPointsDataFrame(coords, as.data.frame(CWM), proj4string=CRSlonlat)
    str(map.CWM)
    
    library(maptools)
    data(wrld_simpl)
    plot(wrld_simpl)
    points(coordinates(map.CWM), cex=0.1, col="red", pch=16)
    # sPlot2withSLA.png
    max(coordinates(map.CWM)[,1]) # 179.5901
    max(coordinates(map.CWM)[,2]) # 80.14912
    min(coordinates(map.CWM)[,1]) # -162.7414
    min(coordinates(map.CWM)[,2]) # -64.78
    
    
    r <- raster(nrows=180, ncols=360, xmn=-180, xmx=180, ymn=-90, ymx=90)
    str(r)
    raster.map.SLA <- rasterize(map.CWM,r,map.CWM$SLA.mean, run="mean")
    str(raster.map.SLA)
    plot(wrld_simpl)
    plot(raster.map.SLA, add=T)
    # SLAgridded1.png
    
    library(rworldmap)
    raster.map.SLA.grid <- as(raster.map.SLA, 'SpatialGridDataFrame')
    library(RColorBrewer)
    # Set a color ramp
    rf <- brewer.pal(8, 'Spectral')
    par(mar=c(0, 0, 0, 8) + 0.1)
    mapParams <- mapGriddedData(raster.map.SLA.grid,addLegend=F, 
                                numCats = 8, colourPalette=rf)
    str(mapParams)
    mapParams$cutVector
    # addMapLegend(plottedData=raster.map.SLA.grid,legendShrink=0.5)
    #do.call(addMapLegend, c( mapParams, legendLabels="all", legendWidth=0.5,
    #                legendIntervals="data", legendMar = 2 ) )
    #do.call(addMapLegend, c( mapParams, legendLabels="all", legendWidth=0.3,
    #                         legendIntervals="page", legendMar = 4, legendShrink=1.7,horizontal=F))
    do.call(addMapLegend, c( mapParams, legendLabels="all", legendWidth=0.3,
                             legendIntervals="page", legendMar =2, horizontal=F))
    
    #round(as.numeric(exp(mapParams$cutVector)),1)
    mtext(round(as.numeric(exp(mapParams$cutVector)),1),side=4,line=-1.5, 
          at=seq(-200,230, by=51.3), las=2)
    #mtext("ln(SLA)",side=1,line=0, at=230, las=1)
    #mtext("ln(SLA)",side=4,line=-2,at=-170, las=2)
    # SLAgridded2.png
    par(mar=c(5, 4, 4, 2) + 0.1)
    
    ############################################################
    # Regression with Bioclim
    ############################################################
    'BIO1 = Annual Mean Temperature
    BIO2 = Mean Diurnal Range (Mean of monthly (max temp - min temp))
    BIO3 = Isothermality (BIO2/BIO7) (* 100)
    BIO4 = Temperature Seasonality (standard deviation *100)
    BIO5 = Max Temperature of Warmest Month
    BIO6 = Min Temperature of Coldest Month
    BIO7 = Temperature Annual Range (BIO5-BIO6)
    BIO8 = Mean Temperature of Wettest Quarter
    BIO9 = Mean Temperature of Driest Quarter
    BIO10 = Mean Temperature of Warmest Quarter
    BIO11 = Mean Temperature of Coldest Quarter
    BIO12 = Annual Precipitation
    BIO13 = Precipitation of Wettest Month
    BIO14 = Precipitation of Driest Month
    BIO15 = Precipitation Seasonality (Coefficient of Variation)
    BIO16 = Precipitation of Wettest Quarter
    BIO17 = Precipitation of Driest Quarter
    BIO18 = Precipitation of Warmest Quarter
    BIO19 = Precipitation of Coldest Quarter'
    
    index11 <- match(splot.header2$PlotObservationID,splot.bioclim$PLOT_ID)
    length(index11) # 1112287
    length(index11[!is.na(index11)]) # 1112287
    
    #index12 <- match(CWM2$PlotObservationID, splot.header$PlotObservationID)
    #any(is.na(index12)) #F
    str(splot.bioclim$BIO_01)
    plot(CWM[,"SLA.mean"]~splot.bioclim$BIO_01[index11],
         cex=0.05, xlab="BIO_1", ylab="CWM ln(SLA)",cex.lab=1.5, cex.axis=1.5)
    model1 <- lm(CWM[,"SLA.mean"]~splot.bioclim$BIO_01[index11])
    model2 <- lm(CWM[,"SLA.mean"]~splot.bioclim$BIO_01[index11]+I(splot.bioclim$BIO_01[index11]^2))
    summary(model1)
    'Coefficients:
                                    Estimate Std. Error t value Pr(>|t|)    
    (Intercept)                    2.929e+00  8.700e-04    3367   <2e-16 ***
    splot.bioclim$BIO_01[index11] -9.720e-04  8.603e-06    -113   <2e-16 ***
    Multiple R-squared:  0.01135,	Adjusted R-squared:  0.01135 
    F-statistic: 1.277e+04 on 1 and 1112285 DF,  p-value: < 2.2e-16
    '
    summary(model2)
    'Coefficients:
                                         Estimate Std. Error t value Pr(>|t|)    
    (Intercept)                         2.791e+00  1.059e-03  2635.2   <2e-16 ***
    splot.bioclim$BIO_01[index11]       2.446e-03  1.765e-05   138.6   <2e-16 ***
    I(splot.bioclim$BIO_01[index11]^2) -1.673e-05  7.593e-08  -220.3   <2e-16 ***
    Multiple R-squared:  0.05269
    '
    abline(model1, lwd=3, col="blue")
    model2$coefficients
    x.new <- seq(-100,300,1)
    y.new <- model2$coefficients[1]+model2$coefficients[2]*x.new+model2$coefficients[3]*x.new^2
    lines(x.new,y.new, lwd=3, col="blue")
    # SLA_BIO1.png
    
    # devide in quantiles
    quantile(splot.bioclim$BIO_01[index11],probs = seq(0, 1, 1/3))
    '       0% 33.33333% 66.66667%      100% 
         -227        79        99       303 '
    # -> most points (i.e. one third) are in the temperate zone between 7.9 and 9.9°C
    x1 <- splot.bioclim$BIO_01[index11][splot.bioclim$BIO_01[index11]<=79]
    model1a <- lm(CWM[,"SLA.mean"][splot.bioclim$BIO_01[index11]<=79]~splot.bioclim$BIO_01[index11][splot.bioclim$BIO_01[index11]<=79])
    summary(model1a)
    'Coefficients:
                                                                        Estimate Std. Error t value Pr(>|t|)    
    (Intercept)                                                        2.688e+00  1.178e-03    2282   <2e-16 ***
    splot.bioclim$BIO_01[index11][splot.bioclim$BIO_01[index11] <= 79] 2.826e-03  2.004e-05     141   <2e-16 ***
    ---
    Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
    
    Residual standard error: 0.382 on 372215 degrees of freedom
    Multiple R-squared:  0.05069,	Adjusted R-squared:  0.05068 
    F-statistic: 1.987e+04 on 1 and 372215 DF,  p-value: < 2.2e-16'
    model1b <- lm(CWM[,"SLA.mean"][splot.bioclim$BIO_01[index11]<=99]~splot.bioclim$BIO_01[index11][splot.bioclim$BIO_01[index11]<=99])
    summary(model1b)
    'Coefficients:
                                                                        Estimate Std. Error t value Pr(>|t|)    
    (Intercept)                                                        2.695e+00  1.112e-03  2422.8   <2e-16 ***
    splot.bioclim$BIO_01[index11][splot.bioclim$BIO_01[index11] <= 99] 2.709e-03  1.463e-05   185.1   <2e-16 ***
    ---
    Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
    
    Residual standard error: 0.3796 on 750485 degrees of freedom
    Multiple R-squared:  0.04367,	Adjusted R-squared:  0.04367 
    F-statistic: 3.427e+04 on 1 and 750485 DF,  p-value: < 2.2e-16'
    model1c <- lm(CWM[,"SLA.mean"][splot.bioclim$BIO_01[index11]>99]~splot.bioclim$BIO_01[index11][splot.bioclim$BIO_01[index11]>99])
    summary(model1c)
    'Coefficients:
                                                                        Estimate Std. Error t value Pr(>|t|)    
    (Intercept)                                                        3.210e+00  2.425e-03  1323.3   <2e-16 ***
    splot.bioclim$BIO_01[index11][splot.bioclim$BIO_01[index11] > 99] -3.417e-03  1.739e-05  -196.5   <2e-16 ***
    ---
    Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
    
    Residual standard error: 0.4157 on 361798 degrees of freedom
    Multiple R-squared:  0.09646,	Adjusted R-squared:  0.09646 
    F-statistic: 3.862e+04 on 1 and 361798 DF,  p-value: < 2.2e-16'
    
    plot(CWM[,"SLA.mean"]~splot.bioclim$BIO_01[index11],
         cex=0.05, xlab="BIO_1", ylab="CWM ln(SLA)",cex.lab=1.5, cex.axis=1.5)
    x.new <- seq(-100,79,1)
    y.new <- model1a$coefficients[1]+model1a$coefficients[2]*x.new
    lines(x.new,y.new, lwd=3, col="blue")
    x.new <- seq(79,99,1)
    y.new <- model1b$coefficients[1]+model1b$coefficients[2]*x.new
    lines(x.new,y.new, lwd=3, col="green")
    x.new <- seq(99,300,1)
    y.new <- model1c$coefficients[1]+model1c$coefficients[2]*x.new
    lines(x.new,y.new, lwd=3, col="red")
    
    ### calculate a GAM
    library(gam)
    x <- splot.bioclim$BIO_01[index11]
    model3 <- gam(CWM[,"SLA.mean"]~lo(x, span=0.5))
    summary(model3)
    '    Null Deviance: 207269.5 on 1111306 degrees of freedom
    Residual Deviance: 187452.7 on 1111302 degrees of freedom
    AIC: 1175898
    Anova for Parametric Effects
                           Df Sum Sq Mean Sq F value    Pr(>F)    
    lo(x, span = 0.5)       1   3238  3238.0   19197 < 2.2e-16 ***
    Residuals         1111302 187453     0.2
    Anova for Nonparametric Effects
                      Npar Df Npar F     Pr(F)    
    (Intercept)                                   
    lo(x, span = 0.5)     2.7  37028 < 2.2e-16 ***
    '
    
    x.new <- seq(-100,300,1)
    y.new <- predict(model3, data.frame(x=x.new), se = F)
    # se=T does not work
    plot(CWM[,"SLA.mean"]~splot.bioclim$BIO_01[index11],
         cex=0.05, xlab="BIO_1", ylab="CWM ln(SLA)",cex.lab=1.5, cex.axis=1.5)
    lines(x.new,y.new, lwd=3, col="blue")
    
    ### N to P ratio
    model4 <- lm(CWM[,"LeafNPratio.mean"]~splot.bioclim$BIO_01[index11])
    summary(model4)
    'Coefficients:
                                   Estimate Std. Error t value Pr(>|t|)    
    (Intercept)                   2.334e+00  4.243e-04    5501   <2e-16 ***
    splot.bioclim$BIO_01[index11] 1.313e-03  4.196e-06     313   <2e-16 ***
    ---
    Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
    
    Residual standard error: 0.1986 on 1112285 degrees of freedom
    Multiple R-squared:  0.08096,	Adjusted R-squared:  0.08096 
    F-statistic: 9.799e+04 on 1 and 1112285 DF,  p-value: < 2.2e-16
    '
    plot(CWM[,"LeafNPratio.mean"]~splot.bioclim$BIO_01[index11],
         cex=0.05, xlab="BIO_01", ylab="CWM ln(Leaf N to P ratio)",cex.lab=1.5, cex.axis=1.5)
    abline(model4, lwd=3, col="blue")
    
    ### turn the regression around
    # does not make sense
    model5 <- lm(splot.bioclim$BIO_01[index11]~CWM[,"SLA.mean"])
    summary(model5)
    '                   Estimate Std. Error t value Pr(>|t|)    
    (Intercept)       127.41676    0.28026   454.6   <2e-16 ***
    CWM[, "SLA.mean"] -12.97407    0.09769  -132.8   <2e-16 ***
    Multiple R-squared:  0.01562,	Adjusted R-squared:  0.01562 '
    
    model6 <- lm(splot.bioclim$BIO_01[index11]~CWM[,"LeafNPratio.mean"])
    summary(model6)
    'Coefficients:
                              Estimate Std. Error t value Pr(>|t|)    
    (Intercept)               -72.7479     0.4591  -158.5   <2e-16 ***
    CWM[, "LeafNPratio.mean"]  67.2535     0.1883   357.2   <2e-16 ***
    ---
    Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
    
    Residual standard error: 42.46 on 1111305 degrees of freedom
    Multiple R-squared:  0.103,	Adjusted R-squared:  0.103 '
    
    model7 <- lm(splot.bioclim$BIO_01[index11]~CWM[,"SLA.mean"]+CWM[,"LeafNPratio.mean"])
    summary(model7)
    '                           Estimate Std. Error t value Pr(>|t|)    
    (Intercept)               -41.59378    0.55839  -74.49   <2e-16 ***
    CWM[, "SLA.mean"]          -9.08978    0.09355  -97.17   <2e-16 ***
    CWM[, "LeafNPratio.mean"]  65.04103    0.18885  344.40   <2e-16 ***
    ---
    Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
    
    Residual standard error: 42.28 on 1111304 degrees of freedom
    Multiple R-squared:  0.1106,	Adjusted R-squared:  0.1106 '
    x <- splot.bioclim$BIO_01[index11]
    model8 <- lm(x~CWM[,"LeafNPratio.mean"]*CWM[,"SLA.mean"])
    summary(model8)
    'Coefficients:
                                                 Estimate Std. Error t value Pr(>|t|)    
    (Intercept)                                 -432.2500     2.2645  -190.9   <2e-16 ***
    CWM[, "LeafNPratio.mean"]                    227.1681     0.9304   244.2   <2e-16 ***
    CWM[, "SLA.mean"]                            135.8572     0.8202   165.6   <2e-16 ***
    CWM[, "LeafNPratio.mean"]:CWM[, "SLA.mean"]  -60.2280     0.3386  -177.9   <2e-16 ***
    ---
    Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
    
    Residual standard error: 41.69 on 1111303 degrees of freedom
    Multiple R-squared:  0.1352,	Adjusted R-squared:  0.1352 '
    
    library(scatterplot3d) # für 3D-Plots
    s3d1 <- scatterplot3d(CWM[,"SLA.mean"],CWM[,"LeafNPratio.mean"], x, 
                          xlab="CWM ln(Leaf N to P ratio)", ylab="CWM ln(SLA)", zlab="BIO_1",
                                  pch=1,cex.symbols=0.05,highlight.3d=T,type="p")
    #fit <- lm(AnzF ~ Prop.n+Prop.ba,data=data6[data6$Sp=="Qupe",])
    s3d1$plane3d(model8)
    # does not work
    ### producing the plane with points
    min(CWM[,"SLA.mean"])
    max(CWM[,"SLA.mean"])
    x2 <- sort(rep(seq(0.1, 5, by=0.1),35))
    min(CWM[,"LeafNPratio.mean"])
    max(CWM[,"LeafNPratio.mean"])
    y2 <- rep(seq(1, 4.4, by=0.1),50)
    func <- function(x,y) { -432.2500 + 227.1681 * x + 135.8572*y -60.2280*x*y}
    z2 <- func(x2,y2)
    s3d1 <- scatterplot3d(CWM[,"SLA.mean"],CWM[,"LeafNPratio.mean"], x, 
                          xlab="CWM ln(Leaf N to P ratio)", ylab="CWM ln(SLA)", zlab="BIO_1",
                          pch=1,cex.symbols=0.05,highlight.3d=T,type="p",angle=70,scale.y=0.5)
    s3d1$points3d(x2,y2,z2,col="blue", type="p", pch=20, cex=0.5)
    # 
    x <- splot.bioclim$BIO_01[index11]
    model7 <- lm(CWM[,"SLA.mean"]~x*CWM[,"LeafNPratio.mean"])
    summary(model7)
    '                              Estimate Std. Error t value Pr(>|t|)    
    (Intercept)                  1.948e+00  9.377e-03   207.8   <2e-16 ***
    x                            1.278e-02  7.985e-05   160.0   <2e-16 ***
    CWM[, "LeafNPratio.mean"]    3.916e-01  3.856e-03   101.6   <2e-16 ***
    x:CWM[, "LeafNPratio.mean"] -5.472e-03  3.166e-05  -172.8   <2e-16 ***
    ---
    Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
    
    Residual standard error: 0.4213 on 1111303 degrees of freedom
    Multiple R-squared:  0.04842,	Adjusted R-squared:  0.04842 '
    min(x)
    max(x)
    x2 <- sort(rep(seq(-100, 290, by=10),35))
    min(CWM[,"LeafNPratio.mean"])
    max(CWM[,"LeafNPratio.mean"])
    y2 <- rep(seq(1, 4.4, by=0.1),40)
    func <- function(x,y) {  1.948e+00  +  1.278e-02 * x + 3.916e-01*y -5.472e-03*x*y}
    z2 <- func(x2,y2)
    s3d1 <- scatterplot3d(x,CWM[,"LeafNPratio.mean"], CWM[,"SLA.mean"], 
                          xlab="BIO_1", ylab="CWM ln(Leaf N to P ratio)", zlab="CWM ln(SLA)", 
                          pch=1,cex.symbols=0.05,highlight.3d=T,type="p",angle=50,scale.y=1)
    s3d1$points3d(x2,y2,z2,col="blue", type="p", pch=20, cex=0.5)
    # final plot
    # some BIO_01 and LeafNPratio combinations do not exist
    ?cut
    x.cut <- cut(x, breaks=seq(-110, 290, by=10))
    y.cut <- cut(CWM[,"LeafNPratio.mean"],breaks=seq(0.9, 4.4, by=0.1))
    z.cut <- table(x.cut,y.cut)
    head(z.cut)
    dim(z.cut) #40 35
    z2.cut <- table(x2,y2)
    head(z2.cut)
    dim(z2.cut) #40 35
    
    #library(reshape2)
    z.cut <- melt(z.cut)
    str(z.cut) # 1400 obs. of  3 variables:
    head(z.cut)
    z2.cut <- melt(z2.cut)
    str(z2.cut) #1400 obs. of  3 variables:
    head(z2.cut)
    z2.cut$value <- 0
    z2.cut$value <- z.cut$value
    z2.cut$xy <- paste(z2.cut$x2,z2.cut$y2,sep="_")
    #z3.cut <- acast(z2.cut, x2~y2)
    #head(z3.cut)
    xy <- paste(x2,y2,sep="_")
    index20 <- match(xy,z2.cut$xy)
    values.present <- vector(mode="logical", length=length(x2))
    values.present <- z2.cut$value[index20] > 0
    table(values.present)
    'z.cut$value[z.cut$x.cut==x.cut[1] & z.cut$y.cut==y.cut[1]]
    values.present <- function(z.cut,x,y){
      z.cut$value[z.cut$x.cut==x & z.cut$y.cut==y]
    }
    x1 <- data.frame(x,x.cut, y=CWM[,"LeafNPratio.mean"],y.cut)
    head(x1)
    z1 <- apply(x1,1,FUN=values.present(z.cut, x1$x.cut, x1$y.cut))
    values.present(z.cut, x1$x.cut[1], x1$y.cut[1])
    '
    
    s3d1 <- scatterplot3d(x,CWM[,"LeafNPratio.mean"], CWM[,"SLA.mean"], 
                          xlab="BIO_1", ylab="CWM ln(Leaf N to P ratio)", zlab="CWM ln(SLA)", 
                          pch=1,cex.symbols=0.05,highlight.3d=T,type="p",angle=-10,scale.y=0.8)
    s3d1$points3d(x2[values.present],y2[values.present],z2[values.present],col="blue", type="p", pch=20, cex=0.5)
    
    
    #### Collect all CWM regressions ####
    names(splot.bioclim) # from 4 (BIO_01 to 49 T_DEC
    CWM.bioclim <- array(NA,c(46,18,5),dimnames=list(names(splot.bioclim)[4:49],dimnames(CWM)[[2]],
                          c("p_lin","r2_lin","p_qua1", "p_qua2","r2_qua")))
    for (i in 1:46){
      for (j in 1:18){
        model1 <- lm(CWM[,j]~splot.bioclim[index11,i+3])
        CWM.bioclim[i,j,1] <- summary(model1)$coefficients[2,4]
        CWM.bioclim[i,j,2] <- summary(model1)$r.squared
        model2 <- lm(CWM[,j]~splot.bioclim[index11,i+3]+I(splot.bioclim[index11,i+3]^2))
        CWM.bioclim[i,j,3] <- summary(model2)$coefficients[2,4]
        CWM.bioclim[i,j,4] <- summary(model2)$coefficients[3,4]
        CWM.bioclim[i,j,5] <- summary(model2)$r.squared
      }
    }
    str(CWM.bioclim) #num [1:46, 1:18, 1:5]
    write.csv(data.frame(names(splot.bioclim)[4:49],CWM.bioclim[,,2]),file = "CWM_bioclim_r2_lin2.csv", row.names = FALSE)
    write.csv(data.frame(names(splot.bioclim)[4:49],CWM.bioclim[,,5]),file = "CWM_bioclim_r2_qua2.csv", row.names = FALSE)
    
    # for SLA
    which(CWM.bioclim[,3,2]==max(CWM.bioclim[,3,2])) # BIO_02 
    which(CWM.bioclim[,3,5]==max(CWM.bioclim[,3,5])) # BIO_02 
    which(CWM.bioclim[,,2]==max(CWM.bioclim[,,2]),arr.ind = TRUE) 
    #       row col
    #BIO_02   2   3
    CWM.bioclim[2,3,2] #0.1113908
    which(CWM.bioclim[,,5]==max(CWM.bioclim[,,5]),arr.ind = TRUE) 
    #    row col
    #PET  22   2
    CWM.bioclim[22,2,5] # 0.1407318
    names(splot.bioclim)[2+3] #"BIO_02"
    dimnames(CWM)[[2]][9] #"LeafNperArea.mean"
    plot(CWM[,"LeafNperArea.mean"]~splot.bioclim$BIO_02[index11],
         cex=0.05, xlab="BIO_02", ylab="CWM ln(Leaf N per Area)",cex.lab=1.5, cex.axis=1.5)
    model1 <- lm(CWM[,9]~splot.bioclim[index11,2+3])
    abline(model1, lwd=3, col="blue")
    # MeanNperArea_BIO_02.png
    png(filename="MeanNperArea_BIO_02.png", width=800, height=800, units = "px", bg="white")
      plot(CWM[,"LeafNperArea.mean"]~splot.bioclim$BIO_02[index11],
           cex=0.05, xlab="BIO_02", ylab="CWM ln(Leaf N per Area)",cex.lab=1.5, cex.axis=1.5)
      #model1 <- lm(CWM[,9]~splot.bioclim[index11,2+3])
      abline(model1, lwd=3, col="blue")
    dev.off() # creates a file
    png(filename="SLA_BIO_02.png", width=800, height=800, units = "px", bg="white")
      plot(CWM[,"SLA.mean"]~splot.bioclim$BIO_02[index11],
        cex=0.05, xlab="BIO_02", ylab="CWM ln(SLA)",cex.lab=1.5, cex.axis=1.5)
      model1 <- lm(CWM[,1]~splot.bioclim[index11,2+3])
    abline(model1, lwd=3, col="blue")
    dev.off() # creates a file
    
    #### PCA of all CWM and all Bioclim variables ####
    library(vegan)
    pca1 <- rda(CWM,scale=T)
    plot(pca1)
    head(summary(pca1),100)
    'Partitioning of correlations:
                  Inertia Proportion
    Total              18          1
    Unconstrained      18          1
    
    Eigenvalues, and their contribution to the correlations 
    
    Importance of components:
                             PC1    PC2    PC3     PC4     PC5     PC6     PC7     PC8     PC9    PC10    PC11    PC12    PC13
    Eigenvalue            5.3194 3.5487 2.0227 1.37785 1.27883 1.00296 0.71963 0.66827 0.56198 0.49244 0.34211 0.26158 0.19518
    Proportion Explained  0.2955 0.1971 0.1124 0.07655 0.07105 0.05572 0.03998 0.03713 0.03122 0.02736 0.01901 0.01453 0.01084
    Cumulative Proportion 0.2955 0.4927 0.6050 0.68159 0.75264 0.80836 0.84834 0.88546 0.91668 0.94404 0.96305 0.97758 0.98842
                             PC14    PC15    PC16    PC17     PC18
    Eigenvalue            0.07591 0.05484 0.05067 0.01766 0.009314
    Proportion Explained  0.00422 0.00305 0.00281 0.00098 0.000520
    Cumulative Proportion 0.99264 0.99569 0.99850 0.99948 1.000000'
    str(summary(pca1))
    #text(summary(pca1)$species[,c(1,2)],dimnames(summary(pca1)$species)[[1]])
    traitnames <- dimnames(summary(pca1)$species)[[1]]
    traitnames <- substr(traitnames,1,nchar(traitnames)-5)
    traitcoord <- summary(pca1)$species[,c(1,2)]
    plotcoord <- summary(pca1)$sites[,c(1,2)]
    which(traitnames=="Seed.length") #9
    traitcoord[9,2] <- traitcoord[9,2]+0.3
    which(traitnames=="LDMC") #10
    traitcoord[10,2] <- traitcoord[10,2]-0.3
    ordiplot(pca1, type="n", ylim=c(-10,10))
    text(traitcoord,traitnames, col="red", cex=1)
    points(plotcoord*30, cex=0.05)
    #env1 <- envfit(pca1,splot.bioclim[index11,c(4:49)],permutations = 99)
    #with permutations = 999:
    #Error: cannot allocate vector of size 4.1 Gb
    plot(env1)
    png(filename="PCA1.png", width=1000, height=1000, units = "px", bg="white")
      ordiplot(pca1, type="n")
      text(traitcoord,traitnames)
      plot(env1)
    dev.off() # creates a file
    'rm(env1)
    rm(backbone.splot.try3)
    rm(model1)
    rm(model2)
    rm(pca1)
    rm(splot.header)
    rm(splot.header2)
    rm(traitdist)
    rm(coords)
    rm(TRY.all.mean.sd.2)
    rm(index7)
    rm(raster.map.SLA)
    rm(trait.vec)
    rm(FD)
    rm(splot.bioclim)
    '
    gc()
    
    ### produce a PCA with kernel density
    # From Díaz et al. 2016, Nature
    library(ks)
    str(plotcoord) #num [1:1111307, 1:2]
    H <- Hpi(x=plotcoord)      # optimal bandwidth estimation
    # does not work
    est<- kde(x=plotcoord, H=H, compute.cont=TRUE)     # kernel density estimation
    
    library(raster)
    library(fBasics)
    # Color palette
    #plotcoord <- plotcoord*30
    min(plotcoord[,1]) #-0.3202043
    max(plotcoord[,1]) # 0.3078603
    min(plotcoord[,2]) #-0.5899272
    max(plotcoord[,2]) # 0.2530614
    x.cut <- cut(plotcoord[,1], breaks=seq(min(plotcoord[,1]), max(plotcoord[,1]), by=0.01))
    y.cut <- cut(plotcoord[,2], breaks=seq(min(plotcoord[,2]), max(plotcoord[,2]), by=0.01))
    z.cut <- table(x.cut,y.cut)
    head(z.cut)
    dim(z.cut) #62 84
    z.cut <- melt(z.cut)
    str(z.cut) # 5208 obs. of  3 variables:
    head(z.cut)
    z.cut$xy <- paste(z.cut$x.cut,z.cut$y.cut,sep="_")
    str(x.cut)
    length(x.cut) # 1112287
    xy <- paste(x.cut,y.cut,sep="_")
    index30 <- match(xy,z.cut$xy)
    length(index30)
    any(is.na(index30)) # T
    head(log10(z.cut$value[index30]))
    values.present <- vector(mode="numeric", length=length(xy))
    min(log10(z.cut$value[index30]), na.rm=T) #0
    max(log10(z.cut$value[index30]), na.rm=T) # 3.992156
    values.cut <- cut(log10(z.cut$value[index30]), breaks=seq(min(log10(z.cut$value[index30]), na.rm=T),
                            4, by=0.4))
    #values.present <- z.cut$value[index30]
    ordiplot(pca1, type="n")
    #points(plotcoord*30, cex=0.05, col=rainbowPalette(n=10,  
    #        s = 1, v = 1, start = 3/6, end = 5/6, alpha = 1)[values.cut])
    points(plotcoord*30, cex=0.05, col=rev(greyPalette(n=10,  start = 0, end = 0.8, 
                                        gamma = 2.2, alpha = NULL))[values.cut])
    text(traitcoord,traitnames, col="red", cex=1)
    
    
    ext.factor <- 30
    r.pca <- raster(nrows=300, ncols=300, xmn=min(plotcoord[,1])*ext.factor, xmx=max(plotcoord[,2])*ext.factor, 
                    ymn=min(plotcoord[,1])*ext.factor, ymx=max(plotcoord[,2]*ext.factor))
    # attention, xmn and ymn as well as xmx and ymx have to be the same, otherwise the raster is shifted
    r.pca.raster <- rasterize(plotcoord*ext.factor, r.pca, fun="count")
    str(r.pca.raster)
    plot(log10(r.pca.raster), asp=0, col=seqPalette(n=10, name=c("Blues")), 
         main="", cex.main=0.7, xlab="",  ylab="", add=T)
    plot(log10(r.pca.raster), asp=0, col=rev(greyPalette(n=10,  start = 0, end = 0.8, 
        gamma = 2.2, alpha = NULL)), 
         main="", cex.main=0.7, xlab="",  ylab="", add=T)
    plot(log10(r.pca.raster), asp=0, col=rainbowPalette(n=10,   s = 1, v = 1, start = 3/6, end = 5/6, alpha = 1), 
         main="", cex.main=0.7, xlab="",  ylab="", add=T, legend=F)
    
    '## graph for the PCA ##
    ordiplot(pca1, type="n", xlim=c(-10,13), ylim=c(-10,10))
    rasterImage(as.raster(r.pca.raster), -10, -10, 20, 10, interpolate = F,
                col=rainbowPalette(n=10,   s = 1, v = 1, start = 3/6, end = 5/6, alpha = 1))
    plot(log10(r.pca.raster), asp=0, col=rainbowPalette(n=10,   s = 1, v = 1, start = 3/6, end = 5/6, alpha = 1), 
         main="", cex.main=0.7, xlab="",  ylab="", add=T, legend=F)
    text(traitcoord,traitnames, col="red", cex=1)
    '
    
    
    rda1 <- rda(CWM~.,splot.bioclim[index11,c(4:49)],scale=T)
    plot(rda1)
    head(summary(rda1), 100)
    rda1
    'Partitioning of correlations:
                  Inertia Proportion
    Total          18.000     1.0000
    Constrained     2.321     0.1289
    Unconstrained  15.679     0.8711
    
    Eigenvalues, and their contribution to the correlations 
    
    Importance of components:
    RDA1    RDA2    RDA3    RDA4    RDA5    RDA6    RDA7    RDA8    RDA9    RDA10    RDA11    RDA12
    Eigenvalue            1.20610 0.57369 0.23626 0.15021 0.06339 0.02077 0.02014 0.01570 0.01164 0.008565 0.007286 0.002948
    Proportion Explained  0.06701 0.03187 0.01313 0.00835 0.00352 0.00115 0.00112 0.00087 0.00065 0.000480 0.000400 0.000160
    Cumulative Proportion 0.06701 0.09888 0.11200 0.12035 0.12387 0.12502 0.12614 0.12701 0.12766 0.128140 0.128540 0.128710
    RDA13   RDA14     RDA15     RDA16     RDA17     RDA18    PC1    PC2    PC3    PC4     PC5     PC6
    Eigenvalue            0.002155 0.00111 0.0002949 0.0001558 0.0000665 3.596e-05 4.2977 3.0445 1.8700 1.1826 1.10560 0.94049
    Proportion Explained  0.000120 0.00006 0.0000200 0.0000100 0.0000000 0.000e+00 0.2388 0.1691 0.1039 0.0657 0.06142 0.05225
    Cumulative Proportion 0.128830 0.12889 0.1289000 0.1289100 0.1289200 1.289e-01 0.3677 0.5368 0.6407 0.7064 0.76783 0.82008
    PC7     PC8     PC9    PC10    PC11    PC12   PC13    PC14    PC15    PC16    PC17     PC18
    Eigenvalue            0.70216 0.61116 0.50930 0.46537 0.32289 0.24648 0.1782 0.07293 0.05366 0.04986 0.01738 0.009213
    Proportion Explained  0.03901 0.03395 0.02829 0.02585 0.01794 0.01369 0.0099 0.00405 0.00298 0.00277 0.00097 0.000510
    Cumulative Proportion 0.85909 0.89304 0.92134 0.94719 0.96513 0.97882 0.9887 0.99277 0.99575 0.99852 0.99949 1.000000'
    #anova(rda1, perm=99)
    #anova(rda1, by="terms", permutations = how(nperm=9))
    library(parallel) 
    detectCores() #32
    # Set up the cluster
    #clusterType <- if(length(find.package("snow", quiet = TRUE))) "SOCK" else "PSOCK"
    #clust <- try(makeCluster(getOption("cl.cores", 2), type = clusterType))
    cl <- makeCluster(10)
    #registerDoParallel(cl) 
    options(na.action = "na.pass")
    #clusterExport(cl, "results") 
    
    #print(system.time(anova(rda1, by="terms", perm=99, parallel = getOption("mc.cores"))))
    print(system.time(out1 <- anova(rda1, by="margin", permutations =99, parallel = cl)))
    'user    system   elapsed 
    5304.802  1311.088 14522.410
    '
    out1
    
    traitnames <- dimnames(summary(rda1)$species)[[1]]
    traitnames <- substr(traitnames,1,nchar(traitnames)-5)
    traitcoord <- summary(rda1)$species[,c(1,2)]
    bioclimcoord <- summary(rda1)$biplot[,c(1,2)]
    which(traitnames=="LeafNperArea") #9
    traitcoord[9,1] <- traitcoord[9,1]+0.5
    which(traitnames=="StemDens") #5
    traitcoord[5,2] <- traitcoord[5,2]+0.3
    which(traitnames=="Disp.unit.leng") #18
    traitcoord[18,2] <- traitcoord[18,2]-0.3
    ordiplot(rda1, type="n", xlim=c(-7,7))
    arrows(rep(0,45),rep(0,45),bioclimcoord[,1]*8,bioclimcoord[,2]*8, col="cadetblue4", length=0.1)
    text(bioclimcoord*8,dimnames(bioclimcoord)[[1]], col="cadetblue4", cex=0.8)
    text(traitcoord,traitnames, col="red")
    # copied as first RDA graph into ppt
    
    #rda1 <- rda(CWM ~ ., splot.bioclim[index11,c(4:49)],scale=T)
    
    ### Stepwise RDA ###
    rda2 <- rda(CWM ~ 1,splot.bioclim[index11,c(4:49)],scale=T)
    #rda3 <- step(rda2, scope = formula(rda1), test = "perm")
    rda3 <- ordiR2step(rda2, scope=formula(rda1), direction = "both", Pin = 0.05, R2scope = F, permutations = how(nperm = 99), trace = TRUE)
    #TRY2.0: CWM ~ GDD5 + BIO_02 + BIO_12 + T_MAY + P_MAY + T_AUG ....
    #TRY3.0: CWM ~ GDD5 + BIO_02 + BIO_12 + T_MAY + P_MAY + T_SEP + P_DEC +      P_FEB 
    #rm(rda1)
    
    # now rebuild the RDA sequentially
    pca1 <- rda(CWM,scale=T)
    pca1
    5.527+ 3.906+ 1.953+ 1.507+ 1.375+ 0.886+ 0.726+ 0.623+ 0.499 +0.360+ 0.295+ 0.131+ 0.087+ 0.077+ 0.048 
    (5.527+ 3.906)/18 # 52.40556%
    
    traitnames <- dimnames(summary(pca1)$species)[[1]]
    traitnames <- substr(traitnames,1,nchar(traitnames)-5)
    traitcoord <- summary(pca1)$species[,c(1,2)]
    which(traitnames=="SeedMass") #3
    traitcoord[3,2] <- traitcoord[3,2]-0.5
    ordiplot(pca1, type="n")
    text(traitcoord,traitnames, col="red")
    # copied as second RDA graph into ppt
    
    
    rda4 <- rda(CWM ~ GDD5,splot.bioclim[index11,c(4:49)],scale=T)
    rda4
    traitnames <- dimnames(summary(rda4)$species)[[1]]
    traitnames <- substr(traitnames,1,nchar(traitnames)-5)
    traitcoord <- summary(rda4)$species[,c(1,2)]
    plotcoord <- summary(rda4)$sites[,c(1,2)]
    str(plotcoord) #num [1:1112287, 1:2]
    bioclimcoord <- summary(rda4)$biplot[,c(1,2)]
    
    min(plotcoord[,1]) #-0.683271
    max(plotcoord[,1]) # 0.8519885
    min(plotcoord[,2]) #-0.3398233
    max(plotcoord[,2]) # 0.3047364
    x.cut <- cut(plotcoord[,1], breaks=seq(min(plotcoord[,1]), max(plotcoord[,1]), by=0.01))
    y.cut <- cut(plotcoord[,2], breaks=seq(min(plotcoord[,2]), max(plotcoord[,2]), by=0.01))
    z.cut <- table(x.cut,y.cut)
    head(z.cut)
    dim(z.cut) #153  64
    z.cut <- melt(z.cut)
    str(z.cut) # 6550 obs. of  3 variables:
    head(z.cut)
    z.cut$xy <- paste(z.cut$x.cut,z.cut$y.cut,sep="_")
    str(x.cut)
    length(x.cut) # 1112287
    xy <- paste(x.cut,y.cut,sep="_")
    index30 <- match(xy,z.cut$xy)
    length(index30)
    any(is.na(index30)) #T
    head(log10(z.cut$value[index30]))
    values.present <- vector(mode="numeric", length=length(xy))
    min(log10(z.cut$value[index30]), na.rm=T) #0
    max(log10(z.cut$value[index30]), na.rm=T) # 3.947336
    values.cut <- cut(log10(z.cut$value[index30]), breaks=seq(min(log10(z.cut$value[index30]), na.rm=T),
                                                             4, by=0.4))
         
    
    ordiplot(rda4, type="n")
    points(plotcoord*30, cex=0.05, col=rev(greyPalette(n=10,  start = 0, end = 0.8, 
                                                       gamma = 2.2, alpha = NULL))[values.cut])
    
    arrows(rep(0,1),rep(0,1),bioclimcoord[1]*8,bioclimcoord[2]*8, col="blue", length=0.1)
    text(bioclimcoord[1]*8+0.8,bioclimcoord[2]*8-0.2,"GDD5", col="blue", cex=1)
    text(traitcoord,traitnames, col="red")
    # copied as third RDA graph into ppt
    
    rda5 <- rda(CWM ~ GDD5+BIO_02,splot.bioclim[index11,c(4:49)],scale=T)
    rda5
    traitnames <- dimnames(summary(rda5)$species)[[1]]
    traitnames <- substr(traitnames,1,nchar(traitnames)-5)
    traitcoord <- summary(rda5)$species[,c(1,2)]
    bioclimcoord <- summary(rda5)$biplot[,c(1,2)]
    plotcoord <- summary(rda5)$sites[,c(1,2)]
    str(plotcoord) #num [1:1112287, 1:2]
    x.cut <- cut(plotcoord[,1], breaks=seq(min(plotcoord[,1]), max(plotcoord[,1]), by=0.01))
    y.cut <- cut(plotcoord[,2], breaks=seq(min(plotcoord[,2]), max(plotcoord[,2]), by=0.01))
    z.cut <- table(x.cut,y.cut)
    head(z.cut)
    dim(z.cut) #141 205
    z.cut <- melt(z.cut)
    str(z.cut) # 6550 obs. of  3 variables:
    head(z.cut)
    z.cut$xy <- paste(z.cut$x.cut,z.cut$y.cut,sep="_")
    str(x.cut)
    length(x.cut) # 1111307
    xy <- paste(x.cut,y.cut,sep="_")
    index30 <- match(xy,z.cut$xy)
    length(index30)
    any(is.na(index30))
    head(log10(z.cut$value[index30]))
    values.present <- vector(mode="numeric", length=length(xy))
    min(log10(z.cut$value[index30]), na.rm=T) #0
    max(log10(z.cut$value[index30]), na.rm=T) # 3.228913
    values.cut <- cut(log10(z.cut$value[index30]), breaks=seq(min(log10(z.cut$value[index30]), na.rm=T),
                                                              4, by=0.4))
    
    
    
    ordiplot(rda5, type="n", xlim=c(-10,10), ylim=c(-10,10))
    points(plotcoord*15, cex=0.05, col=rev(greyPalette(n=10,  start = 0, end = 0.8, 
                                                       gamma = 2.2, alpha = NULL))[values.cut])
    arrows(rep(0,2),rep(0,2),bioclimcoord[,1]*8,bioclimcoord[,2]*8, col="blue", length=0.1)
    text(bioclimcoord*8,dimnames(bioclimcoord)[[1]], col="blue", cex=1)
    text(traitcoord,traitnames, col="red")
    # copied as fourth RDA graph into ppt
    
    
    plot(CWM[,"LeafNPratio.mean"]~splot.bioclim$GDD5[index11],
         cex=0.05, xlab="GDD5", ylab="CWM ln(leaf N to P ratio)",cex.lab=1.5, cex.axis=1.5)
    model1 <- lm(CWM[,"LeafNPratio.mean"]~splot.bioclim$GDD5[index11])
    summary(model1)
    '                             Estimate Std. Error t value Pr(>|t|)    
    (Intercept)                 2.334e+00  3.795e-04  6149.5   <2e-16 ***
    splot.bioclim$GDD5[index11] 5.757e-06  1.595e-08   360.9   <2e-16 ***
    ---
    Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
    
    Residual standard error: 0.196 on 1112285 degrees of freedom
    Multiple R-squared:  0.1048,	Adjusted R-squared:  0.1048 
    F-statistic: 1.303e+05 on 1 and 1112285 DF,  p-value: < 2.2e-16
    '
    abline(model1, lwd=3, col="blue")
    # LeafNPRatio_GDD5
    
    plot(CWM[,"SLA.mean"]~jitter(splot.bioclim$BIO_02[index11]),
         cex=0.05, xlab="BIO_02", ylab="CWM ln(SLA)",cex.lab=1.5, cex.axis=1.5)
    model1 <- lm(CWM[,"SLA.mean"]~splot.bioclim$BIO_02[index11])
    summary(model1)
    '                                Estimate Std. Error t value Pr(>|t|)    
    (Intercept)                    3.399e+00  1.448e-03  2348.2   <2e-16 ***
    splot.bioclim$BIO_02[index11] -6.253e-03  1.551e-05  -403.3   <2e-16 ***
    
    Residual standard error: 0.4034 on 1111305 degrees of freedom
    Multiple R-squared:  0.1277,	Adjusted R-squared:  0.1277 
    '
    abline(model1, lwd=3, col="blue")
    # SLA_BIO_02
    
    plot(CWM[,"LeafNperArea.mean"]~jitter(splot.bioclim$BIO_02[index11]),
         cex=0.05, xlab="BIO_02", ylab="CWM ln(Leaf N per area)",cex.lab=1.5, cex.axis=1.5)
    model1 <- lm(CWM[,"LeafNperArea.mean"]~splot.bioclim$BIO_02[index11])
    summary(model1)
    '                                Estimate Std. Error t value Pr(>|t|)    
    (Intercept)                   -5.989e-02  8.475e-04  -70.67   <2e-16 ***
    splot.bioclim$BIO_02[index11]  3.696e-03  9.079e-06  407.09   <2e-16 ***
    ---
    Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
    
    Residual standard error: 0.2362 on 1111305 degrees of freedom
    Multiple R-squared:  0.1298,	Adjusted R-squared:  0.1298 
    '
    abline(model1, lwd=3, col="blue")
    # LeafNper_BIO_02
    
    ##### FD calculations ####
    j <- 3
    nam <- DT$species[DT$PlotObservationID==j]
    abu <- DT$Relative.cover[DT$PlotObservationID==j]
    trait <- DT$trait[DT$PlotObservationID==j]
    abundance.weighted=T
    
    FD.fun <- function(trait, abu){
      res <- as.double(NA)
      if (length(trait[!is.na(trait)])>0){
        res <- 0
        #nam <- as.character(nam[!is.na(trait)])
        abu <- as.data.frame(abu[!is.na(trait)])
        #rownames(abu) <- nam
        trait <- as.data.frame(trait[!is.na(trait)])
        #rownames(trait) <- nam
        dis <- dist(trait,method="euclidean")
        if (sum(dis)>1){
          sqrt.dis <- sqrt(dis)
          # taking the square root is required to obtain the same result from the divc function
          # as divc takes the square of distances values, as suggested by
          # Champely & Chessel (2002, Env. Ecol. Stat. 9: 167-177)
          #abu <- as.data.frame(abu)
          res <- unlist(divc(abu, sqrt.dis))
          #Rao's Q: sumi sumj (dist ij prop i prop j)'
        } 
      }
      res
    }
    
    mpd.fun <- function(nam, trait, abu,abundance.weighted){
      #res <- numeric(1) # carries result from the function
      res <- as.double(NA)
      if (length(trait[!is.na(trait)])>0){
      #  dis <- as.matrix(dist(t(t1),method="euclidean"))   
        abu <- t(abu)
        colnames(abu) <- nam
        trait <- t(trait)
        colnames(trait) <- nam
        #dis <- as.matrix(vegdist(t(t1),method="euclidean", na.rm=T))
        dis <- as.matrix(dist(t(trait),method="euclidean"))
        res <- mpd(abu, dis, abundance.weighted=abundance.weighted)
      }
      res
    }
    
    # from mpd source code
    'N <- dim(samp)[1]
    mpd <- numeric(N)
    for (i in 1:N) {
      sppInSample <- names(samp[i, samp[i, ] > 0])
      if (length(sppInSample) > 1) {
        sample.dis <- dis[sppInSample, sppInSample]
        if (abundance.weighted) {
          sample.weights <- t(as.matrix(samp[i, sppInSample, 
                                             drop = FALSE])) %*% as.matrix(samp[i, sppInSample, 
                                                                                drop = FALSE])
          mpd[i] <- weighted.mean(sample.dis, sample.weights)
        }
        else {
          mpd[i] <- mean(sample.dis[lower.tri(sample.dis)])
        }
      }
      else {
        mpd[i] <- NA
      }
    }
    mpd
    '
    # from Oliver, who used picante
    # does not accept missing values in the function
    library(picante)
    # define function for mean pairwise distance
    'mpd.fun.dt <- function(abu, dis, nam, abundance.weighted){
      abu <- t(abu)
      colnames(abu) <- nam
      res <- mpd(abu, dis, abundance.weighted=abundance.weighted)
    }
    # define funtion of mean nearest neighbor distance
    mntd.fun.dt <- function(abu, dis, nam, abundance.weighted){
      abu <- t(abu)
      colnames(abu) <- nam
      res <- mntd(abu, dis, abundance.weighted=abundance.weighted)
      res
    }
    species.list <- unique(DT$species)
    any(is.na(species.list)) #T, there is one NA
    length(species.list) #60909
    species.list <- species.list[!is.na(species.list)]
    length(species.list) #60908
    index8 <- match(species.list,TRY.all.mean.sd.2$species)
    str(index8) # 60908
    length(index8[!is.na(index8)]) #21001
    #species.list2 <- TRY.all.mean.sd.2$species[index8]
    #species.list2 <- species.list2[!is.na(species.list2)]
    #length(species.list2) # 21001
    TRY.all.mean.sd.2.na <- TRY.all.mean.sd.2[index8,]
    str(TRY.all.mean.sd.2.na)
    TRY.all.mean.sd.2.na$species2 <- species.list
    rm(index9)
    '
    FD <- array(NA,c(dim(CWM)[1],18),dimnames=list(dimnames(CWM)[[1]],
                                                     colnames(TRY.all.mean.sd.2)[3:20]))
    str(FD) #logi [1:1117898, 1:18] 
    'for (i in 1:18){
      # create distance matrix
      #trait.vec <- TRY.all.mean.sd.2.na[,i+2]
      #names(trait.vec) <- TRY.all.mean.sd.2.na$species2
      #trait.dist.mat <- as.matrix(dist(trait.vec))
      # too large for my notebook
      #mpd[,i] <-  DT[,list(mpd.trait= mpd.fun.dt(Relative.cover, trait.dist.mat, species, abundance.weighted=TRUE)),by=PlotObservationID]$mpd.trait
      print(i)  
      DT$trait <- NA
      DT$trait <- TRY.all.mean.sd.2[index7,i+2]
      FD[,i] <-  DT[,list(mpd.trait= mpd.fun(species, trait, Relative.cover, abundance.weighted=TRUE)),by=PlotObservationID]$mpd.trait
    }
    write.csv(FD,file = "FD2.csv", row.names = T)
    '
    for (i in 1:18){
      # create distance matrix
      #trait.vec <- TRY.all.mean.sd.2.na[,i+2]
      #names(trait.vec) <- TRY.all.mean.sd.2.na$species2
      #trait.dist.mat <- as.matrix(dist(trait.vec))
      # too large for my notebook
      #mpd[,i] <-  DT[,list(mpd.trait= mpd.fun.dt(Relative.cover, trait.dist.mat, species, abundance.weighted=TRUE)),by=PlotObservationID]$mpd.trait
      print(i)  
      DT$trait <- NA
      DT$trait <- TRY.all.mean.sd.2[index7,i+2]
      # recalculate relative cover for this trait
      FD[,i] <-  DT[,list(FD.trait= FD.fun(trait, Relative.cover)),by=PlotObservationID]$FD.trait
    }
    write.csv(FD,file = "FD3.csv", row.names = T)
    
    #FD <- read.csv("C://Daten//iDiv2//splot2//FD3.csv")
    FD <- read.csv("C://Daten//iDiv2//splot2//splot2.0//FD_TRY3.csv")
    str(FD)
    dimnames(FD)[[1]] <- FD$X
    FD <- FD[,-1]
    tail(FD)
    
    #### match FD to splot.header ####
    index12 <- match(dimnames(FD)[[1]],splot.header$PlotObservationID)
    length(index12) #1117898
    any(is.na(index12)) # F
    splot.header3 <- splot.header[index12,]
    str(FD) # num [1:1117898, 1:18]
    FD <- FD[!(is.na(splot.header3$Longitude)|is.na(splot.header3$Latitude)|is.na(FD[,"SLA.mean"])),]
    str(FD) # num [1:1112287 , 1:18]
    1112287 /1117898*100 # 99.49808 % of plots have traits, Long and Lat
    any(is.na(FD)) #F
    # match header again
    index13 <- match(dimnames(FD)[[1]],splot.header$PlotObservationID)
    length(index13) #1112287
    any(is.na(index13)) # F
    splot.header3 <- splot.header[index13,]
    # now in the same sequence as FD
    
    
    #### Collect all FD regressions ####
    names(splot.bioclim) # from 4 BIO_01 to 49 T_MAR
    index14 <- match(splot.header3$PlotObservationID,splot.bioclim$PLOT_ID)
    length(index14) # 1111307
    length(index14[!is.na(index14)]) # 1111307
    
    FD.bioclim <- array(NA,c(46,18,5),dimnames=list(names(splot.bioclim)[4:49],dimnames(FD)[[2]],
                                          c("p_lin","r2_lin","p_qua1", "p_qua2","r2_qua")))
    for (i in 1:46){
      for (j in 1:18){
        model1 <- lm(FD[,j]~splot.bioclim[index14,i+3])
        FD.bioclim[i,j,1] <- summary(model1)$coefficients[2,4]
        FD.bioclim[i,j,2] <- summary(model1)$r.squared
        model2 <- lm(FD[,j]~splot.bioclim[index14,i+3]+I(splot.bioclim[index14,i+3]^2))
        FD.bioclim[i,j,3] <- summary(model2)$coefficients[2,4]
        FD.bioclim[i,j,4] <- summary(model2)$coefficients[3,4]
        FD.bioclim[i,j,5] <- summary(model2)$r.squared
      }
    }
    str(FD.bioclim) #num [1:46, 1:18, 1:5]
    write.csv(data.frame(names(splot.bioclim)[4:49],FD.bioclim[,,2]),file = "FD2_bioclim_r2_lin.csv", row.names = FALSE)
    write.csv(data.frame(names(splot.bioclim)[4:49],FD.bioclim[,,5]),file = "FD2_bioclim_r2_qua.csv", row.names = FALSE)
    
    library(vegan)
    FD.pca1 <- rda(FD,scale=T)
    FD.pca1
    '
    Eigenvalues for unconstrained axes:
      PC1   PC2   PC3   PC4   PC5   PC6   PC7   PC8 
    8.703 1.758 1.122 0.974 0.732 0.643 0.590 0.540 
    (Showed only 8 of all 18 unconstrained eigenvalues)
    '
    plot(FD.pca1)
    head(summary(FD.pca1))
    #text(summary(FD.pca1)$species[,c(1,2)],dimnames(summary(FD.pca1)$species)[[1]])
    traitnames <- dimnames(summary(FD.pca1)$species)[[1]]
    traitnames <- substr(traitnames,1,nchar(traitnames)-5)
    traitcoord <- summary(FD.pca1)$species[,c(1,2)]
    which(traitnames=="Seed.length") #17
    traitcoord[17,2] <- traitcoord[17,2]+0.3
    which(traitnames=="LeafC.perdrymass") #12
    traitcoord[12,2] <- traitcoord[12,2]-0.4
    which(traitnames=="Leaf.delta.15N") #13
    traitcoord[13,2] <- traitcoord[13,2]+0.4
    which(traitnames=="LeafNPratio") #11
    traitcoord[11,2] <- traitcoord[11,2]-0.3
    
    plotcoord <- summary(FD.pca1)$sites[,c(1,2)]
    str(plotcoord) #num [1:1111307, 1:2]
    x.cut <- cut(plotcoord[,1], breaks=seq(min(plotcoord[,1]), max(plotcoord[,1]), by=0.01))
    y.cut <- cut(plotcoord[,2], breaks=seq(min(plotcoord[,2]), max(plotcoord[,2]), by=0.01))
    z.cut <- table(x.cut,y.cut)
    head(z.cut)
    dim(z.cut) #131 50
    z.cut <- melt(z.cut)
    str(z.cut) # 6550 obs. of  3 variables:
    head(z.cut)
    z.cut$xy <- paste(z.cut$x.cut,z.cut$y.cut,sep="_")
    str(x.cut)
    length(x.cut) # 1111307
    xy <- paste(x.cut,y.cut,sep="_")
    index30 <- match(xy,z.cut$xy)
    length(index30)
    any(is.na(index30))
    head(log10(z.cut$value[index30]))
    values.present <- vector(mode="numeric", length=length(xy))
    min(log10(z.cut$value[index30]), na.rm=T) #0
    max(log10(z.cut$value[index30]), na.rm=T) # 3.399154
    values.cut <- cut(log10(z.cut$value[index30]), breaks=seq(min(log10(z.cut$value[index30]), na.rm=T),
                                                              4, by=0.4))
    
    ordiplot(FD.pca1, type="n")
    #text(traitcoord,traitnames)
    points(plotcoord*30, cex=0.05, col=rev(greyPalette(n=10,  start = 0, end = 0.8, 
                                                       gamma = 2.2, alpha = NULL))[values.cut])
    
    #arrows(rep(0,45),rep(0,45),bioclimcoord[,1]*8,bioclimcoord[,2]*8, col="cadetblue4", length=0.1)
    text(traitcoord,traitnames, col="red")
    # copied as first RDA graph into ppt
    
    env1 <- envfit(FD.pca1,splot.bioclim[index14,c(4:49)],permutations = 0)
    env1
    #with permutations = 999:
    #Error: cannot allocate vector of size 4.1 Gb
    plot(env1)
    png(filename="PCA1.png", width=1000, height=1000, units = "px", bg="white")
    ordiplot(pca1, type="n")
    text(traitcoord,traitnames)
    plot(env1)
    dev.off() # creates a file
    
    ### Stepwise RDA ###
    ### RDA ###
    FD.rda1 <- rda(FD~.,splot.bioclim[index14,c(4:49)],scale=T)
    FD.rda1
    FD.rda2 <- rda(FD ~ 1,splot.bioclim[index14,c(4:49)],scale=T)
    FD.rda3 <- ordistep(FD.rda2, scope=formula(FD.rda1), direction = "both", Pin = 0.1, 
                permutations = how(nperm = 10), trace = TRUE)
    'Start: FD ~ 1 
    
    Df     AIC       F  Pr(>F)  
    + BIO_14  1 3191679 20602.8 0.09091 .
    + BIO_17  1 3191898 20379.5 0.09091 .
    + BIO_12  1 3197070 15125.2 0.09091 .
    + P_MAY   1 3198071 14110.8 0.09091 .
    + P_APR   1 3198194 13986.4 0.09091 .
    + BIO_18  1 3198767 13406.3 0.09091 .
    + P_MAR   1 3199448 12717.1 0.09091 .
    + P_FEB   1 3199451 12714.5 0.09091 .
    + P_JUN   1 3199643 12519.9 0.09091 .
    + BIO_05  1 3199804 12357.7 0.09091 .
    + BIO_02  1 3200062 12096.5 0.09091 .
    + P_NOV   1 3200602 11551.1 0.09091 .
    + P_DEC   1 3200823 11328.1 0.09091 .
    + P_JAN   1 3201335 10810.2 0.09091 .
    + BIO_10  1 3202194  9943.1 0.09091 .
    + T_JUL   1 3202226  9911.4 0.09091 .
    + T_AUG   1 3202685  9448.0 0.09091 .
    + GDD5    1 3202914  9217.7 0.09091 .
    + T_JUN   1 3202934  9196.7 0.09091 .
    + BIO_16  1 3203459  8668.3 0.09091 .
    + BIO_19  1 3203685  8440.1 0.09091 .
    + P_AUG   1 3204079  8043.8 0.09091 .
    + BIO_13  1 3204383  7737.1 0.09091 .
    + T_MAY   1 3204450  7669.7 0.09091 .
    + P_OCT   1 3204755  7362.4 0.09091 .
    + T_SEP   1 3204986  7130.0 0.09091 .
    + P_SEP   1 3205174  6941.4 0.09091 .
    + BIO_15  1 3205378  6735.8 0.09091 .
    + P_JUL   1 3205600  6512.4 0.09091 .
    + T_APR   1 3205746  6365.1 0.09091 .
    + BIO_01  1 3205841  6270.3 0.09091 .
    + BIO_03  1 3205882  6228.8 0.09091 .
    + T_OCT   1 3206048  6062.0 0.09091 .
    + AR      1 3206562  5545.3 0.09091 .
    + BIO_07  1 3207063  5042.2 0.09091 .
    + T_NOV   1 3207547  4555.5 0.09091 .
    + T_MAR   1 3207659  4443.2 0.09091 .
    + T_FEB   1 3208177  3923.3 0.09091 .
    + T_DEC   1 3208260  3840.3 0.09091 .
    + BIO_11  1 3208334  3765.7 0.09091 .
    + T_JAN   1 3208465  3634.3 0.09091 .
    + BIO_08  1 3208844  3254.4 0.09091 .
    + BIO_06  1 3209374  2723.0 0.09091 .
    + BIO_09  1 3209543  2553.3 0.09091 .
    + PET     1 3209642  2454.5 0.09091 .
    + BIO_04  1 3210074  2021.3 0.09091 .
    ---
    Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
    
    Step: FD ~ BIO_14 
    
    Df     AIC     F  Pr(>F)  
    - BIO_14  1 3212091 20603 0.09091 .
    ---
    Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
    
    Df     AIC       F  Pr(>F)  
    + BIO_02  1 3184224 7481.80 0.09091 .
    + BIO_03  1 3184435 7269.37 0.09091 .
    + GDD5    1 3187255 4435.29 0.09091 .
    + BIO_05  1 3187772 3915.70 0.09091 .
    + T_JAN   1 3188026 3661.00 0.09091 .
    + BIO_01  1 3188040 3647.18 0.09091 .
    + T_FEB   1 3188048 3638.72 0.09091 .
    + T_MAR   1 3188075 3612.29 0.09091 .
    + T_APR   1 3188082 3605.14 0.09091 .
    + BIO_11  1 3188088 3598.96 0.09091 .
    + T_DEC   1 3188123 3563.70 0.09091 .
    + T_NOV   1 3188208 3478.25 0.09091 .
    + T_OCT   1 3188225 3461.84 0.09091 .
    + T_MAY   1 3188357 3329.42 0.09091 .
    + BIO_06  1 3188491 3194.91 0.09091 .
    + BIO_10  1 3188609 3076.07 0.09091 .
    + T_SEP   1 3188617 3068.49 0.09091 .
    + BIO_13  1 3188664 3021.74 0.09091 .
    + T_JUN   1 3188669 3016.26 0.09091 .
    + BIO_16  1 3188709 2976.11 0.09091 .
    + P_SEP   1 3188724 2961.09 0.09091 .
    + BIO_07  1 3188728 2956.84 0.09091 .
    + BIO_12  1 3188782 2902.48 0.09091 .
    + T_AUG   1 3188788 2897.30 0.09091 .
    + T_JUL   1 3188879 2805.50 0.09091 .
    + P_AUG   1 3189181 2503.15 0.09091 .
    + BIO_18  1 3189256 2427.84 0.09091 .
    + BIO_04  1 3189277 2406.93 0.09091 .
    + P_JUL   1 3189398 2285.76 0.09091 .
    + BIO_15  1 3189605 2078.03 0.09091 .
    + P_APR   1 3189650 2032.61 0.09091 .
    + P_JUN   1 3189703 1979.86 0.09091 .
    + BIO_09  1 3189912 1770.71 0.09091 .
    + P_MAR   1 3189965 1717.19 0.09091 .
    + PET     1 3189984 1698.34 0.09091 .
    + AR      1 3190003 1678.99 0.09091 .
    + P_MAY   1 3190005 1677.55 0.09091 .
    + P_OCT   1 3190063 1619.65 0.09091 .
    + P_FEB   1 3190490 1192.28 0.09091 .
    + BIO_17  1 3190497 1184.39 0.09091 .
    + BIO_19  1 3190555 1126.59 0.09091 .
    + P_NOV   1 3190570 1111.68 0.09091 .
    + BIO_08  1 3190582 1100.05 0.09091 .
    + P_JAN   1 3190893  787.98 0.09091 .
    + P_DEC   1 3190955  726.52 0.09091 .
    ---
    Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
    
    Step: FD ~ BIO_14 + BIO_02 
    
    Df     AIC       F  Pr(>F)  
    - BIO_02  1 3191679  7481.8 0.09091 .
    - BIO_14  1 3200062 15953.2 0.09091 .
    ---
    Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1'
    
    FD.rda4 <- rda(FD ~ BIO_14,splot.bioclim[index14,c(4:49)],scale=T)
    FD.rda4
    '              Inertia Proportion Rank
    Total         18.0000     1.0000     
    Constrained    0.3276     0.0182    1
    Unconstrained 17.6724     0.9818   18
    Inertia is correlations 
    
    Eigenvalues for constrained axes:
    RDA1 
    0.3276 
    
    Eigenvalues for unconstrained axes:
    PC1   PC2   PC3   PC4   PC5   PC6   PC7   PC8 
    8.307 1.850 1.378 0.884 0.789 0.780 0.578 0.551 
    '
    traitnames <- dimnames(summary(FD.rda4)$species)[[1]]
    traitnames <- substr(traitnames,1,nchar(traitnames)-5)
    traitcoord <- summary(FD.rda4)$species[,c(1,2)]
    bioclimcoord <- summary(FD.rda4)$biplot[,c(1,2)]
    plotcoord <- summary(FD.rda4)$sites[,c(1,2)]
    str(plotcoord) #num [1:1111307, 1:2]
    x.cut <- cut(plotcoord[,1], breaks=seq(min(plotcoord[,1]), max(plotcoord[,1]), by=0.01))
    y.cut <- cut(plotcoord[,2], breaks=seq(min(plotcoord[,2]), max(plotcoord[,2]), by=0.01))
    z.cut <- table(x.cut,y.cut)
    head(z.cut)
    dim(z.cut) #131 50
    z.cut <- melt(z.cut)
    str(z.cut) # 6550 obs. of  3 variables:
    head(z.cut)
    z.cut$xy <- paste(z.cut$x.cut,z.cut$y.cut,sep="_")
    str(x.cut)
    length(x.cut) # 1111307
    xy <- paste(x.cut,y.cut,sep="_")
    index30 <- match(xy,z.cut$xy)
    length(index30)
    any(is.na(index30))
    head(log10(z.cut$value[index30]))
    values.present <- vector(mode="numeric", length=length(xy))
    min(log10(z.cut$value[index30]), na.rm=T) #0
    max(log10(z.cut$value[index30]), na.rm=T) # 3.399154
    values.cut <- cut(log10(z.cut$value[index30]), breaks=seq(min(log10(z.cut$value[index30]), na.rm=T),
                                                              4, by=0.4))
    
    ordiplot(FD.rda4, type="n", xlim=c(-10,10),ylim=c(-10,12))
    points(plotcoord*15, cex=0.05, col=rev(greyPalette(n=10,  start = 0, end = 0.8, 
                                                       gamma = 2.2, alpha = NULL))[values.cut])
    arrows(rep(0,1),rep(0,1),bioclimcoord[1]*8,bioclimcoord[2]*8, col="blue", length=0.1)
    text(bioclimcoord[1]*8,bioclimcoord[2]*8+0.5,"BIO_14", col="blue", cex=1)
    text(traitcoord,traitnames, col="red")
    # copied as third RDA graph into ppt
    
    FD.rda5 <- rda(FD ~ BIO_14+BIO_02,splot.bioclim[index14,c(4:49)],scale=T)
    FD.rda5
    '
    Inertia Proportion Rank
    Total         18.00000    1.00000     
    Constrained    0.44582    0.02477    2
    Unconstrained 17.55418    0.97523   18
    Inertia is correlations 
    
    Eigenvalues for constrained axes:
    RDA1   RDA2 
    0.3641 0.0817 
    
    Eigenvalues for unconstrained axes:
    PC1   PC2   PC3   PC4   PC5   PC6   PC7   PC8 
    8.267 1.816 1.377 0.880 0.788 0.780 0.568 0.530 '
    traitnames <- dimnames(summary(FD.rda5)$species)[[1]]
    traitnames <- substr(traitnames,1,nchar(traitnames)-5)
    traitcoord <- summary(FD.rda5)$species[,c(1,2)]
    bioclimcoord <- summary(FD.rda5)$biplot[,c(1,2)]
    
    plotcoord <- summary(FD.rda5)$sites[,c(1,2)]
    str(plotcoord) #num [1:1111307, 1:2]
    x.cut <- cut(plotcoord[,1], breaks=seq(min(plotcoord[,1]), max(plotcoord[,1]), by=0.01))
    y.cut <- cut(plotcoord[,2], breaks=seq(min(plotcoord[,2]), max(plotcoord[,2]), by=0.01))
    z.cut <- table(x.cut,y.cut)
    head(z.cut)
    dim(z.cut) #131 50
    z.cut <- melt(z.cut)
    str(z.cut) # 6550 obs. of  3 variables:
    head(z.cut)
    z.cut$xy <- paste(z.cut$x.cut,z.cut$y.cut,sep="_")
    str(x.cut)
    length(x.cut) # 1111307
    xy <- paste(x.cut,y.cut,sep="_")
    index30 <- match(xy,z.cut$xy)
    length(index30)
    any(is.na(index30))
    head(log10(z.cut$value[index30]))
    values.present <- vector(mode="numeric", length=length(xy))
    min(log10(z.cut$value[index30]), na.rm=T) #0
    max(log10(z.cut$value[index30]), na.rm=T) # 3.399154
    values.cut <- cut(log10(z.cut$value[index30]), breaks=seq(min(log10(z.cut$value[index30]), na.rm=T),
                                                              4, by=0.4))
    
    ordiplot(FD.rda5, type="n",  xlim=c(-10,10),ylim=c(-10,12))
    points(plotcoord*15, cex=0.05, col=rev(greyPalette(n=10,  start = 0, end = 0.8, 
                                                       gamma = 2.2, alpha = NULL))[values.cut])
    arrows(rep(0,2),rep(0,2),bioclimcoord[,1]*4,bioclimcoord[,2]*4, col="blue", length=0.1)
    text(bioclimcoord*4,dimnames(bioclimcoord)[[1]], col="blue", cex=1)
    text(traitcoord,traitnames, col="red")
    # copied as fourth RDA graph into ppt
    
    plot(FD[,"Seed.length.mean"]~splot.bioclim$BIO_14[index14],
         cex=0.05, xlab="BIO_14", ylab="FD ln(seed length)",cex.lab=1.5, cex.axis=1.5)
    model1 <- lm(FD[,"Seed.length.mean"]~splot.bioclim$BIO_14[index14])
    summary(model1)
    '                               Estimate Std. Error t value Pr(>|t|)    
    (Intercept)                   2.584e-01  3.193e-04   809.1   <2e-16 ***
    splot.bioclim$BIO_14[index14] 1.407e-04  6.813e-07   206.5   <2e-16 ***
    Multiple R-squared:  0.03697,	Adjusted R-squared:  0.03697 
    '
    abline(model1, lwd=3, col="blue")
    # 
    
    plot(FD[,"Seed.num.rep.unit.mean"]~splot.bioclim$BIO_02[index14],
         cex=0.05, xlab="BIO_02", ylab="FD ln(Seed number of reproductive unit)",cex.lab=1.5, cex.axis=1.5)
    model1 <- lm(FD[,"Seed.num.rep.unit.mean"]~splot.bioclim$BIO_02[index14])
    summary(model1)
    '                                 Estimate Std. Error t value Pr(>|t|)    
    (Intercept)                    1.251e+00  1.799e-03   695.3   <2e-16 ***
    splot.bioclim$BIO_02[index14] -3.751e-03  1.927e-05  -194.6   <2e-16 ***
    Multiple R-squared:  0.03296,	Adjusted R-squared:  0.03296  '
    abline(model1, lwd=3, col="blue")
    
    
    
    
    
    
    
    
    
    
    
    
    # mpd presence-absence
    system.time(
      mpd.sla.no.abu <-  DT[,list(mpd.sla = mntd.fun.dt(Cover.., SLA.dist.mat, StandSpeciesName, abundance.weighted=FALSE)),by=PlotObservationID]
    )
    
    
    ###########
    levels(splot.header2$COUNTRY_CO)
    levels(splot.header$Country)
    CWM2$Country <- splot.header$Country[index12]
    data(countryExData)
    str(countryExData)
    sPDF <- joinCountryData2Map(splot.header, joinCode = "NAME",nameJoinColumn = "Country")
    ?mapCountryData
    #mapCountryData(sPDF, nameColumnToPlot="BIODIVERSITY" )
    
    #### Climatic space, from Jonathon Lenoir ####
    library(fBasics)
    # Color palette
    
    # Import bio1 (MAT) of the world
    bio <- read.table(paste("/home/helge/sPlot/Bioclim/", "BIO.txt", sep = ""),sep=";", header=T)
    str(bio) #9033453 obs. of  22 variables
    r <- raster(nrows=33, ncols=33, xmn=min(bio$BIO_01), xmx=max(bio$BIO_01), ymn=min(bio$BIO_12), ymx=max(bio$BIO_12))
    mattar <- data.frame(bio$BIO_01,bio$BIO_12)
    str(mattar)
    names(mattar) <- c("bio1","bio12")
    mattar_r <- rasterize(mattar, r, fun="count")
    str(mattar_r)
    plot(log10(mattar_r), asp=0, col=rev(divPalette(n=100, name=c("RdYlBu"))), 
         main="N 2.5 arc-minutes cells per bin (log10 scale)", cex.main=0.7,
         xlab="MAT (°C*10)",  ylab="TAR (mm)")
    # Bio1_Bio12_world.png
    
    #plot(mattar_r/max(getValues(mattar_r), na.rm=TRUE), asp=0, col=rev(divPalette(n=100, name=c("RdYlBu"))), main="Relative proportion of 10 arc-minutes cells per bin", xlab="MAR (°C*10)", ylab="TAR (mm)")
    # Plot the sampling effort of sPlot across the MAT-TAR climatic space
    str(splot.bioclim)
    # Make it a spatial data frame
    CRSlonlat <- CRS("+proj=longlat +datum=WGS84")
    splot.bioclim.coords <- cbind(splot.bioclim$X, splot.bioclim$Y)
    splot.bioclim.coords <- SpatialPoints(splot.bioclim.coords, proj4string=CRSlonlat)
    splot.bioclim2 <- data.frame(splot.bioclim$BIO_01,splot.bioclim$BIO_12)
    names(splot.bioclim2) <- c("bio1","bio12")
    #splot.bioclim2 <- SpatialPointsDataFrame(splot.bioclim.coords, splot.bioclim2, proj4string=CRSlonlat)
    #str(splot.bioclim2)
    #sPlot_mattar <- extract(bio1, coordinates(sPlot))
    #sPlot_mattar <- as.data.frame(sPlot_mattar)
    #names(sPlot_mattar) <- c("bio1")
    #sPlot_mattar$bio12 <- extract(bio12, coordinates(sPlot))
    sPlot_mattar_r <- rasterize(splot.bioclim2, r, fun="count")
    str(sPlot_mattar_r)
    plot(mattar_r, asp=0, col="grey", legend=FALSE, main="N vegetation plots per bin (log10 scale)",
         xlab="MAT (°C)", ylab="TAR (mm)")
    # Bio1_Bio12_world_grey.png
    plot(log10(sPlot_mattar_r), asp=0, col=rev(divPalette(n=100, name=c("RdYlBu"))), 
         add=TRUE)
    # Bio1_Bio12_sPlot_SLA.png
    
    # Build the PC1-PC2 climatic space
    load("/home/helge/sPlot/Bioclim/pca_data.RData")
    str(pca.data)
    r <- raster(nrows=33, ncols=33, xmn=min(pca.data$PC1), xmx=max(pca.data$PC1), ymn=min(pca.data$PC2), ymx=max(pca.data$PC2))
    pca_r <- rasterize(pca.data[, c("PC1", "PC2")], r, fun="count")
    plot(log10(pca_r), asp=0, col=rev(divPalette(n=100, name=c("RdYlBu"))), 
         main="N 2.5 arc-minutes cells per bin (log10 scale)", 
         xlab="PC1 (cold-to-warm)", ylab="PC2 (dry-to-wet)")
    plot(pca_r/max(getValues(pca_r), na.rm=TRUE), asp=0, 
         col=rev(divPalette(n=100, name=c("RdYlBu"))), 
         main="Relative proportion of 2.5 arc-minutes cells per bin", 
         xlab="PC1 (cold-to-warm)", ylab="PC2 (dry-to-wet)")
    # Plot the sampling effort of sPlot across the PC1-PC2 climatic space
    index13 <- match(splot.bioclim$RAST_ID,pca.data$RAST_ID)
    sPlot_pca <- pca.data[index13, c("PC1", "PC2")]
    sPlot_pca_r <- rasterize(sPlot_pca, r, fun="count")
    plot(pca_r, asp=0, col="grey", legend=FALSE, 
         main="N vegetation plots per bin (log10 scale)", 
         xlab="PC1 (cold-to-warm)", ylab="PC2 (dry-to-wet)")
    plot(log10(sPlot_pca_r), asp=0, col=rev(divPalette(n=100, name=c("RdYlBu"))), 
         add=TRUE)
    
    #### For Marten Winter ####
    # species x country Matrix
    species.country <- table(DT$species,splot.header$Country[index30])
    str(species.country) # int [1:60908, 1:130]
    names(dimnames(species.country))
    names(dimnames(species.country)) <- c("species","country")
    
    library(reshape2)
    ?melt.table
    species.country2 <- melt(species.country)
    str(species.country2) # 	188470 obs. of  3 variables:
    names(species.country2)
    names(species.country2)[3] <- "p_a"
    table(species.country2$p_a)
    
    species.country2$p_a[species.country2$p_a>0] <- 1
    species.country2 <- species.country2[species.country2$p_a==1,]
    str(species.country2) # 	188470 obs. of  3 variables:
    species.country2 <- species.country2[,-3]
    str(species.country2) # 188470
    write.csv(species.country2,file = 
                paste("/home/oliver/shared/", "species.country2.csv", sep = ""), row.names = FALSE)
    levels(species.country2$country) # 130
    length(species.country2$country[species.country2$country=="Algeria"])
    # 97
    sum(species.country[,2]) 
    # 799, this is the amount of records for DZA = Algeria
    sum(species.country[,126]) 
    # 1467200, sum of records for United States
    
    TaxaNat <- read.csv(paste("/home/oliver/shared/Glonaf/", "TaxaNat.csv", sep = ""))
    str(TaxaNat) #178779 obs. of  2
    
    GlonafRegion <- read.csv(paste("/home/oliver/shared/Glonaf/", "GlonafRegion.csv", sep = ""))
    str(GlonafRegion) #843 obs. of  10
    trim.trailing <- function (x) sub("\\s+$", "", x)
    # returns string w/o leading or trailing whitespace
    GlonafRegion$Country_sPlot <- as.factor(trim.trailing(as.character(GlonafRegion$Country_sPlot)))
    index14 <- match(levels(species.country2$country),levels(GlonafRegion$Country_sPlot))
    index14
    levels(GlonafRegion$Country)
    
    data.frame(levels(species.country2$country),levels(GlonafRegion$Country_sPlot)[index14])
    ## all correct
    
    index15 <- match(levels(GlonafRegion$Country_sPlot),GlonafRegion$Country_sPlot)
    index15
    index16 <- match(species.country2$country,levels(species.country2$country))
    any(is.na(index16)) # F
    length(levels(species.country2$species)) #60908
    'species.country2$species.country <- paste(species.country2$species,
    levels(species.country2$country)[index16],sep="_")
    species.country2$species.country <- paste(species.country2$species,
    levels(GlonafRegion$Country_sPlot)[index14][index16],sep="_")
    species.country2$species.country <- paste(species.country2$species,
    GlonafRegion$Country_sPlot[index15][index14][index16],sep="_")
    '
    species.country2$species.country <- paste(species.country2$species,
                                              GlonafRegion$ISO.country[index15][index14][index16],sep="_")
    str(species.country2) # 188470 obs. of  3 variables:
    head(species.country2)
    species.country2[c(160000:160100),]
    ## fits!
    
    index17 <- match(TaxaNat$Region,GlonafRegion$ID.region)
    any(is.na(index17)) # F
    
    TaxaNat$species.country <-paste(TaxaNat$Taxon2,GlonafRegion$ISO.country[index17], sep="_")
    
    index18 <- match(species.country2$species.country,TaxaNat$species.country)
    length(index18) #188470
    length(index18[!is.na(index18)]) # 9283 out of 188470 species x country occurrences
    # are alien occurrences
    9283/188470*100 # 4.925452 % species x country occurrences
    species.country2$alien <- F
    species.country2$alien[!is.na(index18)] <- T
    species.country2$ISO.country <- GlonafRegion$ISO.country[index15][index14][index16]
    str(species.country2)
    head(species.country2)
    length(species.country2$ISO.country[species.country2$ISO.country=="DZA"])
    # 12939, this cannot be
    #species.country2$ISO.country[species.country2$ISO.country=="DZA"]
    sum(species.country[,2]) 
    # 799, this is the amount of records for DZA = Algeria
    length(species.country2$ISO.country[species.country2$ISO.country=="DZA"
                                        & !is.na(species.country2$ISO.country)])
    # 97!
    
    #table(species.country2$ISO.country)
    #species.country3 <- acast(species.country2, species~ISO.country, length)
    length(species.country2$alien[species.country2$alien])
    # 9283 alien records in sPlot
    length(species.country2$alien[species.country2$alien & species.country2$ISO.country=="DZA"])
    # 3 alien records in DZA = Algeria
    length(species.country[species.country[,2]>0,2])
    # 97, however there are only 97 records in total
    length(species.country2$alien[species.country2$alien & species.country2$ISO.country=="USA"])
    # 1911 alien species in USA
    length(species.country[species.country[,126]>0,2])
    # 9933 species in total in USA
    alien.richness <- as.matrix(table(species.country2$ISO.country,species.country2$alien))[,2]
    
    species.country3 <- species.country
    species.country3[species.country3>0] <- 1
    country.richness <- colSums(species.country3)
    country.richness
    names(country.richness)
    data.frame(names(country.richness),GlonafRegion$ISO.country[index15][index14])
    country.richness2 <- data.frame(GlonafRegion$ISO.country[index15][index14],as.numeric(country.richness))
    names(country.richness2) <- c("ISO.country","richness")
    country.richness2$ISO.country
    country.richness2
    
    index19 <- match(country.richness2$ISO.country,names(alien.richness))
    index19
    alien.richness
    country.richness2$alien.richness <- alien.richness[index19]
    country.richness2
    country.richness2$alien.prop <- country.richness2$alien.richness/country.richness2$richness
    country.richness2$alien.prop
    
    sPDF <- joinCountryData2Map(country.richness2,joinCode = "ISO3",
                                nameJoinColumn = "ISO.country")
    '112 codes from your data successfully matched countries in the map
    18 codes from your data failed to match with a country code in the map
    132 codes from the map werent represented in your data
    '
    mapParams <- mapCountryData(sPDF, nameColumnToPlot="richness",addLegend=FALSE)
    do.call(addMapLegend, c(mapParams, legendWidth=0.5, legendMar = 2,legendLabels="all"))
    mapParams <- mapCountryData(sPDF, nameColumnToPlot="richness",
                                catMethod=seq(0,10000,by=1000),addLegend=FALSE)
    do.call(addMapLegend, c(mapParams, legendWidth=0.5, legendMar = 2,legendLabels="all"))
    
    mapParams <- mapCountryData(sPDF, nameColumnToPlot="alien.richness",
                                catMethod=seq(0,2000,by=200),addLegend=FALSE)
    do.call(addMapLegend, c(mapParams, legendWidth=0.5, legendMar = 2,legendLabels="all"))
    
    max(country.richness2$alien.prop, na.rm=T) #0.2823779
    mapParams <- mapCountryData(sPDF, nameColumnToPlot="alien.prop",addLegend=FALSE)
    mapParams <- mapCountryData(sPDF, nameColumnToPlot="alien.prop",
                                catMethod=seq(0,0.3,by=0.02),addLegend=FALSE)
    do.call(addMapLegend, c(mapParams, legendWidth=0.5, legendMar = 2,legendLabels="all"))
    
    
    
    
    ##################### OLD ###################
    library(spdep)
    library(maptools)
    library(hexbin)
    library(RColorBrewer)
    # Use a lighter version of sPlot focusing on mean annual temperature MAT (BIO1)
    # and total annual rainfall TAR (BIO12)
    sPlot <- splot.header[, c("LONGITUDE", "LATITUDE", "BIO_1", "BIO_12")]
    sPlot$LONGITUDE <- as.numeric(as.character(sPlot$LONGITUDE))
    sPlot$LATITUDE <- as.numeric(as.character(sPlot$LATITUDE))
    sPlot$BIO_1 <- as.numeric(sPlot$BIO_1)/10
    sPlot$BIO_12 <- as.numeric(sPlot$BIO_12)
    str(sPlot)
    # Make it a spatial data frame
    CRSlonlat <- CRS("+proj=longlat +datum=WGS84")
    coords <- cbind(sPlot$LONGITUDE, sPlot$LATITUDE)
    coords <- SpatialPoints(coords, proj4string=CRSlonlat)
    sPlot <- SpatialPointsDataFrame(coords, sPlot, proj4string=CRSlonlat)
    str(sPlot)
    # Plot the data on the world map to check the coordinates and projection
    data(wrld_simpl)
    plot(wrld_simpl)
    points(coordinates(sPlot), cex=0.1, col="red", pch=16)
    # Set a color ramp
    rf <- colorRampPalette(rev(brewer.pal(11, 'Spectral')))
    # Plot the plot distribution within the MAT-TAR bidimentionnal climatic space
    hexbinplot(BIO_12~BIO_1, data=sPlot@data, colramp=rf, trans=log, inv=exp,
               border="white", xlab="MAT", ylab="TAR", xbins=30)
    # Display only bins with a minimum of 10 plots and a maximum of 1000 plots
    hexbinplot(BIO_12~BIO_1, data=sPlot@data, colramp=rf, trans=log, inv=exp,
               border="white", xlab="MAT", ylab="TAR", xbins=30, mincnt=10, maxcnt=1000)
    #
    
    
    
    
    
    
    
    
    ################ OLD ###################
    
    ###
    load("C:\\Daten\\iDiv\\sPlot\\pca_data.Rdata")
    str(pca.data)
    any(is.na(pca.data$PC1)) #F
    
    load("C:\\Daten\\iDiv\\sPlot\\sPlot_header_pca.RData")
    str(splot.header2)
    any(is.na(pca.data$PC1)) #F
    
    library(raster)
    r <- 
      splot_pca_r <- rasterize(splot_pca, r, run="count")
    plot(splot_pca_r, asp=0, col=rev(divPalette(n=100, name="RdY1Bu"))))
    plot(splot_r, asp=0, col=rev(divPalette(n=100, name="RdY1Bu"))))
    
    index <- which(getValues(splot_pca_r)>0)
    which(getValues(pca_r)>0)
    poinra
    getValues(splot_pca_r)[index] # 136 values
    ?sample
    splo_pca$binid <- cellFromXY(pca_r, splot_pca)
    
    
    ###########################
    load("C:\\Daten\\iDiv\\sPlot\\splot_pca2.Rdata")
    str(splot_pca)
    splot_pca <- data.table(splot_pca)
    setkey(splot_pca,binid)
    inter <- splot_pca[,list(count=length(PCA1)), by=binid]
    splot_pca2 <- splot_pca[,list(PCA1, PCA2,count=length(PCA1)), by=binid]
    
    str(splot_pca2)
    hist(splot_pca2$binid)
    inter2 <- splot_pca2[count>100,list(count=sum(PCA1)), by=binid]
    str(inter2)
    [,list(total.cover=sum(Cover..)), by=PlotObservationID]
    <- DT[,list(total.cover=sum(Cover..)), by=PlotObservationID]
    
    sample(splot_pca$PCA1,100)
    splot_pca2 <-  splot_pca[table(splot_pca$binid)>100,sample(PCA1,100),by=binid]
    str(splot_pca2)
    
    resample <- function(PCA1_,PCA2_,binid_){
      if (length(PCA1)>100) {
        
      }
      else {
        1
      }
      ]index10 <- match(species,labels(d))
      d3 <- as.dist(dist[index10,index10])
      if (sum(d3)==0){
        NA
      }
      divc(as.data.frame(rel.cover),d3)$diversity
    }
    splot_pca2 <-  splot_pca[length(PCA1)<=100,res_ID := 1]
    head(splot_pca2,100)
    table(splot_pca2$binid,splot_pca2$count, exclude=NULL)
    table(splot_pca$binid)
    splot_pca2 <-  splot_pca[,res_ID := 0]
    splot_pca2 <-  splot_pca[table(splot_pca2$binid)<=100,res_ID := 1]
    splot.selection <-  splot_pca[,list(res = resample(PCA1,PCA2,binid)),by=binid]
    #length(splot_pca[binid>100])
    hist(splot_pca$binid)
    splot.selection <-  splot_pca[,list(length(binid)),by=binid]
    splot.selection <-  splot_pca[,list(count=length(PCA1)),by=binid]
    splot.selection <-  splot_pca[,,by=binid]
    hist(splot.selection$count)
    str(splot.selection)
    str(splot_pca2)
    splot.selection <-  splot_pca[,list(nrow()),by=binid]
    splot.selection <-  splot_pca[,count:=length(binid),by=binid]
    length(splot_pca,by=binid)
    ########################################################################################
    
    ### CWM ###
    #CWM1 <-  DT[!is.na(plot.species.trait.value),list(CWM = weighted.mean(plot.species.trait.value,Relative.cover)),by=PlotObservationID]
    CWM1 <-  DT[,list(CWM = weighted.mean(plot.species.trait.value,Relative.cover)),by=PlotObservationID]
    # see http://stackoverflow.com/questions/3367190/aggregate-and-weighted-mean-in-r
    CWM1[1:100,]
    hist(CWM1$CWM)
    write.csv((x, file = paste(path.sPlot, "CWM_SLA_all.csv", sep = ""), row.names = FALSE))
    
    ### FD ###
    library(ade4)    # f?r divc (Rao's Q)
    # this does not work: ~50 GB RAM required for 40k x 40k species matrix:
    'ktab1 <- ktab.list.df(list(as.data.frame(species.trait.value[!is.na(species.trait.value)])))
    d <- dist.ktab(ktab1, type=c("Q"), option = "scaledBYrange")
    #labels(d)
    #str(d)
    #attr(d,"Labels")
    attr(d,"Labels") <- dimnames(species.trait.value)[[1]][!is.na(species.trait.value)]
    labels(d)
    any(is.na(d)) # F
    dist <- as.matrix(d)
    
    #tree.trait.weighted <- t(t(veg.KHG3.prop)  * all.traits.KHG[,j])
    #dist.sqrt <- sqrt(dist)
    d.sqrt <- sqrt(d)
    labels(d.sqrt)
    dist <- as.matrix(d.sqrt)
    '
    'Rao <- function(rel.cover,species){
    index10 <- match(species,labels(d))
    d3 <- as.dist(dist[index10,index10])
    if (sum(d3)==0){
    NA
    }
    divc(as.data.frame(rel.cover),d3)$diversity
    }
    FD.all <-  DT[!is.na(plot.species.trait.value),list(Rao.Q = Rao(Relative.cover,Matched.concept)),by=PlotObservationID]
    '
    #species.trait.value.scaled <- species.trait.value/(max(species.trait.value)-min(species.trait.value))
    species.trait.value.scaled <- TRY.mean.sd.splot$SLA.mean/(max(TRY.mean.sd.splot$SLA.mean)-min(TRY.mean.sd.splot$SLA.mean))
    
    # scale by range, which saves time for distance calculations
    
    library(reshape2)
    
    package.size = 1000
    #number.plots.total <- length(levels(DT[!is.na(plot.species.trait.value)]$PlotObservationID)) # 705272
    number.plots.total <- length(levels(DT$PlotObservationID)) # 701611 vs. 705272
    FD.all <- array(0,number.plots.total, dimnames=list(levels(DT$PlotObservationID)))
    number.packages <-ceiling(number.plots.total/package.size) #702
    pb <- winProgressBar(title = "progress bar", min = 0, max =number.packages, width = 300)
    for (k in 1:number.packages) {
      setWinProgressBar(pb, k, title=paste("Row ",k,"of ", number.packages, " is processed"))
      ifelse (k==number.packages, package<-number.plots.total%%package.size, package <- package.size)
      #index3 <- match(DT[!is.na(plot.species.trait.value)]$PlotObservationID,levels(DT[!is.na(plot.species.trait.value)]$PlotObservationID)[seq((k-1)*package.size+1,k*package.size,1)])
      index3 <- match(DT$PlotObservationID,levels(DT$PlotObservationID)[seq((k-1)*package.size+1,k*package.size,1)])
      #DT2 <- DT[!is.na(plot.species.trait.value) & !is.na(index3)]
      #DT2 <- DT2[!is.na(plot.species.trait.value)]
      #length(index3[!is.na(index3)])
      DT2 <- DT[!is.na(index3)]
      #str(DT2)
      #veg.matrix1 <- acast(DT2, Matched.concept ~ PlotObservationID, value.var="Relative.cover", fill=0)  
      veg.matrix1 <- acast(DT2, names.stand ~ PlotObservationID, value.var="Relative.cover", fun.aggregate=sum, fill=0)  
      # fun.aggregate = sum -> sums up multiple entries per species!
      # this can happen with the unified species nomenclature!
      #length(unique(dimnames(veg.matrix1)[[2]]))# 1000
      #length(unique(DT2$PlotObservationID)) # 1000
      #veg.matrix1[veg.matrix1 > 0] <- 1
      #any(colSums(veg.matrix1)!=1) #F
      index4 <- match(dimnames(veg.matrix1)[[1]],species.list)
      species.trait.value.scaled2 <- species.trait.value.scaled[index4]
      species.list2 <- species.list[index4]
      ktab1 <- ktab.list.df(list(as.data.frame(species.trait.value.scaled2)))
      d <- dist.ktab(ktab1, type=c("Q"), option = "noscale")
      #labels(d)
      #str(d)
      #attr(d,"Labels")
      attr(d,"Labels") <- species.list2
      #labels(d)
      #any(is.na(d)) # F
      #dist <- as.matrix(d)
      #as.matrix(d)[1:8,1:8]
      d <- sqrt(d)
      #dist <- as.matrix(d)
      #rm(d)
      if (k<number.packages){
        FD.all[seq((k-1)*package.size+1,k*package.size,1)] <- 
          divc(as.data.frame(veg.matrix1),d)$diversity
        #DT2[,list(Rao.Q = Rao(Relative.cover,Matched.concept)),by=PlotObservationID]$Rao.Q
        any(colSums(veg.matrix1)==0) # F o.K.
        any(colSums(veg.matrix1)!=1) # T ups....
        hist(colSums(veg.matrix1))
        any(rowSums(veg.matrix1)==0) # F o.K.
      } else {
        FD.all[seq((k-1)*package.size+1,number.plots.total,1)] <- 
          divc(as.data.frame(veg.matrix1),d)$diversity
        #DT2[,list(Rao.Q = Rao(Relative.cover,Matched.concept)),by=PlotObservationID]
      }
    }
    close(pb)
    #FD.all
    write.csv(FD.all, file = paste(path.sPlot, "FDall.csv", sep = ""), row.names = FALSE)