From 01347e28fc295057634f0fc126369b073f75d552 Mon Sep 17 00:00:00 2001
From: oa00oxef <gabriella.damasceno@idiv.de>
Date: Thu, 4 Apr 2024 14:00:41 +0000
Subject: [PATCH] Delete files from sPlot 2.0

---
 code/Trait-Environment-Relationships1.R | 1859 -----------------------
 1 file changed, 1859 deletions(-)
 delete mode 100644 code/Trait-Environment-Relationships1.R

diff --git a/code/Trait-Environment-Relationships1.R b/code/Trait-Environment-Relationships1.R
deleted file mode 100644
index 7e20b4e..0000000
--- a/code/Trait-Environment-Relationships1.R
+++ /dev/null
@@ -1,1859 +0,0 @@
-getwd()
-###### Traits #####
-load("data/TRY.all.mean.sd.2.Rdata")
-str(TRY.all.mean.sd.2)
-# 40791 obs. of  38 variables
-# tests
-
-plot(SLA.mean~LDMC.mean, data=TRY.all.mean.sd.2)
-#summary(lm(SLA.mean~LDMC.mean, data=TRY.all.mean.sd.2))
-#abline(lm(SLA.mean~LDMC.mean, data=TRY.all.mean.sd.2),col="blue", lwd=2)
-#sqrt(0.3415)
-
-###### Backbone #####
-load("data/backbone.v.2.splot.try3.Rdata")
-# alternatively:
-#sPlot3 <- read.csv(paste("data/", "backbone.v.2.splot.try3.csv", 
-#                         sep = ""), sep=",", quote = "\"'")
-str(backbone.splot.try3) 
-# data.frame':	122901 obs. of  33 variables:
-
-###### Harmonized plot data ######
-DT <- read.csv("data/plots_without_unified_cover_scales_20160120a.csv")
-library(data.table)
-DT <- data.table(DT)
-colnames(DT)
-colnames(DT)[9] <- "Cover"
-setkey(DT,PlotObservationID)
-
-###### Header data #######
-splot.header <- read.csv("data/sPlot_14_04_2015_coord.csv")
-str(splot.header)
-'data.frame:	1117940 obs. of  3 variables:
-$ PlotObservationID: int  1 2 3 4 5 6 7 8 9 10 ...
-$ Longitude        : num  -154 -154 -154 -154 -154 ...
-$ Latitude         : num  62.4 62.4 62.4 62.4 62.4 ...'
-
-##### Bioclim data ####
-splot.bioclim <- read.csv(paste("data/","sPlot.csv", sep = ""), sep = ";", na.strings=c("","NA"))
-str(splot.bioclim)
-#data.frame':	1117382 obs. of  52 variables:
-
-####################
-##### Matching #####
-index6 <- match(TRY.all.mean.sd.2$StandSpeciesName,backbone.splot.try3$names.sPlot.TRY)
-any(is.na(index6)) #T
-# not all species in the trait list are also in the backbone
-TRY.all.mean.sd.2$StandSpeciesName[is.na(index6)]
-# 3959 species in TRY are not in the backbone
-
-### data cleaning ###
-length(TRY.all.mean.sd.2$StandSpeciesName[is.na(TRY.all.mean.sd.2$StandSpeciesName)])
-# 1 record is NA
-TRY.all.mean.sd.2[which(is.na(TRY.all.mean.sd.2$StandSpeciesName)),]
-# record no. 7947 has StandSpeciesName = NA
-TRY.all.mean.sd.2 <- TRY.all.mean.sd.2[-which(is.na(TRY.all.mean.sd.2$StandSpeciesName)),]
-str(TRY.all.mean.sd.2) #40790 obs. of  38 variables
-# 1 record less
-'data.frame:	40790 obs. of  38 variables:
-$ StandSpeciesName       : Factor w/ 40790 levels "Abarema adenophora",..: 965 3066 3128 3445 4239 7449 7467 7829 8665 12419 ...
-$ n                      : int  23 32 15 8 21 32 20 31 7 8 ...
-$ SLA.mean               : num  1.42 1.83 1.92 2.7 2.31 ...
-$ PlantHeight.mean       : num  1.282 2.92 0.585 0.488 1.1 ...
-$ SeedMass.mean          : num  0.0656 2.7757 2.6427 -2.6794 -2.4111 ...
-$ LDMC.mean              : num  -0.765 -1.367 -1.009 -1.228 -0.963 ...
-$ StemDens.mean          : num  -0.298 -0.524 -0.322 -0.69 -0.592 ...
-$ LeafArea.mean          : num  3.39 8.36 5.26 3.28 5.16 ...
-$ LeafN.mean             : num  2.33 2.5 2.21 2.95 3.17 ...
-$ LeafP.mean             : num  0.118 0.074 -0.127 0.792 0.421 ...
-$ LeafNperArea.mean      : num  0.692 0.485 0.381 0.452 0.709 ...
-$ Leaffreshmass.mean     : num  -2.325 0.112 -1.916 -3.777 -2.714 ...
-$ LeafNPratio.mean       : num  2.23 2.32 2.24 2.14 2.81 ...
-$ LeafC.perdrymass.mean  : num  6.14 6.25 6.31 6.1 6.12 ...
-$ Leaf.delta.15N.mean    : num  2.68 2.44 2.32 2.78 2.72 ...
-$ Stem.cond.dens.mean    : num  5.44 4.82 5.44 6.41 6.69 ...
-$ Seed.num.rep.unit.mean : num  4.57 4.34 1.48 9.87 8.48 ...
-$ Wood.vessel.length.mean: num  6.32 6.17 6.18 6.13 6.16 ...
-$ Seed.length.mean       : num  1.18 1.45 1.78 0.16 1.21 ...
-$ Disp.unit.leng.mean    : num  1.173 2.119 2.064 0.039 1.287 ...
-$ SLA.sd                 : num  0.1105 0.1754 0.3161 0.0694 0.0596 ...
-$ PlantHeight.sd         : num  0.185 0.347 1.296 0.274 0.137 ...
-$ SeedMass.sd            : num  0.288 1.016 0.234 0.201 0.317 ...
-$ LDMC.sd                : num  0.038 0.0469 0.0449 0.0433 0.0321 ...
-$ StemDens.sd            : num  0.0629 0.0582 0.0511 0.0787 0.0635 ...
-$ LeafArea.sd            : num  0.876 0.235 1.89 0.135 0.196 ...
-$ LeafN.sd               : num  0.0857 0.0902 0.1229 0.0493 0.0443 ...
-$ LeafP.sd               : num  0.2173 0.0974 0.142 0.0356 0.0611 ...
-$ LeafNperArea.sd        : num  0.0983 0.0916 0.065 0.0425 0.053 ...
-$ Leaffreshmass.sd       : num  0.454 0.189 0.776 0.116 0.144 ...
-$ LeafNPratio.sd         : num  0.1921 0.0522 0.1184 0.032 0.0477 ...
-$ LeafC.perdrymass.sd    : num  0.00741 0.00767 0.00706 0.00634 0.00469 ...
-$ Leaf.delta.15N.sd      : num  0.0363 0.0376 0.0382 0.0296 0.0198 ...
-$ Stem.cond.dens.sd      : num  0.0996 0.3641 0.2232 0.1064 0.1064 ...
-$ Seed.num.rep.unit.sd   : num  0.319 0.51 0.684 0.295 0.408 ...
-$ Wood.vessel.length.sd  : num  0.0435 0.0518 0.0673 0.0392 0.0291 ...
-$ Seed.length.sd         : num  0.0508 0.0744 0.0992 0.0817 0.0651 ...
-$ Disp.unit.leng.sd      : num  0.0721 0.1367 0.1409 0.0423 0.0754 ...
-'
-
-index6 <- match(TRY.all.mean.sd.2$StandSpeciesName,backbone.splot.try3$names.sPlot.TRY)
-any(is.na(index6)) #T
-TRY.all.mean.sd.2$StandSpeciesName[is.na(index6)]
-# 3958 species
-TRY.all.mean.sd.2$species <- backbone.splot.try3$name.short.correct[index6]
-dim(TRY.all.mean.sd.2) # 40790    39
-
-
-index1 <- match(DT$PlotObservationID, splot.header$PlotObservationID)
-any(is.na(index1)) #F
-length(index1) #  24241941
-length(unique(index1[!is.na(index1)])) #1117940
-
-index2 <- match(splot.header$PlotObservationID,DT$PlotObservationID)
-any(is.na(index2)) #F
-length(index2) #   1117940
-length(unique(index2[!is.na(index2)])) #  1117940
-dim(splot.header)
-# 1117940      3
-identical(as.character(splot.header$PlotObservationID),as.character(DT$PlotObservationID[index2]))
-# T
-
-any(is.na(DT$species)) #T
-# that should not be the case!
-length(DT$species[!is.na(DT$species)]) #24221565
-length(DT$species) #24241941
-24241941-24221565 # 20376 NA names!!!
-# it gives nonsense to match them with traits
-index7 <- match(DT$species,TRY.all.mean.sd.2$species)
-length(index7) #24241941
-length(index7[!is.na(index7)]) #21040927, with previous TRY version that was: 19841429
-24241941 - 21040927 # 3201014 entries have no traits
-(24241941 - 3201014)/24241941*100 
-# which are   86.79555% of plots that have traits,
-# previously: 18.15247 %
-
-# reduce DT, and splot.header
-#splot.header <- splot.header[!is.na(DT$species[index2]),]
-#dim(splot.header) #1117476       3
-DT <- DT[!is.na(DT$species),]
-index7 <- match(DT$species,TRY.all.mean.sd.2$species)
-length(index7) #24221565
-
-
-### CWM ###
-mean(TRY.all.mean.sd.2$SLA.mean) # 2.611657
-min(TRY.all.mean.sd.2$SLA.mean) # -1.297031
-max(TRY.all.mean.sd.2$SLA.mean) # 5.893029
-# example
-DT$trait <- NA
-DT$trait <- TRY.all.mean.sd.2$SLA.mean[index7]
-length(DT$trait[!is.na(DT$trait)]) # 21020551
-length(DT$trait[is.na(DT$trait)]) #   3201014
-str(DT)
-
-colnames(TRY.all.mean.sd.2)
-CWM2 <-  DT[,list(CWM.SLA = weighted.mean(trait,Cover,na.rm = T)),by=PlotObservationID]
-# I have checked that we do not need to prefilter only those entries that have a trait value
-# this works fine
-str(CWM2)
-dim(CWM2) #1117898
-which(colnames(TRY.all.mean.sd.2)=="SLA.mean") # 3
-which(colnames(TRY.all.mean.sd.2)=="Disp.unit.leng.mean") # 20
-CWM <- array(NA,c(dim(CWM2)[1],18),dimnames=list(CWM2$PlotObservationID,
-                                              colnames(TRY.all.mean.sd.2)[3:20]))
-rm(CWM2)
-for (i in 1:18){
-  DT$trait <- NA
-  DT$trait <- TRY.all.mean.sd.2[index7,i+2]
-  CWM[,i] <-  DT[,list(CWM.trait= weighted.mean(trait,Relative.cover,na.rm = T)),by=PlotObservationID]$CWM.trait
-  
-}
-str(CWM) #num [1:1117898, 1:18]
-CWM[1:20,]
-tail(CWM)
-write.csv(CWM,file = "CWM.csv", row.names = T)
-
-CWM <- read.csv("C://Daten//iDiv2//splot2//CWM.csv")
-str(CWM) #1117898 obs. of  19 variables
-dimnames(CWM)[[1]] <- CWM$X
-CWM <- CWM[,-1]
-tail(CWM)
-
-##### MAP ####
-index3 <- match(dimnames(CWM)[[1]],splot.header$PlotObservationID)
-length(index3) #1117898
-any(is.na(index3)) # F
-#length(index3[is.na(index3)]) #422
-splot.header2 <- splot.header[index3,]
-'CWM1$Longitude <- splot.header$Longitude[index10]
-CWM1$Latitude <- splot.header$Latitude[index10]
-'
-str(CWM) # num [1:1117898, 1:18]
-CWM <- CWM[!(is.na(splot.header2$Longitude)|is.na(splot.header2$Latitude)|is.na(CWM[,"SLA.mean"])),]
-str(CWM) # num [1:1111307, 1:18]
-1111307/1117898*100 # 99.41041% of plots have traits, Long and Lat
-any(is.na(CWM)) #F
-# match header again
-index4 <- match(dimnames(CWM)[[1]],splot.header$PlotObservationID)
-length(index4) #1111307
-any(is.na(index4)) # F
-splot.header2 <- splot.header[index4,]
-# now in the same sequence as CWM
-
-library(raster)
-CRSlonlat <- CRS("+proj=longlat +datum=WGS84")
-coords <- cbind(splot.header2$Longitude, splot.header2$Latitude)
-coords <- SpatialPoints(coords, proj4string=CRSlonlat)
-map.CWM <- SpatialPointsDataFrame(coords, as.data.frame(CWM), proj4string=CRSlonlat)
-str(map.CWM)
-
-library(maptools)
-data(wrld_simpl)
-plot(wrld_simpl)
-points(coordinates(map.CWM), cex=0.1, col="red", pch=16)
-# sPlot2withSLA.png
-max(coordinates(map.CWM)[,1]) # 179.5901
-max(coordinates(map.CWM)[,2]) # 80.14912
-min(coordinates(map.CWM)[,1]) # -162.7414
-min(coordinates(map.CWM)[,2]) # -64.78
-
-
-r <- raster(nrows=180, ncols=360, xmn=-180, xmx=180, ymn=-90, ymx=90)
-str(r)
-raster.map.SLA <- rasterize(map.CWM,r,map.CWM$SLA.mean, run="mean")
-str(raster.map.SLA)
-plot(wrld_simpl)
-plot(raster.map.SLA, add=T)
-# SLAgridded1.png
-
-library(rworldmap)
-raster.map.SLA.grid <- as(raster.map.SLA, 'SpatialGridDataFrame')
-library(RColorBrewer)
-# Set a color ramp
-rf <- brewer.pal(8, 'Spectral')
-mapParams <- mapGriddedData(raster.map.SLA.grid,addLegend=F, 
-                            numCats = 8, colourPalette=rf)
-str(mapParams)
-mapParams$cutVector
-# addMapLegend(plottedData=raster.map.SLA.grid,legendShrink=0.5)
-#do.call(addMapLegend, c( mapParams, legendLabels="all", legendWidth=0.5,
-#                legendIntervals="data", legendMar = 2 ) )
-do.call(addMapLegend, c( mapParams, legendLabels="all", legendWidth=0.3,
-                         legendIntervals="page", legendMar = 4, legendShrink=1.7,horizontal=F))
-#round(as.numeric(exp(mapParams$cutVector)),1)
-mtext(round(as.numeric(exp(mapParams$cutVector)),1),side=4,line=-2, 
-      at=seq(-150,170, by=39), las=2)
-#mtext("ln(SLA)",side=1,line=0, at=230, las=1)
-#mtext("ln(SLA)",side=4,line=-2,at=-170, las=2)
-# SLAgridded2.png
-
-############################################################
-# Regression with Bioclim
-############################################################
-'BIO1 = Annual Mean Temperature
-BIO2 = Mean Diurnal Range (Mean of monthly (max temp - min temp))
-BIO3 = Isothermality (BIO2/BIO7) (* 100)
-BIO4 = Temperature Seasonality (standard deviation *100)
-BIO5 = Max Temperature of Warmest Month
-BIO6 = Min Temperature of Coldest Month
-BIO7 = Temperature Annual Range (BIO5-BIO6)
-BIO8 = Mean Temperature of Wettest Quarter
-BIO9 = Mean Temperature of Driest Quarter
-BIO10 = Mean Temperature of Warmest Quarter
-BIO11 = Mean Temperature of Coldest Quarter
-BIO12 = Annual Precipitation
-BIO13 = Precipitation of Wettest Month
-BIO14 = Precipitation of Driest Month
-BIO15 = Precipitation Seasonality (Coefficient of Variation)
-BIO16 = Precipitation of Wettest Quarter
-BIO17 = Precipitation of Driest Quarter
-BIO18 = Precipitation of Warmest Quarter
-BIO19 = Precipitation of Coldest Quarter'
-
-index11 <- match(splot.header2$PlotObservationID,splot.bioclim$PLOT_ID)
-length(index11) # 1111307
-length(index11[!is.na(index11)]) # 1111307
-
-#index12 <- match(CWM2$PlotObservationID, splot.header$PlotObservationID)
-#any(is.na(index12)) #F
-str(splot.bioclim$BIO_01)
-plot(CWM[,"SLA.mean"]~splot.bioclim$BIO_01[index11],
-     cex=0.05, xlab="BIO_1", ylab="CWM ln(SLA)",cex.lab=1.5, cex.axis=1.5)
-model1 <- lm(CWM[,"SLA.mean"]~splot.bioclim$BIO_01[index11])
-model2 <- lm(CWM[,"SLA.mean"]~splot.bioclim$BIO_01[index11]+I(splot.bioclim$BIO_01[index11]^2))
-summary(model1)
-'Coefficients:
-                                Estimate Std. Error t value Pr(>|t|)    
-(Intercept)                    2.945e+00  9.167e-04  3212.9   <2e-16 ***
-splot.bioclim$BIO_01[index11] -1.204e-03  9.067e-06  -132.8   <2e-16 ***
-Multiple R-squared:  0.01562,	Adjusted R-squared:  0.01562 
-F-statistic: 1.764e+04 on 1 and 1111305 DF,  p-value: < 2.2e-16
-'
-summary(model2)
-'                                     Estimate Std. Error t value Pr(>|t|)    
-(Intercept)                         2.806e+00  1.113e-03  2520.3   <2e-16 ***
-splot.bioclim$BIO_01[index11]       2.188e-03  1.857e-05   117.8   <2e-16 ***
-I(splot.bioclim$BIO_01[index11]^2) -1.657e-05  7.985e-08  -207.6   <2e-16 ***
-Multiple R-squared:  0.05241
-'
-abline(model1, lwd=3, col="blue")
-model2$coefficients
-x.new <- seq(-100,300,1)
-y.new <- model2$coefficients[1]+model2$coefficients[2]*x.new+model2$coefficients[3]*x.new^2
-lines(x.new,y.new, lwd=3, col="blue")
-# SLA_BIO1.png
-
-# devide in quantiles
-quantile(splot.bioclim$BIO_01[index11],probs = seq(0, 1, 1/3))
-'       0% 33.33333% 66.66667%      100% 
-     -227        79        99       303 '
-# -> most points (i.e. one third) are in the temperate zone between 7.9 and 9.9°C
-x1 <- splot.bioclim$BIO_01[index11][splot.bioclim$BIO_01[index11]<=79]
-model1a <- lm(CWM[,"SLA.mean"][splot.bioclim$BIO_01[index11]<=79]~splot.bioclim$BIO_01[index11][splot.bioclim$BIO_01[index11]<=79])
-summary(model1a)
-'Coefficients:
-                                                                    Estimate Std. Error t value Pr(>|t|)    
-(Intercept)                                                        2.697e+00  1.244e-03  2167.2   <2e-16 ***
-splot.bioclim$BIO_01[index11][splot.bioclim$BIO_01[index11] <= 79] 2.466e-03  2.117e-05   116.5   <2e-16 ***
----
-Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-
-Residual standard error: 0.4027 on 371873 degrees of freedom
-Multiple R-squared:  0.03519,	Adjusted R-squared:  0.03519 
-F-statistic: 1.356e+04 on 1 and 371873 DF,  p-value: < 2.2e-16'
-model1b <- lm(CWM[,"SLA.mean"][splot.bioclim$BIO_01[index11]<=99]~splot.bioclim$BIO_01[index11][splot.bioclim$BIO_01[index11]<=99])
-summary(model1b)
-'                                                                    Estimate Std. Error t value Pr(>|t|)    
-(Intercept)                                                        2.692e+00  1.161e-03  2319.8   <2e-16 ***
-splot.bioclim$BIO_01[index11][splot.bioclim$BIO_01[index11] <= 99] 2.777e-03  1.527e-05   181.9   <2e-16 ***
----
-Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-
-Residual standard error: 0.3954 on 750010 degrees of freedom
-Multiple R-squared:  0.04224,	Adjusted R-squared:  0.04224 
-F-statistic: 3.308e+04 on 1 and 750010 DF,  p-value: < 2.2e-16'
-model1c <- lm(CWM[,"SLA.mean"][splot.bioclim$BIO_01[index11]>99]~splot.bioclim$BIO_01[index11][splot.bioclim$BIO_01[index11]>99])
-summary(model1c)
-'                                                                    Estimate Std. Error t value Pr(>|t|)    
-(Intercept)                                                        3.236e+00  2.587e-03  1251.1   <2e-16 ***
-splot.bioclim$BIO_01[index11][splot.bioclim$BIO_01[index11] > 99] -3.774e-03  1.855e-05  -203.4   <2e-16 ***
----
-Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-
-Residual standard error: 0.443 on 361293 degrees of freedom
-Multiple R-squared:  0.1028,	Adjusted R-squared:  0.1028 
-F-statistic: 4.138e+04 on 1 and 361293 DF,  p-value: < 2.2e-16
-'
-
-plot(CWM[,"SLA.mean"]~splot.bioclim$BIO_01[index11],
-     cex=0.05, xlab="BIO_1", ylab="CWM ln(SLA)",cex.lab=1.5, cex.axis=1.5)
-x.new <- seq(-100,79,1)
-y.new <- model1a$coefficients[1]+model1a$coefficients[2]*x.new
-lines(x.new,y.new, lwd=3, col="blue")
-x.new <- seq(79,99,1)
-y.new <- model1b$coefficients[1]+model1b$coefficients[2]*x.new
-lines(x.new,y.new, lwd=3, col="green")
-x.new <- seq(99,300,1)
-y.new <- model1c$coefficients[1]+model1c$coefficients[2]*x.new
-lines(x.new,y.new, lwd=3, col="red")
-
-### calculate a GAM
-library(gam)
-x <- splot.bioclim$BIO_01[index11]
-model3 <- gam(CWM[,"SLA.mean"]~lo(x, span=0.5))
-summary(model3)
-'    Null Deviance: 207269.5 on 1111306 degrees of freedom
-Residual Deviance: 187452.7 on 1111302 degrees of freedom
-AIC: 1175898
-Anova for Parametric Effects
-                       Df Sum Sq Mean Sq F value    Pr(>F)    
-lo(x, span = 0.5)       1   3238  3238.0   19197 < 2.2e-16 ***
-Residuals         1111302 187453     0.2
-Anova for Nonparametric Effects
-                  Npar Df Npar F     Pr(F)    
-(Intercept)                                   
-lo(x, span = 0.5)     2.7  37028 < 2.2e-16 ***
-'
-
-x.new <- seq(-100,300,1)
-y.new <- predict(model3, data.frame(x=x.new), se = F)
-# se=T does not work
-plot(CWM[,"SLA.mean"]~splot.bioclim$BIO_01[index11],
-     cex=0.05, xlab="BIO_1", ylab="CWM ln(SLA)",cex.lab=1.5, cex.axis=1.5)
-lines(x.new,y.new, lwd=3, col="blue")
-
-### N to P ratio
-model4 <- lm(CWM[,"LeafNPratio.mean"]~splot.bioclim$BIO_01[index11])
-summary(model4)
-'                               Estimate Std. Error t value Pr(>|t|)    
-(Intercept)                   2.290e+00  4.334e-04  5284.0   <2e-16 ***
-splot.bioclim$BIO_01[index11] 1.531e-03  4.287e-06   357.2   <2e-16 ***
----
-Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-
-Residual standard error: 0.2026 on 1111305 degrees of freedom
-Multiple R-squared:  0.103,	Adjusted R-squared:  0.103  
-'
-plot(CWM[,"LeafNPratio.mean"]~splot.bioclim$BIO_01[index11],
-     cex=0.05, xlab="BIO_01", ylab="CWM ln(Leaf N to P ratio)",cex.lab=1.5, cex.axis=1.5)
-abline(model4, lwd=3, col="blue")
-
-### turn the regression around
-# does not make sense
-model5 <- lm(splot.bioclim$BIO_01[index11]~CWM[,"SLA.mean"])
-summary(model5)
-'                   Estimate Std. Error t value Pr(>|t|)    
-(Intercept)       127.41676    0.28026   454.6   <2e-16 ***
-CWM[, "SLA.mean"] -12.97407    0.09769  -132.8   <2e-16 ***
-Multiple R-squared:  0.01562,	Adjusted R-squared:  0.01562 '
-
-model6 <- lm(splot.bioclim$BIO_01[index11]~CWM[,"LeafNPratio.mean"])
-summary(model6)
-'Coefficients:
-                          Estimate Std. Error t value Pr(>|t|)    
-(Intercept)               -72.7479     0.4591  -158.5   <2e-16 ***
-CWM[, "LeafNPratio.mean"]  67.2535     0.1883   357.2   <2e-16 ***
----
-Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-
-Residual standard error: 42.46 on 1111305 degrees of freedom
-Multiple R-squared:  0.103,	Adjusted R-squared:  0.103 '
-
-model7 <- lm(splot.bioclim$BIO_01[index11]~CWM[,"SLA.mean"]+CWM[,"LeafNPratio.mean"])
-summary(model7)
-'                           Estimate Std. Error t value Pr(>|t|)    
-(Intercept)               -41.59378    0.55839  -74.49   <2e-16 ***
-CWM[, "SLA.mean"]          -9.08978    0.09355  -97.17   <2e-16 ***
-CWM[, "LeafNPratio.mean"]  65.04103    0.18885  344.40   <2e-16 ***
----
-Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-
-Residual standard error: 42.28 on 1111304 degrees of freedom
-Multiple R-squared:  0.1106,	Adjusted R-squared:  0.1106 '
-x <- splot.bioclim$BIO_01[index11]
-model8 <- lm(x~CWM[,"LeafNPratio.mean"]*CWM[,"SLA.mean"])
-summary(model8)
-'Coefficients:
-                                             Estimate Std. Error t value Pr(>|t|)    
-(Intercept)                                 -432.2500     2.2645  -190.9   <2e-16 ***
-CWM[, "LeafNPratio.mean"]                    227.1681     0.9304   244.2   <2e-16 ***
-CWM[, "SLA.mean"]                            135.8572     0.8202   165.6   <2e-16 ***
-CWM[, "LeafNPratio.mean"]:CWM[, "SLA.mean"]  -60.2280     0.3386  -177.9   <2e-16 ***
----
-Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-
-Residual standard error: 41.69 on 1111303 degrees of freedom
-Multiple R-squared:  0.1352,	Adjusted R-squared:  0.1352 '
-
-library(scatterplot3d) # für 3D-Plots
-s3d1 <- scatterplot3d(CWM[,"SLA.mean"],CWM[,"LeafNPratio.mean"], x, 
-                      xlab="CWM ln(Leaf N to P ratio)", ylab="CWM ln(SLA)", zlab="BIO_1",
-                              pch=1,cex.symbols=0.05,highlight.3d=T,type="p")
-#fit <- lm(AnzF ~ Prop.n+Prop.ba,data=data6[data6$Sp=="Qupe",])
-s3d1$plane3d(model8)
-# does not work
-### producing the plane with points
-min(CWM[,"SLA.mean"])
-max(CWM[,"SLA.mean"])
-x2 <- sort(rep(seq(0.1, 5, by=0.1),35))
-min(CWM[,"LeafNPratio.mean"])
-max(CWM[,"LeafNPratio.mean"])
-y2 <- rep(seq(1, 4.4, by=0.1),50)
-func <- function(x,y) { -432.2500 + 227.1681 * x + 135.8572*y -60.2280*x*y}
-z2 <- func(x2,y2)
-s3d1 <- scatterplot3d(CWM[,"SLA.mean"],CWM[,"LeafNPratio.mean"], x, 
-                      xlab="CWM ln(Leaf N to P ratio)", ylab="CWM ln(SLA)", zlab="BIO_1",
-                      pch=1,cex.symbols=0.05,highlight.3d=T,type="p",angle=70,scale.y=0.5)
-s3d1$points3d(x2,y2,z2,col="blue", type="p", pch=20, cex=0.5)
-# 
-x <- splot.bioclim$BIO_01[index11]
-model7 <- lm(CWM[,"SLA.mean"]~x*CWM[,"LeafNPratio.mean"])
-summary(model7)
-'                              Estimate Std. Error t value Pr(>|t|)    
-(Intercept)                  1.948e+00  9.377e-03   207.8   <2e-16 ***
-x                            1.278e-02  7.985e-05   160.0   <2e-16 ***
-CWM[, "LeafNPratio.mean"]    3.916e-01  3.856e-03   101.6   <2e-16 ***
-x:CWM[, "LeafNPratio.mean"] -5.472e-03  3.166e-05  -172.8   <2e-16 ***
----
-Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-
-Residual standard error: 0.4213 on 1111303 degrees of freedom
-Multiple R-squared:  0.04842,	Adjusted R-squared:  0.04842 '
-min(x)
-max(x)
-x2 <- sort(rep(seq(-100, 290, by=10),35))
-min(CWM[,"LeafNPratio.mean"])
-max(CWM[,"LeafNPratio.mean"])
-y2 <- rep(seq(1, 4.4, by=0.1),40)
-func <- function(x,y) {  1.948e+00  +  1.278e-02 * x + 3.916e-01*y -5.472e-03*x*y}
-z2 <- func(x2,y2)
-s3d1 <- scatterplot3d(x,CWM[,"LeafNPratio.mean"], CWM[,"SLA.mean"], 
-                      xlab="BIO_1", ylab="CWM ln(Leaf N to P ratio)", zlab="CWM ln(SLA)", 
-                      pch=1,cex.symbols=0.05,highlight.3d=T,type="p",angle=50,scale.y=1)
-s3d1$points3d(x2,y2,z2,col="blue", type="p", pch=20, cex=0.5)
-# final plot
-# some BIO_01 and LeafNPratio combinations do not exist
-?cut
-x.cut <- cut(x, breaks=seq(-110, 290, by=10))
-y.cut <- cut(CWM[,"LeafNPratio.mean"],breaks=seq(0.9, 4.4, by=0.1))
-z.cut <- table(x.cut,y.cut)
-head(z.cut)
-dim(z.cut) #40 35
-z2.cut <- table(x2,y2)
-head(z2.cut)
-dim(z2.cut) #40 35
-
-#library(reshape2)
-z.cut <- melt(z.cut)
-str(z.cut) # 1400 obs. of  3 variables:
-head(z.cut)
-z2.cut <- melt(z2.cut)
-str(z2.cut) #1400 obs. of  3 variables:
-head(z2.cut)
-z2.cut$value <- 0
-z2.cut$value <- z.cut$value
-z2.cut$xy <- paste(z2.cut$x2,z2.cut$y2,sep="_")
-#z3.cut <- acast(z2.cut, x2~y2)
-#head(z3.cut)
-xy <- paste(x2,y2,sep="_")
-index20 <- match(xy,z2.cut$xy)
-values.present <- vector(mode="logical", length=length(x2))
-values.present <- z2.cut$value[index20] > 0
-table(values.present)
-'z.cut$value[z.cut$x.cut==x.cut[1] & z.cut$y.cut==y.cut[1]]
-values.present <- function(z.cut,x,y){
-  z.cut$value[z.cut$x.cut==x & z.cut$y.cut==y]
-}
-x1 <- data.frame(x,x.cut, y=CWM[,"LeafNPratio.mean"],y.cut)
-head(x1)
-z1 <- apply(x1,1,FUN=values.present(z.cut, x1$x.cut, x1$y.cut))
-values.present(z.cut, x1$x.cut[1], x1$y.cut[1])
-'
-
-s3d1 <- scatterplot3d(x,CWM[,"LeafNPratio.mean"], CWM[,"SLA.mean"], 
-                      xlab="BIO_1", ylab="CWM ln(Leaf N to P ratio)", zlab="CWM ln(SLA)", 
-                      pch=1,cex.symbols=0.05,highlight.3d=T,type="p",angle=-10,scale.y=0.8)
-s3d1$points3d(x2[values.present],y2[values.present],z2[values.present],col="blue", type="p", pch=20, cex=0.5)
-
-
-#### Collect all CWM regressions ####
-names(splot.bioclim) # from 4 (BIO_01 to 49 T_MAR
-CWM.bioclim <- array(NA,c(46,18,5),dimnames=list(names(splot.bioclim)[4:49],dimnames(CWM)[[2]],
-                      c("p_lin","r2_lin","p_qua1", "p_qua2","r2_qua")))
-for (i in 1:46){
-  for (j in 1:18){
-    model1 <- lm(CWM[,j]~splot.bioclim[index11,i+3])
-    CWM.bioclim[i,j,1] <- summary(model1)$coefficients[2,4]
-    CWM.bioclim[i,j,2] <- summary(model1)$r.squared
-    model2 <- lm(CWM[,j]~splot.bioclim[index11,i+3]+I(splot.bioclim[index11,i+3]^2))
-    CWM.bioclim[i,j,3] <- summary(model2)$coefficients[2,4]
-    CWM.bioclim[i,j,4] <- summary(model2)$coefficients[3,4]
-    CWM.bioclim[i,j,5] <- summary(model2)$r.squared
-  }
-}
-str(CWM.bioclim) #num [1:46, 1:18, 1:5]
-write.csv(data.frame(names(splot.bioclim)[4:49],CWM.bioclim[,,2]),file = "CWM_bioclim_r2_lin.csv", row.names = FALSE)
-write.csv(data.frame(names(splot.bioclim)[4:49],CWM.bioclim[,,5]),file = "CWM_bioclim_r2_qua.csv", row.names = FALSE)
-
-
-which(CWM.bioclim[,1,2]==max(CWM.bioclim[,1,2])) # BIO_02 
-which(CWM.bioclim[,1,5]==max(CWM.bioclim[,1,5])) # BIO_02 
-which(CWM.bioclim[,,2]==max(CWM.bioclim[,,2]),arr.ind = TRUE) 
-#       row col
-#BIO_02   2   9
-CWM.bioclim[2,9,2] #0.1297723
-which(CWM.bioclim[,,5]==max(CWM.bioclim[,,5]),arr.ind = TRUE) 
-#       row col
-#BIO_02   2   9
-CWM.bioclim[2,9,5] # 0.1563736
-names(splot.bioclim)[2+3] #"BIO_02"
-dimnames(CWM)[[2]][9] #"LeafNperArea.mean"
-plot(CWM[,"LeafNperArea.mean"]~splot.bioclim$BIO_02[index11],
-     cex=0.05, xlab="BIO_02", ylab="CWM ln(Leaf N per Area)",cex.lab=1.5, cex.axis=1.5)
-model1 <- lm(CWM[,9]~splot.bioclim[index11,2+3])
-abline(model1, lwd=3, col="blue")
-# MeanNperArea_BIO_02.png
-png(filename="MeanNperArea_BIO_02.png", width=800, height=800, units = "px", bg="white")
-  plot(CWM[,"LeafNperArea.mean"]~splot.bioclim$BIO_02[index11],
-       cex=0.05, xlab="BIO_02", ylab="CWM ln(Leaf N per Area)",cex.lab=1.5, cex.axis=1.5)
-  #model1 <- lm(CWM[,9]~splot.bioclim[index11,2+3])
-  abline(model1, lwd=3, col="blue")
-dev.off() # creates a file
-png(filename="SLA_BIO_02.png", width=800, height=800, units = "px", bg="white")
-  plot(CWM[,"SLA.mean"]~splot.bioclim$BIO_02[index11],
-    cex=0.05, xlab="BIO_02", ylab="CWM ln(SLA)",cex.lab=1.5, cex.axis=1.5)
-  model1 <- lm(CWM[,1]~splot.bioclim[index11,2+3])
-abline(model1, lwd=3, col="blue")
-dev.off() # creates a file
-
-#### PCA of all CWM and all Bioclim variables ####
-library(vegan)
-pca1 <- rda(CWM,scale=T)
-plot(pca1)
-str(summary(pca1))
-#text(summary(pca1)$species[,c(1,2)],dimnames(summary(pca1)$species)[[1]])
-traitnames <- dimnames(summary(pca1)$species)[[1]]
-traitnames <- substr(traitnames,1,nchar(traitnames)-5)
-traitcoord <- summary(pca1)$species[,c(1,2)]
-which(traitnames=="SeedMass") #3
-traitcoord[3,2] <- traitcoord[3,2]-0.5
-ordiplot(pca1, type="n", ylim=c(-10,10))
-text(traitcoord,traitnames, col="red", cex=1)
-points(plotcoord*30, cex=0.05)
-#env1 <- envfit(pca1,splot.bioclim[index11,c(4:49)],permutations = 99)
-#with permutations = 999:
-#Error: cannot allocate vector of size 4.1 Gb
-plot(env1)
-png(filename="PCA1.png", width=1000, height=1000, units = "px", bg="white")
-  ordiplot(pca1, type="n")
-  text(traitcoord,traitnames)
-  plot(env1)
-dev.off() # creates a file
-'rm(env1)
-rm(backbone.splot.try3)
-rm(model1)
-rm(model2)
-rm(pca1)
-rm(splot.header)
-rm(splot.header2)
-rm(traitdist)
-rm(coords)
-rm(TRY.all.mean.sd.2)
-rm(index7)
-rm(raster.map.SLA)
-rm(trait.vec)
-rm(FD)
-rm(splot.bioclim)
-'
-gc()
-
-### produce a PCA with kernel density
-# From Díaz et al. 2016, Nature
-library(ks)
-plotcoord <- summary(pca1)$sites[,c(1,2)]
-str(plotcoord) #num [1:1111307, 1:2]
-H <- Hpi(x=plotcoord)      # optimal bandwidth estimation
-# does not work
-est<- kde(x=plotcoord, H=H, compute.cont=TRUE)     # kernel density estimation
-
-library(raster)
-library(fBasics)
-# Color palette
-#plotcoord <- plotcoord*30
-min(plotcoord[,1]) #-0.2639705
-max(plotcoord[,1]) # 0.2415964
-min(plotcoord[,2]) #-0.2595053
-max(plotcoord[,2]) # 0.3560549
-x.cut <- cut(plotcoord[,1], breaks=seq(min(plotcoord[,1]), max(plotcoord[,1]), by=0.01))
-y.cut <- cut(plotcoord[,2], breaks=seq(min(plotcoord[,2]), max(plotcoord[,2]), by=0.01))
-z.cut <- table(x.cut,y.cut)
-head(z.cut)
-dim(z.cut) #50 61
-z.cut <- melt(z.cut)
-str(z.cut) # 3050 obs. of  3 variables:
-head(z.cut)
-z.cut$xy <- paste(z.cut$x.cut,z.cut$y.cut,sep="_")
-str(x.cut)
-length(x.cut) # 1111307
-xy <- paste(x.cut,y.cut,sep="_")
-index30 <- match(xy,z.cut$xy)
-length(index30)
-any(is.na(index30))
-head(log10(z.cut$value[index30]))
-values.present <- vector(mode="numeric", length=length(xy))
-min(log10(z.cut$value[index30]), na.rm=T) #0
-max(log10(z.cut$value[index30]), na.rm=T) # 3.9878
-values.cut <- cut(log10(z.cut$value[index30]), breaks=seq(min(log10(z.cut$value[index30]), na.rm=T),
-                        4, by=0.4))
-#values.present <- z.cut$value[index30]
-ordiplot(pca1, type="n")
-#points(plotcoord*30, cex=0.05, col=rainbowPalette(n=10,  
-#        s = 1, v = 1, start = 3/6, end = 5/6, alpha = 1)[values.cut])
-points(plotcoord*30, cex=0.05, col=rev(greyPalette(n=10,  start = 0, end = 0.8, 
-                                    gamma = 2.2, alpha = NULL))[values.cut])
-text(traitcoord,traitnames, col="red", cex=1)
-
-
-ext.factor <- 30
-r.pca <- raster(nrows=300, ncols=300, xmn=min(plotcoord[,1])*ext.factor, xmx=max(plotcoord[,2])*ext.factor, 
-                ymn=min(plotcoord[,1])*ext.factor, ymx=max(plotcoord[,2]*ext.factor))
-# attention, xmn and ymn as well as xmx and ymx have to be the same, otherwise the raster is shifted
-r.pca.raster <- rasterize(plotcoord*ext.factor, r.pca, fun="count")
-str(r.pca.raster)
-plot(log10(r.pca.raster), asp=0, col=seqPalette(n=10, name=c("Blues")), 
-     main="", cex.main=0.7, xlab="",  ylab="", add=T)
-plot(log10(r.pca.raster), asp=0, col=rev(greyPalette(n=10,  start = 0, end = 0.8, 
-    gamma = 2.2, alpha = NULL)), 
-     main="", cex.main=0.7, xlab="",  ylab="", add=T)
-plot(log10(r.pca.raster), asp=0, col=rainbowPalette(n=10,   s = 1, v = 1, start = 3/6, end = 5/6, alpha = 1), 
-     main="", cex.main=0.7, xlab="",  ylab="", add=T, legend=F)
-
-'## graph for the PCA ##
-ordiplot(pca1, type="n", xlim=c(-10,13), ylim=c(-10,10))
-rasterImage(as.raster(r.pca.raster), -10, -10, 20, 10, interpolate = F,
-            col=rainbowPalette(n=10,   s = 1, v = 1, start = 3/6, end = 5/6, alpha = 1))
-plot(log10(r.pca.raster), asp=0, col=rainbowPalette(n=10,   s = 1, v = 1, start = 3/6, end = 5/6, alpha = 1), 
-     main="", cex.main=0.7, xlab="",  ylab="", add=T, legend=F)
-text(traitcoord,traitnames, col="red", cex=1)
-'
-
-
-rda1 <- rda(CWM~.,splot.bioclim[index11,c(4:49)],scale=T)
-plot(rda1)
-summary(rda1)
-rda1
-'              Inertia Proportion Rank
-Total         18.0000     1.0000     
-Constrained    2.4811     0.1378   15
-Unconstrained 15.5189     0.8622   15
-Inertia is correlations 
-Eigenvalues for constrained axes:
-  RDA1   RDA2   RDA3   RDA4   RDA5   RDA6   RDA7   RDA8   RDA9  RDA10  RDA11  RDA12  RDA13  RDA14  RDA15 
-1.2230 0.6020 0.3275 0.1972 0.0653 0.0226 0.0156 0.0107 0.0059 0.0050 0.0031 0.0012 0.0010 0.0007 0.0003 
-
-Eigenvalues for unconstrained axes:
-PC1   PC2   PC3   PC4   PC5   PC6   PC7   PC8   PC9  PC10  PC11  PC12  PC13  PC14  PC15 
-4.594 3.453 1.486 1.426 1.095 0.798 0.648 0.603 0.483 0.343 0.261 0.122 0.084 0.074 0.047 
-'
-#anova(rda1, perm=99)
-#anova(rda1, by="terms", permutations = how(nperm=9))
-library(parallel) 
-detectCores() #32
-# Set up the cluster
-#clusterType <- if(length(find.package("snow", quiet = TRUE))) "SOCK" else "PSOCK"
-#clust <- try(makeCluster(getOption("cl.cores", 2), type = clusterType))
-cl <- makeCluster(10)
-#registerDoParallel(cl) 
-options(na.action = "na.pass")
-#clusterExport(cl, "results") 
-
-#print(system.time(anova(rda1, by="terms", perm=99, parallel = getOption("mc.cores"))))
-print(system.time(out1 <- anova(rda1, by="margin", permutations =99, parallel = cl)))
-'user    system   elapsed 
-5304.802  1311.088 14522.410
-'
-out1
-
-traitnames <- dimnames(summary(rda1)$species)[[1]]
-traitnames <- substr(traitnames,1,nchar(traitnames)-5)
-traitcoord <- summary(rda1)$species[,c(1,2)]
-bioclimcoord <- summary(rda1)$biplot[,c(1,2)]
-which(traitnames=="LeafNperArea") #9
-traitcoord[9,1] <- traitcoord[9,1]+0.5
-which(traitnames=="StemDens") #5
-traitcoord[5,2] <- traitcoord[5,2]+0.3
-which(traitnames=="Disp.unit.leng") #18
-traitcoord[18,2] <- traitcoord[18,2]-0.3
-ordiplot(rda1, type="n", xlim=c(-7,7))
-arrows(rep(0,45),rep(0,45),bioclimcoord[,1]*8,bioclimcoord[,2]*8, col="cadetblue4", length=0.1)
-text(bioclimcoord*8,dimnames(bioclimcoord)[[1]], col="cadetblue4", cex=0.8)
-text(traitcoord,traitnames, col="red")
-# copied as first RDA graph into ppt
-
-#rda1 <- rda(CWM ~ ., splot.bioclim[index11,c(4:49)],scale=T)
-
-### Stepwise RDA ###
-rda2 <- rda(CWM ~ 1,splot.bioclim[index11,c(4:49)],scale=T)
-#rda3 <- step(rda2, scope = formula(rda1), test = "perm")
-rda3 <- ordiR2step(rda2, scope=formula(rda1), direction = "both", Pin = 0.05, R2scope = F, permutations = how(nperm = 99), trace = TRUE)
-#CWM ~ GDD5 + BIO_02 + BIO_12 + T_MAY + P_MAY + T_AUG ....
-#rm(rda1)
-
-# now rebuild the RDA sequentially
-pca1 <- rda(CWM,scale=T)
-pca1
-5.527+ 3.906+ 1.953+ 1.507+ 1.375+ 0.886+ 0.726+ 0.623+ 0.499 +0.360+ 0.295+ 0.131+ 0.087+ 0.077+ 0.048 
-(5.527+ 3.906)/18 # 52.40556%
-
-traitnames <- dimnames(summary(pca1)$species)[[1]]
-traitnames <- substr(traitnames,1,nchar(traitnames)-5)
-traitcoord <- summary(pca1)$species[,c(1,2)]
-which(traitnames=="SeedMass") #3
-traitcoord[3,2] <- traitcoord[3,2]-0.5
-ordiplot(pca1, type="n")
-text(traitcoord,traitnames, col="red")
-# copied as second RDA graph into ppt
-
-
-rda4 <- rda(CWM ~ GDD5,splot.bioclim[index11,c(4:49)],scale=T)
-rda4
-traitnames <- dimnames(summary(rda4)$species)[[1]]
-traitnames <- substr(traitnames,1,nchar(traitnames)-5)
-traitcoord <- summary(rda4)$species[,c(1,2)]
-plotcoord <- summary(rda4)$sites[,c(1,2)]
-str(plotcoord) #num [1:1111307, 1:2]
-bioclimcoord <- summary(rda4)$biplot[,c(1,2)]
-
-min(plotcoord[,1]) #-0.5340595
-max(plotcoord[,1]) # 0.7760822
-min(plotcoord[,2]) #-0.2688765
-max(plotcoord[,2]) # 0.2326464
-x.cut <- cut(plotcoord[,1], breaks=seq(min(plotcoord[,1]), max(plotcoord[,1]), by=0.01))
-y.cut <- cut(plotcoord[,2], breaks=seq(min(plotcoord[,2]), max(plotcoord[,2]), by=0.01))
-z.cut <- table(x.cut,y.cut)
-head(z.cut)
-dim(z.cut) #131 50
-z.cut <- melt(z.cut)
-str(z.cut) # 6550 obs. of  3 variables:
-head(z.cut)
-z.cut$xy <- paste(z.cut$x.cut,z.cut$y.cut,sep="_")
-str(x.cut)
-length(x.cut) # 1111307
-xy <- paste(x.cut,y.cut,sep="_")
-index30 <- match(xy,z.cut$xy)
-length(index30)
-any(is.na(index30))
-head(log10(z.cut$value[index30]))
-values.present <- vector(mode="numeric", length=length(xy))
-min(log10(z.cut$value[index30]), na.rm=T) #0
-max(log10(z.cut$value[index30]), na.rm=T) # 3.956168
-values.cut <- cut(log10(z.cut$value[index30]), breaks=seq(min(log10(z.cut$value[index30]), na.rm=T),
-                                                         4, by=0.4))
-     
-
-ordiplot(rda4, type="n")
-points(plotcoord*30, cex=0.05, col=rev(greyPalette(n=10,  start = 0, end = 0.8, 
-                                                   gamma = 2.2, alpha = NULL))[values.cut])
-
-arrows(rep(0,1),rep(0,1),bioclimcoord[1]*8,bioclimcoord[2]*8, col="blue", length=0.1)
-text(bioclimcoord[1]*8+0.8,bioclimcoord[2]*8-0.2,"GDD5", col="blue", cex=1)
-text(traitcoord,traitnames, col="red")
-# copied as third RDA graph into ppt
-
-rda5 <- rda(CWM ~ GDD5+BIO_02,splot.bioclim[index11,c(4:49)],scale=T)
-rda5
-traitnames <- dimnames(summary(rda5)$species)[[1]]
-traitnames <- substr(traitnames,1,nchar(traitnames)-5)
-traitcoord <- summary(rda5)$species[,c(1,2)]
-bioclimcoord <- summary(rda5)$biplot[,c(1,2)]
-plotcoord <- summary(rda5)$sites[,c(1,2)]
-str(plotcoord) #num [1:1111307, 1:2]
-x.cut <- cut(plotcoord[,1], breaks=seq(min(plotcoord[,1]), max(plotcoord[,1]), by=0.01))
-y.cut <- cut(plotcoord[,2], breaks=seq(min(plotcoord[,2]), max(plotcoord[,2]), by=0.01))
-z.cut <- table(x.cut,y.cut)
-head(z.cut)
-dim(z.cut) #131 50
-z.cut <- melt(z.cut)
-str(z.cut) # 6550 obs. of  3 variables:
-head(z.cut)
-z.cut$xy <- paste(z.cut$x.cut,z.cut$y.cut,sep="_")
-str(x.cut)
-length(x.cut) # 1111307
-xy <- paste(x.cut,y.cut,sep="_")
-index30 <- match(xy,z.cut$xy)
-length(index30)
-any(is.na(index30))
-head(log10(z.cut$value[index30]))
-values.present <- vector(mode="numeric", length=length(xy))
-min(log10(z.cut$value[index30]), na.rm=T) #0
-max(log10(z.cut$value[index30]), na.rm=T) # 3.399154
-values.cut <- cut(log10(z.cut$value[index30]), breaks=seq(min(log10(z.cut$value[index30]), na.rm=T),
-                                                          4, by=0.4))
-
-
-
-ordiplot(rda5, type="n", xlim=c(-10,10), ylim=c(-10,10))
-points(plotcoord*30, cex=0.05, col=rev(greyPalette(n=10,  start = 0, end = 0.8, 
-                                                   gamma = 2.2, alpha = NULL))[values.cut])
-arrows(rep(0,2),rep(0,2),bioclimcoord[,1]*8,bioclimcoord[,2]*8, col="blue", length=0.1)
-text(bioclimcoord*8,dimnames(bioclimcoord)[[1]], col="blue", cex=1)
-text(traitcoord,traitnames, col="red")
-# copied as fourth RDA graph into ppt
-
-
-plot(CWM[,"LeafNPratio.mean"]~splot.bioclim$GDD5[index11],
-     cex=0.05, xlab="GDD5", ylab="CWM ln(leaf N to P ratio)",cex.lab=1.5, cex.axis=1.5)
-model1 <- lm(CWM[,"LeafNPratio.mean"]~splot.bioclim$GDD5[index11])
-summary(model1)
-'                             Estimate Std. Error t value Pr(>|t|)    
-(Intercept)                 2.297e+00  3.891e-04  5905.2   <2e-16 ***
-splot.bioclim$GDD5[index11] 6.349e-06  1.636e-08   388.1   <2e-16 ***
-Multiple R-squared:  0.1193,	Adjusted R-squared:  0.1193 
-'
-abline(model1, lwd=3, col="blue")
-# LeafNPRatio_GDD5
-
-plot(CWM[,"SLA.mean"]~jitter(splot.bioclim$BIO_02[index11]),
-     cex=0.05, xlab="BIO_02", ylab="CWM ln(SLA)",cex.lab=1.5, cex.axis=1.5)
-model1 <- lm(CWM[,"SLA.mean"]~splot.bioclim$BIO_02[index11])
-summary(model1)
-'                                Estimate Std. Error t value Pr(>|t|)    
-(Intercept)                    3.399e+00  1.448e-03  2348.2   <2e-16 ***
-splot.bioclim$BIO_02[index11] -6.253e-03  1.551e-05  -403.3   <2e-16 ***
-
-Residual standard error: 0.4034 on 1111305 degrees of freedom
-Multiple R-squared:  0.1277,	Adjusted R-squared:  0.1277 
-'
-abline(model1, lwd=3, col="blue")
-# SLA_BIO_02
-
-plot(CWM[,"LeafNperArea.mean"]~jitter(splot.bioclim$BIO_02[index11]),
-     cex=0.05, xlab="BIO_02", ylab="CWM ln(Leaf N per area)",cex.lab=1.5, cex.axis=1.5)
-model1 <- lm(CWM[,"LeafNperArea.mean"]~splot.bioclim$BIO_02[index11])
-summary(model1)
-'                                Estimate Std. Error t value Pr(>|t|)    
-(Intercept)                   -5.989e-02  8.475e-04  -70.67   <2e-16 ***
-splot.bioclim$BIO_02[index11]  3.696e-03  9.079e-06  407.09   <2e-16 ***
----
-Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-
-Residual standard error: 0.2362 on 1111305 degrees of freedom
-Multiple R-squared:  0.1298,	Adjusted R-squared:  0.1298 
-'
-abline(model1, lwd=3, col="blue")
-# LeafNper_BIO_02
-
-##### FD calculations ####
-j <- 3
-nam <- DT$species[DT$PlotObservationID==j]
-abu <- DT$Relative.cover[DT$PlotObservationID==j]
-trait <- DT$trait[DT$PlotObservationID==j]
-abundance.weighted=T
-
-FD.fun <- function(trait, abu){
-  res <- as.double(NA)
-  if (length(trait[!is.na(trait)])>0){
-    res <- 0
-    #nam <- as.character(nam[!is.na(trait)])
-    abu <- as.data.frame(abu[!is.na(trait)])
-    #rownames(abu) <- nam
-    trait <- as.data.frame(trait[!is.na(trait)])
-    #rownames(trait) <- nam
-    dis <- dist(trait,method="euclidean")
-    if (sum(dis)>1){
-      sqrt.dis <- sqrt(dis)
-      # taking the square root is required to obtain the same result from the divc function
-      # as divc takes the square of distances values, as suggested by
-      # Champely & Chessel (2002, Env. Ecol. Stat. 9: 167-177)
-      #abu <- as.data.frame(abu)
-      res <- unlist(divc(abu, sqrt.dis))
-      #Rao's Q: sumi sumj (dist ij prop i prop j)'
-    } 
-  }
-  res
-}
-
-mpd.fun <- function(nam, trait, abu,abundance.weighted){
-  #res <- numeric(1) # carries result from the function
-  res <- as.double(NA)
-  if (length(trait[!is.na(trait)])>0){
-  #  dis <- as.matrix(dist(t(t1),method="euclidean"))   
-    abu <- t(abu)
-    colnames(abu) <- nam
-    trait <- t(trait)
-    colnames(trait) <- nam
-    #dis <- as.matrix(vegdist(t(t1),method="euclidean", na.rm=T))
-    dis <- as.matrix(dist(t(trait),method="euclidean"))
-    res <- mpd(abu, dis, abundance.weighted=abundance.weighted)
-  }
-  res
-}
-
-# from mpd source code
-'N <- dim(samp)[1]
-mpd <- numeric(N)
-for (i in 1:N) {
-  sppInSample <- names(samp[i, samp[i, ] > 0])
-  if (length(sppInSample) > 1) {
-    sample.dis <- dis[sppInSample, sppInSample]
-    if (abundance.weighted) {
-      sample.weights <- t(as.matrix(samp[i, sppInSample, 
-                                         drop = FALSE])) %*% as.matrix(samp[i, sppInSample, 
-                                                                            drop = FALSE])
-      mpd[i] <- weighted.mean(sample.dis, sample.weights)
-    }
-    else {
-      mpd[i] <- mean(sample.dis[lower.tri(sample.dis)])
-    }
-  }
-  else {
-    mpd[i] <- NA
-  }
-}
-mpd
-'
-# from Oliver, who used picante
-# does not accept missing values in the function
-library(picante)
-# define function for mean pairwise distance
-'mpd.fun.dt <- function(abu, dis, nam, abundance.weighted){
-  abu <- t(abu)
-  colnames(abu) <- nam
-  res <- mpd(abu, dis, abundance.weighted=abundance.weighted)
-}
-# define funtion of mean nearest neighbor distance
-mntd.fun.dt <- function(abu, dis, nam, abundance.weighted){
-  abu <- t(abu)
-  colnames(abu) <- nam
-  res <- mntd(abu, dis, abundance.weighted=abundance.weighted)
-  res
-}
-species.list <- unique(DT$species)
-any(is.na(species.list)) #T, there is one NA
-length(species.list) #60909
-species.list <- species.list[!is.na(species.list)]
-length(species.list) #60908
-index8 <- match(species.list,TRY.all.mean.sd.2$species)
-str(index8) # 60908
-length(index8[!is.na(index8)]) #21001
-#species.list2 <- TRY.all.mean.sd.2$species[index8]
-#species.list2 <- species.list2[!is.na(species.list2)]
-#length(species.list2) # 21001
-TRY.all.mean.sd.2.na <- TRY.all.mean.sd.2[index8,]
-str(TRY.all.mean.sd.2.na)
-TRY.all.mean.sd.2.na$species2 <- species.list
-rm(index9)
-'
-FD <- array(NA,c(dim(CWM)[1],18),dimnames=list(dimnames(CWM)[[1]],
-                                                 colnames(TRY.all.mean.sd.2)[3:20]))
-str(FD) #logi [1:1117898, 1:18] 
-'for (i in 1:18){
-  # create distance matrix
-  #trait.vec <- TRY.all.mean.sd.2.na[,i+2]
-  #names(trait.vec) <- TRY.all.mean.sd.2.na$species2
-  #trait.dist.mat <- as.matrix(dist(trait.vec))
-  # too large for my notebook
-  #mpd[,i] <-  DT[,list(mpd.trait= mpd.fun.dt(Relative.cover, trait.dist.mat, species, abundance.weighted=TRUE)),by=PlotObservationID]$mpd.trait
-  print(i)  
-  DT$trait <- NA
-  DT$trait <- TRY.all.mean.sd.2[index7,i+2]
-  FD[,i] <-  DT[,list(mpd.trait= mpd.fun(species, trait, Relative.cover, abundance.weighted=TRUE)),by=PlotObservationID]$mpd.trait
-}
-write.csv(FD,file = "FD2.csv", row.names = T)
-'
-for (i in 1:18){
-  # create distance matrix
-  #trait.vec <- TRY.all.mean.sd.2.na[,i+2]
-  #names(trait.vec) <- TRY.all.mean.sd.2.na$species2
-  #trait.dist.mat <- as.matrix(dist(trait.vec))
-  # too large for my notebook
-  #mpd[,i] <-  DT[,list(mpd.trait= mpd.fun.dt(Relative.cover, trait.dist.mat, species, abundance.weighted=TRUE)),by=PlotObservationID]$mpd.trait
-  print(i)  
-  DT$trait <- NA
-  DT$trait <- TRY.all.mean.sd.2[index7,i+2]
-  # recalculate relative cover for this trait
-  FD[,i] <-  DT[,list(FD.trait= FD.fun(trait, Relative.cover)),by=PlotObservationID]$FD.trait
-}
-write.csv(FD,file = "FD3.csv", row.names = T)
-
-FD <- read.csv("C://Daten//iDiv2//splot2//FD3.csv")
-str(FD)
-dimnames(FD)[[1]] <- FD$X
-FD <- FD[,-1]
-tail(FD)
-
-#### match FD to splot.header ####
-index12 <- match(dimnames(FD)[[1]],splot.header$PlotObservationID)
-length(index12) #1117898
-any(is.na(index12)) # F
-splot.header3 <- splot.header[index12,]
-str(FD) # num [1:1117898, 1:18]
-FD <- FD[!(is.na(splot.header3$Longitude)|is.na(splot.header3$Latitude)|is.na(FD[,"SLA.mean"])),]
-str(FD) # num [1:1111307 , 1:18]
-1111307 /1117898*100 # 99.41041 % of plots have traits, Long and Lat
-any(is.na(FD)) #F
-# match header again
-index13 <- match(dimnames(FD)[[1]],splot.header$PlotObservationID)
-length(index13) #1111307
-any(is.na(index13)) # F
-splot.header3 <- splot.header[index13,]
-# now in the same sequence as FD
-
-
-#### Collect all FD regressions ####
-names(splot.bioclim) # from 4 BIO_01 to 49 T_MAR
-index14 <- match(splot.header3$PlotObservationID,splot.bioclim$PLOT_ID)
-length(index14) # 1111307
-length(index14[!is.na(index14)]) # 1111307
-
-FD.bioclim <- array(NA,c(46,18,5),dimnames=list(names(splot.bioclim)[4:49],dimnames(FD)[[2]],
-                                      c("p_lin","r2_lin","p_qua1", "p_qua2","r2_qua")))
-for (i in 1:46){
-  for (j in 1:18){
-    model1 <- lm(FD[,j]~splot.bioclim[index14,i+3])
-    FD.bioclim[i,j,1] <- summary(model1)$coefficients[2,4]
-    FD.bioclim[i,j,2] <- summary(model1)$r.squared
-    model2 <- lm(FD[,j]~splot.bioclim[index14,i+3]+I(splot.bioclim[index14,i+3]^2))
-    FD.bioclim[i,j,3] <- summary(model2)$coefficients[2,4]
-    FD.bioclim[i,j,4] <- summary(model2)$coefficients[3,4]
-    FD.bioclim[i,j,5] <- summary(model2)$r.squared
-  }
-}
-str(FD.bioclim) #num [1:46, 1:18, 1:5]
-write.csv(data.frame(names(splot.bioclim)[4:49],FD.bioclim[,,2]),file = "FD2_bioclim_r2_lin.csv", row.names = FALSE)
-write.csv(data.frame(names(splot.bioclim)[4:49],FD.bioclim[,,5]),file = "FD2_bioclim_r2_qua.csv", row.names = FALSE)
-
-library(vegan)
-FD.pca1 <- rda(FD,scale=T)
-FD.pca1
-'Eigenvalues for unconstrained axes:
-PC1   PC2   PC3   PC4   PC5   PC6   PC7   PC8 
-8.597 1.876 1.378 0.886 0.790 0.780 0.583 0.551 
-(Showed only 8 of all 18 unconstrained eigenvalues)
-'
-plot(FD.pca1)
-summary(FD.pca1)
-#text(summary(FD.pca1)$species[,c(1,2)],dimnames(summary(FD.pca1)$species)[[1]])
-traitnames <- dimnames(summary(FD.pca1)$species)[[1]]
-traitnames <- substr(traitnames,1,nchar(traitnames)-5)
-traitcoord <- summary(FD.pca1)$species[,c(1,2)]
-which(traitnames=="Seed.length") #17
-traitcoord[17,2] <- traitcoord[17,2]+0.3
-which(traitnames=="LeafC.perdrymass") #12
-traitcoord[12,2] <- traitcoord[12,2]-0.4
-which(traitnames=="Leaf.delta.15N") #13
-traitcoord[13,2] <- traitcoord[13,2]+0.4
-which(traitnames=="LeafNPratio") #11
-traitcoord[11,2] <- traitcoord[11,2]-0.3
-
-plotcoord <- summary(FD.pca1)$sites[,c(1,2)]
-str(plotcoord) #num [1:1111307, 1:2]
-x.cut <- cut(plotcoord[,1], breaks=seq(min(plotcoord[,1]), max(plotcoord[,1]), by=0.01))
-y.cut <- cut(plotcoord[,2], breaks=seq(min(plotcoord[,2]), max(plotcoord[,2]), by=0.01))
-z.cut <- table(x.cut,y.cut)
-head(z.cut)
-dim(z.cut) #131 50
-z.cut <- melt(z.cut)
-str(z.cut) # 6550 obs. of  3 variables:
-head(z.cut)
-z.cut$xy <- paste(z.cut$x.cut,z.cut$y.cut,sep="_")
-str(x.cut)
-length(x.cut) # 1111307
-xy <- paste(x.cut,y.cut,sep="_")
-index30 <- match(xy,z.cut$xy)
-length(index30)
-any(is.na(index30))
-head(log10(z.cut$value[index30]))
-values.present <- vector(mode="numeric", length=length(xy))
-min(log10(z.cut$value[index30]), na.rm=T) #0
-max(log10(z.cut$value[index30]), na.rm=T) # 3.399154
-values.cut <- cut(log10(z.cut$value[index30]), breaks=seq(min(log10(z.cut$value[index30]), na.rm=T),
-                                                          4, by=0.4))
-
-ordiplot(FD.pca1, type="n")
-#text(traitcoord,traitnames)
-points(plotcoord*30, cex=0.05, col=rev(greyPalette(n=10,  start = 0, end = 0.8, 
-                                                   gamma = 2.2, alpha = NULL))[values.cut])
-
-#arrows(rep(0,45),rep(0,45),bioclimcoord[,1]*8,bioclimcoord[,2]*8, col="cadetblue4", length=0.1)
-text(traitcoord,traitnames, col="red")
-# copied as first RDA graph into ppt
-
-env1 <- envfit(FD.pca1,splot.bioclim[index14,c(4:49)],permutations = 0)
-env1
-#with permutations = 999:
-#Error: cannot allocate vector of size 4.1 Gb
-plot(env1)
-png(filename="PCA1.png", width=1000, height=1000, units = "px", bg="white")
-ordiplot(pca1, type="n")
-text(traitcoord,traitnames)
-plot(env1)
-dev.off() # creates a file
-
-### Stepwise RDA ###
-### RDA ###
-FD.rda1 <- rda(FD~.,splot.bioclim[index14,c(4:49)],scale=T)
-FD.rda1
-FD.rda2 <- rda(FD ~ 1,splot.bioclim[index14,c(4:49)],scale=T)
-FD.rda3 <- ordistep(FD.rda2, scope=formula(FD.rda1), direction = "both", Pin = 0.1, 
-            permutations = how(nperm = 10), trace = TRUE)
-'Start: FD ~ 1 
-
-Df     AIC       F  Pr(>F)  
-+ BIO_14  1 3191679 20602.8 0.09091 .
-+ BIO_17  1 3191898 20379.5 0.09091 .
-+ BIO_12  1 3197070 15125.2 0.09091 .
-+ P_MAY   1 3198071 14110.8 0.09091 .
-+ P_APR   1 3198194 13986.4 0.09091 .
-+ BIO_18  1 3198767 13406.3 0.09091 .
-+ P_MAR   1 3199448 12717.1 0.09091 .
-+ P_FEB   1 3199451 12714.5 0.09091 .
-+ P_JUN   1 3199643 12519.9 0.09091 .
-+ BIO_05  1 3199804 12357.7 0.09091 .
-+ BIO_02  1 3200062 12096.5 0.09091 .
-+ P_NOV   1 3200602 11551.1 0.09091 .
-+ P_DEC   1 3200823 11328.1 0.09091 .
-+ P_JAN   1 3201335 10810.2 0.09091 .
-+ BIO_10  1 3202194  9943.1 0.09091 .
-+ T_JUL   1 3202226  9911.4 0.09091 .
-+ T_AUG   1 3202685  9448.0 0.09091 .
-+ GDD5    1 3202914  9217.7 0.09091 .
-+ T_JUN   1 3202934  9196.7 0.09091 .
-+ BIO_16  1 3203459  8668.3 0.09091 .
-+ BIO_19  1 3203685  8440.1 0.09091 .
-+ P_AUG   1 3204079  8043.8 0.09091 .
-+ BIO_13  1 3204383  7737.1 0.09091 .
-+ T_MAY   1 3204450  7669.7 0.09091 .
-+ P_OCT   1 3204755  7362.4 0.09091 .
-+ T_SEP   1 3204986  7130.0 0.09091 .
-+ P_SEP   1 3205174  6941.4 0.09091 .
-+ BIO_15  1 3205378  6735.8 0.09091 .
-+ P_JUL   1 3205600  6512.4 0.09091 .
-+ T_APR   1 3205746  6365.1 0.09091 .
-+ BIO_01  1 3205841  6270.3 0.09091 .
-+ BIO_03  1 3205882  6228.8 0.09091 .
-+ T_OCT   1 3206048  6062.0 0.09091 .
-+ AR      1 3206562  5545.3 0.09091 .
-+ BIO_07  1 3207063  5042.2 0.09091 .
-+ T_NOV   1 3207547  4555.5 0.09091 .
-+ T_MAR   1 3207659  4443.2 0.09091 .
-+ T_FEB   1 3208177  3923.3 0.09091 .
-+ T_DEC   1 3208260  3840.3 0.09091 .
-+ BIO_11  1 3208334  3765.7 0.09091 .
-+ T_JAN   1 3208465  3634.3 0.09091 .
-+ BIO_08  1 3208844  3254.4 0.09091 .
-+ BIO_06  1 3209374  2723.0 0.09091 .
-+ BIO_09  1 3209543  2553.3 0.09091 .
-+ PET     1 3209642  2454.5 0.09091 .
-+ BIO_04  1 3210074  2021.3 0.09091 .
----
-Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-
-Step: FD ~ BIO_14 
-
-Df     AIC     F  Pr(>F)  
-- BIO_14  1 3212091 20603 0.09091 .
----
-Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-
-Df     AIC       F  Pr(>F)  
-+ BIO_02  1 3184224 7481.80 0.09091 .
-+ BIO_03  1 3184435 7269.37 0.09091 .
-+ GDD5    1 3187255 4435.29 0.09091 .
-+ BIO_05  1 3187772 3915.70 0.09091 .
-+ T_JAN   1 3188026 3661.00 0.09091 .
-+ BIO_01  1 3188040 3647.18 0.09091 .
-+ T_FEB   1 3188048 3638.72 0.09091 .
-+ T_MAR   1 3188075 3612.29 0.09091 .
-+ T_APR   1 3188082 3605.14 0.09091 .
-+ BIO_11  1 3188088 3598.96 0.09091 .
-+ T_DEC   1 3188123 3563.70 0.09091 .
-+ T_NOV   1 3188208 3478.25 0.09091 .
-+ T_OCT   1 3188225 3461.84 0.09091 .
-+ T_MAY   1 3188357 3329.42 0.09091 .
-+ BIO_06  1 3188491 3194.91 0.09091 .
-+ BIO_10  1 3188609 3076.07 0.09091 .
-+ T_SEP   1 3188617 3068.49 0.09091 .
-+ BIO_13  1 3188664 3021.74 0.09091 .
-+ T_JUN   1 3188669 3016.26 0.09091 .
-+ BIO_16  1 3188709 2976.11 0.09091 .
-+ P_SEP   1 3188724 2961.09 0.09091 .
-+ BIO_07  1 3188728 2956.84 0.09091 .
-+ BIO_12  1 3188782 2902.48 0.09091 .
-+ T_AUG   1 3188788 2897.30 0.09091 .
-+ T_JUL   1 3188879 2805.50 0.09091 .
-+ P_AUG   1 3189181 2503.15 0.09091 .
-+ BIO_18  1 3189256 2427.84 0.09091 .
-+ BIO_04  1 3189277 2406.93 0.09091 .
-+ P_JUL   1 3189398 2285.76 0.09091 .
-+ BIO_15  1 3189605 2078.03 0.09091 .
-+ P_APR   1 3189650 2032.61 0.09091 .
-+ P_JUN   1 3189703 1979.86 0.09091 .
-+ BIO_09  1 3189912 1770.71 0.09091 .
-+ P_MAR   1 3189965 1717.19 0.09091 .
-+ PET     1 3189984 1698.34 0.09091 .
-+ AR      1 3190003 1678.99 0.09091 .
-+ P_MAY   1 3190005 1677.55 0.09091 .
-+ P_OCT   1 3190063 1619.65 0.09091 .
-+ P_FEB   1 3190490 1192.28 0.09091 .
-+ BIO_17  1 3190497 1184.39 0.09091 .
-+ BIO_19  1 3190555 1126.59 0.09091 .
-+ P_NOV   1 3190570 1111.68 0.09091 .
-+ BIO_08  1 3190582 1100.05 0.09091 .
-+ P_JAN   1 3190893  787.98 0.09091 .
-+ P_DEC   1 3190955  726.52 0.09091 .
----
-Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-
-Step: FD ~ BIO_14 + BIO_02 
-
-Df     AIC       F  Pr(>F)  
-- BIO_02  1 3191679  7481.8 0.09091 .
-- BIO_14  1 3200062 15953.2 0.09091 .
----
-Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1'
-
-FD.rda4 <- rda(FD ~ BIO_14,splot.bioclim[index14,c(4:49)],scale=T)
-FD.rda4
-'              Inertia Proportion Rank
-Total         18.0000     1.0000     
-Constrained    0.3276     0.0182    1
-Unconstrained 17.6724     0.9818   18
-Inertia is correlations 
-
-Eigenvalues for constrained axes:
-RDA1 
-0.3276 
-
-Eigenvalues for unconstrained axes:
-PC1   PC2   PC3   PC4   PC5   PC6   PC7   PC8 
-8.307 1.850 1.378 0.884 0.789 0.780 0.578 0.551 
-'
-traitnames <- dimnames(summary(FD.rda4)$species)[[1]]
-traitnames <- substr(traitnames,1,nchar(traitnames)-5)
-traitcoord <- summary(FD.rda4)$species[,c(1,2)]
-bioclimcoord <- summary(FD.rda4)$biplot[,c(1,2)]
-plotcoord <- summary(FD.rda4)$sites[,c(1,2)]
-str(plotcoord) #num [1:1111307, 1:2]
-x.cut <- cut(plotcoord[,1], breaks=seq(min(plotcoord[,1]), max(plotcoord[,1]), by=0.01))
-y.cut <- cut(plotcoord[,2], breaks=seq(min(plotcoord[,2]), max(plotcoord[,2]), by=0.01))
-z.cut <- table(x.cut,y.cut)
-head(z.cut)
-dim(z.cut) #131 50
-z.cut <- melt(z.cut)
-str(z.cut) # 6550 obs. of  3 variables:
-head(z.cut)
-z.cut$xy <- paste(z.cut$x.cut,z.cut$y.cut,sep="_")
-str(x.cut)
-length(x.cut) # 1111307
-xy <- paste(x.cut,y.cut,sep="_")
-index30 <- match(xy,z.cut$xy)
-length(index30)
-any(is.na(index30))
-head(log10(z.cut$value[index30]))
-values.present <- vector(mode="numeric", length=length(xy))
-min(log10(z.cut$value[index30]), na.rm=T) #0
-max(log10(z.cut$value[index30]), na.rm=T) # 3.399154
-values.cut <- cut(log10(z.cut$value[index30]), breaks=seq(min(log10(z.cut$value[index30]), na.rm=T),
-                                                          4, by=0.4))
-
-ordiplot(FD.rda4, type="n", xlim=c(-10,10),ylim=c(-10,12))
-points(plotcoord*15, cex=0.05, col=rev(greyPalette(n=10,  start = 0, end = 0.8, 
-                                                   gamma = 2.2, alpha = NULL))[values.cut])
-arrows(rep(0,1),rep(0,1),bioclimcoord[1]*8,bioclimcoord[2]*8, col="blue", length=0.1)
-text(bioclimcoord[1]*8,bioclimcoord[2]*8+0.5,"BIO_14", col="blue", cex=1)
-text(traitcoord,traitnames, col="red")
-# copied as third RDA graph into ppt
-
-FD.rda5 <- rda(FD ~ BIO_14+BIO_02,splot.bioclim[index14,c(4:49)],scale=T)
-FD.rda5
-'
-Inertia Proportion Rank
-Total         18.00000    1.00000     
-Constrained    0.44582    0.02477    2
-Unconstrained 17.55418    0.97523   18
-Inertia is correlations 
-
-Eigenvalues for constrained axes:
-RDA1   RDA2 
-0.3641 0.0817 
-
-Eigenvalues for unconstrained axes:
-PC1   PC2   PC3   PC4   PC5   PC6   PC7   PC8 
-8.267 1.816 1.377 0.880 0.788 0.780 0.568 0.530 '
-traitnames <- dimnames(summary(FD.rda5)$species)[[1]]
-traitnames <- substr(traitnames,1,nchar(traitnames)-5)
-traitcoord <- summary(FD.rda5)$species[,c(1,2)]
-bioclimcoord <- summary(FD.rda5)$biplot[,c(1,2)]
-
-plotcoord <- summary(FD.rda5)$sites[,c(1,2)]
-str(plotcoord) #num [1:1111307, 1:2]
-x.cut <- cut(plotcoord[,1], breaks=seq(min(plotcoord[,1]), max(plotcoord[,1]), by=0.01))
-y.cut <- cut(plotcoord[,2], breaks=seq(min(plotcoord[,2]), max(plotcoord[,2]), by=0.01))
-z.cut <- table(x.cut,y.cut)
-head(z.cut)
-dim(z.cut) #131 50
-z.cut <- melt(z.cut)
-str(z.cut) # 6550 obs. of  3 variables:
-head(z.cut)
-z.cut$xy <- paste(z.cut$x.cut,z.cut$y.cut,sep="_")
-str(x.cut)
-length(x.cut) # 1111307
-xy <- paste(x.cut,y.cut,sep="_")
-index30 <- match(xy,z.cut$xy)
-length(index30)
-any(is.na(index30))
-head(log10(z.cut$value[index30]))
-values.present <- vector(mode="numeric", length=length(xy))
-min(log10(z.cut$value[index30]), na.rm=T) #0
-max(log10(z.cut$value[index30]), na.rm=T) # 3.399154
-values.cut <- cut(log10(z.cut$value[index30]), breaks=seq(min(log10(z.cut$value[index30]), na.rm=T),
-                                                          4, by=0.4))
-
-ordiplot(FD.rda5, type="n",  xlim=c(-10,10),ylim=c(-10,12))
-points(plotcoord*15, cex=0.05, col=rev(greyPalette(n=10,  start = 0, end = 0.8, 
-                                                   gamma = 2.2, alpha = NULL))[values.cut])
-arrows(rep(0,2),rep(0,2),bioclimcoord[,1]*4,bioclimcoord[,2]*4, col="blue", length=0.1)
-text(bioclimcoord*4,dimnames(bioclimcoord)[[1]], col="blue", cex=1)
-text(traitcoord,traitnames, col="red")
-# copied as fourth RDA graph into ppt
-
-plot(FD[,"Seed.length.mean"]~splot.bioclim$BIO_14[index14],
-     cex=0.05, xlab="BIO_14", ylab="FD ln(seed length)",cex.lab=1.5, cex.axis=1.5)
-model1 <- lm(FD[,"Seed.length.mean"]~splot.bioclim$BIO_14[index14])
-summary(model1)
-'                               Estimate Std. Error t value Pr(>|t|)    
-(Intercept)                   2.584e-01  3.193e-04   809.1   <2e-16 ***
-splot.bioclim$BIO_14[index14] 1.407e-04  6.813e-07   206.5   <2e-16 ***
-Multiple R-squared:  0.03697,	Adjusted R-squared:  0.03697 
-'
-abline(model1, lwd=3, col="blue")
-# 
-
-plot(FD[,"Seed.num.rep.unit.mean"]~splot.bioclim$BIO_02[index14],
-     cex=0.05, xlab="BIO_02", ylab="FD ln(Seed number of reproductive unit)",cex.lab=1.5, cex.axis=1.5)
-model1 <- lm(FD[,"Seed.num.rep.unit.mean"]~splot.bioclim$BIO_02[index14])
-summary(model1)
-'                                 Estimate Std. Error t value Pr(>|t|)    
-(Intercept)                    1.251e+00  1.799e-03   695.3   <2e-16 ***
-splot.bioclim$BIO_02[index14] -3.751e-03  1.927e-05  -194.6   <2e-16 ***
-Multiple R-squared:  0.03296,	Adjusted R-squared:  0.03296  '
-abline(model1, lwd=3, col="blue")
-
-
-
-
-
-
-
-
-
-
-
-
-# mpd presence-absence
-system.time(
-  mpd.sla.no.abu <-  DT[,list(mpd.sla = mntd.fun.dt(Cover.., SLA.dist.mat, StandSpeciesName, abundance.weighted=FALSE)),by=PlotObservationID]
-)
-
-
-###########
-levels(splot.header2$COUNTRY_CO)
-levels(splot.header$Country)
-CWM2$Country <- splot.header$Country[index12]
-data(countryExData)
-str(countryExData)
-sPDF <- joinCountryData2Map(splot.header, joinCode = "NAME",nameJoinColumn = "Country")
-?mapCountryData
-#mapCountryData(sPDF, nameColumnToPlot="BIODIVERSITY" )
-
-#### Climatic space, from Jonathon Lenoir ####
-library(fBasics)
-# Color palette
-
-# Import bio1 (MAT) of the world
-bio <- read.table(paste("/home/helge/sPlot/Bioclim/", "BIO.txt", sep = ""),sep=";", header=T)
-str(bio) #9033453 obs. of  22 variables
-r <- raster(nrows=33, ncols=33, xmn=min(bio$BIO_01), xmx=max(bio$BIO_01), ymn=min(bio$BIO_12), ymx=max(bio$BIO_12))
-mattar <- data.frame(bio$BIO_01,bio$BIO_12)
-str(mattar)
-names(mattar) <- c("bio1","bio12")
-mattar_r <- rasterize(mattar, r, fun="count")
-str(mattar_r)
-plot(log10(mattar_r), asp=0, col=rev(divPalette(n=100, name=c("RdYlBu"))), 
-     main="N 2.5 arc-minutes cells per bin (log10 scale)", cex.main=0.7,
-     xlab="MAT (°C*10)",  ylab="TAR (mm)")
-# Bio1_Bio12_world.png
-
-#plot(mattar_r/max(getValues(mattar_r), na.rm=TRUE), asp=0, col=rev(divPalette(n=100, name=c("RdYlBu"))), main="Relative proportion of 10 arc-minutes cells per bin", xlab="MAR (°C*10)", ylab="TAR (mm)")
-# Plot the sampling effort of sPlot across the MAT-TAR climatic space
-str(splot.bioclim)
-# Make it a spatial data frame
-CRSlonlat <- CRS("+proj=longlat +datum=WGS84")
-splot.bioclim.coords <- cbind(splot.bioclim$X, splot.bioclim$Y)
-splot.bioclim.coords <- SpatialPoints(splot.bioclim.coords, proj4string=CRSlonlat)
-splot.bioclim2 <- data.frame(splot.bioclim$BIO_01,splot.bioclim$BIO_12)
-names(splot.bioclim2) <- c("bio1","bio12")
-#splot.bioclim2 <- SpatialPointsDataFrame(splot.bioclim.coords, splot.bioclim2, proj4string=CRSlonlat)
-#str(splot.bioclim2)
-#sPlot_mattar <- extract(bio1, coordinates(sPlot))
-#sPlot_mattar <- as.data.frame(sPlot_mattar)
-#names(sPlot_mattar) <- c("bio1")
-#sPlot_mattar$bio12 <- extract(bio12, coordinates(sPlot))
-sPlot_mattar_r <- rasterize(splot.bioclim2, r, fun="count")
-str(sPlot_mattar_r)
-plot(mattar_r, asp=0, col="grey", legend=FALSE, main="N vegetation plots per bin (log10 scale)",
-     xlab="MAT (°C)", ylab="TAR (mm)")
-# Bio1_Bio12_world_grey.png
-plot(log10(sPlot_mattar_r), asp=0, col=rev(divPalette(n=100, name=c("RdYlBu"))), 
-     add=TRUE)
-# Bio1_Bio12_sPlot_SLA.png
-
-# Build the PC1-PC2 climatic space
-load("/home/helge/sPlot/Bioclim/pca_data.RData")
-str(pca.data)
-r <- raster(nrows=33, ncols=33, xmn=min(pca.data$PC1), xmx=max(pca.data$PC1), ymn=min(pca.data$PC2), ymx=max(pca.data$PC2))
-pca_r <- rasterize(pca.data[, c("PC1", "PC2")], r, fun="count")
-plot(log10(pca_r), asp=0, col=rev(divPalette(n=100, name=c("RdYlBu"))), 
-     main="N 2.5 arc-minutes cells per bin (log10 scale)", 
-     xlab="PC1 (cold-to-warm)", ylab="PC2 (dry-to-wet)")
-plot(pca_r/max(getValues(pca_r), na.rm=TRUE), asp=0, 
-     col=rev(divPalette(n=100, name=c("RdYlBu"))), 
-     main="Relative proportion of 2.5 arc-minutes cells per bin", 
-     xlab="PC1 (cold-to-warm)", ylab="PC2 (dry-to-wet)")
-# Plot the sampling effort of sPlot across the PC1-PC2 climatic space
-index13 <- match(splot.bioclim$RAST_ID,pca.data$RAST_ID)
-sPlot_pca <- pca.data[index13, c("PC1", "PC2")]
-sPlot_pca_r <- rasterize(sPlot_pca, r, fun="count")
-plot(pca_r, asp=0, col="grey", legend=FALSE, 
-     main="N vegetation plots per bin (log10 scale)", 
-     xlab="PC1 (cold-to-warm)", ylab="PC2 (dry-to-wet)")
-plot(log10(sPlot_pca_r), asp=0, col=rev(divPalette(n=100, name=c("RdYlBu"))), 
-     add=TRUE)
-
-#### For Marten Winter ####
-# species x country Matrix
-species.country <- table(DT$species,splot.header$Country[index30])
-str(species.country) # int [1:60908, 1:130]
-names(dimnames(species.country))
-names(dimnames(species.country)) <- c("species","country")
-
-library(reshape2)
-?melt.table
-species.country2 <- melt(species.country)
-str(species.country2) # 	188470 obs. of  3 variables:
-names(species.country2)
-names(species.country2)[3] <- "p_a"
-table(species.country2$p_a)
-
-species.country2$p_a[species.country2$p_a>0] <- 1
-species.country2 <- species.country2[species.country2$p_a==1,]
-str(species.country2) # 	188470 obs. of  3 variables:
-species.country2 <- species.country2[,-3]
-str(species.country2) # 188470
-write.csv(species.country2,file = 
-            paste("/home/oliver/shared/", "species.country2.csv", sep = ""), row.names = FALSE)
-levels(species.country2$country) # 130
-length(species.country2$country[species.country2$country=="Algeria"])
-# 97
-sum(species.country[,2]) 
-# 799, this is the amount of records for DZA = Algeria
-sum(species.country[,126]) 
-# 1467200, sum of records for United States
-
-TaxaNat <- read.csv(paste("/home/oliver/shared/Glonaf/", "TaxaNat.csv", sep = ""))
-str(TaxaNat) #178779 obs. of  2
-
-GlonafRegion <- read.csv(paste("/home/oliver/shared/Glonaf/", "GlonafRegion.csv", sep = ""))
-str(GlonafRegion) #843 obs. of  10
-trim.trailing <- function (x) sub("\\s+$", "", x)
-# returns string w/o leading or trailing whitespace
-GlonafRegion$Country_sPlot <- as.factor(trim.trailing(as.character(GlonafRegion$Country_sPlot)))
-index14 <- match(levels(species.country2$country),levels(GlonafRegion$Country_sPlot))
-index14
-levels(GlonafRegion$Country)
-
-data.frame(levels(species.country2$country),levels(GlonafRegion$Country_sPlot)[index14])
-## all correct
-
-index15 <- match(levels(GlonafRegion$Country_sPlot),GlonafRegion$Country_sPlot)
-index15
-index16 <- match(species.country2$country,levels(species.country2$country))
-any(is.na(index16)) # F
-length(levels(species.country2$species)) #60908
-'species.country2$species.country <- paste(species.country2$species,
-levels(species.country2$country)[index16],sep="_")
-species.country2$species.country <- paste(species.country2$species,
-levels(GlonafRegion$Country_sPlot)[index14][index16],sep="_")
-species.country2$species.country <- paste(species.country2$species,
-GlonafRegion$Country_sPlot[index15][index14][index16],sep="_")
-'
-species.country2$species.country <- paste(species.country2$species,
-                                          GlonafRegion$ISO.country[index15][index14][index16],sep="_")
-str(species.country2) # 188470 obs. of  3 variables:
-head(species.country2)
-species.country2[c(160000:160100),]
-## fits!
-
-index17 <- match(TaxaNat$Region,GlonafRegion$ID.region)
-any(is.na(index17)) # F
-
-TaxaNat$species.country <-paste(TaxaNat$Taxon2,GlonafRegion$ISO.country[index17], sep="_")
-
-index18 <- match(species.country2$species.country,TaxaNat$species.country)
-length(index18) #188470
-length(index18[!is.na(index18)]) # 9283 out of 188470 species x country occurrences
-# are alien occurrences
-9283/188470*100 # 4.925452 % species x country occurrences
-species.country2$alien <- F
-species.country2$alien[!is.na(index18)] <- T
-species.country2$ISO.country <- GlonafRegion$ISO.country[index15][index14][index16]
-str(species.country2)
-head(species.country2)
-length(species.country2$ISO.country[species.country2$ISO.country=="DZA"])
-# 12939, this cannot be
-#species.country2$ISO.country[species.country2$ISO.country=="DZA"]
-sum(species.country[,2]) 
-# 799, this is the amount of records for DZA = Algeria
-length(species.country2$ISO.country[species.country2$ISO.country=="DZA"
-                                    & !is.na(species.country2$ISO.country)])
-# 97!
-
-#table(species.country2$ISO.country)
-#species.country3 <- acast(species.country2, species~ISO.country, length)
-length(species.country2$alien[species.country2$alien])
-# 9283 alien records in sPlot
-length(species.country2$alien[species.country2$alien & species.country2$ISO.country=="DZA"])
-# 3 alien records in DZA = Algeria
-length(species.country[species.country[,2]>0,2])
-# 97, however there are only 97 records in total
-length(species.country2$alien[species.country2$alien & species.country2$ISO.country=="USA"])
-# 1911 alien species in USA
-length(species.country[species.country[,126]>0,2])
-# 9933 species in total in USA
-alien.richness <- as.matrix(table(species.country2$ISO.country,species.country2$alien))[,2]
-
-species.country3 <- species.country
-species.country3[species.country3>0] <- 1
-country.richness <- colSums(species.country3)
-country.richness
-names(country.richness)
-data.frame(names(country.richness),GlonafRegion$ISO.country[index15][index14])
-country.richness2 <- data.frame(GlonafRegion$ISO.country[index15][index14],as.numeric(country.richness))
-names(country.richness2) <- c("ISO.country","richness")
-country.richness2$ISO.country
-country.richness2
-
-index19 <- match(country.richness2$ISO.country,names(alien.richness))
-index19
-alien.richness
-country.richness2$alien.richness <- alien.richness[index19]
-country.richness2
-country.richness2$alien.prop <- country.richness2$alien.richness/country.richness2$richness
-country.richness2$alien.prop
-
-sPDF <- joinCountryData2Map(country.richness2,joinCode = "ISO3",
-                            nameJoinColumn = "ISO.country")
-'112 codes from your data successfully matched countries in the map
-18 codes from your data failed to match with a country code in the map
-132 codes from the map werent represented in your data
-'
-mapParams <- mapCountryData(sPDF, nameColumnToPlot="richness",addLegend=FALSE)
-do.call(addMapLegend, c(mapParams, legendWidth=0.5, legendMar = 2,legendLabels="all"))
-mapParams <- mapCountryData(sPDF, nameColumnToPlot="richness",
-                            catMethod=seq(0,10000,by=1000),addLegend=FALSE)
-do.call(addMapLegend, c(mapParams, legendWidth=0.5, legendMar = 2,legendLabels="all"))
-
-mapParams <- mapCountryData(sPDF, nameColumnToPlot="alien.richness",
-                            catMethod=seq(0,2000,by=200),addLegend=FALSE)
-do.call(addMapLegend, c(mapParams, legendWidth=0.5, legendMar = 2,legendLabels="all"))
-
-max(country.richness2$alien.prop, na.rm=T) #0.2823779
-mapParams <- mapCountryData(sPDF, nameColumnToPlot="alien.prop",addLegend=FALSE)
-mapParams <- mapCountryData(sPDF, nameColumnToPlot="alien.prop",
-                            catMethod=seq(0,0.3,by=0.02),addLegend=FALSE)
-do.call(addMapLegend, c(mapParams, legendWidth=0.5, legendMar = 2,legendLabels="all"))
-
-
-
-
-##################### OLD ###################
-library(spdep)
-library(maptools)
-library(hexbin)
-library(RColorBrewer)
-# Use a lighter version of sPlot focusing on mean annual temperature MAT (BIO1)
-# and total annual rainfall TAR (BIO12)
-sPlot <- splot.header[, c("LONGITUDE", "LATITUDE", "BIO_1", "BIO_12")]
-sPlot$LONGITUDE <- as.numeric(as.character(sPlot$LONGITUDE))
-sPlot$LATITUDE <- as.numeric(as.character(sPlot$LATITUDE))
-sPlot$BIO_1 <- as.numeric(sPlot$BIO_1)/10
-sPlot$BIO_12 <- as.numeric(sPlot$BIO_12)
-str(sPlot)
-# Make it a spatial data frame
-CRSlonlat <- CRS("+proj=longlat +datum=WGS84")
-coords <- cbind(sPlot$LONGITUDE, sPlot$LATITUDE)
-coords <- SpatialPoints(coords, proj4string=CRSlonlat)
-sPlot <- SpatialPointsDataFrame(coords, sPlot, proj4string=CRSlonlat)
-str(sPlot)
-# Plot the data on the world map to check the coordinates and projection
-data(wrld_simpl)
-plot(wrld_simpl)
-points(coordinates(sPlot), cex=0.1, col="red", pch=16)
-# Set a color ramp
-rf <- colorRampPalette(rev(brewer.pal(11, 'Spectral')))
-# Plot the plot distribution within the MAT-TAR bidimentionnal climatic space
-hexbinplot(BIO_12~BIO_1, data=sPlot@data, colramp=rf, trans=log, inv=exp,
-           border="white", xlab="MAT", ylab="TAR", xbins=30)
-# Display only bins with a minimum of 10 plots and a maximum of 1000 plots
-hexbinplot(BIO_12~BIO_1, data=sPlot@data, colramp=rf, trans=log, inv=exp,
-           border="white", xlab="MAT", ylab="TAR", xbins=30, mincnt=10, maxcnt=1000)
-#
-
-
-
-
-
-
-
-
-################ OLD ###################
-
-###
-load("C:\\Daten\\iDiv\\sPlot\\pca_data.Rdata")
-str(pca.data)
-any(is.na(pca.data$PC1)) #F
-
-load("C:\\Daten\\iDiv\\sPlot\\sPlot_header_pca.RData")
-str(splot.header2)
-any(is.na(pca.data$PC1)) #F
-
-library(raster)
-r <- 
-  splot_pca_r <- rasterize(splot_pca, r, run="count")
-plot(splot_pca_r, asp=0, col=rev(divPalette(n=100, name="RdY1Bu"))))
-plot(splot_r, asp=0, col=rev(divPalette(n=100, name="RdY1Bu"))))
-
-index <- which(getValues(splot_pca_r)>0)
-which(getValues(pca_r)>0)
-poinra
-getValues(splot_pca_r)[index] # 136 values
-?sample
-splo_pca$binid <- cellFromXY(pca_r, splot_pca)
-
-
-###########################
-load("C:\\Daten\\iDiv\\sPlot\\splot_pca2.Rdata")
-str(splot_pca)
-splot_pca <- data.table(splot_pca)
-setkey(splot_pca,binid)
-inter <- splot_pca[,list(count=length(PCA1)), by=binid]
-splot_pca2 <- splot_pca[,list(PCA1, PCA2,count=length(PCA1)), by=binid]
-
-str(splot_pca2)
-hist(splot_pca2$binid)
-inter2 <- splot_pca2[count>100,list(count=sum(PCA1)), by=binid]
-str(inter2)
-[,list(total.cover=sum(Cover..)), by=PlotObservationID]
-<- DT[,list(total.cover=sum(Cover..)), by=PlotObservationID]
-
-sample(splot_pca$PCA1,100)
-splot_pca2 <-  splot_pca[table(splot_pca$binid)>100,sample(PCA1,100),by=binid]
-str(splot_pca2)
-
-resample <- function(PCA1_,PCA2_,binid_){
-  if (length(PCA1)>100) {
-    
-  }
-  else {
-    1
-  }
-  ]index10 <- match(species,labels(d))
-  d3 <- as.dist(dist[index10,index10])
-  if (sum(d3)==0){
-    NA
-  }
-  divc(as.data.frame(rel.cover),d3)$diversity
-}
-splot_pca2 <-  splot_pca[length(PCA1)<=100,res_ID := 1]
-head(splot_pca2,100)
-table(splot_pca2$binid,splot_pca2$count, exclude=NULL)
-table(splot_pca$binid)
-splot_pca2 <-  splot_pca[,res_ID := 0]
-splot_pca2 <-  splot_pca[table(splot_pca2$binid)<=100,res_ID := 1]
-splot.selection <-  splot_pca[,list(res = resample(PCA1,PCA2,binid)),by=binid]
-#length(splot_pca[binid>100])
-hist(splot_pca$binid)
-splot.selection <-  splot_pca[,list(length(binid)),by=binid]
-splot.selection <-  splot_pca[,list(count=length(PCA1)),by=binid]
-splot.selection <-  splot_pca[,,by=binid]
-hist(splot.selection$count)
-str(splot.selection)
-str(splot_pca2)
-splot.selection <-  splot_pca[,list(nrow()),by=binid]
-splot.selection <-  splot_pca[,count:=length(binid),by=binid]
-length(splot_pca,by=binid)
-########################################################################################
-
-### CWM ###
-#CWM1 <-  DT[!is.na(plot.species.trait.value),list(CWM = weighted.mean(plot.species.trait.value,Relative.cover)),by=PlotObservationID]
-CWM1 <-  DT[,list(CWM = weighted.mean(plot.species.trait.value,Relative.cover)),by=PlotObservationID]
-# see http://stackoverflow.com/questions/3367190/aggregate-and-weighted-mean-in-r
-CWM1[1:100,]
-hist(CWM1$CWM)
-write.csv((x, file = paste(path.sPlot, "CWM_SLA_all.csv", sep = ""), row.names = FALSE))
-
-### FD ###
-library(ade4)    # f?r divc (Rao's Q)
-# this does not work: ~50 GB RAM required for 40k x 40k species matrix:
-'ktab1 <- ktab.list.df(list(as.data.frame(species.trait.value[!is.na(species.trait.value)])))
-d <- dist.ktab(ktab1, type=c("Q"), option = "scaledBYrange")
-#labels(d)
-#str(d)
-#attr(d,"Labels")
-attr(d,"Labels") <- dimnames(species.trait.value)[[1]][!is.na(species.trait.value)]
-labels(d)
-any(is.na(d)) # F
-dist <- as.matrix(d)
-
-#tree.trait.weighted <- t(t(veg.KHG3.prop)  * all.traits.KHG[,j])
-#dist.sqrt <- sqrt(dist)
-d.sqrt <- sqrt(d)
-labels(d.sqrt)
-dist <- as.matrix(d.sqrt)
-'
-'Rao <- function(rel.cover,species){
-index10 <- match(species,labels(d))
-d3 <- as.dist(dist[index10,index10])
-if (sum(d3)==0){
-NA
-}
-divc(as.data.frame(rel.cover),d3)$diversity
-}
-FD.all <-  DT[!is.na(plot.species.trait.value),list(Rao.Q = Rao(Relative.cover,Matched.concept)),by=PlotObservationID]
-'
-#species.trait.value.scaled <- species.trait.value/(max(species.trait.value)-min(species.trait.value))
-species.trait.value.scaled <- TRY.mean.sd.splot$SLA.mean/(max(TRY.mean.sd.splot$SLA.mean)-min(TRY.mean.sd.splot$SLA.mean))
-
-# scale by range, which saves time for distance calculations
-
-library(reshape2)
-
-package.size = 1000
-#number.plots.total <- length(levels(DT[!is.na(plot.species.trait.value)]$PlotObservationID)) # 705272
-number.plots.total <- length(levels(DT$PlotObservationID)) # 701611 vs. 705272
-FD.all <- array(0,number.plots.total, dimnames=list(levels(DT$PlotObservationID)))
-number.packages <-ceiling(number.plots.total/package.size) #702
-pb <- winProgressBar(title = "progress bar", min = 0, max =number.packages, width = 300)
-for (k in 1:number.packages) {
-  setWinProgressBar(pb, k, title=paste("Row ",k,"of ", number.packages, " is processed"))
-  ifelse (k==number.packages, package<-number.plots.total%%package.size, package <- package.size)
-  #index3 <- match(DT[!is.na(plot.species.trait.value)]$PlotObservationID,levels(DT[!is.na(plot.species.trait.value)]$PlotObservationID)[seq((k-1)*package.size+1,k*package.size,1)])
-  index3 <- match(DT$PlotObservationID,levels(DT$PlotObservationID)[seq((k-1)*package.size+1,k*package.size,1)])
-  #DT2 <- DT[!is.na(plot.species.trait.value) & !is.na(index3)]
-  #DT2 <- DT2[!is.na(plot.species.trait.value)]
-  #length(index3[!is.na(index3)])
-  DT2 <- DT[!is.na(index3)]
-  #str(DT2)
-  #veg.matrix1 <- acast(DT2, Matched.concept ~ PlotObservationID, value.var="Relative.cover", fill=0)  
-  veg.matrix1 <- acast(DT2, names.stand ~ PlotObservationID, value.var="Relative.cover", fun.aggregate=sum, fill=0)  
-  # fun.aggregate = sum -> sums up multiple entries per species!
-  # this can happen with the unified species nomenclature!
-  #length(unique(dimnames(veg.matrix1)[[2]]))# 1000
-  #length(unique(DT2$PlotObservationID)) # 1000
-  #veg.matrix1[veg.matrix1 > 0] <- 1
-  #any(colSums(veg.matrix1)!=1) #F
-  index4 <- match(dimnames(veg.matrix1)[[1]],species.list)
-  species.trait.value.scaled2 <- species.trait.value.scaled[index4]
-  species.list2 <- species.list[index4]
-  ktab1 <- ktab.list.df(list(as.data.frame(species.trait.value.scaled2)))
-  d <- dist.ktab(ktab1, type=c("Q"), option = "noscale")
-  #labels(d)
-  #str(d)
-  #attr(d,"Labels")
-  attr(d,"Labels") <- species.list2
-  #labels(d)
-  #any(is.na(d)) # F
-  #dist <- as.matrix(d)
-  #as.matrix(d)[1:8,1:8]
-  d <- sqrt(d)
-  #dist <- as.matrix(d)
-  #rm(d)
-  if (k<number.packages){
-    FD.all[seq((k-1)*package.size+1,k*package.size,1)] <- 
-      divc(as.data.frame(veg.matrix1),d)$diversity
-    #DT2[,list(Rao.Q = Rao(Relative.cover,Matched.concept)),by=PlotObservationID]$Rao.Q
-    any(colSums(veg.matrix1)==0) # F o.K.
-    any(colSums(veg.matrix1)!=1) # T ups....
-    hist(colSums(veg.matrix1))
-    any(rowSums(veg.matrix1)==0) # F o.K.
-  } else {
-    FD.all[seq((k-1)*package.size+1,number.plots.total,1)] <- 
-      divc(as.data.frame(veg.matrix1),d)$diversity
-    #DT2[,list(Rao.Q = Rao(Relative.cover,Matched.concept)),by=PlotObservationID]
-  }
-}
-close(pb)
-#FD.all
-write.csv(FD.all, file = paste(path.sPlot, "FDall.csv", sep = ""), row.names = FALSE)
-
-- 
GitLab