Trait-Environment-Relationships1.R

getwd()
###### Traits #####
load("data/TRY.all.mean.sd.2.Rdata")
str(TRY.all.mean.sd.2)
# 40791 obs. of  38 variables
# tests

plot(SLA.mean~LDMC.mean, data=TRY.all.mean.sd.2)
#summary(lm(SLA.mean~LDMC.mean, data=TRY.all.mean.sd.2))
#abline(lm(SLA.mean~LDMC.mean, data=TRY.all.mean.sd.2),col="blue", lwd=2)
#sqrt(0.3415)

###### Backbone #####
load("data/backbone.v.2.splot.try3.Rdata")
# alternatively:
#sPlot3 <- read.csv(paste("data/", "backbone.v.2.splot.try3.csv", 
#                         sep = ""), sep=",", quote = "\"'")
str(backbone.splot.try3) 
# data.frame':	122901 obs. of  33 variables:

###### Harmonized plot data ######
DT <- read.csv("data/plots_without_unified_cover_scales_20160120a.csv")
library(data.table)
DT <- data.table(DT)
colnames(DT)
colnames(DT)[9] <- "Cover"
setkey(DT,PlotObservationID)

###### Header data #######
splot.header <- read.csv("data/sPlot_14_04_2015_coord.csv")
str(splot.header)
'data.frame:	1117940 obs. of  3 variables:
$ PlotObservationID: int  1 2 3 4 5 6 7 8 9 10 ...
$ Longitude        : num  -154 -154 -154 -154 -154 ...
$ Latitude         : num  62.4 62.4 62.4 62.4 62.4 ...'

##### Bioclim data ####
splot.bioclim <- read.csv(paste("data/","sPlot.csv", sep = ""), sep = ";", na.strings=c("","NA"))
str(splot.bioclim)
#data.frame':	1117382 obs. of  52 variables:

####################
##### Matching #####
index6 <- match(TRY.all.mean.sd.2$StandSpeciesName,backbone.splot.try3$names.sPlot.TRY)
any(is.na(index6)) #T
# not all species in the trait list are also in the backbone
TRY.all.mean.sd.2$StandSpeciesName[is.na(index6)]
# 3959 species in TRY are not in the backbone

### data cleaning ###
length(TRY.all.mean.sd.2$StandSpeciesName[is.na(TRY.all.mean.sd.2$StandSpeciesName)])
# 1 record is NA
TRY.all.mean.sd.2[which(is.na(TRY.all.mean.sd.2$StandSpeciesName)),]
# record no. 7947 has StandSpeciesName = NA
TRY.all.mean.sd.2 <- TRY.all.mean.sd.2[-which(is.na(TRY.all.mean.sd.2$StandSpeciesName)),]
str(TRY.all.mean.sd.2) #40790 obs. of  38 variables
# 1 record less
'data.frame:	40790 obs. of  38 variables:
$ StandSpeciesName       : Factor w/ 40790 levels "Abarema adenophora",..: 965 3066 3128 3445 4239 7449 7467 7829 8665 12419 ...
$ n                      : int  23 32 15 8 21 32 20 31 7 8 ...
$ SLA.mean               : num  1.42 1.83 1.92 2.7 2.31 ...
$ PlantHeight.mean       : num  1.282 2.92 0.585 0.488 1.1 ...
$ SeedMass.mean          : num  0.0656 2.7757 2.6427 -2.6794 -2.4111 ...
$ LDMC.mean              : num  -0.765 -1.367 -1.009 -1.228 -0.963 ...
$ StemDens.mean          : num  -0.298 -0.524 -0.322 -0.69 -0.592 ...
$ LeafArea.mean          : num  3.39 8.36 5.26 3.28 5.16 ...
$ LeafN.mean             : num  2.33 2.5 2.21 2.95 3.17 ...
$ LeafP.mean             : num  0.118 0.074 -0.127 0.792 0.421 ...
$ LeafNperArea.mean      : num  0.692 0.485 0.381 0.452 0.709 ...
$ Leaffreshmass.mean     : num  -2.325 0.112 -1.916 -3.777 -2.714 ...
$ LeafNPratio.mean       : num  2.23 2.32 2.24 2.14 2.81 ...
$ LeafC.perdrymass.mean  : num  6.14 6.25 6.31 6.1 6.12 ...
$ Leaf.delta.15N.mean    : num  2.68 2.44 2.32 2.78 2.72 ...
$ Stem.cond.dens.mean    : num  5.44 4.82 5.44 6.41 6.69 ...
$ Seed.num.rep.unit.mean : num  4.57 4.34 1.48 9.87 8.48 ...
$ Wood.vessel.length.mean: num  6.32 6.17 6.18 6.13 6.16 ...
$ Seed.length.mean       : num  1.18 1.45 1.78 0.16 1.21 ...
$ Disp.unit.leng.mean    : num  1.173 2.119 2.064 0.039 1.287 ...
$ SLA.sd                 : num  0.1105 0.1754 0.3161 0.0694 0.0596 ...
$ PlantHeight.sd         : num  0.185 0.347 1.296 0.274 0.137 ...
$ SeedMass.sd            : num  0.288 1.016 0.234 0.201 0.317 ...
$ LDMC.sd                : num  0.038 0.0469 0.0449 0.0433 0.0321 ...
$ StemDens.sd            : num  0.0629 0.0582 0.0511 0.0787 0.0635 ...
$ LeafArea.sd            : num  0.876 0.235 1.89 0.135 0.196 ...
$ LeafN.sd               : num  0.0857 0.0902 0.1229 0.0493 0.0443 ...
$ LeafP.sd               : num  0.2173 0.0974 0.142 0.0356 0.0611 ...
$ LeafNperArea.sd        : num  0.0983 0.0916 0.065 0.0425 0.053 ...
$ Leaffreshmass.sd       : num  0.454 0.189 0.776 0.116 0.144 ...
$ LeafNPratio.sd         : num  0.1921 0.0522 0.1184 0.032 0.0477 ...
$ LeafC.perdrymass.sd    : num  0.00741 0.00767 0.00706 0.00634 0.00469 ...
$ Leaf.delta.15N.sd      : num  0.0363 0.0376 0.0382 0.0296 0.0198 ...
$ Stem.cond.dens.sd      : num  0.0996 0.3641 0.2232 0.1064 0.1064 ...
$ Seed.num.rep.unit.sd   : num  0.319 0.51 0.684 0.295 0.408 ...
$ Wood.vessel.length.sd  : num  0.0435 0.0518 0.0673 0.0392 0.0291 ...
$ Seed.length.sd         : num  0.0508 0.0744 0.0992 0.0817 0.0651 ...
$ Disp.unit.leng.sd      : num  0.0721 0.1367 0.1409 0.0423 0.0754 ...
'

index6 <- match(TRY.all.mean.sd.2$StandSpeciesName,backbone.splot.try3$names.sPlot.TRY)
any(is.na(index6)) #T
TRY.all.mean.sd.2$StandSpeciesName[is.na(index6)]
# 3958 species
TRY.all.mean.sd.2$species <- backbone.splot.try3$name.short.correct[index6]
dim(TRY.all.mean.sd.2) # 40790    39


index1 <- match(DT$PlotObservationID, splot.header$PlotObservationID)
any(is.na(index1)) #F
length(index1) #  24241941
length(unique(index1[!is.na(index1)])) #1117940

index2 <- match(splot.header$PlotObservationID,DT$PlotObservationID)
any(is.na(index2)) #F
length(index2) #   1117940
length(unique(index2[!is.na(index2)])) #  1117940
dim(splot.header)
# 1117940      3
identical(as.character(splot.header$PlotObservationID),as.character(DT$PlotObservationID[index2]))
# T

any(is.na(DT$species)) #T
# that should not be the case!
length(DT$species[!is.na(DT$species)]) #24221565
length(DT$species) #24241941
24241941-24221565 # 20376 NA names!!!
# it gives nonsense to match them with traits
index7 <- match(DT$species,TRY.all.mean.sd.2$species)
length(index7) #24241941
length(index7[!is.na(index7)]) #21040927, with previous TRY version that was: 19841429
24241941 - 21040927 # 3201014 entries have no traits
(24241941 - 3201014)/24241941*100 
# which are   86.79555% of plots that have traits,
# previously: 18.15247 %

# reduce DT, and splot.header
#splot.header <- splot.header[!is.na(DT$species[index2]),]
#dim(splot.header) #1117476       3
DT <- DT[!is.na(DT$species),]
index7 <- match(DT$species,TRY.all.mean.sd.2$species)
length(index7) #24221565


### CWM ###
mean(TRY.all.mean.sd.2$SLA.mean) # 2.611657
min(TRY.all.mean.sd.2$SLA.mean) # -1.297031
max(TRY.all.mean.sd.2$SLA.mean) # 5.893029
# example
DT$trait <- NA
DT$trait <- TRY.all.mean.sd.2$SLA.mean[index7]
length(DT$trait[!is.na(DT$trait)]) # 21020551
length(DT$trait[is.na(DT$trait)]) #   3201014
str(DT)

colnames(TRY.all.mean.sd.2)
CWM2 <-  DT[,list(CWM.SLA = weighted.mean(trait,Cover,na.rm = T)),by=PlotObservationID]
# I have checked that we do not need to prefilter only those entries that have a trait value
# this works fine
str(CWM2)
dim(CWM2) #1117898
which(colnames(TRY.all.mean.sd.2)=="SLA.mean") # 3
which(colnames(TRY.all.mean.sd.2)=="Disp.unit.leng.mean") # 20
CWM <- array(NA,c(dim(CWM2)[1],18),dimnames=list(CWM2$PlotObservationID,
                                              colnames(TRY.all.mean.sd.2)[3:20]))
rm(CWM2)
for (i in 1:18){
  DT$trait <- NA
  DT$trait <- TRY.all.mean.sd.2[index7,i+2]
  CWM[,i] <-  DT[,list(CWM.trait= weighted.mean(trait,Relative.cover,na.rm = T)),by=PlotObservationID]$CWM.trait
  
}
str(CWM) #num [1:1117898, 1:18]
CWM[1:20,]
tail(CWM)
write.csv(CWM,file = "CWM.csv", row.names = T)

CWM <- read.csv("C://Daten//iDiv2//splot2//CWM.csv")
str(CWM) #1117898 obs. of  19 variables
dimnames(CWM)[[1]] <- CWM$X
CWM <- CWM[,-1]
tail(CWM)

##### MAP ####
index3 <- match(dimnames(CWM)[[1]],splot.header$PlotObservationID)
length(index3) #1117898
any(is.na(index3)) # F
#length(index3[is.na(index3)]) #422
splot.header2 <- splot.header[index3,]
'CWM1$Longitude <- splot.header$Longitude[index10]
CWM1$Latitude <- splot.header$Latitude[index10]
'
str(CWM) # num [1:1117898, 1:18]
CWM <- CWM[!(is.na(splot.header2$Longitude)|is.na(splot.header2$Latitude)|is.na(CWM[,"SLA.mean"])),]
str(CWM) # num [1:1111307, 1:18]
1111307/1117898*100 # 99.41041% of plots have traits, Long and Lat
any(is.na(CWM)) #F
# match header again
index4 <- match(dimnames(CWM)[[1]],splot.header$PlotObservationID)
length(index4) #1111307
any(is.na(index4)) # F
splot.header2 <- splot.header[index4,]
# now in the same sequence as CWM

library(raster)
CRSlonlat <- CRS("+proj=longlat +datum=WGS84")
coords <- cbind(splot.header2$Longitude, splot.header2$Latitude)
coords <- SpatialPoints(coords, proj4string=CRSlonlat)
map.CWM <- SpatialPointsDataFrame(coords, as.data.frame(CWM), proj4string=CRSlonlat)
str(map.CWM)

library(maptools)
data(wrld_simpl)
plot(wrld_simpl)
points(coordinates(map.CWM), cex=0.1, col="red", pch=16)
# sPlot2withSLA.png
max(coordinates(map.CWM)[,1]) # 179.5901
max(coordinates(map.CWM)[,2]) # 80.14912
min(coordinates(map.CWM)[,1]) # -162.7414
min(coordinates(map.CWM)[,2]) # -64.78


r <- raster(nrows=180, ncols=360, xmn=-180, xmx=180, ymn=-90, ymx=90)
str(r)
raster.map.SLA <- rasterize(map.CWM,r,map.CWM$SLA.mean, run="mean")
str(raster.map.SLA)
plot(wrld_simpl)
plot(raster.map.SLA, add=T)
# SLAgridded1.png

library(rworldmap)
raster.map.SLA.grid <- as(raster.map.SLA, 'SpatialGridDataFrame')
library(RColorBrewer)
# Set a color ramp
rf <- brewer.pal(8, 'Spectral')
mapParams <- mapGriddedData(raster.map.SLA.grid,addLegend=F, 
                            numCats = 8, colourPalette=rf)
str(mapParams)
mapParams$cutVector
# addMapLegend(plottedData=raster.map.SLA.grid,legendShrink=0.5)
#do.call(addMapLegend, c( mapParams, legendLabels="all", legendWidth=0.5,
#                legendIntervals="data", legendMar = 2 ) )
do.call(addMapLegend, c( mapParams, legendLabels="all", legendWidth=0.3,
                         legendIntervals="page", legendMar = 4, legendShrink=1.7,horizontal=F))
#round(as.numeric(exp(mapParams$cutVector)),1)
mtext(round(as.numeric(exp(mapParams$cutVector)),1),side=4,line=-2, 
      at=seq(-150,170, by=39), las=2)
#mtext("ln(SLA)",side=1,line=0, at=230, las=1)
#mtext("ln(SLA)",side=4,line=-2,at=-170, las=2)
# SLAgridded2.png

############################################################
# Regression with Bioclim
############################################################
'BIO1 = Annual Mean Temperature
BIO2 = Mean Diurnal Range (Mean of monthly (max temp - min temp))
BIO3 = Isothermality (BIO2/BIO7) (* 100)
BIO4 = Temperature Seasonality (standard deviation *100)
BIO5 = Max Temperature of Warmest Month
BIO6 = Min Temperature of Coldest Month
BIO7 = Temperature Annual Range (BIO5-BIO6)
BIO8 = Mean Temperature of Wettest Quarter
BIO9 = Mean Temperature of Driest Quarter
BIO10 = Mean Temperature of Warmest Quarter
BIO11 = Mean Temperature of Coldest Quarter
BIO12 = Annual Precipitation
BIO13 = Precipitation of Wettest Month
BIO14 = Precipitation of Driest Month
BIO15 = Precipitation Seasonality (Coefficient of Variation)
BIO16 = Precipitation of Wettest Quarter
BIO17 = Precipitation of Driest Quarter
BIO18 = Precipitation of Warmest Quarter
BIO19 = Precipitation of Coldest Quarter'

index11 <- match(splot.header2$PlotObservationID,splot.bioclim$PLOT_ID)
length(index11) # 1111307
length(index11[!is.na(index11)]) # 1111307

#index12 <- match(CWM2$PlotObservationID, splot.header$PlotObservationID)
#any(is.na(index12)) #F
str(splot.bioclim$BIO_01)
plot(CWM[,"SLA.mean"]~splot.bioclim$BIO_01[index11],
     cex=0.05, xlab="BIO_1", ylab="CWM ln(SLA)",cex.lab=1.5, cex.axis=1.5)
model1 <- lm(CWM[,"SLA.mean"]~splot.bioclim$BIO_01[index11])
model2 <- lm(CWM[,"SLA.mean"]~splot.bioclim$BIO_01[index11]+I(splot.bioclim$BIO_01[index11]^2))
summary(model1)
'Coefficients:
                                Estimate Std. Error t value Pr(>|t|)    
(Intercept)                    2.945e+00  9.167e-04  3212.9   <2e-16 ***
splot.bioclim$BIO_01[index11] -1.204e-03  9.067e-06  -132.8   <2e-16 ***
Multiple R-squared:  0.01562,	Adjusted R-squared:  0.01562 
F-statistic: 1.764e+04 on 1 and 1111305 DF,  p-value: < 2.2e-16
'
summary(model2)
'                                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)                         2.806e+00  1.113e-03  2520.3   <2e-16 ***
splot.bioclim$BIO_01[index11]       2.188e-03  1.857e-05   117.8   <2e-16 ***
I(splot.bioclim$BIO_01[index11]^2) -1.657e-05  7.985e-08  -207.6   <2e-16 ***
Multiple R-squared:  0.05241
'
abline(model1, lwd=3, col="blue")
model2$coefficients
x.new <- seq(-100,300,1)
y.new <- model2$coefficients[1]+model2$coefficients[2]*x.new+model2$coefficients[3]*x.new^2
lines(x.new,y.new, lwd=3, col="blue")
# SLA_BIO1.png

# devide in quantiles
quantile(splot.bioclim$BIO_01[index11],probs = seq(0, 1, 1/3))
'       0% 33.33333% 66.66667%      100% 
     -227        79        99       303 '
# -> most points (i.e. one third) are in the temperate zone between 7.9 and 9.9°C
x1 <- splot.bioclim$BIO_01[index11][splot.bioclim$BIO_01[index11]<=79]
model1a <- lm(CWM[,"SLA.mean"][splot.bioclim$BIO_01[index11]<=79]~splot.bioclim$BIO_01[index11][splot.bioclim$BIO_01[index11]<=79])
summary(model1a)
'Coefficients:
                                                                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)                                                        2.697e+00  1.244e-03  2167.2   <2e-16 ***
splot.bioclim$BIO_01[index11][splot.bioclim$BIO_01[index11] <= 79] 2.466e-03  2.117e-05   116.5   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.4027 on 371873 degrees of freedom
Multiple R-squared:  0.03519,	Adjusted R-squared:  0.03519 
F-statistic: 1.356e+04 on 1 and 371873 DF,  p-value: < 2.2e-16'
model1b <- lm(CWM[,"SLA.mean"][splot.bioclim$BIO_01[index11]<=99]~splot.bioclim$BIO_01[index11][splot.bioclim$BIO_01[index11]<=99])
summary(model1b)
'                                                                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)                                                        2.692e+00  1.161e-03  2319.8   <2e-16 ***
splot.bioclim$BIO_01[index11][splot.bioclim$BIO_01[index11] <= 99] 2.777e-03  1.527e-05   181.9   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.3954 on 750010 degrees of freedom
Multiple R-squared:  0.04224,	Adjusted R-squared:  0.04224 
F-statistic: 3.308e+04 on 1 and 750010 DF,  p-value: < 2.2e-16'
model1c <- lm(CWM[,"SLA.mean"][splot.bioclim$BIO_01[index11]>99]~splot.bioclim$BIO_01[index11][splot.bioclim$BIO_01[index11]>99])
summary(model1c)
'                                                                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)                                                        3.236e+00  2.587e-03  1251.1   <2e-16 ***
splot.bioclim$BIO_01[index11][splot.bioclim$BIO_01[index11] > 99] -3.774e-03  1.855e-05  -203.4   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.443 on 361293 degrees of freedom
Multiple R-squared:  0.1028,	Adjusted R-squared:  0.1028 
F-statistic: 4.138e+04 on 1 and 361293 DF,  p-value: < 2.2e-16
'

plot(CWM[,"SLA.mean"]~splot.bioclim$BIO_01[index11],
     cex=0.05, xlab="BIO_1", ylab="CWM ln(SLA)",cex.lab=1.5, cex.axis=1.5)
x.new <- seq(-100,79,1)
y.new <- model1a$coefficients[1]+model1a$coefficients[2]*x.new
lines(x.new,y.new, lwd=3, col="blue")
x.new <- seq(79,99,1)
y.new <- model1b$coefficients[1]+model1b$coefficients[2]*x.new
lines(x.new,y.new, lwd=3, col="green")
x.new <- seq(99,300,1)
y.new <- model1c$coefficients[1]+model1c$coefficients[2]*x.new
lines(x.new,y.new, lwd=3, col="red")

### calculate a GAM
library(gam)
x <- splot.bioclim$BIO_01[index11]
model3 <- gam(CWM[,"SLA.mean"]~lo(x, span=0.5))
summary(model3)
'    Null Deviance: 207269.5 on 1111306 degrees of freedom
Residual Deviance: 187452.7 on 1111302 degrees of freedom
AIC: 1175898
Anova for Parametric Effects
                       Df Sum Sq Mean Sq F value    Pr(>F)    
lo(x, span = 0.5)       1   3238  3238.0   19197 < 2.2e-16 ***
Residuals         1111302 187453     0.2
Anova for Nonparametric Effects
                  Npar Df Npar F     Pr(F)    
(Intercept)                                   
lo(x, span = 0.5)     2.7  37028 < 2.2e-16 ***
'

x.new <- seq(-100,300,1)
y.new <- predict(model3, data.frame(x=x.new), se = F)
# se=T does not work
plot(CWM[,"SLA.mean"]~splot.bioclim$BIO_01[index11],
     cex=0.05, xlab="BIO_1", ylab="CWM ln(SLA)",cex.lab=1.5, cex.axis=1.5)
lines(x.new,y.new, lwd=3, col="blue")

### N to P ratio
model4 <- lm(CWM[,"LeafNPratio.mean"]~splot.bioclim$BIO_01[index11])
summary(model4)
'                               Estimate Std. Error t value Pr(>|t|)    
(Intercept)                   2.290e+00  4.334e-04  5284.0   <2e-16 ***
splot.bioclim$BIO_01[index11] 1.531e-03  4.287e-06   357.2   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2026 on 1111305 degrees of freedom
Multiple R-squared:  0.103,	Adjusted R-squared:  0.103  
'
plot(CWM[,"LeafNPratio.mean"]~splot.bioclim$BIO_01[index11],
     cex=0.05, xlab="BIO_01", ylab="CWM ln(Leaf N to P ratio)",cex.lab=1.5, cex.axis=1.5)
abline(model4, lwd=3, col="blue")

### turn the regression around
# does not make sense
model5 <- lm(splot.bioclim$BIO_01[index11]~CWM[,"SLA.mean"])
summary(model5)
'                   Estimate Std. Error t value Pr(>|t|)    
(Intercept)       127.41676    0.28026   454.6   <2e-16 ***
CWM[, "SLA.mean"] -12.97407    0.09769  -132.8   <2e-16 ***
Multiple R-squared:  0.01562,	Adjusted R-squared:  0.01562 '

model6 <- lm(splot.bioclim$BIO_01[index11]~CWM[,"LeafNPratio.mean"])
summary(model6)
'Coefficients:
                          Estimate Std. Error t value Pr(>|t|)    
(Intercept)               -72.7479     0.4591  -158.5   <2e-16 ***
CWM[, "LeafNPratio.mean"]  67.2535     0.1883   357.2   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 42.46 on 1111305 degrees of freedom
Multiple R-squared:  0.103,	Adjusted R-squared:  0.103 '

model7 <- lm(splot.bioclim$BIO_01[index11]~CWM[,"SLA.mean"]+CWM[,"LeafNPratio.mean"])
summary(model7)
'                           Estimate Std. Error t value Pr(>|t|)    
(Intercept)               -41.59378    0.55839  -74.49   <2e-16 ***
CWM[, "SLA.mean"]          -9.08978    0.09355  -97.17   <2e-16 ***
CWM[, "LeafNPratio.mean"]  65.04103    0.18885  344.40   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 42.28 on 1111304 degrees of freedom
Multiple R-squared:  0.1106,	Adjusted R-squared:  0.1106 '
x <- splot.bioclim$BIO_01[index11]
model8 <- lm(x~CWM[,"LeafNPratio.mean"]*CWM[,"SLA.mean"])
summary(model8)
'Coefficients:
                                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                                 -432.2500     2.2645  -190.9   <2e-16 ***
CWM[, "LeafNPratio.mean"]                    227.1681     0.9304   244.2   <2e-16 ***
CWM[, "SLA.mean"]                            135.8572     0.8202   165.6   <2e-16 ***
CWM[, "LeafNPratio.mean"]:CWM[, "SLA.mean"]  -60.2280     0.3386  -177.9   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 41.69 on 1111303 degrees of freedom
Multiple R-squared:  0.1352,	Adjusted R-squared:  0.1352 '

library(scatterplot3d) # für 3D-Plots
s3d1 <- scatterplot3d(CWM[,"SLA.mean"],CWM[,"LeafNPratio.mean"], x, 
                      xlab="CWM ln(Leaf N to P ratio)", ylab="CWM ln(SLA)", zlab="BIO_1",
                              pch=1,cex.symbols=0.05,highlight.3d=T,type="p")
#fit <- lm(AnzF ~ Prop.n+Prop.ba,data=data6[data6$Sp=="Qupe",])
s3d1$plane3d(model8)
# does not work
### producing the plane with points
min(CWM[,"SLA.mean"])
max(CWM[,"SLA.mean"])
x2 <- sort(rep(seq(0.1, 5, by=0.1),35))
min(CWM[,"LeafNPratio.mean"])
max(CWM[,"LeafNPratio.mean"])
y2 <- rep(seq(1, 4.4, by=0.1),50)
func <- function(x,y) { -432.2500 + 227.1681 * x + 135.8572*y -60.2280*x*y}
z2 <- func(x2,y2)
s3d1 <- scatterplot3d(CWM[,"SLA.mean"],CWM[,"LeafNPratio.mean"], x, 
                      xlab="CWM ln(Leaf N to P ratio)", ylab="CWM ln(SLA)", zlab="BIO_1",
                      pch=1,cex.symbols=0.05,highlight.3d=T,type="p",angle=70,scale.y=0.5)
s3d1$points3d(x2,y2,z2,col="blue", type="p", pch=20, cex=0.5)
# 
x <- splot.bioclim$BIO_01[index11]
model7 <- lm(CWM[,"SLA.mean"]~x*CWM[,"LeafNPratio.mean"])
summary(model7)
'                              Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  1.948e+00  9.377e-03   207.8   <2e-16 ***
x                            1.278e-02  7.985e-05   160.0   <2e-16 ***
CWM[, "LeafNPratio.mean"]    3.916e-01  3.856e-03   101.6   <2e-16 ***
x:CWM[, "LeafNPratio.mean"] -5.472e-03  3.166e-05  -172.8   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.4213 on 1111303 degrees of freedom
Multiple R-squared:  0.04842,	Adjusted R-squared:  0.04842 '
min(x)
max(x)
x2 <- sort(rep(seq(-100, 290, by=10),35))
min(CWM[,"LeafNPratio.mean"])
max(CWM[,"LeafNPratio.mean"])
y2 <- rep(seq(1, 4.4, by=0.1),40)
func <- function(x,y) {  1.948e+00  +  1.278e-02 * x + 3.916e-01*y -5.472e-03*x*y}
z2 <- func(x2,y2)
s3d1 <- scatterplot3d(x,CWM[,"LeafNPratio.mean"], CWM[,"SLA.mean"],
                      xlab="BIO_1", ylab="CWM ln(Leaf N to P ratio)", zlab="CWM ln(SLA)", 
                      pch=1,cex.symbols=0.05,highlight.3d=T,type="p",angle=50,scale.y=1)
s3d1$points3d(x2,y2,z2,col="blue", type="p", pch=20, cex=0.5)
# final plot
# some BIO_01 and LeafNPratio combinations do not exist
?cut
x.cut <- cut(x, breaks=seq(-110, 290, by=10))
y.cut <- cut(CWM[,"LeafNPratio.mean"],breaks=seq(0.9, 4.4, by=0.1))
z.cut <- table(x.cut,y.cut)
head(z.cut)
dim(z.cut) #40 35
z2.cut <- table(x2,y2)
head(z2.cut)
dim(z2.cut) #40 35

#library(reshape2)
z.cut <- melt(z.cut)
str(z.cut) # 1400 obs. of  3 variables:
head(z.cut)
z2.cut <- melt(z2.cut)
str(z2.cut) #1400 obs. of  3 variables:
head(z2.cut)
z2.cut$value <- 0
z2.cut$value <- z.cut$value
z2.cut$xy <- paste(z2.cut$x2,z2.cut$y2,sep="_")
#z3.cut <- acast(z2.cut, x2~y2)
#head(z3.cut)
xy <- paste(x2,y2,sep="_")
index20 <- match(xy,z2.cut$xy)
values.present <- vector(mode="logical", length=length(x2))
values.present <- z2.cut$value[index20] > 0
table(values.present)
'z.cut$value[z.cut$x.cut==x.cut[1] & z.cut$y.cut==y.cut[1]]
values.present <- function(z.cut,x,y){
  z.cut$value[z.cut$x.cut==x & z.cut$y.cut==y]
}
x1 <- data.frame(x,x.cut, y=CWM[,"LeafNPratio.mean"],y.cut)
head(x1)
z1 <- apply(x1,1,FUN=values.present(z.cut, x1$x.cut, x1$y.cut))
values.present(z.cut, x1$x.cut[1], x1$y.cut[1])
'

s3d1 <- scatterplot3d(x,CWM[,"LeafNPratio.mean"], CWM[,"SLA.mean"], 
                      xlab="BIO_1", ylab="CWM ln(Leaf N to P ratio)", zlab="CWM ln(SLA)", 
                      pch=1,cex.symbols=0.05,highlight.3d=T,type="p",angle=-10,scale.y=0.8)
s3d1$points3d(x2[values.present],y2[values.present],z2[values.present],col="blue", type="p", pch=20, cex=0.5)


#### Collect all CWM regressions ####
names(splot.bioclim) # from 4 (BIO_01 to 49 T_MAR
CWM.bioclim <- array(NA,c(46,18,5),dimnames=list(names(splot.bioclim)[4:49],dimnames(CWM)[[2]],
                      c("p_lin","r2_lin","p_qua1", "p_qua2","r2_qua")))
for (i in 1:46){
  for (j in 1:18){
    model1 <- lm(CWM[,j]~splot.bioclim[index11,i+3])
    CWM.bioclim[i,j,1] <- summary(model1)$coefficients[2,4]
    CWM.bioclim[i,j,2] <- summary(model1)$r.squared
    model2 <- lm(CWM[,j]~splot.bioclim[index11,i+3]+I(splot.bioclim[index11,i+3]^2))
    CWM.bioclim[i,j,3] <- summary(model2)$coefficients[2,4]
    CWM.bioclim[i,j,4] <- summary(model2)$coefficients[3,4]
    CWM.bioclim[i,j,5] <- summary(model2)$r.squared
  }
}
str(CWM.bioclim) #num [1:46, 1:18, 1:5]
write.csv(data.frame(names(splot.bioclim)[4:49],CWM.bioclim[,,2]),file = "CWM_bioclim_r2_lin.csv", row.names = FALSE)
write.csv(data.frame(names(splot.bioclim)[4:49],CWM.bioclim[,,5]),file = "CWM_bioclim_r2_qua.csv", row.names = FALSE)


which(CWM.bioclim[,1,2]==max(CWM.bioclim[,1,2])) # BIO_02 
which(CWM.bioclim[,1,5]==max(CWM.bioclim[,1,5])) # BIO_02
which(CWM.bioclim[,,2]==max(CWM.bioclim[,,2]),arr.ind = TRUE) 
#       row col
#BIO_02   2   9
CWM.bioclim[2,9,2] #0.1297723
which(CWM.bioclim[,,5]==max(CWM.bioclim[,,5]),arr.ind = TRUE) 
#       row col
#BIO_02   2   9
CWM.bioclim[2,9,5] # 0.1563736
names(splot.bioclim)[2+3] #"BIO_02"
dimnames(CWM)[[2]][9] #"LeafNperArea.mean"
plot(CWM[,"LeafNperArea.mean"]~splot.bioclim$BIO_02[index11],
     cex=0.05, xlab="BIO_02", ylab="CWM ln(Leaf N per Area)",cex.lab=1.5, cex.axis=1.5)
model1 <- lm(CWM[,9]~splot.bioclim[index11,2+3])
abline(model1, lwd=3, col="blue")
# MeanNperArea_BIO_02.png
png(filename="MeanNperArea_BIO_02.png", width=800, height=800, units = "px", bg="white")
  plot(CWM[,"LeafNperArea.mean"]~splot.bioclim$BIO_02[index11],
       cex=0.05, xlab="BIO_02", ylab="CWM ln(Leaf N per Area)",cex.lab=1.5, cex.axis=1.5)
  #model1 <- lm(CWM[,9]~splot.bioclim[index11,2+3])
  abline(model1, lwd=3, col="blue")
dev.off() # creates a file
png(filename="SLA_BIO_02.png", width=800, height=800, units = "px", bg="white")
  plot(CWM[,"SLA.mean"]~splot.bioclim$BIO_02[index11],
    cex=0.05, xlab="BIO_02", ylab="CWM ln(SLA)",cex.lab=1.5, cex.axis=1.5)
  model1 <- lm(CWM[,1]~splot.bioclim[index11,2+3])
abline(model1, lwd=3, col="blue")
dev.off() # creates a file

#### PCA of all CWM and all Bioclim variables ####
library(vegan)
pca1 <- rda(CWM,scale=T)
plot(pca1)
str(summary(pca1))
#text(summary(pca1)$species[,c(1,2)],dimnames(summary(pca1)$species)[[1]])
traitnames <- dimnames(summary(pca1)$species)[[1]]
traitnames <- substr(traitnames,1,nchar(traitnames)-5)
traitcoord <- summary(pca1)$species[,c(1,2)]
which(traitnames=="SeedMass") #3
traitcoord[3,2] <- traitcoord[3,2]-0.5
ordiplot(pca1, type="n", ylim=c(-10,10))
text(traitcoord,traitnames, col="red", cex=1)
points(plotcoord*30, cex=0.05)
#env1 <- envfit(pca1,splot.bioclim[index11,c(4:49)],permutations = 99)
#with permutations = 999:
#Error: cannot allocate vector of size 4.1 Gb
plot(env1)
png(filename="PCA1.png", width=1000, height=1000, units = "px", bg="white")
  ordiplot(pca1, type="n")
  text(traitcoord,traitnames)
  plot(env1)
dev.off() # creates a file
'rm(env1)
rm(backbone.splot.try3)
rm(model1)
rm(model2)
rm(pca1)
rm(splot.header)
rm(splot.header2)
rm(traitdist)
rm(coords)
rm(TRY.all.mean.sd.2)
rm(index7)
rm(raster.map.SLA)
rm(trait.vec)
rm(FD)
rm(splot.bioclim)
'
gc()

### produce a PCA with kernel density
# From Díaz et al. 2016, Nature
library(ks)
plotcoord <- summary(pca1)$sites[,c(1,2)]
str(plotcoord) #num [1:1111307, 1:2]
H <- Hpi(x=plotcoord)      # optimal bandwidth estimation
# does not work
est<- kde(x=plotcoord, H=H, compute.cont=TRUE)     # kernel density estimation

library(raster)
library(fBasics)
# Color palette
#plotcoord <- plotcoord*30
min(plotcoord[,1]) #-0.2639705
max(plotcoord[,1]) # 0.2415964
min(plotcoord[,2]) #-0.2595053
max(plotcoord[,2]) # 0.3560549
x.cut <- cut(plotcoord[,1], breaks=seq(min(plotcoord[,1]), max(plotcoord[,1]), by=0.01))
y.cut <- cut(plotcoord[,2], breaks=seq(min(plotcoord[,2]), max(plotcoord[,2]), by=0.01))
z.cut <- table(x.cut,y.cut)
head(z.cut)
dim(z.cut) #50 61
z.cut <- melt(z.cut)
str(z.cut) # 3050 obs. of  3 variables:
head(z.cut)
z.cut$xy <- paste(z.cut$x.cut,z.cut$y.cut,sep="_")
str(x.cut)
length(x.cut) # 1111307
xy <- paste(x.cut,y.cut,sep="_")
index30 <- match(xy,z.cut$xy)
length(index30)
any(is.na(index30))
head(log10(z.cut$value[index30]))
values.present <- vector(mode="numeric", length=length(xy))
min(log10(z.cut$value[index30]), na.rm=T) #0
max(log10(z.cut$value[index30]), na.rm=T) # 3.9878
values.cut <- cut(log10(z.cut$value[index30]), breaks=seq(min(log10(z.cut$value[index30]), na.rm=T),
                        4, by=0.4))
#values.present <- z.cut$value[index30]
ordiplot(pca1, type="n")
#points(plotcoord*30, cex=0.05, col=rainbowPalette(n=10,  
#        s = 1, v = 1, start = 3/6, end = 5/6, alpha = 1)[values.cut])
points(plotcoord*30, cex=0.05, col=rev(greyPalette(n=10,  start = 0, end = 0.8, 
                                    gamma = 2.2, alpha = NULL))[values.cut])
text(traitcoord,traitnames, col="red", cex=1)


ext.factor <- 30
r.pca <- raster(nrows=300, ncols=300, xmn=min(plotcoord[,1])*ext.factor, xmx=max(plotcoord[,2])*ext.factor, 
                ymn=min(plotcoord[,1])*ext.factor, ymx=max(plotcoord[,2]*ext.factor))
# attention, xmn and ymn as well as xmx and ymx have to be the same, otherwise the raster is shifted
r.pca.raster <- rasterize(plotcoord*ext.factor, r.pca, fun="count")
str(r.pca.raster)
plot(log10(r.pca.raster), asp=0, col=seqPalette(n=10, name=c("Blues")), 
     main="", cex.main=0.7, xlab="",  ylab="", add=T)
plot(log10(r.pca.raster), asp=0, col=rev(greyPalette(n=10,  start = 0, end = 0.8, 
    gamma = 2.2, alpha = NULL)), 
     main="", cex.main=0.7, xlab="",  ylab="", add=T)
plot(log10(r.pca.raster), asp=0, col=rainbowPalette(n=10,   s = 1, v = 1, start = 3/6, end = 5/6, alpha = 1), 
     main="", cex.main=0.7, xlab="",  ylab="", add=T, legend=F)

'## graph for the PCA ##
ordiplot(pca1, type="n", xlim=c(-10,13), ylim=c(-10,10))
rasterImage(as.raster(r.pca.raster), -10, -10, 20, 10, interpolate = F,
            col=rainbowPalette(n=10,   s = 1, v = 1, start = 3/6, end = 5/6, alpha = 1))
plot(log10(r.pca.raster), asp=0, col=rainbowPalette(n=10,   s = 1, v = 1, start = 3/6, end = 5/6, alpha = 1), 
     main="", cex.main=0.7, xlab="",  ylab="", add=T, legend=F)
text(traitcoord,traitnames, col="red", cex=1)
'
rda1 <- rda(CWM~.,splot.bioclim[index11,c(4:49)],scale=T)
plot(rda1)
summary(rda1)
rda1
'              Inertia Proportion Rank
Total         18.0000     1.0000     
Constrained    2.4811     0.1378   15
Unconstrained 15.5189     0.8622   15
Inertia is correlations 
Eigenvalues for constrained axes:
  RDA1   RDA2   RDA3   RDA4   RDA5   RDA6   RDA7   RDA8   RDA9  RDA10  RDA11  RDA12  RDA13  RDA14  RDA15 
1.2230 0.6020 0.3275 0.1972 0.0653 0.0226 0.0156 0.0107 0.0059 0.0050 0.0031 0.0012 0.0010 0.0007 0.0003 

Eigenvalues for unconstrained axes:
PC1   PC2   PC3   PC4   PC5   PC6   PC7   PC8   PC9  PC10  PC11  PC12  PC13  PC14  PC15 
4.594 3.453 1.486 1.426 1.095 0.798 0.648 0.603 0.483 0.343 0.261 0.122 0.084 0.074 0.047 
'
#anova(rda1, perm=99)
#anova(rda1, by="terms", permutations = how(nperm=9))
library(parallel) 
detectCores() #32
# Set up the cluster
#clusterType <- if(length(find.package("snow", quiet = TRUE))) "SOCK" else "PSOCK"
#clust <- try(makeCluster(getOption("cl.cores", 2), type = clusterType))
cl <- makeCluster(10)
#registerDoParallel(cl) 
options(na.action = "na.pass")
#clusterExport(cl, "results") 

#print(system.time(anova(rda1, by="terms", perm=99, parallel = getOption("mc.cores"))))
print(system.time(out1 <- anova(rda1, by="margin", permutations =99, parallel = cl)))
'user    system   elapsed 
5304.802  1311.088 14522.410
'
out1

traitnames <- dimnames(summary(rda1)$species)[[1]]
traitnames <- substr(traitnames,1,nchar(traitnames)-5)
traitcoord <- summary(rda1)$species[,c(1,2)]
bioclimcoord <- summary(rda1)$biplot[,c(1,2)]
which(traitnames=="LeafNperArea") #9
traitcoord[9,1] <- traitcoord[9,1]+0.5
which(traitnames=="StemDens") #5
traitcoord[5,2] <- traitcoord[5,2]+0.3
which(traitnames=="Disp.unit.leng") #18
traitcoord[18,2] <- traitcoord[18,2]-0.3
ordiplot(rda1, type="n", xlim=c(-7,7))
arrows(rep(0,45),rep(0,45),bioclimcoord[,1]*8,bioclimcoord[,2]*8, col="cadetblue4", length=0.1)
text(bioclimcoord*8,dimnames(bioclimcoord)[[1]], col="cadetblue4", cex=0.8)
text(traitcoord,traitnames, col="red")
# copied as first RDA graph into ppt

#rda1 <- rda(CWM ~ ., splot.bioclim[index11,c(4:49)],scale=T)

### Stepwise RDA ###
rda2 <- rda(CWM ~ 1,splot.bioclim[index11,c(4:49)],scale=T)
#rda3 <- step(rda2, scope = formula(rda1), test = "perm")
rda3 <- ordiR2step(rda2, scope=formula(rda1), direction = "both", Pin = 0.05, R2scope = F, permutations = how(nperm = 99), trace = TRUE)
#CWM ~ GDD5 + BIO_02 + BIO_12 + T_MAY + P_MAY + T_AUG ....
#rm(rda1)

# now rebuild the RDA sequentially
pca1 <- rda(CWM,scale=T)
pca1
5.527+ 3.906+ 1.953+ 1.507+ 1.375+ 0.886+ 0.726+ 0.623+ 0.499 +0.360+ 0.295+ 0.131+ 0.087+ 0.077+ 0.048 
(5.527+ 3.906)/18 # 52.40556%

traitnames <- dimnames(summary(pca1)$species)[[1]]
traitnames <- substr(traitnames,1,nchar(traitnames)-5)
traitcoord <- summary(pca1)$species[,c(1,2)]
which(traitnames=="SeedMass") #3
traitcoord[3,2] <- traitcoord[3,2]-0.5
ordiplot(pca1, type="n")
text(traitcoord,traitnames, col="red")
# copied as second RDA graph into ppt


rda4 <- rda(CWM ~ GDD5,splot.bioclim[index11,c(4:49)],scale=T)
rda4
traitnames <- dimnames(summary(rda4)$species)[[1]]
traitnames <- substr(traitnames,1,nchar(traitnames)-5)
traitcoord <- summary(rda4)$species[,c(1,2)]
plotcoord <- summary(rda4)$sites[,c(1,2)]
str(plotcoord) #num [1:1111307, 1:2]
bioclimcoord <- summary(rda4)$biplot[,c(1,2)]

min(plotcoord[,1]) #-0.5340595
max(plotcoord[,1]) # 0.7760822
min(plotcoord[,2]) #-0.2688765
max(plotcoord[,2]) # 0.2326464
x.cut <- cut(plotcoord[,1], breaks=seq(min(plotcoord[,1]), max(plotcoord[,1]), by=0.01))
y.cut <- cut(plotcoord[,2], breaks=seq(min(plotcoord[,2]), max(plotcoord[,2]), by=0.01))
z.cut <- table(x.cut,y.cut)
head(z.cut)
dim(z.cut) #131 50
z.cut <- melt(z.cut)
str(z.cut) # 6550 obs. of  3 variables:
head(z.cut)
z.cut$xy <- paste(z.cut$x.cut,z.cut$y.cut,sep="_")
str(x.cut)
length(x.cut) # 1111307
xy <- paste(x.cut,y.cut,sep="_")
index30 <- match(xy,z.cut$xy)
length(index30)
any(is.na(index30))
head(log10(z.cut$value[index30]))
values.present <- vector(mode="numeric", length=length(xy))
min(log10(z.cut$value[index30]), na.rm=T) #0
max(log10(z.cut$value[index30]), na.rm=T) # 3.956168
values.cut <- cut(log10(z.cut$value[index30]), breaks=seq(min(log10(z.cut$value[index30]), na.rm=T),
                                                         4, by=0.4))
     

ordiplot(rda4, type="n")
points(plotcoord*30, cex=0.05, col=rev(greyPalette(n=10,  start = 0, end = 0.8, 
                                                   gamma = 2.2, alpha = NULL))[values.cut])

arrows(rep(0,1),rep(0,1),bioclimcoord[1]*8,bioclimcoord[2]*8, col="blue", length=0.1)
text(bioclimcoord[1]*8+0.8,bioclimcoord[2]*8-0.2,"GDD5", col="blue", cex=1)
text(traitcoord,traitnames, col="red")
# copied as third RDA graph into ppt

rda5 <- rda(CWM ~ GDD5+BIO_02,splot.bioclim[index11,c(4:49)],scale=T)
rda5
traitnames <- dimnames(summary(rda5)$species)[[1]]
traitnames <- substr(traitnames,1,nchar(traitnames)-5)
traitcoord <- summary(rda5)$species[,c(1,2)]
bioclimcoord <- summary(rda5)$biplot[,c(1,2)]
plotcoord <- summary(rda5)$sites[,c(1,2)]
str(plotcoord) #num [1:1111307, 1:2]
x.cut <- cut(plotcoord[,1], breaks=seq(min(plotcoord[,1]), max(plotcoord[,1]), by=0.01))
y.cut <- cut(plotcoord[,2], breaks=seq(min(plotcoord[,2]), max(plotcoord[,2]), by=0.01))
z.cut <- table(x.cut,y.cut)
head(z.cut)
dim(z.cut) #131 50
z.cut <- melt(z.cut)
str(z.cut) # 6550 obs. of  3 variables:
head(z.cut)
z.cut$xy <- paste(z.cut$x.cut,z.cut$y.cut,sep="_")
str(x.cut)
length(x.cut) # 1111307
xy <- paste(x.cut,y.cut,sep="_")
index30 <- match(xy,z.cut$xy)
length(index30)
any(is.na(index30))
head(log10(z.cut$value[index30]))
values.present <- vector(mode="numeric", length=length(xy))
min(log10(z.cut$value[index30]), na.rm=T) #0
max(log10(z.cut$value[index30]), na.rm=T) # 3.399154
values.cut <- cut(log10(z.cut$value[index30]), breaks=seq(min(log10(z.cut$value[index30]), na.rm=T),
                                                          4, by=0.4))


ordiplot(rda5, type="n", xlim=c(-10,10), ylim=c(-10,10))
points(plotcoord*30, cex=0.05, col=rev(greyPalette(n=10,  start = 0, end = 0.8, 
                                                   gamma = 2.2, alpha = NULL))[values.cut])
arrows(rep(0,2),rep(0,2),bioclimcoord[,1]*8,bioclimcoord[,2]*8, col="blue", length=0.1)
text(bioclimcoord*8,dimnames(bioclimcoord)[[1]], col="blue", cex=1)
text(traitcoord,traitnames, col="red")
# copied as fourth RDA graph into ppt


plot(CWM[,"LeafNPratio.mean"]~splot.bioclim$GDD5[index11],
     cex=0.05, xlab="GDD5", ylab="CWM ln(leaf N to P ratio)",cex.lab=1.5, cex.axis=1.5)
model1 <- lm(CWM[,"LeafNPratio.mean"]~splot.bioclim$GDD5[index11])
summary(model1)
'                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                 2.297e+00  3.891e-04  5905.2   <2e-16 ***
splot.bioclim$GDD5[index11] 6.349e-06  1.636e-08   388.1   <2e-16 ***
Multiple R-squared:  0.1193,	Adjusted R-squared:  0.1193 
'
abline(model1, lwd=3, col="blue")
# LeafNPRatio_GDD5

plot(CWM[,"SLA.mean"]~jitter(splot.bioclim$BIO_02[index11]),
     cex=0.05, xlab="BIO_02", ylab="CWM ln(SLA)",cex.lab=1.5, cex.axis=1.5)
model1 <- lm(CWM[,"SLA.mean"]~splot.bioclim$BIO_02[index11])
summary(model1)
'                                Estimate Std. Error t value Pr(>|t|)    
(Intercept)                    3.399e+00  1.448e-03  2348.2   <2e-16 ***
splot.bioclim$BIO_02[index11] -6.253e-03  1.551e-05  -403.3   <2e-16 ***

Residual standard error: 0.4034 on 1111305 degrees of freedom
Multiple R-squared:  0.1277,	Adjusted R-squared:  0.1277 
'
abline(model1, lwd=3, col="blue")
# SLA_BIO_02

plot(CWM[,"LeafNperArea.mean"]~jitter(splot.bioclim$BIO_02[index11]),
     cex=0.05, xlab="BIO_02", ylab="CWM ln(Leaf N per area)",cex.lab=1.5, cex.axis=1.5)
model1 <- lm(CWM[,"LeafNperArea.mean"]~splot.bioclim$BIO_02[index11])
summary(model1)
'                                Estimate Std. Error t value Pr(>|t|)    
(Intercept)                   -5.989e-02  8.475e-04  -70.67   <2e-16 ***
splot.bioclim$BIO_02[index11]  3.696e-03  9.079e-06  407.09   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2362 on 1111305 degrees of freedom
Multiple R-squared:  0.1298,	Adjusted R-squared:  0.1298 
'
abline(model1, lwd=3, col="blue")
# LeafNper_BIO_02

##### FD calculations ####
j <- 3
nam <- DT$species[DT$PlotObservationID==j]
abu <- DT$Relative.cover[DT$PlotObservationID==j]
trait <- DT$trait[DT$PlotObservationID==j]
abundance.weighted=T

FD.fun <- function(trait, abu){
  res <- as.double(NA)
  if (length(trait[!is.na(trait)])>0){
    res <- 0
    #nam <- as.character(nam[!is.na(trait)])
    abu <- as.data.frame(abu[!is.na(trait)])
    #rownames(abu) <- nam
    trait <- as.data.frame(trait[!is.na(trait)])
    #rownames(trait) <- nam
    dis <- dist(trait,method="euclidean")
    if (sum(dis)>1){
      sqrt.dis <- sqrt(dis)
      # taking the square root is required to obtain the same result from the divc function
      # as divc takes the square of distances values, as suggested by
      # Champely & Chessel (2002, Env. Ecol. Stat. 9: 167-177)
      #abu <- as.data.frame(abu)
      res <- unlist(divc(abu, sqrt.dis))
      #Rao's Q: sumi sumj (dist ij prop i prop j)'
    } 
  }
  res
}

mpd.fun <- function(nam, trait, abu,abundance.weighted){
  #res <- numeric(1) # carries result from the function
  res <- as.double(NA)
  if (length(trait[!is.na(trait)])>0){
  #  dis <- as.matrix(dist(t(t1),method="euclidean"))   
    abu <- t(abu)
    colnames(abu) <- nam
    trait <- t(trait)
    colnames(trait) <- nam
    #dis <- as.matrix(vegdist(t(t1),method="euclidean", na.rm=T))
    dis <- as.matrix(dist(t(trait),method="euclidean"))
    res <- mpd(abu, dis, abundance.weighted=abundance.weighted)
  }
  res
}

# from mpd source code
'N <- dim(samp)[1]
mpd <- numeric(N)
for (i in 1:N) {
  sppInSample <- names(samp[i, samp[i, ] > 0])
  if (length(sppInSample) > 1) {
    sample.dis <- dis[sppInSample, sppInSample]
    if (abundance.weighted) {
      sample.weights <- t(as.matrix(samp[i, sppInSample, 
                                         drop = FALSE])) %*% as.matrix(samp[i, sppInSample, 
                                                                            drop = FALSE])
      mpd[i] <- weighted.mean(sample.dis, sample.weights)
    }
    else {
      mpd[i] <- mean(sample.dis[lower.tri(sample.dis)])
    }
  }
  else {
    mpd[i] <- NA
  }
}
mpd
'
# from Oliver, who used picante
# does not accept missing values in the function
library(picante)
# define function for mean pairwise distance
'mpd.fun.dt <- function(abu, dis, nam, abundance.weighted){
  abu <- t(abu)
  colnames(abu) <- nam
  res <- mpd(abu, dis, abundance.weighted=abundance.weighted)
}
# define funtion of mean nearest neighbor distance
mntd.fun.dt <- function(abu, dis, nam, abundance.weighted){
  abu <- t(abu)
  colnames(abu) <- nam
  res <- mntd(abu, dis, abundance.weighted=abundance.weighted)
  res
}
species.list <- unique(DT$species)
any(is.na(species.list)) #T, there is one NA
length(species.list) #60909
species.list <- species.list[!is.na(species.list)]
length(species.list) #60908
index8 <- match(species.list,TRY.all.mean.sd.2$species)
str(index8) # 60908
length(index8[!is.na(index8)]) #21001
#species.list2 <- TRY.all.mean.sd.2$species[index8]
#species.list2 <- species.list2[!is.na(species.list2)]
#length(species.list2) # 21001
TRY.all.mean.sd.2.na <- TRY.all.mean.sd.2[index8,]
str(TRY.all.mean.sd.2.na)
TRY.all.mean.sd.2.na$species2 <- species.list
rm(index9)
'
FD <- array(NA,c(dim(CWM)[1],18),dimnames=list(dimnames(CWM)[[1]],
                                                 colnames(TRY.all.mean.sd.2)[3:20]))
str(FD) #logi [1:1117898, 1:18] 
'for (i in 1:18){
  # create distance matrix
  #trait.vec <- TRY.all.mean.sd.2.na[,i+2]
  #names(trait.vec) <- TRY.all.mean.sd.2.na$species2
  #trait.dist.mat <- as.matrix(dist(trait.vec))
  # too large for my notebook
  #mpd[,i] <-  DT[,list(mpd.trait= mpd.fun.dt(Relative.cover, trait.dist.mat, species, abundance.weighted=TRUE)),by=PlotObservationID]$mpd.trait
  print(i)  
  DT$trait <- NA
  DT$trait <- TRY.all.mean.sd.2[index7,i+2]
  FD[,i] <-  DT[,list(mpd.trait= mpd.fun(species, trait, Relative.cover, abundance.weighted=TRUE)),by=PlotObservationID]$mpd.trait
}
write.csv(FD,file = "FD2.csv", row.names = T)
'
for (i in 1:18){
  # create distance matrix
  #trait.vec <- TRY.all.mean.sd.2.na[,i+2]
  #names(trait.vec) <- TRY.all.mean.sd.2.na$species2
  #trait.dist.mat <- as.matrix(dist(trait.vec))
  # too large for my notebook
  #mpd[,i] <-  DT[,list(mpd.trait= mpd.fun.dt(Relative.cover, trait.dist.mat, species, abundance.weighted=TRUE)),by=PlotObservationID]$mpd.trait
  print(i)  
  DT$trait <- NA
  DT$trait <- TRY.all.mean.sd.2[index7,i+2]
  # recalculate relative cover for this trait
  FD[,i] <-  DT[,list(FD.trait= FD.fun(trait, Relative.cover)),by=PlotObservationID]$FD.trait
}
write.csv(FD,file = "FD3.csv", row.names = T)

FD <- read.csv("C://Daten//iDiv2//splot2//FD3.csv")
str(FD)
dimnames(FD)[[1]] <- FD$X
FD <- FD[,-1]
tail(FD)

#### match FD to splot.header ####
index12 <- match(dimnames(FD)[[1]],splot.header$PlotObservationID)
length(index12) #1117898
any(is.na(index12)) # F
splot.header3 <- splot.header[index12,]
str(FD) # num [1:1117898, 1:18]
FD <- FD[!(is.na(splot.header3$Longitude)|is.na(splot.header3$Latitude)|is.na(FD[,"SLA.mean"])),]
str(FD) # num [1:1111307 , 1:18]
1111307 /1117898*100 # 99.41041 % of plots have traits, Long and Lat
any(is.na(FD)) #F
# match header again
index13 <- match(dimnames(FD)[[1]],splot.header$PlotObservationID)
length(index13) #1111307
any(is.na(index13)) # F
splot.header3 <- splot.header[index13,]
# now in the same sequence as FD


#### Collect all FD regressions ####
names(splot.bioclim) # from 4 BIO_01 to 49 T_MAR
index14 <- match(splot.header3$PlotObservationID,splot.bioclim$PLOT_ID)
length(index14) # 1111307
length(index14[!is.na(index14)]) # 1111307

FD.bioclim <- array(NA,c(46,18,5),dimnames=list(names(splot.bioclim)[4:49],dimnames(FD)[[2]],
                                      c("p_lin","r2_lin","p_qua1", "p_qua2","r2_qua")))
for (i in 1:46){
  for (j in 1:18){
    model1 <- lm(FD[,j]~splot.bioclim[index14,i+3])
    FD.bioclim[i,j,1] <- summary(model1)$coefficients[2,4]
    FD.bioclim[i,j,2] <- summary(model1)$r.squared
    model2 <- lm(FD[,j]~splot.bioclim[index14,i+3]+I(splot.bioclim[index14,i+3]^2))
    FD.bioclim[i,j,3] <- summary(model2)$coefficients[2,4]
    FD.bioclim[i,j,4] <- summary(model2)$coefficients[3,4]
    FD.bioclim[i,j,5] <- summary(model2)$r.squared
  }
}
str(FD.bioclim) #num [1:46, 1:18, 1:5]
write.csv(data.frame(names(splot.bioclim)[4:49],FD.bioclim[,,2]),file = "FD2_bioclim_r2_lin.csv", row.names = FALSE)
write.csv(data.frame(names(splot.bioclim)[4:49],FD.bioclim[,,5]),file = "FD2_bioclim_r2_qua.csv", row.names = FALSE)

library(vegan)
FD.pca1 <- rda(FD,scale=T)
FD.pca1
'Eigenvalues for unconstrained axes:
PC1   PC2   PC3   PC4   PC5   PC6   PC7   PC8 
8.597 1.876 1.378 0.886 0.790 0.780 0.583 0.551 
(Showed only 8 of all 18 unconstrained eigenvalues)
'
plot(FD.pca1)
summary(FD.pca1)
#text(summary(FD.pca1)$species[,c(1,2)],dimnames(summary(FD.pca1)$species)[[1]])
traitnames <- dimnames(summary(FD.pca1)$species)[[1]]
traitnames <- substr(traitnames,1,nchar(traitnames)-5)
traitcoord <- summary(FD.pca1)$species[,c(1,2)]
which(traitnames=="Seed.length") #17
traitcoord[17,2] <- traitcoord[17,2]+0.3
which(traitnames=="LeafC.perdrymass") #12
traitcoord[12,2] <- traitcoord[12,2]-0.4
which(traitnames=="Leaf.delta.15N") #13
traitcoord[13,2] <- traitcoord[13,2]+0.4
which(traitnames=="LeafNPratio") #11
traitcoord[11,2] <- traitcoord[11,2]-0.3

plotcoord <- summary(FD.pca1)$sites[,c(1,2)]
str(plotcoord) #num [1:1111307, 1:2]
x.cut <- cut(plotcoord[,1], breaks=seq(min(plotcoord[,1]), max(plotcoord[,1]), by=0.01))
y.cut <- cut(plotcoord[,2], breaks=seq(min(plotcoord[,2]), max(plotcoord[,2]), by=0.01))
z.cut <- table(x.cut,y.cut)
head(z.cut)
dim(z.cut) #131 50
z.cut <- melt(z.cut)
str(z.cut) # 6550 obs. of  3 variables:
head(z.cut)
z.cut$xy <- paste(z.cut$x.cut,z.cut$y.cut,sep="_")
str(x.cut)
length(x.cut) # 1111307
xy <- paste(x.cut,y.cut,sep="_")
index30 <- match(xy,z.cut$xy)
length(index30)
any(is.na(index30))
head(log10(z.cut$value[index30]))
values.present <- vector(mode="numeric", length=length(xy))
min(log10(z.cut$value[index30]), na.rm=T) #0
max(log10(z.cut$value[index30]), na.rm=T) # 3.399154
values.cut <- cut(log10(z.cut$value[index30]), breaks=seq(min(log10(z.cut$value[index30]), na.rm=T),
                                                          4, by=0.4))

ordiplot(FD.pca1, type="n")
#text(traitcoord,traitnames)
points(plotcoord*30, cex=0.05, col=rev(greyPalette(n=10,  start = 0, end = 0.8, 
                                                   gamma = 2.2, alpha = NULL))[values.cut])

#arrows(rep(0,45),rep(0,45),bioclimcoord[,1]*8,bioclimcoord[,2]*8, col="cadetblue4", length=0.1)
text(traitcoord,traitnames, col="red")
# copied as first RDA graph into ppt

env1 <- envfit(FD.pca1,splot.bioclim[index14,c(4:49)],permutations = 0)
env1
#with permutations = 999:
#Error: cannot allocate vector of size 4.1 Gb
plot(env1)
png(filename="PCA1.png", width=1000, height=1000, units = "px", bg="white")
ordiplot(pca1, type="n")
text(traitcoord,traitnames)
plot(env1)
dev.off() # creates a file

### Stepwise RDA ###
### RDA ###
FD.rda1 <- rda(FD~.,splot.bioclim[index14,c(4:49)],scale=T)
FD.rda1
FD.rda2 <- rda(FD ~ 1,splot.bioclim[index14,c(4:49)],scale=T)
FD.rda3 <- ordistep(FD.rda2, scope=formula(FD.rda1), direction = "both", Pin = 0.1, 
            permutations = how(nperm = 10), trace = TRUE)
'Start: FD ~ 1 

Df     AIC       F  Pr(>F)  
+ BIO_14  1 3191679 20602.8 0.09091 .
+ BIO_17  1 3191898 20379.5 0.09091 .
+ BIO_12  1 3197070 15125.2 0.09091 .
+ P_MAY   1 3198071 14110.8 0.09091 .
+ P_APR   1 3198194 13986.4 0.09091 .
+ BIO_18  1 3198767 13406.3 0.09091 .
+ P_MAR   1 3199448 12717.1 0.09091 .
+ P_FEB   1 3199451 12714.5 0.09091 .
+ P_JUN   1 3199643 12519.9 0.09091 .
+ BIO_05  1 3199804 12357.7 0.09091 .
+ BIO_02  1 3200062 12096.5 0.09091 .
+ P_NOV   1 3200602 11551.1 0.09091 .
+ P_DEC   1 3200823 11328.1 0.09091 .
+ P_JAN   1 3201335 10810.2 0.09091 .
+ BIO_10  1 3202194  9943.1 0.09091 .
+ T_JUL   1 3202226  9911.4 0.09091 .
+ T_AUG   1 3202685  9448.0 0.09091 .
+ GDD5    1 3202914  9217.7 0.09091 .
+ T_JUN   1 3202934  9196.7 0.09091 .
+ BIO_16  1 3203459  8668.3 0.09091 .
+ BIO_19  1 3203685  8440.1 0.09091 .
+ P_AUG   1 3204079  8043.8 0.09091 .
+ BIO_13  1 3204383  7737.1 0.09091 .
+ T_MAY   1 3204450  7669.7 0.09091 .
+ P_OCT   1 3204755  7362.4 0.09091 .
+ T_SEP   1 3204986  7130.0 0.09091 .
+ P_SEP   1 3205174  6941.4 0.09091 .
+ BIO_15  1 3205378  6735.8 0.09091 .
+ P_JUL   1 3205600  6512.4 0.09091 .
+ T_APR   1 3205746  6365.1 0.09091 .
+ BIO_01  1 3205841  6270.3 0.09091 .
+ BIO_03  1 3205882  6228.8 0.09091 .
+ T_OCT   1 3206048  6062.0 0.09091 .
+ AR      1 3206562  5545.3 0.09091 .
+ BIO_07  1 3207063  5042.2 0.09091 .
+ T_NOV   1 3207547  4555.5 0.09091 .
+ T_MAR   1 3207659  4443.2 0.09091 .
+ T_FEB   1 3208177  3923.3 0.09091 .
+ T_DEC   1 3208260  3840.3 0.09091 .
+ BIO_11  1 3208334  3765.7 0.09091 .
+ T_JAN   1 3208465  3634.3 0.09091 .
+ BIO_08  1 3208844  3254.4 0.09091 .
+ BIO_06  1 3209374  2723.0 0.09091 .
+ BIO_09  1 3209543  2553.3 0.09091 .
+ PET     1 3209642  2454.5 0.09091 .
+ BIO_04  1 3210074  2021.3 0.09091 .
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Step: FD ~ BIO_14 

Df     AIC     F  Pr(>F)  
- BIO_14  1 3212091 20603 0.09091 .
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Df     AIC       F  Pr(>F)  
+ BIO_02  1 3184224 7481.80 0.09091 .
+ BIO_03  1 3184435 7269.37 0.09091 .
+ GDD5    1 3187255 4435.29 0.09091 .
+ BIO_05  1 3187772 3915.70 0.09091 .
+ T_JAN   1 3188026 3661.00 0.09091 .
+ BIO_01  1 3188040 3647.18 0.09091 .
+ T_FEB   1 3188048 3638.72 0.09091 .
+ T_MAR   1 3188075 3612.29 0.09091 .
+ T_APR   1 3188082 3605.14 0.09091 .
+ BIO_11  1 3188088 3598.96 0.09091 .
+ T_DEC   1 3188123 3563.70 0.09091 .
+ T_NOV   1 3188208 3478.25 0.09091 .
+ T_OCT   1 3188225 3461.84 0.09091 .
+ T_MAY   1 3188357 3329.42 0.09091 .
+ BIO_06  1 3188491 3194.91 0.09091 .
+ BIO_10  1 3188609 3076.07 0.09091 .
+ T_SEP   1 3188617 3068.49 0.09091 .
+ BIO_13  1 3188664 3021.74 0.09091 .
+ T_JUN   1 3188669 3016.26 0.09091 .
+ BIO_16  1 3188709 2976.11 0.09091 .
+ P_SEP   1 3188724 2961.09 0.09091 .
+ BIO_07  1 3188728 2956.84 0.09091 .
+ BIO_12  1 3188782 2902.48 0.09091 .
+ T_AUG   1 3188788 2897.30 0.09091 .
+ T_JUL   1 3188879 2805.50 0.09091 .
+ P_AUG   1 3189181 2503.15 0.09091 .
+ BIO_18  1 3189256 2427.84 0.09091 .
+ BIO_04  1 3189277 2406.93 0.09091 .
+ P_JUL   1 3189398 2285.76 0.09091 .
+ BIO_15  1 3189605 2078.03 0.09091 .
+ P_APR   1 3189650 2032.61 0.09091 .
+ P_JUN   1 3189703 1979.86 0.09091 .
+ BIO_09  1 3189912 1770.71 0.09091 .
+ P_MAR   1 3189965 1717.19 0.09091 .
+ PET     1 3189984 1698.34 0.09091 .
+ AR      1 3190003 1678.99 0.09091 .
+ P_MAY   1 3190005 1677.55 0.09091 .
+ P_OCT   1 3190063 1619.65 0.09091 .
+ P_FEB   1 3190490 1192.28 0.09091 .
+ BIO_17  1 3190497 1184.39 0.09091 .
+ BIO_19  1 3190555 1126.59 0.09091 .
+ P_NOV   1 3190570 1111.68 0.09091 .
+ BIO_08  1 3190582 1100.05 0.09091 .
+ P_JAN   1 3190893  787.98 0.09091 .
+ P_DEC   1 3190955  726.52 0.09091 .
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Step: FD ~ BIO_14 + BIO_02 

Df     AIC       F  Pr(>F)  
- BIO_02  1 3191679  7481.8 0.09091 .
- BIO_14  1 3200062 15953.2 0.09091 .
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1'

FD.rda4 <- rda(FD ~ BIO_14,splot.bioclim[index14,c(4:49)],scale=T)
FD.rda4
'              Inertia Proportion Rank
Total         18.0000     1.0000     
Constrained    0.3276     0.0182    1
Unconstrained 17.6724     0.9818   18
Inertia is correlations 

Eigenvalues for constrained axes:
RDA1 
0.3276 

Eigenvalues for unconstrained axes:
PC1   PC2   PC3   PC4   PC5   PC6   PC7   PC8 
8.307 1.850 1.378 0.884 0.789 0.780 0.578 0.551 
'
traitnames <- dimnames(summary(FD.rda4)$species)[[1]]
traitnames <- substr(traitnames,1,nchar(traitnames)-5)
traitcoord <- summary(FD.rda4)$species[,c(1,2)]
bioclimcoord <- summary(FD.rda4)$biplot[,c(1,2)]
plotcoord <- summary(FD.rda4)$sites[,c(1,2)]
str(plotcoord) #num [1:1111307, 1:2]
x.cut <- cut(plotcoord[,1], breaks=seq(min(plotcoord[,1]), max(plotcoord[,1]), by=0.01))
y.cut <- cut(plotcoord[,2], breaks=seq(min(plotcoord[,2]), max(plotcoord[,2]), by=0.01))
z.cut <- table(x.cut,y.cut)
head(z.cut)
dim(z.cut) #131 50
z.cut <- melt(z.cut)
str(z.cut) # 6550 obs. of  3 variables:
head(z.cut)
z.cut$xy <- paste(z.cut$x.cut,z.cut$y.cut,sep="_")
str(x.cut)
length(x.cut) # 1111307
xy <- paste(x.cut,y.cut,sep="_")
index30 <- match(xy,z.cut$xy)
length(index30)
any(is.na(index30))
head(log10(z.cut$value[index30]))
values.present <- vector(mode="numeric", length=length(xy))
min(log10(z.cut$value[index30]), na.rm=T) #0
max(log10(z.cut$value[index30]), na.rm=T) # 3.399154
values.cut <- cut(log10(z.cut$value[index30]), breaks=seq(min(log10(z.cut$value[index30]), na.rm=T),
                                                          4, by=0.4))

ordiplot(FD.rda4, type="n", xlim=c(-10,10),ylim=c(-10,12))
points(plotcoord*15, cex=0.05, col=rev(greyPalette(n=10,  start = 0, end = 0.8, 
                                                   gamma = 2.2, alpha = NULL))[values.cut])
arrows(rep(0,1),rep(0,1),bioclimcoord[1]*8,bioclimcoord[2]*8, col="blue", length=0.1)
text(bioclimcoord[1]*8,bioclimcoord[2]*8+0.5,"BIO_14", col="blue", cex=1)
text(traitcoord,traitnames, col="red")
# copied as third RDA graph into ppt

FD.rda5 <- rda(FD ~ BIO_14+BIO_02,splot.bioclim[index14,c(4:49)],scale=T)
FD.rda5
'
Inertia Proportion Rank
Total         18.00000    1.00000     
Constrained    0.44582    0.02477    2
Unconstrained 17.55418    0.97523   18
Inertia is correlations 

Eigenvalues for constrained axes:
RDA1   RDA2 
0.3641 0.0817 

Eigenvalues for unconstrained axes:
PC1   PC2   PC3   PC4   PC5   PC6   PC7   PC8 
8.267 1.816 1.377 0.880 0.788 0.780 0.568 0.530 '
traitnames <- dimnames(summary(FD.rda5)$species)[[1]]
traitnames <- substr(traitnames,1,nchar(traitnames)-5)
traitcoord <- summary(FD.rda5)$species[,c(1,2)]
bioclimcoord <- summary(FD.rda5)$biplot[,c(1,2)]

plotcoord <- summary(FD.rda5)$sites[,c(1,2)]
str(plotcoord) #num [1:1111307, 1:2]
x.cut <- cut(plotcoord[,1], breaks=seq(min(plotcoord[,1]), max(plotcoord[,1]), by=0.01))
y.cut <- cut(plotcoord[,2], breaks=seq(min(plotcoord[,2]), max(plotcoord[,2]), by=0.01))
z.cut <- table(x.cut,y.cut)
head(z.cut)
dim(z.cut) #131 50
z.cut <- melt(z.cut)
str(z.cut) # 6550 obs. of  3 variables:
head(z.cut)
z.cut$xy <- paste(z.cut$x.cut,z.cut$y.cut,sep="_")
str(x.cut)
length(x.cut) # 1111307
xy <- paste(x.cut,y.cut,sep="_")
index30 <- match(xy,z.cut$xy)
length(index30)
any(is.na(index30))
head(log10(z.cut$value[index30]))
values.present <- vector(mode="numeric", length=length(xy))
min(log10(z.cut$value[index30]), na.rm=T) #0
max(log10(z.cut$value[index30]), na.rm=T) # 3.399154
values.cut <- cut(log10(z.cut$value[index30]), breaks=seq(min(log10(z.cut$value[index30]), na.rm=T),
                                                          4, by=0.4))

ordiplot(FD.rda5, type="n",  xlim=c(-10,10),ylim=c(-10,12))
points(plotcoord*15, cex=0.05, col=rev(greyPalette(n=10,  start = 0, end = 0.8, 
                                                   gamma = 2.2, alpha = NULL))[values.cut])
arrows(rep(0,2),rep(0,2),bioclimcoord[,1]*4,bioclimcoord[,2]*4, col="blue", length=0.1)
text(bioclimcoord*4,dimnames(bioclimcoord)[[1]], col="blue", cex=1)
text(traitcoord,traitnames, col="red")
# copied as fourth RDA graph into ppt

plot(FD[,"Seed.length.mean"]~splot.bioclim$BIO_14[index14],
     cex=0.05, xlab="BIO_14", ylab="FD ln(seed length)",cex.lab=1.5, cex.axis=1.5)
model1 <- lm(FD[,"Seed.length.mean"]~splot.bioclim$BIO_14[index14])
summary(model1)
'                               Estimate Std. Error t value Pr(>|t|)    
(Intercept)                   2.584e-01  3.193e-04   809.1   <2e-16 ***
splot.bioclim$BIO_14[index14] 1.407e-04  6.813e-07   206.5   <2e-16 ***
Multiple R-squared:  0.03697,	Adjusted R-squared:  0.03697 
'
abline(model1, lwd=3, col="blue")
# 

plot(FD[,"Seed.num.rep.unit.mean"]~splot.bioclim$BIO_02[index14],
     cex=0.05, xlab="BIO_02", ylab="FD ln(Seed number of reproductive unit)",cex.lab=1.5, cex.axis=1.5)
model1 <- lm(FD[,"Seed.num.rep.unit.mean"]~splot.bioclim$BIO_02[index14])
summary(model1)
'                                 Estimate Std. Error t value Pr(>|t|)    
(Intercept)                    1.251e+00  1.799e-03   695.3   <2e-16 ***
splot.bioclim$BIO_02[index14] -3.751e-03  1.927e-05  -194.6   <2e-16 ***
Multiple R-squared:  0.03296,	Adjusted R-squared:  0.03296  '
abline(model1, lwd=3, col="blue")


# mpd presence-absence
system.time(
  mpd.sla.no.abu <-  DT[,list(mpd.sla = mntd.fun.dt(Cover.., SLA.dist.mat, StandSpeciesName, abundance.weighted=FALSE)),by=PlotObservationID]
)


###########
levels(splot.header2$COUNTRY_CO)
levels(splot.header$Country)
CWM2$Country <- splot.header$Country[index12]
data(countryExData)
str(countryExData)
sPDF <- joinCountryData2Map(splot.header, joinCode = "NAME",nameJoinColumn = "Country")
?mapCountryData
#mapCountryData(sPDF, nameColumnToPlot="BIODIVERSITY" )

#### Climatic space, from Jonathon Lenoir ####
library(fBasics)
# Color palette

# Import bio1 (MAT) of the world
bio <- read.table(paste("/home/helge/sPlot/Bioclim/", "BIO.txt", sep = ""),sep=";", header=T)
str(bio) #9033453 obs. of  22 variables
r <- raster(nrows=33, ncols=33, xmn=min(bio$BIO_01), xmx=max(bio$BIO_01), ymn=min(bio$BIO_12), ymx=max(bio$BIO_12))
mattar <- data.frame(bio$BIO_01,bio$BIO_12)
str(mattar)
names(mattar) <- c("bio1","bio12")
mattar_r <- rasterize(mattar, r, fun="count")
str(mattar_r)
plot(log10(mattar_r), asp=0, col=rev(divPalette(n=100, name=c("RdYlBu"))), 
     main="N 2.5 arc-minutes cells per bin (log10 scale)", cex.main=0.7,
     xlab="MAT (°C*10)",  ylab="TAR (mm)")
# Bio1_Bio12_world.png

#plot(mattar_r/max(getValues(mattar_r), na.rm=TRUE), asp=0, col=rev(divPalette(n=100, name=c("RdYlBu"))), main="Relative proportion of 10 arc-minutes cells per bin", xlab="MAR (°C*10)", ylab="TAR (mm)")
# Plot the sampling effort of sPlot across the MAT-TAR climatic space
str(splot.bioclim)
# Make it a spatial data frame
CRSlonlat <- CRS("+proj=longlat +datum=WGS84")
splot.bioclim.coords <- cbind(splot.bioclim$X, splot.bioclim$Y)
splot.bioclim.coords <- SpatialPoints(splot.bioclim.coords, proj4string=CRSlonlat)
splot.bioclim2 <- data.frame(splot.bioclim$BIO_01,splot.bioclim$BIO_12)
names(splot.bioclim2) <- c("bio1","bio12")
#splot.bioclim2 <- SpatialPointsDataFrame(splot.bioclim.coords, splot.bioclim2, proj4string=CRSlonlat)
#str(splot.bioclim2)
#sPlot_mattar <- extract(bio1, coordinates(sPlot))
#sPlot_mattar <- as.data.frame(sPlot_mattar)
#names(sPlot_mattar) <- c("bio1")
#sPlot_mattar$bio12 <- extract(bio12, coordinates(sPlot))
sPlot_mattar_r <- rasterize(splot.bioclim2, r, fun="count")
str(sPlot_mattar_r)
plot(mattar_r, asp=0, col="grey", legend=FALSE, main="N vegetation plots per bin (log10 scale)",
     xlab="MAT (°C)", ylab="TAR (mm)")
# Bio1_Bio12_world_grey.png
plot(log10(sPlot_mattar_r), asp=0, col=rev(divPalette(n=100, name=c("RdYlBu"))), 
     add=TRUE)
# Bio1_Bio12_sPlot_SLA.png

# Build the PC1-PC2 climatic space
load("/home/helge/sPlot/Bioclim/pca_data.RData")
str(pca.data)
r <- raster(nrows=33, ncols=33, xmn=min(pca.data$PC1), xmx=max(pca.data$PC1), ymn=min(pca.data$PC2), ymx=max(pca.data$PC2))
pca_r <- rasterize(pca.data[, c("PC1", "PC2")], r, fun="count")
plot(log10(pca_r), asp=0, col=rev(divPalette(n=100, name=c("RdYlBu"))), 
     main="N 2.5 arc-minutes cells per bin (log10 scale)", 
     xlab="PC1 (cold-to-warm)", ylab="PC2 (dry-to-wet)")
plot(pca_r/max(getValues(pca_r), na.rm=TRUE), asp=0, 
     col=rev(divPalette(n=100, name=c("RdYlBu"))), 
     main="Relative proportion of 2.5 arc-minutes cells per bin", 
     xlab="PC1 (cold-to-warm)", ylab="PC2 (dry-to-wet)")
# Plot the sampling effort of sPlot across the PC1-PC2 climatic space
index13 <- match(splot.bioclim$RAST_ID,pca.data$RAST_ID)
sPlot_pca <- pca.data[index13, c("PC1", "PC2")]
sPlot_pca_r <- rasterize(sPlot_pca, r, fun="count")
plot(pca_r, asp=0, col="grey", legend=FALSE, 
     main="N vegetation plots per bin (log10 scale)", 
     xlab="PC1 (cold-to-warm)", ylab="PC2 (dry-to-wet)")
plot(log10(sPlot_pca_r), asp=0, col=rev(divPalette(n=100, name=c("RdYlBu"))), 
     add=TRUE)

#### For Marten Winter ####
# species x country Matrix
species.country <- table(DT$species,splot.header$Country[index30])
str(species.country) # int [1:60908, 1:130]
names(dimnames(species.country))
names(dimnames(species.country)) <- c("species","country")

library(reshape2)
?melt.table
species.country2 <- melt(species.country)
str(species.country2) # 	188470 obs. of  3 variables:
names(species.country2)
names(species.country2)[3] <- "p_a"
table(species.country2$p_a)

species.country2$p_a[species.country2$p_a>0] <- 1
species.country2 <- species.country2[species.country2$p_a==1,]
str(species.country2) # 	188470 obs. of  3 variables:
species.country2 <- species.country2[,-3]
str(species.country2) # 188470
write.csv(species.country2,file = 
            paste("/home/oliver/shared/", "species.country2.csv", sep = ""), row.names = FALSE)
levels(species.country2$country) # 130
length(species.country2$country[species.country2$country=="Algeria"])
# 97
sum(species.country[,2]) 
# 799, this is the amount of records for DZA = Algeria
sum(species.country[,126]) 
# 1467200, sum of records for United States

TaxaNat <- read.csv(paste("/home/oliver/shared/Glonaf/", "TaxaNat.csv", sep = ""))
str(TaxaNat) #178779 obs. of  2

GlonafRegion <- read.csv(paste("/home/oliver/shared/Glonaf/", "GlonafRegion.csv", sep = ""))
str(GlonafRegion) #843 obs. of  10
trim.trailing <- function (x) sub("\\s+$", "", x)
# returns string w/o leading or trailing whitespace
GlonafRegion$Country_sPlot <- as.factor(trim.trailing(as.character(GlonafRegion$Country_sPlot)))
index14 <- match(levels(species.country2$country),levels(GlonafRegion$Country_sPlot))
index14
levels(GlonafRegion$Country)

data.frame(levels(species.country2$country),levels(GlonafRegion$Country_sPlot)[index14])
## all correct

index15 <- match(levels(GlonafRegion$Country_sPlot),GlonafRegion$Country_sPlot)
index15
index16 <- match(species.country2$country,levels(species.country2$country))
any(is.na(index16)) # F
length(levels(species.country2$species)) #60908
'species.country2$species.country <- paste(species.country2$species,
levels(species.country2$country)[index16],sep="_")
species.country2$species.country <- paste(species.country2$species,
levels(GlonafRegion$Country_sPlot)[index14][index16],sep="_")
species.country2$species.country <- paste(species.country2$species,
GlonafRegion$Country_sPlot[index15][index14][index16],sep="_")
'
species.country2$species.country <- paste(species.country2$species,
                                          GlonafRegion$ISO.country[index15][index14][index16],sep="_")
str(species.country2) # 188470 obs. of  3 variables:
head(species.country2)
species.country2[c(160000:160100),]
## fits!

index17 <- match(TaxaNat$Region,GlonafRegion$ID.region)
any(is.na(index17)) # F

TaxaNat$species.country <-paste(TaxaNat$Taxon2,GlonafRegion$ISO.country[index17], sep="_")

index18 <- match(species.country2$species.country,TaxaNat$species.country)
length(index18) #188470
length(index18[!is.na(index18)]) # 9283 out of 188470 species x country occurrences
# are alien occurrences
9283/188470*100 # 4.925452 % species x country occurrences
species.country2$alien <- F
species.country2$alien[!is.na(index18)] <- T
species.country2$ISO.country <- GlonafRegion$ISO.country[index15][index14][index16]
str(species.country2)
head(species.country2)
length(species.country2$ISO.country[species.country2$ISO.country=="DZA"])
# 12939, this cannot be
#species.country2$ISO.country[species.country2$ISO.country=="DZA"]
sum(species.country[,2]) 
# 799, this is the amount of records for DZA = Algeria
length(species.country2$ISO.country[species.country2$ISO.country=="DZA"
                                    & !is.na(species.country2$ISO.country)])
# 97!

#table(species.country2$ISO.country)
#species.country3 <- acast(species.country2, species~ISO.country, length)
length(species.country2$alien[species.country2$alien])
# 9283 alien records in sPlot
length(species.country2$alien[species.country2$alien & species.country2$ISO.country=="DZA"])
# 3 alien records in DZA = Algeria
length(species.country[species.country[,2]>0,2])
# 97, however there are only 97 records in total
length(species.country2$alien[species.country2$alien & species.country2$ISO.country=="USA"])
# 1911 alien species in USA
length(species.country[species.country[,126]>0,2])
# 9933 species in total in USA
alien.richness <- as.matrix(table(species.country2$ISO.country,species.country2$alien))[,2]

species.country3 <- species.country
species.country3[species.country3>0] <- 1
country.richness <- colSums(species.country3)
country.richness
names(country.richness)
data.frame(names(country.richness),GlonafRegion$ISO.country[index15][index14])
country.richness2 <- data.frame(GlonafRegion$ISO.country[index15][index14],as.numeric(country.richness))
names(country.richness2) <- c("ISO.country","richness")
country.richness2$ISO.country
country.richness2

index19 <- match(country.richness2$ISO.country,names(alien.richness))
index19
alien.richness
country.richness2$alien.richness <- alien.richness[index19]
country.richness2
country.richness2$alien.prop <- country.richness2$alien.richness/country.richness2$richness
country.richness2$alien.prop

sPDF <- joinCountryData2Map(country.richness2,joinCode = "ISO3",
                            nameJoinColumn = "ISO.country")
'112 codes from your data successfully matched countries in the map
18 codes from your data failed to match with a country code in the map
132 codes from the map werent represented in your data
'
mapParams <- mapCountryData(sPDF, nameColumnToPlot="richness",addLegend=FALSE)
do.call(addMapLegend, c(mapParams, legendWidth=0.5, legendMar = 2,legendLabels="all"))
mapParams <- mapCountryData(sPDF, nameColumnToPlot="richness",
                            catMethod=seq(0,10000,by=1000),addLegend=FALSE)
do.call(addMapLegend, c(mapParams, legendWidth=0.5, legendMar = 2,legendLabels="all"))

mapParams <- mapCountryData(sPDF, nameColumnToPlot="alien.richness",
                            catMethod=seq(0,2000,by=200),addLegend=FALSE)
do.call(addMapLegend, c(mapParams, legendWidth=0.5, legendMar = 2,legendLabels="all"))

max(country.richness2$alien.prop, na.rm=T) #0.2823779
mapParams <- mapCountryData(sPDF, nameColumnToPlot="alien.prop",addLegend=FALSE)
mapParams <- mapCountryData(sPDF, nameColumnToPlot="alien.prop",
                            catMethod=seq(0,0.3,by=0.02),addLegend=FALSE)
do.call(addMapLegend, c(mapParams, legendWidth=0.5, legendMar = 2,legendLabels="all"))


##################### OLD ###################
library(spdep)
library(maptools)
library(hexbin)
library(RColorBrewer)
# Use a lighter version of sPlot focusing on mean annual temperature MAT (BIO1)
# and total annual rainfall TAR (BIO12)
sPlot <- splot.header[, c("LONGITUDE", "LATITUDE", "BIO_1", "BIO_12")]
sPlot$LONGITUDE <- as.numeric(as.character(sPlot$LONGITUDE))
sPlot$LATITUDE <- as.numeric(as.character(sPlot$LATITUDE))
sPlot$BIO_1 <- as.numeric(sPlot$BIO_1)/10
sPlot$BIO_12 <- as.numeric(sPlot$BIO_12)
str(sPlot)
# Make it a spatial data frame
CRSlonlat <- CRS("+proj=longlat +datum=WGS84")
coords <- cbind(sPlot$LONGITUDE, sPlot$LATITUDE)
coords <- SpatialPoints(coords, proj4string=CRSlonlat)
sPlot <- SpatialPointsDataFrame(coords, sPlot, proj4string=CRSlonlat)
str(sPlot)
# Plot the data on the world map to check the coordinates and projection
data(wrld_simpl)
plot(wrld_simpl)
points(coordinates(sPlot), cex=0.1, col="red", pch=16)
# Set a color ramp
rf <- colorRampPalette(rev(brewer.pal(11, 'Spectral')))
# Plot the plot distribution within the MAT-TAR bidimentionnal climatic space
hexbinplot(BIO_12~BIO_1, data=sPlot@data, colramp=rf, trans=log, inv=exp,
           border="white", xlab="MAT", ylab="TAR", xbins=30)
# Display only bins with a minimum of 10 plots and a maximum of 1000 plots
hexbinplot(BIO_12~BIO_1, data=sPlot@data, colramp=rf, trans=log, inv=exp,
           border="white", xlab="MAT", ylab="TAR", xbins=30, mincnt=10, maxcnt=1000)
#


################ OLD ###################

###
load("C:\\Daten\\iDiv\\sPlot\\pca_data.Rdata")
str(pca.data)
any(is.na(pca.data$PC1)) #F

load("C:\\Daten\\iDiv\\sPlot\\sPlot_header_pca.RData")
str(splot.header2)
any(is.na(pca.data$PC1)) #F

library(raster)
r <- 
  splot_pca_r <- rasterize(splot_pca, r, run="count")
plot(splot_pca_r, asp=0, col=rev(divPalette(n=100, name="RdY1Bu"))))
plot(splot_r, asp=0, col=rev(divPalette(n=100, name="RdY1Bu"))))

index <- which(getValues(splot_pca_r)>0)
which(getValues(pca_r)>0)
poinra
getValues(splot_pca_r)[index] # 136 values
?sample
splo_pca$binid <- cellFromXY(pca_r, splot_pca)


###########################
load("C:\\Daten\\iDiv\\sPlot\\splot_pca2.Rdata")
str(splot_pca)
splot_pca <- data.table(splot_pca)
setkey(splot_pca,binid)
inter <- splot_pca[,list(count=length(PCA1)), by=binid]
splot_pca2 <- splot_pca[,list(PCA1, PCA2,count=length(PCA1)), by=binid]

str(splot_pca2)
hist(splot_pca2$binid)
inter2 <- splot_pca2[count>100,list(count=sum(PCA1)), by=binid]
str(inter2)
[,list(total.cover=sum(Cover..)), by=PlotObservationID]
<- DT[,list(total.cover=sum(Cover..)), by=PlotObservationID]

sample(splot_pca$PCA1,100)
splot_pca2 <-  splot_pca[table(splot_pca$binid)>100,sample(PCA1,100),by=binid]
str(splot_pca2)

resample <- function(PCA1_,PCA2_,binid_){
  if (length(PCA1)>100) {
    
  }
  else {
    1
  }
  ]index10 <- match(species,labels(d))
  d3 <- as.dist(dist[index10,index10])
  if (sum(d3)==0){
    NA
  }
  divc(as.data.frame(rel.cover),d3)$diversity
}
splot_pca2 <-  splot_pca[length(PCA1)<=100,res_ID := 1]
head(splot_pca2,100)
table(splot_pca2$binid,splot_pca2$count, exclude=NULL)
table(splot_pca$binid)
splot_pca2 <-  splot_pca[,res_ID := 0]
splot_pca2 <-  splot_pca[table(splot_pca2$binid)<=100,res_ID := 1]
splot.selection <-  splot_pca[,list(res = resample(PCA1,PCA2,binid)),by=binid]
#length(splot_pca[binid>100])
hist(splot_pca$binid)
splot.selection <-  splot_pca[,list(length(binid)),by=binid]
splot.selection <-  splot_pca[,list(count=length(PCA1)),by=binid]
splot.selection <-  splot_pca[,,by=binid]
hist(splot.selection$count)
str(splot.selection)
str(splot_pca2)
splot.selection <-  splot_pca[,list(nrow()),by=binid]
splot.selection <-  splot_pca[,count:=length(binid),by=binid]
length(splot_pca,by=binid)
########################################################################################

### CWM ###
#CWM1 <-  DT[!is.na(plot.species.trait.value),list(CWM = weighted.mean(plot.species.trait.value,Relative.cover)),by=PlotObservationID]
CWM1 <-  DT[,list(CWM = weighted.mean(plot.species.trait.value,Relative.cover)),by=PlotObservationID]
# see http://stackoverflow.com/questions/3367190/aggregate-and-weighted-mean-in-r
CWM1[1:100,]
hist(CWM1$CWM)
write.csv((x, file = paste(path.sPlot, "CWM_SLA_all.csv", sep = ""), row.names = FALSE))

### FD ###
library(ade4)    # f?r divc (Rao's Q)
# this does not work: ~50 GB RAM required for 40k x 40k species matrix:
'ktab1 <- ktab.list.df(list(as.data.frame(species.trait.value[!is.na(species.trait.value)])))
d <- dist.ktab(ktab1, type=c("Q"), option = "scaledBYrange")
#labels(d)
#str(d)
#attr(d,"Labels")
attr(d,"Labels") <- dimnames(species.trait.value)[[1]][!is.na(species.trait.value)]
labels(d)
any(is.na(d)) # F
dist <- as.matrix(d)

#tree.trait.weighted <- t(t(veg.KHG3.prop)  * all.traits.KHG[,j])
#dist.sqrt <- sqrt(dist)
d.sqrt <- sqrt(d)
labels(d.sqrt)
dist <- as.matrix(d.sqrt)
'
'Rao <- function(rel.cover,species){
index10 <- match(species,labels(d))
d3 <- as.dist(dist[index10,index10])
if (sum(d3)==0){
NA
}
divc(as.data.frame(rel.cover),d3)$diversity
}
FD.all <-  DT[!is.na(plot.species.trait.value),list(Rao.Q = Rao(Relative.cover,Matched.concept)),by=PlotObservationID]
'
#species.trait.value.scaled <- species.trait.value/(max(species.trait.value)-min(species.trait.value))
species.trait.value.scaled <- TRY.mean.sd.splot$SLA.mean/(max(TRY.mean.sd.splot$SLA.mean)-min(TRY.mean.sd.splot$SLA.mean))

# scale by range, which saves time for distance calculations

library(reshape2)

package.size = 1000
#number.plots.total <- length(levels(DT[!is.na(plot.species.trait.value)]$PlotObservationID)) # 705272
number.plots.total <- length(levels(DT$PlotObservationID)) # 701611 vs. 705272
FD.all <- array(0,number.plots.total, dimnames=list(levels(DT$PlotObservationID)))
number.packages <-ceiling(number.plots.total/package.size) #702
pb <- winProgressBar(title = "progress bar", min = 0, max =number.packages, width = 300)
for (k in 1:number.packages) {
  setWinProgressBar(pb, k, title=paste("Row ",k,"of ", number.packages, " is processed"))
  ifelse (k==number.packages, package<-number.plots.total%%package.size, package <- package.size)
  #index3 <- match(DT[!is.na(plot.species.trait.value)]$PlotObservationID,levels(DT[!is.na(plot.species.trait.value)]$PlotObservationID)[seq((k-1)*package.size+1,k*package.size,1)])
  index3 <- match(DT$PlotObservationID,levels(DT$PlotObservationID)[seq((k-1)*package.size+1,k*package.size,1)])
  #DT2 <- DT[!is.na(plot.species.trait.value) & !is.na(index3)]
  #DT2 <- DT2[!is.na(plot.species.trait.value)]
  #length(index3[!is.na(index3)])
  DT2 <- DT[!is.na(index3)]
  #str(DT2)
  #veg.matrix1 <- acast(DT2, Matched.concept ~ PlotObservationID, value.var="Relative.cover", fill=0)  
  veg.matrix1 <- acast(DT2, names.stand ~ PlotObservationID, value.var="Relative.cover", fun.aggregate=sum, fill=0)  
  # fun.aggregate = sum -> sums up multiple entries per species!
  # this can happen with the unified species nomenclature!
  #length(unique(dimnames(veg.matrix1)[[2]]))# 1000
  #length(unique(DT2$PlotObservationID)) # 1000
  #veg.matrix1[veg.matrix1 > 0] <- 1
  #any(colSums(veg.matrix1)!=1) #F
  index4 <- match(dimnames(veg.matrix1)[[1]],species.list)
  species.trait.value.scaled2 <- species.trait.value.scaled[index4]
  species.list2 <- species.list[index4]
  ktab1 <- ktab.list.df(list(as.data.frame(species.trait.value.scaled2)))
  d <- dist.ktab(ktab1, type=c("Q"), option = "noscale")
  #labels(d)
  #str(d)
  #attr(d,"Labels")
  attr(d,"Labels") <- species.list2
  #labels(d)
  #any(is.na(d)) # F
  #dist <- as.matrix(d)
  #as.matrix(d)[1:8,1:8]
  d <- sqrt(d)
  #dist <- as.matrix(d)
  #rm(d)
  if (k<number.packages){
    FD.all[seq((k-1)*package.size+1,k*package.size,1)] <- 
      divc(as.data.frame(veg.matrix1),d)$diversity
    #DT2[,list(Rao.Q = Rao(Relative.cover,Matched.concept)),by=PlotObservationID]$Rao.Q
    any(colSums(veg.matrix1)==0) # F o.K.
    any(colSums(veg.matrix1)!=1) # T ups....
    hist(colSums(veg.matrix1))
    any(rowSums(veg.matrix1)==0) # F o.K.
  } else {
    FD.all[seq((k-1)*package.size+1,number.plots.total,1)] <- 
      divc(as.data.frame(veg.matrix1),d)$diversity
    #DT2[,list(Rao.Q = Rao(Relative.cover,Matched.concept)),by=PlotObservationID]
  }
}
close(pb)
#FD.all
write.csv(FD.all, file = paste(path.sPlot, "FDall.csv", sep = ""), row.names = FALSE)