Trait-Environment-Relationships2.R

getwd()
###### Traits #####
# TRY 2.0
load("data/TRY.all.mean.sd.2.Rdata")
str(TRY.all.mean.sd.2)
dim(TRY.all.mean.sd.2)
# 40791 obs. of  38 variables
# tests
any(is.na(TRY.all.mean.sd.2$SLA.sd)) #T

# TRY 3.0
TRY3.0 <- read.csv("data/Export_sPlot_2016_09_12.csv")
str(TRY3.0)
levels(TRY3.0$Species)[100] # "Abronia villosa", just an example
which(TRY3.0$Species==levels(TRY3.0$Species)[100]) # an example of one species with several (7) observations
TRY3.0[which(TRY3.0$Species==levels(TRY3.0$Species)[100]),c(1:19)] # trait values differ between observations
any(is.na(TRY3.0[,c(2:19)])) #F
any(TRY3.0[,c(2:19)]==0) #F

for (i in 2:19){
  TRY3.0[,i] <- log(TRY3.0[,i])
}
any(is.na(TRY3.0[,c(2:19)])) #F

TRY.all.n.3 <- aggregate(TRY3.0[,1], by=list(TRY3.0$Species), FUN=length)
head(TRY.all.n.3)
str(TRY.all.n.3) #59319 obs. of  2 variables:
names(TRY.all.n.3)
names(TRY.all.n.3) <- c("StandSpeciesName","n")

TRY.all.mean.3 <- aggregate(TRY3.0[,c(2:19)], by=list(TRY3.0$Species), FUN=mean)
str(TRY.all.mean.3) #59319 obs. of  19 variables:
names(TRY.all.mean.3)
' [1] "Group.1" "X1"      "X4"      "X11"     "X13"     "X14"     "X15"     "X18"     "X26"     "X27"     "X47"    
[12] "X50"     "X56"     "X78"     "X138"    "X163"    "X169"    "X237"    "X282"  '
names(TRY.all.mean.3) <- c("StandSpeciesName","LeafArea.mean", "StemDens.mean", "SLA.mean", "LeafC.perdrymass.mean",
                           "LeafN.mean","LeafP.mean", "PlantHeight.mean", "SeedMass.mean", "Seed.length.mean",
                           "LDMC.mean","LeafNperArea.mean", "LeafNPratio.mean", "Leaf.delta.15N.mean", 
                           "Seed.num.rep.unit.mean", "Leaffreshmass.mean", "Stem.cond.dens.mean", 
                           "Disp.unit.leng.mean", "Wood.vessel.length.mean")
any(is.na(TRY.all.mean.3$SLA.mean)) #F

TRY.all.sd.3 <- aggregate(TRY3.0[,c(2:19)], by=list(TRY3.0$Species), FUN=sd)
str(TRY.all.sd.3) #59319 obs. of  19 variables:
names(TRY.all.sd.3)
names(TRY.all.sd.3) <- c("StandSpeciesName","LeafArea.sd", "StemDens.sd", "SLA.sd", "LeafC.perdrymass.sd",
                           "LeafN.sd","LeafP.sd", "PlantHeight.sd", "SeedMass.sd", "Seed.length.sd",
                           "LDMC.sd","LeafNperArea.sd", "LeafNPratio.sd", "Leaf.delta.15N.sd", 
                           "Seed.num.rep.unit.sd", "Leaffreshmass.sd", "Stem.cond.dens.sd", 
                           "Disp.unit.leng.sd", "Wood.vessel.length.sd")
any(is.na(TRY.all.sd.3$SLA.sd)) #T
TRY.all.mean.sd.3 <- data.frame(TRY.all.n.3,TRY.all.mean.3[,c(2:19)],TRY.all.sd.3[,c(2:19)])
str(TRY.all.mean.sd.3) #59319 obs. of  38 variables
'data.frame:	59319 obs. of  38 variables:
$ StandSpeciesName       : Factor w/ 59319 levels "Aa sp","Abarema adenophora",..: 1 2 3 4 5 6 7 8 9 10 ...
$ n                      : int  1 4 1 1 38 4 86 7 5 95 ...
$ LeafArea.mean          : num  6.61 6.96 7.04 7.11 6.75 ...
$ StemDens.mean          : num  -0.822 -0.427 -0.57 -0.619 -0.543 ...
$ SLA.mean               : num  2.24 2.38 2.41 2.56 2.65 ...
$ LeafC.perdrymass.mean  : num  6.17 6.26 6.19 6.17 6.18 ...
$ LeafN.mean             : num  2.92 3.19 3.25 3.41 3.23 ...
$ LeafP.mean             : num  -0.0143 0.3403 -0.1557 -0.0999 -0.217 ...
$ PlantHeight.mean       : num  -0.498 2.874 2.544 2.293 2.394 ...
$ SeedMass.mean          : num  -4.08 4.49 4.27 4.23 4.13 ...
$ Seed.length.mean       : num  -0.317 2.021 1.88 1.92 1.912 ...
$ LDMC.mean              : num  -1.471 -0.798 -0.938 -1.087 -1.111 ...
$ LeafNperArea.mean      : num  0.896 0.834 0.838 0.804 0.617 ...
$ LeafNPratio.mean       : num  2.54 3.26 3.49 3.55 3.57 ...
$ Leaf.delta.15N.mean    : num  0.38 1.258 0.925 -0.169 0.719 ...
$ Seed.num.rep.unit.mean : num  9.56 1.64 1.39 1.18 1.32 ...
$ Leaffreshmass.mean     : num  -1.3303 -0.2516 -0.0799 -0.0889 -0.4985 ...
$ Stem.cond.dens.mean    : num  3.67 2.14 1.94 2.08 2.06 ...
$ Disp.unit.leng.mean    : num  -0.555 2.916 2.676 2.689 2.842 ...
$ Wood.vessel.length.mean: num  6.13 5.73 6 6.06 6.21 ...
$ LeafArea.sd            : num  NA 0.0303 NA NA 0.0556 ...
$ StemDens.sd            : num  NA 0.1101 NA NA 0.0123 ...
$ SLA.sd                 : num  NA 0.1063 NA NA 0.0695 ...
$ LeafC.perdrymass.sd    : num  NA 0.0086 NA NA 0.00511 ...
$ LeafN.sd               : num  NA 0.0638 NA NA 0.0113 ...
$ LeafP.sd               : num  NA 0.3936 NA NA 0.0232 ...
$ PlantHeight.sd         : num  NA 0.0626 NA NA 0.1806 ...
$ SeedMass.sd            : num  NA 0.0315 NA NA 0.0787 ...
$ Seed.length.sd         : num  NA 0.0457 NA NA 0.0193 ...
$ LDMC.sd                : num  NA 0.0553 NA NA 0.0573 ...
$ LeafNperArea.sd        : num  NA 0.098 NA NA 0.068 ...
$ LeafNPratio.sd         : num  NA 0.291 NA NA 0.0304 ...
$ Leaf.delta.15N.sd      : num  NA 0.1651 NA NA 0.0398 ...
$ Seed.num.rep.unit.sd   : num  NA 0.211 NA NA 0.259 ...
$ Leaffreshmass.sd       : num  NA 0.0748 NA NA 0.0571 ...
$ Stem.cond.dens.sd      : num  NA 0.1389 NA NA 0.0349 ...
$ Disp.unit.leng.sd      : num  NA 0.0927 NA NA 0.0293 ...
$ Wood.vessel.length.sd  : num  NA 0.1466 NA NA 0.0271 ...'

plot(SLA.mean~LDMC.mean, data=TRY.all.mean.sd.3)
#summary(lm(SLA.mean~LDMC.mean, data=TRY.all.mean.sd.2))
#abline(lm(SLA.mean~LDMC.mean, data=TRY.all.mean.sd.2),col="blue", lwd=2)
#sqrt(0.3415)

#TRY.all.mean.sd.3 <- TRY.all.mean.sd.3[,-39]
save(TRY.all.mean.sd.3, file="TRY.all.mean.sd.3.Rdata")

###### Backbone #####
load("data/backbone.v.2.splot.try3.Rdata")
# alternatively:
#sPlot3 <- read.csv(paste("data/", "backbone.v.2.splot.try3.csv", 
#                         sep = ""), sep=",", quote = "\"'")
str(backbone.splot.try3) 
# data.frame':	122901 obs. of  33 variables:

###### Harmonized plot data ######
DT <- read.csv("data/plots_without_unified_cover_scales_20160120a.csv")
DT <- read.csv("data/plots_without_unified_cover_scales_20160120a.csv")
library(data.table)
DT <- data.table(DT)
colnames(DT)
colnames(DT)[9] <- "Cover"
setkey(DT,PlotObservationID)

###### Header data #######
splot.header <- read.csv("data/sPlot_14_04_2015_coord.csv")
str(splot.header)
'data.frame:	1117940 obs. of  3 variables:
$ PlotObservationID: int  1 2 3 4 5 6 7 8 9 10 ...
$ Longitude        : num  -154 -154 -154 -154 -154 ...
$ Latitude         : num  62.4 62.4 62.4 62.4 62.4 ...'

##### Bioclim data ####
splot.bioclim <- read.csv(paste("data/","sPlot.csv", sep = ""), sep = ";", na.strings=c("","NA"))
str(splot.bioclim)
#data.frame':	1117382 obs. of  52 variables:

####################
##### Matching #####
#index6 <- match(TRY.all.mean.sd.2$StandSpeciesName,backbone.splot.try3$names.sPlot.TRY)
index6 <- match(TRY.all.mean.sd.3$StandSpeciesName,backbone.splot.try3$names.sPlot.TRY)
any(is.na(index6)) #T
# not all species in the trait list are also in the backbone
TRY.all.mean.sd.3$StandSpeciesName[is.na(index6)]
# 16 species in TRY are not in the backbone
'[1] Albizia greveanaÊ              AlnusÊgymnothyrsus             Carapanaœba sp
[4] ChionanthusÊsp                 Clerodendron nudiflorumÊ       Copa’ba sp                    
[7] Copa’fera sp                   Corynocarpus laevigatusÊ       Enterospermum madagascarienseÊ
[10] Hierochlo‘ odorata             Ing‡ sp                        Norrisia maiorÊ               
[13] NothofagusÊbrassospora         QuercusÊsp                     ShoreaÊanthoshorea            
[16] ShoreaÊsp  '
TRY.all.mean.sd.3[TRY.all.mean.sd.3$StandSpeciesName=="Albizia greveanaÊ",] # n=10
TRY.all.mean.sd.3[TRY.all.mean.sd.3$StandSpeciesName=="Albizia greveana",] # 0
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Albizia greveanaÊ"] #0
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Albizia greveana"] #0
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="AlnusÊgymnothyrsus"] #0
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Alnus gymnothyrsus"] #0
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Carapanaœba sp"] #0
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Carapanauba sp"] #0
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Carapanaúba sp"] #0
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="ChionanthusÊsp"] #0
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Chionanthus sp"] #1
TRY.all.mean.sd.3[TRY.all.mean.sd.3$StandSpeciesName=="ChionanthusÊsp",] # n=10
TRY.all.mean.sd.3[TRY.all.mean.sd.3$StandSpeciesName=="Chionanthus sp",] # n=15
DT$species[DT$species=="Chionanthus sp" & !is.na(DT$species)] #0
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Clerodendron nudiflorumÊ"] #0
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Clerodendron nudiflorum"] #0
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Copa’ba sp"] #0
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Copaiba sp"] #1
TRY.all.mean.sd.3[TRY.all.mean.sd.3$StandSpeciesName=="Copa’ba sp",] # n=0, strange, there should be at least one
TRY.all.mean.sd.3[TRY.all.mean.sd.3$StandSpeciesName=="Copaiba sp",] # n=1
DT$species[DT$species=="Copaiba sp" & !is.na(DT$species)] #0
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Copa’fera sp"] #0
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Copaifera sp"] #1
DT$species[DT$species=="Copaifera sp" & !is.na(DT$species)] #0
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Corynocarpus laevigatusÊ"] #0
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Corynocarpus laevigatus"] #0
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Enterospermum madagascarienseÊ"] #0
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Enterospermum madagascariense"] #1
DT$species[DT$species=="Enterospermum madagascariense" & !is.na(DT$species)] #0
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Hierochlo‘ odorata"] #0
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Hierochloe odorata"] #1
TRY.all.mean.sd.3[TRY.all.mean.sd.3$StandSpeciesName=="Hierochlo‘ odorata",] # n=0, strange, there should be at least one
TRY.all.mean.sd.3[TRY.all.mean.sd.3$StandSpeciesName=="Hierochloe odorata",] # n=63
DT$species[DT$species=="Hierochloe odorata" & !is.na(DT$species)] #545
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Ing‡ sp"] #0
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Inga sp"] #1
DT$species[DT$species=="Inga sp" & !is.na(DT$species)] #0
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Norrisia maiorÊ"] #0
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Norrisia maior"] #1
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="NothofagusÊbrassospora"] #0
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Nothofagus brassospora"] #1
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="QuercusÊsp"] #0
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Quercus sp"] #1
DT$species[DT$species=="Quercus sp" & !is.na(DT$species)] #0
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="ShoreaÊanthoshorea"] #0
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Shorea anthoshorea"] #1
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="ShoreaÊsp"] #0
backbone.splot.try3$names.sPlot.TRY[backbone.splot.try3$names.sPlot.TRY=="Shorea sp"] #1
DT$species[DT$species=="Shorea sp" & !is.na(DT$species)] #0
# none of the species is in sPlot, thus we can take them out

### data cleaning ###
length(TRY.all.mean.sd.3$StandSpeciesName[is.na(TRY.all.mean.sd.3$StandSpeciesName)])
# No record is NA!
#TRY.all.mean.sd.2[which(is.na(TRY.all.mean.sd.2$StandSpeciesName)),]
# record no. 7947 has StandSpeciesName = NA
#TRY.all.mean.sd.2 <- TRY.all.mean.sd.2[-which(is.na(TRY.all.mean.sd.2$StandSpeciesName)),]

TRY.all.mean.sd.3$species <- backbone.splot.try3$name.short.correct[index6]
dim(TRY.all.mean.sd.3) # 59319    39


index1 <- match(DT$PlotObservationID, splot.header$PlotObservationID)
any(is.na(index1)) #F
length(index1) #  24241941
length(unique(index1[!is.na(index1)])) #1117940

index2 <- match(splot.header$PlotObservationID,DT$PlotObservationID)
any(is.na(index2)) #F
length(index2) #   1117940
length(unique(index2[!is.na(index2)])) #  1117940
dim(splot.header)
# 1117940      3
identical(as.character(splot.header$PlotObservationID),as.character(DT$PlotObservationID[index2]))
# T

any(is.na(DT$species)) #T
# that should not be the case!
length(DT$species[!is.na(DT$species)]) #24221565
length(DT$species) #24241941
24241941-24221565 # 20376 NA names!!!
# it gives nonsense to match them with traits
index7 <- match(DT$species,TRY.all.mean.sd.3$species)
length(index7) #24241941
length(index7[!is.na(index7)]) 
# 21839463, with TRY3.0
# 21040927, with TRY2.0
# 19841429, with first TRY version
24241941 - 21839463 # 2402478 entries have no traits
(24241941 - 2402478)/24241941*100 
# which are 90.08958% of all entries
# previously: 86.79555% with TRY2.0
# previously: 18.15247 % and with first TRY version

# reduce DT, and splot.header
#splot.header <- splot.header[!is.na(DT$species[index2]),]
#dim(splot.header) #1117476       3
DT <- DT[!is.na(DT$species),]
index7 <- match(DT$species,TRY.all.mean.sd.3$species)
length(index7) #24221565


### CWM ###
mean(TRY.all.mean.sd.3$SLA.mean) # 2.634988
min(TRY.all.mean.sd.3$SLA.mean) # 0.03072317
max(TRY.all.mean.sd.3$SLA.mean) # 5.182365
# example
DT$trait <- NA
DT$trait <- TRY.all.mean.sd.3$SLA.mean[index7]
length(DT$trait[!is.na(DT$trait)]) # 21819087
length(DT$trait[is.na(DT$trait)]) #   2402478
str(DT)

colnames(TRY.all.mean.sd.3)
CWM2 <-  DT[,list(CWM.SLA = weighted.mean(trait,Cover,na.rm = T)),by=PlotObservationID]
# I have checked that we do not need to prefilter only those entries that have a trait value
# this works fine
str(CWM2)
dim(CWM2) #1117898
which(colnames(TRY.all.mean.sd.3)=="LeafArea.mean") # 3
which(colnames(TRY.all.mean.sd.3)=="Wood.vessel.length.mean") # 20
CWM <- array(NA,c(dim(CWM2)[1],18),dimnames=list(CWM2$PlotObservationID,
                                              colnames(TRY.all.mean.sd.3)[3:20]))
rm(CWM2)
for (i in 1:18){
  DT$trait <- NA
  DT$trait <- TRY.all.mean.sd.3[index7,i+2]
  CWM[,i] <-  DT[,list(CWM.trait= weighted.mean(trait,Relative.cover,na.rm = T)),by=PlotObservationID]$CWM.trait
  
}
str(CWM) #num [1:1117898, 1:18]
CWM[1:20,]
tail(CWM)
write.csv(CWM,file = "CWM_TRY3.csv", row.names = T)
CWM <- read.csv("C://Daten//iDiv2//splot2//CWM_TRY3.csv")
str(CWM) #1117898 obs. of  19 variables
dimnames(CWM)[[1]] <- CWM$X
CWM <- CWM[,-1]
tail(CWM)

##### MAP ####
index3 <- match(dimnames(CWM)[[1]],splot.header$PlotObservationID)
length(index3) #1117898
any(is.na(index3)) # F
#length(index3[is.na(index3)]) #422
splot.header2 <- splot.header[index3,]
'CWM1$Longitude <- splot.header$Longitude[index10]
CWM1$Latitude <- splot.header$Latitude[index10]
'
str(CWM) # num [1:1117898, 1:18]
CWM <- CWM[!(is.na(splot.header2$Longitude)|is.na(splot.header2$Latitude)|is.na(CWM[,"SLA.mean"])),]
str(CWM) # num [1:1112287, 1:18]
1112287/1117898*100 # 99.49808% of plots have traits, Long and Lat
any(is.na(CWM)) #F
# match header again
index4 <- match(dimnames(CWM)[[1]],splot.header$PlotObservationID)
length(index4) #1112287
any(is.na(index4)) # F
splot.header2 <- splot.header[index4,]
# now in the same sequence as CWM

library(raster)
CRSlonlat <- CRS("+proj=longlat +datum=WGS84")
coords <- cbind(splot.header2$Longitude, splot.header2$Latitude)
coords <- SpatialPoints(coords, proj4string=CRSlonlat)
map.CWM <- SpatialPointsDataFrame(coords, as.data.frame(CWM), proj4string=CRSlonlat)
str(map.CWM)

library(maptools)
data(wrld_simpl)
plot(wrld_simpl)
points(coordinates(map.CWM), cex=0.1, col="red", pch=16)
# sPlot2withSLA.png
max(coordinates(map.CWM)[,1]) # 179.5901
max(coordinates(map.CWM)[,2]) # 80.14912
min(coordinates(map.CWM)[,1]) # -162.7414
min(coordinates(map.CWM)[,2]) # -64.78


r <- raster(nrows=180, ncols=360, xmn=-180, xmx=180, ymn=-90, ymx=90)
str(r)
raster.map.SLA <- rasterize(map.CWM,r,map.CWM$SLA.mean, run="mean")
str(raster.map.SLA)
plot(wrld_simpl)
plot(raster.map.SLA, add=T)
# SLAgridded1.png

library(rworldmap)
raster.map.SLA.grid <- as(raster.map.SLA, 'SpatialGridDataFrame')
library(RColorBrewer)
# Set a color ramp
rf <- brewer.pal(8, 'Spectral')
par(mar=c(0, 0, 0, 8) + 0.1)
mapParams <- mapGriddedData(raster.map.SLA.grid,addLegend=F, 
                            numCats = 8, colourPalette=rf)
str(mapParams)
mapParams$cutVector
# addMapLegend(plottedData=raster.map.SLA.grid,legendShrink=0.5)
#do.call(addMapLegend, c( mapParams, legendLabels="all", legendWidth=0.5,
#                legendIntervals="data", legendMar = 2 ) )
#do.call(addMapLegend, c( mapParams, legendLabels="all", legendWidth=0.3,
#                         legendIntervals="page", legendMar = 4, legendShrink=1.7,horizontal=F))
do.call(addMapLegend, c( mapParams, legendLabels="all", legendWidth=0.3,
                         legendIntervals="page", legendMar =2, horizontal=F))

#round(as.numeric(exp(mapParams$cutVector)),1)
mtext(round(as.numeric(exp(mapParams$cutVector)),1),side=4,line=-1.5, 
      at=seq(-200,230, by=51.3), las=2)
#mtext("ln(SLA)",side=1,line=0, at=230, las=1)
#mtext("ln(SLA)",side=4,line=-2,at=-170, las=2)
# SLAgridded2.png
par(mar=c(5, 4, 4, 2) + 0.1)

############################################################
# Regression with Bioclim
############################################################
'BIO1 = Annual Mean Temperature
BIO2 = Mean Diurnal Range (Mean of monthly (max temp - min temp))
BIO3 = Isothermality (BIO2/BIO7) (* 100)
BIO4 = Temperature Seasonality (standard deviation *100)
BIO5 = Max Temperature of Warmest Month
BIO6 = Min Temperature of Coldest Month
BIO7 = Temperature Annual Range (BIO5-BIO6)
BIO8 = Mean Temperature of Wettest Quarter
BIO9 = Mean Temperature of Driest Quarter
BIO10 = Mean Temperature of Warmest Quarter
BIO11 = Mean Temperature of Coldest Quarter
BIO12 = Annual Precipitation
BIO13 = Precipitation of Wettest Month
BIO14 = Precipitation of Driest Month
BIO15 = Precipitation Seasonality (Coefficient of Variation)
BIO16 = Precipitation of Wettest Quarter
BIO17 = Precipitation of Driest Quarter
BIO18 = Precipitation of Warmest Quarter
BIO19 = Precipitation of Coldest Quarter'

index11 <- match(splot.header2$PlotObservationID,splot.bioclim$PLOT_ID)
length(index11) # 1112287
length(index11[!is.na(index11)]) # 1112287

#index12 <- match(CWM2$PlotObservationID, splot.header$PlotObservationID)
#any(is.na(index12)) #F
str(splot.bioclim$BIO_01)
plot(CWM[,"SLA.mean"]~splot.bioclim$BIO_01[index11],
     cex=0.05, xlab="BIO_1", ylab="CWM ln(SLA)",cex.lab=1.5, cex.axis=1.5)
model1 <- lm(CWM[,"SLA.mean"]~splot.bioclim$BIO_01[index11])
model2 <- lm(CWM[,"SLA.mean"]~splot.bioclim$BIO_01[index11]+I(splot.bioclim$BIO_01[index11]^2))
summary(model1)
'Coefficients:
                                Estimate Std. Error t value Pr(>|t|)    
(Intercept)                    2.929e+00  8.700e-04    3367   <2e-16 ***
splot.bioclim$BIO_01[index11] -9.720e-04  8.603e-06    -113   <2e-16 ***
Multiple R-squared:  0.01135,	Adjusted R-squared:  0.01135 
F-statistic: 1.277e+04 on 1 and 1112285 DF,  p-value: < 2.2e-16
'
summary(model2)
'Coefficients:
                                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)                         2.791e+00  1.059e-03  2635.2   <2e-16 ***
splot.bioclim$BIO_01[index11]       2.446e-03  1.765e-05   138.6   <2e-16 ***
I(splot.bioclim$BIO_01[index11]^2) -1.673e-05  7.593e-08  -220.3   <2e-16 ***
Multiple R-squared:  0.05269
'
abline(model1, lwd=3, col="blue")
model2$coefficients
x.new <- seq(-100,300,1)
y.new <- model2$coefficients[1]+model2$coefficients[2]*x.new+model2$coefficients[3]*x.new^2
lines(x.new,y.new, lwd=3, col="blue")
# SLA_BIO1.png

# devide in quantiles
quantile(splot.bioclim$BIO_01[index11],probs = seq(0, 1, 1/3))
'       0% 33.33333% 66.66667%      100% 
     -227        79        99       303 '
# -> most points (i.e. one third) are in the temperate zone between 7.9 and 9.9°C
x1 <- splot.bioclim$BIO_01[index11][splot.bioclim$BIO_01[index11]<=79]
model1a <- lm(CWM[,"SLA.mean"][splot.bioclim$BIO_01[index11]<=79]~splot.bioclim$BIO_01[index11][splot.bioclim$BIO_01[index11]<=79])
summary(model1a)
'Coefficients:
                                                                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)                                                        2.688e+00  1.178e-03    2282   <2e-16 ***
splot.bioclim$BIO_01[index11][splot.bioclim$BIO_01[index11] <= 79] 2.826e-03  2.004e-05     141   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.382 on 372215 degrees of freedom
Multiple R-squared:  0.05069,	Adjusted R-squared:  0.05068 
F-statistic: 1.987e+04 on 1 and 372215 DF,  p-value: < 2.2e-16'
model1b <- lm(CWM[,"SLA.mean"][splot.bioclim$BIO_01[index11]<=99]~splot.bioclim$BIO_01[index11][splot.bioclim$BIO_01[index11]<=99])
summary(model1b)
'Coefficients:
                                                                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)                                                        2.695e+00  1.112e-03  2422.8   <2e-16 ***
splot.bioclim$BIO_01[index11][splot.bioclim$BIO_01[index11] <= 99] 2.709e-03  1.463e-05   185.1   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.3796 on 750485 degrees of freedom
Multiple R-squared:  0.04367,	Adjusted R-squared:  0.04367 
F-statistic: 3.427e+04 on 1 and 750485 DF,  p-value: < 2.2e-16'
model1c <- lm(CWM[,"SLA.mean"][splot.bioclim$BIO_01[index11]>99]~splot.bioclim$BIO_01[index11][splot.bioclim$BIO_01[index11]>99])
summary(model1c)
'Coefficients:
                                                                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)                                                        3.210e+00  2.425e-03  1323.3   <2e-16 ***
splot.bioclim$BIO_01[index11][splot.bioclim$BIO_01[index11] > 99] -3.417e-03  1.739e-05  -196.5   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.4157 on 361798 degrees of freedom
Multiple R-squared:  0.09646,	Adjusted R-squared:  0.09646 
F-statistic: 3.862e+04 on 1 and 361798 DF,  p-value: < 2.2e-16'

plot(CWM[,"SLA.mean"]~splot.bioclim$BIO_01[index11],
     cex=0.05, xlab="BIO_1", ylab="CWM ln(SLA)",cex.lab=1.5, cex.axis=1.5)
x.new <- seq(-100,79,1)
y.new <- model1a$coefficients[1]+model1a$coefficients[2]*x.new
lines(x.new,y.new, lwd=3, col="blue")
x.new <- seq(79,99,1)
y.new <- model1b$coefficients[1]+model1b$coefficients[2]*x.new
lines(x.new,y.new, lwd=3, col="green")
x.new <- seq(99,300,1)
y.new <- model1c$coefficients[1]+model1c$coefficients[2]*x.new
lines(x.new,y.new, lwd=3, col="red")

### calculate a GAM
library(gam)
x <- splot.bioclim$BIO_01[index11]
model3 <- gam(CWM[,"SLA.mean"]~lo(x, span=0.5))
summary(model3)
'    Null Deviance: 207269.5 on 1111306 degrees of freedom
Residual Deviance: 187452.7 on 1111302 degrees of freedom
AIC: 1175898
Anova for Parametric Effects
                       Df Sum Sq Mean Sq F value    Pr(>F)    
lo(x, span = 0.5)       1   3238  3238.0   19197 < 2.2e-16 ***
Residuals         1111302 187453     0.2
Anova for Nonparametric Effects
                  Npar Df Npar F     Pr(F)    
(Intercept)                                   
lo(x, span = 0.5)     2.7  37028 < 2.2e-16 ***
'

x.new <- seq(-100,300,1)
y.new <- predict(model3, data.frame(x=x.new), se = F)
# se=T does not work
plot(CWM[,"SLA.mean"]~splot.bioclim$BIO_01[index11],
     cex=0.05, xlab="BIO_1", ylab="CWM ln(SLA)",cex.lab=1.5, cex.axis=1.5)
lines(x.new,y.new, lwd=3, col="blue")

### N to P ratio
model4 <- lm(CWM[,"LeafNPratio.mean"]~splot.bioclim$BIO_01[index11])
summary(model4)
'Coefficients:
                               Estimate Std. Error t value Pr(>|t|)    
(Intercept)                   2.334e+00  4.243e-04    5501   <2e-16 ***
splot.bioclim$BIO_01[index11] 1.313e-03  4.196e-06     313   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.1986 on 1112285 degrees of freedom
Multiple R-squared:  0.08096,	Adjusted R-squared:  0.08096 
F-statistic: 9.799e+04 on 1 and 1112285 DF,  p-value: < 2.2e-16
'
plot(CWM[,"LeafNPratio.mean"]~splot.bioclim$BIO_01[index11],
     cex=0.05, xlab="BIO_01", ylab="CWM ln(Leaf N to P ratio)",cex.lab=1.5, cex.axis=1.5)
abline(model4, lwd=3, col="blue")

### turn the regression around
# does not make sense
model5 <- lm(splot.bioclim$BIO_01[index11]~CWM[,"SLA.mean"])
summary(model5)
'                   Estimate Std. Error t value Pr(>|t|)    
(Intercept)       127.41676    0.28026   454.6   <2e-16 ***
CWM[, "SLA.mean"] -12.97407    0.09769  -132.8   <2e-16 ***
Multiple R-squared:  0.01562,	Adjusted R-squared:  0.01562 '

model6 <- lm(splot.bioclim$BIO_01[index11]~CWM[,"LeafNPratio.mean"])
summary(model6)
'Coefficients:
                          Estimate Std. Error t value Pr(>|t|)    
(Intercept)               -72.7479     0.4591  -158.5   <2e-16 ***
CWM[, "LeafNPratio.mean"]  67.2535     0.1883   357.2   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 42.46 on 1111305 degrees of freedom
Multiple R-squared:  0.103,	Adjusted R-squared:  0.103 '

model7 <- lm(splot.bioclim$BIO_01[index11]~CWM[,"SLA.mean"]+CWM[,"LeafNPratio.mean"])
summary(model7)
'                           Estimate Std. Error t value Pr(>|t|)    
(Intercept)               -41.59378    0.55839  -74.49   <2e-16 ***
CWM[, "SLA.mean"]          -9.08978    0.09355  -97.17   <2e-16 ***
CWM[, "LeafNPratio.mean"]  65.04103    0.18885  344.40   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 42.28 on 1111304 degrees of freedom
Multiple R-squared:  0.1106,	Adjusted R-squared:  0.1106 '
x <- splot.bioclim$BIO_01[index11]
model8 <- lm(x~CWM[,"LeafNPratio.mean"]*CWM[,"SLA.mean"])
summary(model8)
'Coefficients:
                                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                                 -432.2500     2.2645  -190.9   <2e-16 ***
CWM[, "LeafNPratio.mean"]                    227.1681     0.9304   244.2   <2e-16 ***
CWM[, "SLA.mean"]                            135.8572     0.8202   165.6   <2e-16 ***
CWM[, "LeafNPratio.mean"]:CWM[, "SLA.mean"]  -60.2280     0.3386  -177.9   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 41.69 on 1111303 degrees of freedom
Multiple R-squared:  0.1352,	Adjusted R-squared:  0.1352 '

library(scatterplot3d) # für 3D-Plots
s3d1 <- scatterplot3d(CWM[,"SLA.mean"],CWM[,"LeafNPratio.mean"], x, 
                      xlab="CWM ln(Leaf N to P ratio)", ylab="CWM ln(SLA)", zlab="BIO_1",
                              pch=1,cex.symbols=0.05,highlight.3d=T,type="p")
#fit <- lm(AnzF ~ Prop.n+Prop.ba,data=data6[data6$Sp=="Qupe",])
s3d1$plane3d(model8)
# does not work
### producing the plane with points
min(CWM[,"SLA.mean"])
max(CWM[,"SLA.mean"])
x2 <- sort(rep(seq(0.1, 5, by=0.1),35))
min(CWM[,"LeafNPratio.mean"])
max(CWM[,"LeafNPratio.mean"])
y2 <- rep(seq(1, 4.4, by=0.1),50)
func <- function(x,y) { -432.2500 + 227.1681 * x + 135.8572*y -60.2280*x*y}
z2 <- func(x2,y2)
s3d1 <- scatterplot3d(CWM[,"SLA.mean"],CWM[,"LeafNPratio.mean"], x, 
                      xlab="CWM ln(Leaf N to P ratio)", ylab="CWM ln(SLA)", zlab="BIO_1",
                      pch=1,cex.symbols=0.05,highlight.3d=T,type="p",angle=70,scale.y=0.5)
s3d1$points3d(x2,y2,z2,col="blue", type="p", pch=20, cex=0.5)
# 
x <- splot.bioclim$BIO_01[index11]
model7 <- lm(CWM[,"SLA.mean"]~x*CWM[,"LeafNPratio.mean"])
summary(model7)
'                              Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  1.948e+00  9.377e-03   207.8   <2e-16 ***
x                            1.278e-02  7.985e-05   160.0   <2e-16 ***
CWM[, "LeafNPratio.mean"]    3.916e-01  3.856e-03   101.6   <2e-16 ***
x:CWM[, "LeafNPratio.mean"] -5.472e-03  3.166e-05  -172.8   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.4213 on 1111303 degrees of freedom
Multiple R-squared:  0.04842,	Adjusted R-squared:  0.04842 '
min(x)
max(x)
x2 <- sort(rep(seq(-100, 290, by=10),35))
min(CWM[,"LeafNPratio.mean"])
max(CWM[,"LeafNPratio.mean"])
y2 <- rep(seq(1, 4.4, by=0.1),40)
func <- function(x,y) {  1.948e+00  +  1.278e-02 * x + 3.916e-01*y -5.472e-03*x*y}
z2 <- func(x2,y2)
s3d1 <- scatterplot3d(x,CWM[,"LeafNPratio.mean"], CWM[,"SLA.mean"], 
                      xlab="BIO_1", ylab="CWM ln(Leaf N to P ratio)", zlab="CWM ln(SLA)", 
                      pch=1,cex.symbols=0.05,highlight.3d=T,type="p",angle=50,scale.y=1)
s3d1$points3d(x2,y2,z2,col="blue", type="p", pch=20, cex=0.5)
# final plot
# some BIO_01 and LeafNPratio combinations do not exist
?cut
x.cut <- cut(x, breaks=seq(-110, 290, by=10))
y.cut <- cut(CWM[,"LeafNPratio.mean"],breaks=seq(0.9, 4.4, by=0.1))
z.cut <- table(x.cut,y.cut)
head(z.cut)
dim(z.cut) #40 35
z2.cut <- table(x2,y2)
head(z2.cut)
dim(z2.cut) #40 35

#library(reshape2)
z.cut <- melt(z.cut)
str(z.cut) # 1400 obs. of  3 variables:
head(z.cut)
z2.cut <- melt(z2.cut)
str(z2.cut) #1400 obs. of  3 variables:
head(z2.cut)
z2.cut$value <- 0
z2.cut$value <- z.cut$value
z2.cut$xy <- paste(z2.cut$x2,z2.cut$y2,sep="_")
#z3.cut <- acast(z2.cut, x2~y2)
#head(z3.cut)
xy <- paste(x2,y2,sep="_")
index20 <- match(xy,z2.cut$xy)
values.present <- vector(mode="logical", length=length(x2))
values.present <- z2.cut$value[index20] > 0
table(values.present)
'z.cut$value[z.cut$x.cut==x.cut[1] & z.cut$y.cut==y.cut[1]]
values.present <- function(z.cut,x,y){
  z.cut$value[z.cut$x.cut==x & z.cut$y.cut==y]
}
x1 <- data.frame(x,x.cut, y=CWM[,"LeafNPratio.mean"],y.cut)
head(x1)
z1 <- apply(x1,1,FUN=values.present(z.cut, x1$x.cut, x1$y.cut))
values.present(z.cut, x1$x.cut[1], x1$y.cut[1])
'

s3d1 <- scatterplot3d(x,CWM[,"LeafNPratio.mean"], CWM[,"SLA.mean"], 
                      xlab="BIO_1", ylab="CWM ln(Leaf N to P ratio)", zlab="CWM ln(SLA)", 
                      pch=1,cex.symbols=0.05,highlight.3d=T,type="p",angle=-10,scale.y=0.8)
s3d1$points3d(x2[values.present],y2[values.present],z2[values.present],col="blue", type="p", pch=20, cex=0.5)


#### Collect all CWM regressions ####
names(splot.bioclim) # from 4 (BIO_01 to 49 T_DEC
CWM.bioclim <- array(NA,c(46,18,5),dimnames=list(names(splot.bioclim)[4:49],dimnames(CWM)[[2]],
                      c("p_lin","r2_lin","p_qua1", "p_qua2","r2_qua")))
for (i in 1:46){
  for (j in 1:18){
    model1 <- lm(CWM[,j]~splot.bioclim[index11,i+3])
    CWM.bioclim[i,j,1] <- summary(model1)$coefficients[2,4]
    CWM.bioclim[i,j,2] <- summary(model1)$r.squared
    model2 <- lm(CWM[,j]~splot.bioclim[index11,i+3]+I(splot.bioclim[index11,i+3]^2))
    CWM.bioclim[i,j,3] <- summary(model2)$coefficients[2,4]
    CWM.bioclim[i,j,4] <- summary(model2)$coefficients[3,4]
    CWM.bioclim[i,j,5] <- summary(model2)$r.squared
  }
}
str(CWM.bioclim) #num [1:46, 1:18, 1:5]
write.csv(data.frame(names(splot.bioclim)[4:49],CWM.bioclim[,,2]),file = "CWM_bioclim_r2_lin2.csv", row.names = FALSE)
write.csv(data.frame(names(splot.bioclim)[4:49],CWM.bioclim[,,5]),file = "CWM_bioclim_r2_qua2.csv", row.names = FALSE)

# for SLA
which(CWM.bioclim[,3,2]==max(CWM.bioclim[,3,2])) # BIO_02 
which(CWM.bioclim[,3,5]==max(CWM.bioclim[,3,5])) # BIO_02 
which(CWM.bioclim[,,2]==max(CWM.bioclim[,,2]),arr.ind = TRUE) 
#       row col
#BIO_02   2   3
CWM.bioclim[2,3,2] #0.1113908
which(CWM.bioclim[,,5]==max(CWM.bioclim[,,5]),arr.ind = TRUE) 
#    row col
#PET  22   2
CWM.bioclim[22,2,5] # 0.1407318
names(splot.bioclim)[2+3] #"BIO_02"
dimnames(CWM)[[2]][9] #"LeafNperArea.mean"
plot(CWM[,"LeafNperArea.mean"]~splot.bioclim$BIO_02[index11],
     cex=0.05, xlab="BIO_02", ylab="CWM ln(Leaf N per Area)",cex.lab=1.5, cex.axis=1.5)
model1 <- lm(CWM[,9]~splot.bioclim[index11,2+3])
abline(model1, lwd=3, col="blue")
# MeanNperArea_BIO_02.png
png(filename="MeanNperArea_BIO_02.png", width=800, height=800, units = "px", bg="white")
  plot(CWM[,"LeafNperArea.mean"]~splot.bioclim$BIO_02[index11],
       cex=0.05, xlab="BIO_02", ylab="CWM ln(Leaf N per Area)",cex.lab=1.5, cex.axis=1.5)
  #model1 <- lm(CWM[,9]~splot.bioclim[index11,2+3])
  abline(model1, lwd=3, col="blue")
dev.off() # creates a file
png(filename="SLA_BIO_02.png", width=800, height=800, units = "px", bg="white")
  plot(CWM[,"SLA.mean"]~splot.bioclim$BIO_02[index11],
    cex=0.05, xlab="BIO_02", ylab="CWM ln(SLA)",cex.lab=1.5, cex.axis=1.5)
  model1 <- lm(CWM[,1]~splot.bioclim[index11,2+3])
abline(model1, lwd=3, col="blue")
dev.off() # creates a file

#### PCA of all CWM and all Bioclim variables ####
library(vegan)
pca1 <- rda(CWM,scale=T)
plot(pca1)
head(summary(pca1),100)
'Partitioning of correlations:
              Inertia Proportion
Total              18          1
Unconstrained      18          1

Eigenvalues, and their contribution to the correlations 

Importance of components:
                         PC1    PC2    PC3     PC4     PC5     PC6     PC7     PC8     PC9    PC10    PC11    PC12    PC13
Eigenvalue            5.3194 3.5487 2.0227 1.37785 1.27883 1.00296 0.71963 0.66827 0.56198 0.49244 0.34211 0.26158 0.19518
Proportion Explained  0.2955 0.1971 0.1124 0.07655 0.07105 0.05572 0.03998 0.03713 0.03122 0.02736 0.01901 0.01453 0.01084
Cumulative Proportion 0.2955 0.4927 0.6050 0.68159 0.75264 0.80836 0.84834 0.88546 0.91668 0.94404 0.96305 0.97758 0.98842
                         PC14    PC15    PC16    PC17     PC18
Eigenvalue            0.07591 0.05484 0.05067 0.01766 0.009314
Proportion Explained  0.00422 0.00305 0.00281 0.00098 0.000520
Cumulative Proportion 0.99264 0.99569 0.99850 0.99948 1.000000'
str(summary(pca1))
#text(summary(pca1)$species[,c(1,2)],dimnames(summary(pca1)$species)[[1]])
traitnames <- dimnames(summary(pca1)$species)[[1]]
traitnames <- substr(traitnames,1,nchar(traitnames)-5)
traitcoord <- summary(pca1)$species[,c(1,2)]
plotcoord <- summary(pca1)$sites[,c(1,2)]
which(traitnames=="Seed.length") #9
traitcoord[9,2] <- traitcoord[9,2]+0.3
which(traitnames=="LDMC") #10
traitcoord[10,2] <- traitcoord[10,2]-0.3
ordiplot(pca1, type="n", ylim=c(-10,10))
text(traitcoord,traitnames, col="red", cex=1)
points(plotcoord*30, cex=0.05)
#env1 <- envfit(pca1,splot.bioclim[index11,c(4:49)],permutations = 99)
#with permutations = 999:
#Error: cannot allocate vector of size 4.1 Gb
plot(env1)
png(filename="PCA1.png", width=1000, height=1000, units = "px", bg="white")
  ordiplot(pca1, type="n")
  text(traitcoord,traitnames)
  plot(env1)
dev.off() # creates a file
'rm(env1)
rm(backbone.splot.try3)
rm(model1)
rm(model2)
rm(pca1)
rm(splot.header)
rm(splot.header2)
rm(traitdist)
rm(coords)
rm(TRY.all.mean.sd.2)
rm(index7)
rm(raster.map.SLA)
rm(trait.vec)
rm(FD)
rm(splot.bioclim)
'
gc()

### produce a PCA with kernel density
# From Díaz et al. 2016, Nature
library(ks)
str(plotcoord) #num [1:1111307, 1:2]
H <- Hpi(x=plotcoord)      # optimal bandwidth estimation
# does not work
est<- kde(x=plotcoord, H=H, compute.cont=TRUE)     # kernel density estimation

library(raster)
library(fBasics)
# Color palette
#plotcoord <- plotcoord*30
min(plotcoord[,1]) #-0.3202043
max(plotcoord[,1]) # 0.3078603
min(plotcoord[,2]) #-0.5899272
max(plotcoord[,2]) # 0.2530614
x.cut <- cut(plotcoord[,1], breaks=seq(min(plotcoord[,1]), max(plotcoord[,1]), by=0.01))
y.cut <- cut(plotcoord[,2], breaks=seq(min(plotcoord[,2]), max(plotcoord[,2]), by=0.01))
z.cut <- table(x.cut,y.cut)
head(z.cut)
dim(z.cut) #62 84
z.cut <- melt(z.cut)
str(z.cut) # 5208 obs. of  3 variables:
head(z.cut)
z.cut$xy <- paste(z.cut$x.cut,z.cut$y.cut,sep="_")
str(x.cut)
length(x.cut) # 1112287
xy <- paste(x.cut,y.cut,sep="_")
index30 <- match(xy,z.cut$xy)
length(index30)
any(is.na(index30)) # T
head(log10(z.cut$value[index30]))
values.present <- vector(mode="numeric", length=length(xy))
min(log10(z.cut$value[index30]), na.rm=T) #0
max(log10(z.cut$value[index30]), na.rm=T) # 3.992156
values.cut <- cut(log10(z.cut$value[index30]), breaks=seq(min(log10(z.cut$value[index30]), na.rm=T),
                        4, by=0.4))
#values.present <- z.cut$value[index30]
ordiplot(pca1, type="n")
#points(plotcoord*30, cex=0.05, col=rainbowPalette(n=10,  
#        s = 1, v = 1, start = 3/6, end = 5/6, alpha = 1)[values.cut])
points(plotcoord*30, cex=0.05, col=rev(greyPalette(n=10,  start = 0, end = 0.8, 
                                    gamma = 2.2, alpha = NULL))[values.cut])
text(traitcoord,traitnames, col="red", cex=1)


ext.factor <- 30
r.pca <- raster(nrows=300, ncols=300, xmn=min(plotcoord[,1])*ext.factor, xmx=max(plotcoord[,2])*ext.factor, 
                ymn=min(plotcoord[,1])*ext.factor, ymx=max(plotcoord[,2]*ext.factor))
# attention, xmn and ymn as well as xmx and ymx have to be the same, otherwise the raster is shifted
r.pca.raster <- rasterize(plotcoord*ext.factor, r.pca, fun="count")
str(r.pca.raster)
plot(log10(r.pca.raster), asp=0, col=seqPalette(n=10, name=c("Blues")), 
     main="", cex.main=0.7, xlab="",  ylab="", add=T)
plot(log10(r.pca.raster), asp=0, col=rev(greyPalette(n=10,  start = 0, end = 0.8, 
    gamma = 2.2, alpha = NULL)), 
     main="", cex.main=0.7, xlab="",  ylab="", add=T)
plot(log10(r.pca.raster), asp=0, col=rainbowPalette(n=10,   s = 1, v = 1, start = 3/6, end = 5/6, alpha = 1), 
     main="", cex.main=0.7, xlab="",  ylab="", add=T, legend=F)

'## graph for the PCA ##
ordiplot(pca1, type="n", xlim=c(-10,13), ylim=c(-10,10))
rasterImage(as.raster(r.pca.raster), -10, -10, 20, 10, interpolate = F,
            col=rainbowPalette(n=10,   s = 1, v = 1, start = 3/6, end = 5/6, alpha = 1))
plot(log10(r.pca.raster), asp=0, col=rainbowPalette(n=10,   s = 1, v = 1, start = 3/6, end = 5/6, alpha = 1), 
     main="", cex.main=0.7, xlab="",  ylab="", add=T, legend=F)
text(traitcoord,traitnames, col="red", cex=1)
'


rda1 <- rda(CWM~.,splot.bioclim[index11,c(4:49)],scale=T)
plot(rda1)
head(summary(rda1), 100)
rda1
'Partitioning of correlations:
              Inertia Proportion
Total          18.000     1.0000
Constrained     2.321     0.1289
Unconstrained  15.679     0.8711

Eigenvalues, and their contribution to the correlations 

Importance of components:
RDA1    RDA2    RDA3    RDA4    RDA5    RDA6    RDA7    RDA8    RDA9    RDA10    RDA11    RDA12
Eigenvalue            1.20610 0.57369 0.23626 0.15021 0.06339 0.02077 0.02014 0.01570 0.01164 0.008565 0.007286 0.002948
Proportion Explained  0.06701 0.03187 0.01313 0.00835 0.00352 0.00115 0.00112 0.00087 0.00065 0.000480 0.000400 0.000160
Cumulative Proportion 0.06701 0.09888 0.11200 0.12035 0.12387 0.12502 0.12614 0.12701 0.12766 0.128140 0.128540 0.128710
RDA13   RDA14     RDA15     RDA16     RDA17     RDA18    PC1    PC2    PC3    PC4     PC5     PC6
Eigenvalue            0.002155 0.00111 0.0002949 0.0001558 0.0000665 3.596e-05 4.2977 3.0445 1.8700 1.1826 1.10560 0.94049
Proportion Explained  0.000120 0.00006 0.0000200 0.0000100 0.0000000 0.000e+00 0.2388 0.1691 0.1039 0.0657 0.06142 0.05225
Cumulative Proportion 0.128830 0.12889 0.1289000 0.1289100 0.1289200 1.289e-01 0.3677 0.5368 0.6407 0.7064 0.76783 0.82008
PC7     PC8     PC9    PC10    PC11    PC12   PC13    PC14    PC15    PC16    PC17     PC18
Eigenvalue            0.70216 0.61116 0.50930 0.46537 0.32289 0.24648 0.1782 0.07293 0.05366 0.04986 0.01738 0.009213
Proportion Explained  0.03901 0.03395 0.02829 0.02585 0.01794 0.01369 0.0099 0.00405 0.00298 0.00277 0.00097 0.000510
Cumulative Proportion 0.85909 0.89304 0.92134 0.94719 0.96513 0.97882 0.9887 0.99277 0.99575 0.99852 0.99949 1.000000'
#anova(rda1, perm=99)
#anova(rda1, by="terms", permutations = how(nperm=9))
library(parallel) 
detectCores() #32
# Set up the cluster
#clusterType <- if(length(find.package("snow", quiet = TRUE))) "SOCK" else "PSOCK"
#clust <- try(makeCluster(getOption("cl.cores", 2), type = clusterType))
cl <- makeCluster(10)
#registerDoParallel(cl) 
options(na.action = "na.pass")
#clusterExport(cl, "results") 

#print(system.time(anova(rda1, by="terms", perm=99, parallel = getOption("mc.cores"))))
print(system.time(out1 <- anova(rda1, by="margin", permutations =99, parallel = cl)))
'user    system   elapsed 
5304.802  1311.088 14522.410
'
out1

traitnames <- dimnames(summary(rda1)$species)[[1]]
traitnames <- substr(traitnames,1,nchar(traitnames)-5)
traitcoord <- summary(rda1)$species[,c(1,2)]
bioclimcoord <- summary(rda1)$biplot[,c(1,2)]
which(traitnames=="LeafNperArea") #9
traitcoord[9,1] <- traitcoord[9,1]+0.5
which(traitnames=="StemDens") #5
traitcoord[5,2] <- traitcoord[5,2]+0.3
which(traitnames=="Disp.unit.leng") #18
traitcoord[18,2] <- traitcoord[18,2]-0.3
ordiplot(rda1, type="n", xlim=c(-7,7))
arrows(rep(0,45),rep(0,45),bioclimcoord[,1]*8,bioclimcoord[,2]*8, col="cadetblue4", length=0.1)
text(bioclimcoord*8,dimnames(bioclimcoord)[[1]], col="cadetblue4", cex=0.8)
text(traitcoord,traitnames, col="red")
# copied as first RDA graph into ppt

#rda1 <- rda(CWM ~ ., splot.bioclim[index11,c(4:49)],scale=T)

### Stepwise RDA ###
rda2 <- rda(CWM ~ 1,splot.bioclim[index11,c(4:49)],scale=T)
#rda3 <- step(rda2, scope = formula(rda1), test = "perm")
rda3 <- ordiR2step(rda2, scope=formula(rda1), direction = "both", Pin = 0.05, R2scope = F, permutations = how(nperm = 99), trace = TRUE)
#TRY2.0: CWM ~ GDD5 + BIO_02 + BIO_12 + T_MAY + P_MAY + T_AUG ....
#TRY3.0: CWM ~ GDD5 + BIO_02 + BIO_12 + T_MAY + P_MAY + T_SEP + P_DEC +      P_FEB 
#rm(rda1)

# now rebuild the RDA sequentially
pca1 <- rda(CWM,scale=T)
pca1
5.527+ 3.906+ 1.953+ 1.507+ 1.375+ 0.886+ 0.726+ 0.623+ 0.499 +0.360+ 0.295+ 0.131+ 0.087+ 0.077+ 0.048 
(5.527+ 3.906)/18 # 52.40556%

traitnames <- dimnames(summary(pca1)$species)[[1]]
traitnames <- substr(traitnames,1,nchar(traitnames)-5)
traitcoord <- summary(pca1)$species[,c(1,2)]
which(traitnames=="SeedMass") #3
traitcoord[3,2] <- traitcoord[3,2]-0.5
ordiplot(pca1, type="n")
text(traitcoord,traitnames, col="red")
# copied as second RDA graph into ppt


rda4 <- rda(CWM ~ GDD5,splot.bioclim[index11,c(4:49)],scale=T)
rda4
traitnames <- dimnames(summary(rda4)$species)[[1]]
traitnames <- substr(traitnames,1,nchar(traitnames)-5)
traitcoord <- summary(rda4)$species[,c(1,2)]
plotcoord <- summary(rda4)$sites[,c(1,2)]
str(plotcoord) #num [1:1112287, 1:2]
bioclimcoord <- summary(rda4)$biplot[,c(1,2)]

min(plotcoord[,1]) #-0.683271
max(plotcoord[,1]) # 0.8519885
min(plotcoord[,2]) #-0.3398233
max(plotcoord[,2]) # 0.3047364
x.cut <- cut(plotcoord[,1], breaks=seq(min(plotcoord[,1]), max(plotcoord[,1]), by=0.01))
y.cut <- cut(plotcoord[,2], breaks=seq(min(plotcoord[,2]), max(plotcoord[,2]), by=0.01))
z.cut <- table(x.cut,y.cut)
head(z.cut)
dim(z.cut) #153  64
z.cut <- melt(z.cut)
str(z.cut) # 6550 obs. of  3 variables:
head(z.cut)
z.cut$xy <- paste(z.cut$x.cut,z.cut$y.cut,sep="_")
str(x.cut)
length(x.cut) # 1112287
xy <- paste(x.cut,y.cut,sep="_")
index30 <- match(xy,z.cut$xy)
length(index30)
any(is.na(index30)) #T
head(log10(z.cut$value[index30]))
values.present <- vector(mode="numeric", length=length(xy))
min(log10(z.cut$value[index30]), na.rm=T) #0
max(log10(z.cut$value[index30]), na.rm=T) # 3.947336
values.cut <- cut(log10(z.cut$value[index30]), breaks=seq(min(log10(z.cut$value[index30]), na.rm=T),
                                                         4, by=0.4))
     

ordiplot(rda4, type="n")
points(plotcoord*30, cex=0.05, col=rev(greyPalette(n=10,  start = 0, end = 0.8, 
                                                   gamma = 2.2, alpha = NULL))[values.cut])

arrows(rep(0,1),rep(0,1),bioclimcoord[1]*8,bioclimcoord[2]*8, col="blue", length=0.1)
text(bioclimcoord[1]*8+0.8,bioclimcoord[2]*8-0.2,"GDD5", col="blue", cex=1)
text(traitcoord,traitnames, col="red")
# copied as third RDA graph into ppt

rda5 <- rda(CWM ~ GDD5+BIO_02,splot.bioclim[index11,c(4:49)],scale=T)
rda5
traitnames <- dimnames(summary(rda5)$species)[[1]]
traitnames <- substr(traitnames,1,nchar(traitnames)-5)
traitcoord <- summary(rda5)$species[,c(1,2)]
bioclimcoord <- summary(rda5)$biplot[,c(1,2)]
plotcoord <- summary(rda5)$sites[,c(1,2)]
str(plotcoord) #num [1:1112287, 1:2]
x.cut <- cut(plotcoord[,1], breaks=seq(min(plotcoord[,1]), max(plotcoord[,1]), by=0.01))
y.cut <- cut(plotcoord[,2], breaks=seq(min(plotcoord[,2]), max(plotcoord[,2]), by=0.01))
z.cut <- table(x.cut,y.cut)
head(z.cut)
dim(z.cut) #141 205
z.cut <- melt(z.cut)
str(z.cut) # 6550 obs. of  3 variables:
head(z.cut)
z.cut$xy <- paste(z.cut$x.cut,z.cut$y.cut,sep="_")
str(x.cut)
length(x.cut) # 1111307
xy <- paste(x.cut,y.cut,sep="_")
index30 <- match(xy,z.cut$xy)
length(index30)
any(is.na(index30))
head(log10(z.cut$value[index30]))
values.present <- vector(mode="numeric", length=length(xy))
min(log10(z.cut$value[index30]), na.rm=T) #0
max(log10(z.cut$value[index30]), na.rm=T) # 3.228913
values.cut <- cut(log10(z.cut$value[index30]), breaks=seq(min(log10(z.cut$value[index30]), na.rm=T),
                                                          4, by=0.4))


ordiplot(rda5, type="n", xlim=c(-10,10), ylim=c(-10,10))
points(plotcoord*15, cex=0.05, col=rev(greyPalette(n=10,  start = 0, end = 0.8, 
                                                   gamma = 2.2, alpha = NULL))[values.cut])
arrows(rep(0,2),rep(0,2),bioclimcoord[,1]*8,bioclimcoord[,2]*8, col="blue", length=0.1)
text(bioclimcoord*8,dimnames(bioclimcoord)[[1]], col="blue", cex=1)
text(traitcoord,traitnames, col="red")
# copied as fourth RDA graph into ppt


plot(CWM[,"LeafNPratio.mean"]~splot.bioclim$GDD5[index11],
     cex=0.05, xlab="GDD5", ylab="CWM ln(leaf N to P ratio)",cex.lab=1.5, cex.axis=1.5)
model1 <- lm(CWM[,"LeafNPratio.mean"]~splot.bioclim$GDD5[index11])
summary(model1)
'                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                 2.334e+00  3.795e-04  6149.5   <2e-16 ***
splot.bioclim$GDD5[index11] 5.757e-06  1.595e-08   360.9   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.196 on 1112285 degrees of freedom
Multiple R-squared:  0.1048,	Adjusted R-squared:  0.1048 
F-statistic: 1.303e+05 on 1 and 1112285 DF,  p-value: < 2.2e-16
'
abline(model1, lwd=3, col="blue")
# LeafNPRatio_GDD5

plot(CWM[,"SLA.mean"]~jitter(splot.bioclim$BIO_02[index11]),
     cex=0.05, xlab="BIO_02", ylab="CWM ln(SLA)",cex.lab=1.5, cex.axis=1.5)
model1 <- lm(CWM[,"SLA.mean"]~splot.bioclim$BIO_02[index11])
summary(model1)
'                                Estimate Std. Error t value Pr(>|t|)    
(Intercept)                    3.399e+00  1.448e-03  2348.2   <2e-16 ***
splot.bioclim$BIO_02[index11] -6.253e-03  1.551e-05  -403.3   <2e-16 ***

Residual standard error: 0.4034 on 1111305 degrees of freedom
Multiple R-squared:  0.1277,	Adjusted R-squared:  0.1277 
'
abline(model1, lwd=3, col="blue")
# SLA_BIO_02

plot(CWM[,"LeafNperArea.mean"]~jitter(splot.bioclim$BIO_02[index11]),
     cex=0.05, xlab="BIO_02", ylab="CWM ln(Leaf N per area)",cex.lab=1.5, cex.axis=1.5)
model1 <- lm(CWM[,"LeafNperArea.mean"]~splot.bioclim$BIO_02[index11])
summary(model1)
'                                Estimate Std. Error t value Pr(>|t|)    
(Intercept)                   -5.989e-02  8.475e-04  -70.67   <2e-16 ***
splot.bioclim$BIO_02[index11]  3.696e-03  9.079e-06  407.09   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2362 on 1111305 degrees of freedom
Multiple R-squared:  0.1298,	Adjusted R-squared:  0.1298 
'
abline(model1, lwd=3, col="blue")
# LeafNper_BIO_02

##### FD calculations ####
j <- 3
nam <- DT$species[DT$PlotObservationID==j]
abu <- DT$Relative.cover[DT$PlotObservationID==j]
trait <- DT$trait[DT$PlotObservationID==j]
abundance.weighted=T

FD.fun <- function(trait, abu){
  res <- as.double(NA)
  if (length(trait[!is.na(trait)])>0){
    res <- 0
    #nam <- as.character(nam[!is.na(trait)])
    abu <- as.data.frame(abu[!is.na(trait)])
    #rownames(abu) <- nam
    trait <- as.data.frame(trait[!is.na(trait)])
    #rownames(trait) <- nam
    dis <- dist(trait,method="euclidean")
    if (sum(dis)>1){
      sqrt.dis <- sqrt(dis)
      # taking the square root is required to obtain the same result from the divc function
      # as divc takes the square of distances values, as suggested by
      # Champely & Chessel (2002, Env. Ecol. Stat. 9: 167-177)
      #abu <- as.data.frame(abu)
      res <- unlist(divc(abu, sqrt.dis))
      #Rao's Q: sumi sumj (dist ij prop i prop j)'
    } 
  }
  res
}

mpd.fun <- function(nam, trait, abu,abundance.weighted){
  #res <- numeric(1) # carries result from the function
  res <- as.double(NA)
  if (length(trait[!is.na(trait)])>0){
  #  dis <- as.matrix(dist(t(t1),method="euclidean"))   
    abu <- t(abu)
    colnames(abu) <- nam
    trait <- t(trait)
    colnames(trait) <- nam
    #dis <- as.matrix(vegdist(t(t1),method="euclidean", na.rm=T))
    dis <- as.matrix(dist(t(trait),method="euclidean"))
    res <- mpd(abu, dis, abundance.weighted=abundance.weighted)
  }
  res
}

# from mpd source code
'N <- dim(samp)[1]
mpd <- numeric(N)
for (i in 1:N) {
  sppInSample <- names(samp[i, samp[i, ] > 0])
  if (length(sppInSample) > 1) {
    sample.dis <- dis[sppInSample, sppInSample]
    if (abundance.weighted) {
      sample.weights <- t(as.matrix(samp[i, sppInSample, 
                                         drop = FALSE])) %*% as.matrix(samp[i, sppInSample, 
                                                                            drop = FALSE])
      mpd[i] <- weighted.mean(sample.dis, sample.weights)
    }
    else {
      mpd[i] <- mean(sample.dis[lower.tri(sample.dis)])
    }
  }
  else {
    mpd[i] <- NA
  }
}
mpd
'
# from Oliver, who used picante
# does not accept missing values in the function
library(picante)
# define function for mean pairwise distance
'mpd.fun.dt <- function(abu, dis, nam, abundance.weighted){
  abu <- t(abu)
  colnames(abu) <- nam
  res <- mpd(abu, dis, abundance.weighted=abundance.weighted)
}
# define funtion of mean nearest neighbor distance
mntd.fun.dt <- function(abu, dis, nam, abundance.weighted){
  abu <- t(abu)
  colnames(abu) <- nam
  res <- mntd(abu, dis, abundance.weighted=abundance.weighted)
  res
}
species.list <- unique(DT$species)
any(is.na(species.list)) #T, there is one NA
length(species.list) #60909
species.list <- species.list[!is.na(species.list)]
length(species.list) #60908
index8 <- match(species.list,TRY.all.mean.sd.2$species)
str(index8) # 60908
length(index8[!is.na(index8)]) #21001
#species.list2 <- TRY.all.mean.sd.2$species[index8]
#species.list2 <- species.list2[!is.na(species.list2)]
#length(species.list2) # 21001
TRY.all.mean.sd.2.na <- TRY.all.mean.sd.2[index8,]
str(TRY.all.mean.sd.2.na)
TRY.all.mean.sd.2.na$species2 <- species.list
rm(index9)
'
FD <- array(NA,c(dim(CWM)[1],18),dimnames=list(dimnames(CWM)[[1]],
                                                 colnames(TRY.all.mean.sd.2)[3:20]))
str(FD) #logi [1:1117898, 1:18] 
'for (i in 1:18){
  # create distance matrix
  #trait.vec <- TRY.all.mean.sd.2.na[,i+2]
  #names(trait.vec) <- TRY.all.mean.sd.2.na$species2
  #trait.dist.mat <- as.matrix(dist(trait.vec))
  # too large for my notebook
  #mpd[,i] <-  DT[,list(mpd.trait= mpd.fun.dt(Relative.cover, trait.dist.mat, species, abundance.weighted=TRUE)),by=PlotObservationID]$mpd.trait
  print(i)  
  DT$trait <- NA
  DT$trait <- TRY.all.mean.sd.2[index7,i+2]
  FD[,i] <-  DT[,list(mpd.trait= mpd.fun(species, trait, Relative.cover, abundance.weighted=TRUE)),by=PlotObservationID]$mpd.trait
}
write.csv(FD,file = "FD2.csv", row.names = T)
'
for (i in 1:18){
  # create distance matrix
  #trait.vec <- TRY.all.mean.sd.2.na[,i+2]
  #names(trait.vec) <- TRY.all.mean.sd.2.na$species2
  #trait.dist.mat <- as.matrix(dist(trait.vec))
  # too large for my notebook
  #mpd[,i] <-  DT[,list(mpd.trait= mpd.fun.dt(Relative.cover, trait.dist.mat, species, abundance.weighted=TRUE)),by=PlotObservationID]$mpd.trait
  print(i)  
  DT$trait <- NA
  DT$trait <- TRY.all.mean.sd.2[index7,i+2]
  # recalculate relative cover for this trait
  FD[,i] <-  DT[,list(FD.trait= FD.fun(trait, Relative.cover)),by=PlotObservationID]$FD.trait
}
write.csv(FD,file = "FD3.csv", row.names = T)

#FD <- read.csv("C://Daten//iDiv2//splot2//FD3.csv")
FD <- read.csv("C://Daten//iDiv2//splot2//splot2.0//FD_TRY3.csv")
str(FD)
dimnames(FD)[[1]] <- FD$X
FD <- FD[,-1]
tail(FD)

#### match FD to splot.header ####
index12 <- match(dimnames(FD)[[1]],splot.header$PlotObservationID)
length(index12) #1117898
any(is.na(index12)) # F
splot.header3 <- splot.header[index12,]
str(FD) # num [1:1117898, 1:18]
FD <- FD[!(is.na(splot.header3$Longitude)|is.na(splot.header3$Latitude)|is.na(FD[,"SLA.mean"])),]
str(FD) # num [1:1112287 , 1:18]
1112287 /1117898*100 # 99.49808 % of plots have traits, Long and Lat
any(is.na(FD)) #F
# match header again
index13 <- match(dimnames(FD)[[1]],splot.header$PlotObservationID)
length(index13) #1112287
any(is.na(index13)) # F
splot.header3 <- splot.header[index13,]
# now in the same sequence as FD


#### Collect all FD regressions ####
names(splot.bioclim) # from 4 BIO_01 to 49 T_MAR
index14 <- match(splot.header3$PlotObservationID,splot.bioclim$PLOT_ID)
length(index14) # 1111307
length(index14[!is.na(index14)]) # 1111307

FD.bioclim <- array(NA,c(46,18,5),dimnames=list(names(splot.bioclim)[4:49],dimnames(FD)[[2]],
                                      c("p_lin","r2_lin","p_qua1", "p_qua2","r2_qua")))
for (i in 1:46){
  for (j in 1:18){
    model1 <- lm(FD[,j]~splot.bioclim[index14,i+3])
    FD.bioclim[i,j,1] <- summary(model1)$coefficients[2,4]
    FD.bioclim[i,j,2] <- summary(model1)$r.squared
    model2 <- lm(FD[,j]~splot.bioclim[index14,i+3]+I(splot.bioclim[index14,i+3]^2))
    FD.bioclim[i,j,3] <- summary(model2)$coefficients[2,4]
    FD.bioclim[i,j,4] <- summary(model2)$coefficients[3,4]
    FD.bioclim[i,j,5] <- summary(model2)$r.squared
  }
}
str(FD.bioclim) #num [1:46, 1:18, 1:5]
write.csv(data.frame(names(splot.bioclim)[4:49],FD.bioclim[,,2]),file = "FD2_bioclim_r2_lin.csv", row.names = FALSE)
write.csv(data.frame(names(splot.bioclim)[4:49],FD.bioclim[,,5]),file = "FD2_bioclim_r2_qua.csv", row.names = FALSE)

library(vegan)
FD.pca1 <- rda(FD,scale=T)
FD.pca1
'
Eigenvalues for unconstrained axes:
  PC1   PC2   PC3   PC4   PC5   PC6   PC7   PC8 
8.703 1.758 1.122 0.974 0.732 0.643 0.590 0.540 
(Showed only 8 of all 18 unconstrained eigenvalues)
'
plot(FD.pca1)
head(summary(FD.pca1))
#text(summary(FD.pca1)$species[,c(1,2)],dimnames(summary(FD.pca1)$species)[[1]])
traitnames <- dimnames(summary(FD.pca1)$species)[[1]]
traitnames <- substr(traitnames,1,nchar(traitnames)-5)
traitcoord <- summary(FD.pca1)$species[,c(1,2)]
which(traitnames=="Seed.length") #17
traitcoord[17,2] <- traitcoord[17,2]+0.3
which(traitnames=="LeafC.perdrymass") #12
traitcoord[12,2] <- traitcoord[12,2]-0.4
which(traitnames=="Leaf.delta.15N") #13
traitcoord[13,2] <- traitcoord[13,2]+0.4
which(traitnames=="LeafNPratio") #11
traitcoord[11,2] <- traitcoord[11,2]-0.3

plotcoord <- summary(FD.pca1)$sites[,c(1,2)]
str(plotcoord) #num [1:1111307, 1:2]
x.cut <- cut(plotcoord[,1], breaks=seq(min(plotcoord[,1]), max(plotcoord[,1]), by=0.01))
y.cut <- cut(plotcoord[,2], breaks=seq(min(plotcoord[,2]), max(plotcoord[,2]), by=0.01))
z.cut <- table(x.cut,y.cut)
head(z.cut)
dim(z.cut) #131 50
z.cut <- melt(z.cut)
str(z.cut) # 6550 obs. of  3 variables:
head(z.cut)
z.cut$xy <- paste(z.cut$x.cut,z.cut$y.cut,sep="_")
str(x.cut)
length(x.cut) # 1111307
xy <- paste(x.cut,y.cut,sep="_")
index30 <- match(xy,z.cut$xy)
length(index30)
any(is.na(index30))
head(log10(z.cut$value[index30]))
values.present <- vector(mode="numeric", length=length(xy))
min(log10(z.cut$value[index30]), na.rm=T) #0
max(log10(z.cut$value[index30]), na.rm=T) # 3.399154
values.cut <- cut(log10(z.cut$value[index30]), breaks=seq(min(log10(z.cut$value[index30]), na.rm=T),
                                                          4, by=0.4))

ordiplot(FD.pca1, type="n")
#text(traitcoord,traitnames)
points(plotcoord*30, cex=0.05, col=rev(greyPalette(n=10,  start = 0, end = 0.8, 
                                                   gamma = 2.2, alpha = NULL))[values.cut])

#arrows(rep(0,45),rep(0,45),bioclimcoord[,1]*8,bioclimcoord[,2]*8, col="cadetblue4", length=0.1)
text(traitcoord,traitnames, col="red")
# copied as first RDA graph into ppt

env1 <- envfit(FD.pca1,splot.bioclim[index14,c(4:49)],permutations = 0)
env1
#with permutations = 999:
#Error: cannot allocate vector of size 4.1 Gb
plot(env1)
png(filename="PCA1.png", width=1000, height=1000, units = "px", bg="white")
ordiplot(pca1, type="n")
text(traitcoord,traitnames)
plot(env1)
dev.off() # creates a file

### Stepwise RDA ###
### RDA ###
FD.rda1 <- rda(FD~.,splot.bioclim[index14,c(4:49)],scale=T)
FD.rda1
FD.rda2 <- rda(FD ~ 1,splot.bioclim[index14,c(4:49)],scale=T)
FD.rda3 <- ordistep(FD.rda2, scope=formula(FD.rda1), direction = "both", Pin = 0.1, 
            permutations = how(nperm = 10), trace = TRUE)
'Start: FD ~ 1 

Df     AIC       F  Pr(>F)  
+ BIO_14  1 3191679 20602.8 0.09091 .
+ BIO_17  1 3191898 20379.5 0.09091 .
+ BIO_12  1 3197070 15125.2 0.09091 .
+ P_MAY   1 3198071 14110.8 0.09091 .
+ P_APR   1 3198194 13986.4 0.09091 .
+ BIO_18  1 3198767 13406.3 0.09091 .
+ P_MAR   1 3199448 12717.1 0.09091 .
+ P_FEB   1 3199451 12714.5 0.09091 .
+ P_JUN   1 3199643 12519.9 0.09091 .
+ BIO_05  1 3199804 12357.7 0.09091 .
+ BIO_02  1 3200062 12096.5 0.09091 .
+ P_NOV   1 3200602 11551.1 0.09091 .
+ P_DEC   1 3200823 11328.1 0.09091 .
+ P_JAN   1 3201335 10810.2 0.09091 .
+ BIO_10  1 3202194  9943.1 0.09091 .
+ T_JUL   1 3202226  9911.4 0.09091 .
+ T_AUG   1 3202685  9448.0 0.09091 .
+ GDD5    1 3202914  9217.7 0.09091 .
+ T_JUN   1 3202934  9196.7 0.09091 .
+ BIO_16  1 3203459  8668.3 0.09091 .
+ BIO_19  1 3203685  8440.1 0.09091 .
+ P_AUG   1 3204079  8043.8 0.09091 .
+ BIO_13  1 3204383  7737.1 0.09091 .
+ T_MAY   1 3204450  7669.7 0.09091 .
+ P_OCT   1 3204755  7362.4 0.09091 .
+ T_SEP   1 3204986  7130.0 0.09091 .
+ P_SEP   1 3205174  6941.4 0.09091 .
+ BIO_15  1 3205378  6735.8 0.09091 .
+ P_JUL   1 3205600  6512.4 0.09091 .
+ T_APR   1 3205746  6365.1 0.09091 .
+ BIO_01  1 3205841  6270.3 0.09091 .
+ BIO_03  1 3205882  6228.8 0.09091 .
+ T_OCT   1 3206048  6062.0 0.09091 .
+ AR      1 3206562  5545.3 0.09091 .
+ BIO_07  1 3207063  5042.2 0.09091 .
+ T_NOV   1 3207547  4555.5 0.09091 .
+ T_MAR   1 3207659  4443.2 0.09091 .
+ T_FEB   1 3208177  3923.3 0.09091 .
+ T_DEC   1 3208260  3840.3 0.09091 .
+ BIO_11  1 3208334  3765.7 0.09091 .
+ T_JAN   1 3208465  3634.3 0.09091 .
+ BIO_08  1 3208844  3254.4 0.09091 .
+ BIO_06  1 3209374  2723.0 0.09091 .
+ BIO_09  1 3209543  2553.3 0.09091 .
+ PET     1 3209642  2454.5 0.09091 .
+ BIO_04  1 3210074  2021.3 0.09091 .
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Step: FD ~ BIO_14 

Df     AIC     F  Pr(>F)  
- BIO_14  1 3212091 20603 0.09091 .
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Df     AIC       F  Pr(>F)  
+ BIO_02  1 3184224 7481.80 0.09091 .
+ BIO_03  1 3184435 7269.37 0.09091 .
+ GDD5    1 3187255 4435.29 0.09091 .
+ BIO_05  1 3187772 3915.70 0.09091 .
+ T_JAN   1 3188026 3661.00 0.09091 .
+ BIO_01  1 3188040 3647.18 0.09091 .
+ T_FEB   1 3188048 3638.72 0.09091 .
+ T_MAR   1 3188075 3612.29 0.09091 .
+ T_APR   1 3188082 3605.14 0.09091 .
+ BIO_11  1 3188088 3598.96 0.09091 .
+ T_DEC   1 3188123 3563.70 0.09091 .
+ T_NOV   1 3188208 3478.25 0.09091 .
+ T_OCT   1 3188225 3461.84 0.09091 .
+ T_MAY   1 3188357 3329.42 0.09091 .
+ BIO_06  1 3188491 3194.91 0.09091 .
+ BIO_10  1 3188609 3076.07 0.09091 .
+ T_SEP   1 3188617 3068.49 0.09091 .
+ BIO_13  1 3188664 3021.74 0.09091 .
+ T_JUN   1 3188669 3016.26 0.09091 .
+ BIO_16  1 3188709 2976.11 0.09091 .
+ P_SEP   1 3188724 2961.09 0.09091 .
+ BIO_07  1 3188728 2956.84 0.09091 .
+ BIO_12  1 3188782 2902.48 0.09091 .
+ T_AUG   1 3188788 2897.30 0.09091 .
+ T_JUL   1 3188879 2805.50 0.09091 .
+ P_AUG   1 3189181 2503.15 0.09091 .
+ BIO_18  1 3189256 2427.84 0.09091 .
+ BIO_04  1 3189277 2406.93 0.09091 .
+ P_JUL   1 3189398 2285.76 0.09091 .
+ BIO_15  1 3189605 2078.03 0.09091 .
+ P_APR   1 3189650 2032.61 0.09091 .
+ P_JUN   1 3189703 1979.86 0.09091 .
+ BIO_09  1 3189912 1770.71 0.09091 .
+ P_MAR   1 3189965 1717.19 0.09091 .
+ PET     1 3189984 1698.34 0.09091 .
+ AR      1 3190003 1678.99 0.09091 .
+ P_MAY   1 3190005 1677.55 0.09091 .
+ P_OCT   1 3190063 1619.65 0.09091 .
+ P_FEB   1 3190490 1192.28 0.09091 .
+ BIO_17  1 3190497 1184.39 0.09091 .
+ BIO_19  1 3190555 1126.59 0.09091 .
+ P_NOV   1 3190570 1111.68 0.09091 .
+ BIO_08  1 3190582 1100.05 0.09091 .
+ P_JAN   1 3190893  787.98 0.09091 .
+ P_DEC   1 3190955  726.52 0.09091 .
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Step: FD ~ BIO_14 + BIO_02 

Df     AIC       F  Pr(>F)  
- BIO_02  1 3191679  7481.8 0.09091 .
- BIO_14  1 3200062 15953.2 0.09091 .
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1'

FD.rda4 <- rda(FD ~ BIO_14,splot.bioclim[index14,c(4:49)],scale=T)
FD.rda4
'              Inertia Proportion Rank
Total         18.0000     1.0000     
Constrained    0.3276     0.0182    1
Unconstrained 17.6724     0.9818   18
Inertia is correlations 

Eigenvalues for constrained axes:
RDA1 
0.3276 

Eigenvalues for unconstrained axes:
PC1   PC2   PC3   PC4   PC5   PC6   PC7   PC8 
8.307 1.850 1.378 0.884 0.789 0.780 0.578 0.551 
'
traitnames <- dimnames(summary(FD.rda4)$species)[[1]]
traitnames <- substr(traitnames,1,nchar(traitnames)-5)
traitcoord <- summary(FD.rda4)$species[,c(1,2)]
bioclimcoord <- summary(FD.rda4)$biplot[,c(1,2)]
plotcoord <- summary(FD.rda4)$sites[,c(1,2)]
str(plotcoord) #num [1:1111307, 1:2]
x.cut <- cut(plotcoord[,1], breaks=seq(min(plotcoord[,1]), max(plotcoord[,1]), by=0.01))
y.cut <- cut(plotcoord[,2], breaks=seq(min(plotcoord[,2]), max(plotcoord[,2]), by=0.01))
z.cut <- table(x.cut,y.cut)
head(z.cut)
dim(z.cut) #131 50
z.cut <- melt(z.cut)
str(z.cut) # 6550 obs. of  3 variables:
head(z.cut)
z.cut$xy <- paste(z.cut$x.cut,z.cut$y.cut,sep="_")
str(x.cut)
length(x.cut) # 1111307
xy <- paste(x.cut,y.cut,sep="_")
index30 <- match(xy,z.cut$xy)
length(index30)
any(is.na(index30))
head(log10(z.cut$value[index30]))
values.present <- vector(mode="numeric", length=length(xy))
min(log10(z.cut$value[index30]), na.rm=T) #0
max(log10(z.cut$value[index30]), na.rm=T) # 3.399154
values.cut <- cut(log10(z.cut$value[index30]), breaks=seq(min(log10(z.cut$value[index30]), na.rm=T),
                                                          4, by=0.4))

ordiplot(FD.rda4, type="n", xlim=c(-10,10),ylim=c(-10,12))
points(plotcoord*15, cex=0.05, col=rev(greyPalette(n=10,  start = 0, end = 0.8,
                                                   gamma = 2.2, alpha = NULL))[values.cut])
arrows(rep(0,1),rep(0,1),bioclimcoord[1]*8,bioclimcoord[2]*8, col="blue", length=0.1)
text(bioclimcoord[1]*8,bioclimcoord[2]*8+0.5,"BIO_14", col="blue", cex=1)
text(traitcoord,traitnames, col="red")
# copied as third RDA graph into ppt

FD.rda5 <- rda(FD ~ BIO_14+BIO_02,splot.bioclim[index14,c(4:49)],scale=T)
FD.rda5
'
Inertia Proportion Rank
Total         18.00000    1.00000     
Constrained    0.44582    0.02477    2
Unconstrained 17.55418    0.97523   18
Inertia is correlations 

Eigenvalues for constrained axes:
RDA1   RDA2 
0.3641 0.0817 

Eigenvalues for unconstrained axes:
PC1   PC2   PC3   PC4   PC5   PC6   PC7   PC8 
8.267 1.816 1.377 0.880 0.788 0.780 0.568 0.530 '
traitnames <- dimnames(summary(FD.rda5)$species)[[1]]
traitnames <- substr(traitnames,1,nchar(traitnames)-5)
traitcoord <- summary(FD.rda5)$species[,c(1,2)]
bioclimcoord <- summary(FD.rda5)$biplot[,c(1,2)]

plotcoord <- summary(FD.rda5)$sites[,c(1,2)]
str(plotcoord) #num [1:1111307, 1:2]
x.cut <- cut(plotcoord[,1], breaks=seq(min(plotcoord[,1]), max(plotcoord[,1]), by=0.01))
y.cut <- cut(plotcoord[,2], breaks=seq(min(plotcoord[,2]), max(plotcoord[,2]), by=0.01))
z.cut <- table(x.cut,y.cut)
head(z.cut)
dim(z.cut) #131 50
z.cut <- melt(z.cut)
str(z.cut) # 6550 obs. of  3 variables:
head(z.cut)
z.cut$xy <- paste(z.cut$x.cut,z.cut$y.cut,sep="_")
str(x.cut)
length(x.cut) # 1111307
xy <- paste(x.cut,y.cut,sep="_")
index30 <- match(xy,z.cut$xy)
length(index30)
any(is.na(index30))
head(log10(z.cut$value[index30]))
values.present <- vector(mode="numeric", length=length(xy))
min(log10(z.cut$value[index30]), na.rm=T) #0
max(log10(z.cut$value[index30]), na.rm=T) # 3.399154
values.cut <- cut(log10(z.cut$value[index30]), breaks=seq(min(log10(z.cut$value[index30]), na.rm=T),
                                                          4, by=0.4))

ordiplot(FD.rda5, type="n",  xlim=c(-10,10),ylim=c(-10,12))
points(plotcoord*15, cex=0.05, col=rev(greyPalette(n=10,  start = 0, end = 0.8, 
                                                   gamma = 2.2, alpha = NULL))[values.cut])
arrows(rep(0,2),rep(0,2),bioclimcoord[,1]*4,bioclimcoord[,2]*4, col="blue", length=0.1)
text(bioclimcoord*4,dimnames(bioclimcoord)[[1]], col="blue", cex=1)
text(traitcoord,traitnames, col="red")
# copied as fourth RDA graph into ppt

plot(FD[,"Seed.length.mean"]~splot.bioclim$BIO_14[index14],
     cex=0.05, xlab="BIO_14", ylab="FD ln(seed length)",cex.lab=1.5, cex.axis=1.5)
model1 <- lm(FD[,"Seed.length.mean"]~splot.bioclim$BIO_14[index14])
summary(model1)
'                               Estimate Std. Error t value Pr(>|t|)    
(Intercept)                   2.584e-01  3.193e-04   809.1   <2e-16 ***
splot.bioclim$BIO_14[index14] 1.407e-04  6.813e-07   206.5   <2e-16 ***
Multiple R-squared:  0.03697,	Adjusted R-squared:  0.03697 
'
abline(model1, lwd=3, col="blue")
#

plot(FD[,"Seed.num.rep.unit.mean"]~splot.bioclim$BIO_02[index14],
     cex=0.05, xlab="BIO_02", ylab="FD ln(Seed number of reproductive unit)",cex.lab=1.5, cex.axis=1.5)
model1 <- lm(FD[,"Seed.num.rep.unit.mean"]~splot.bioclim$BIO_02[index14])
summary(model1)
'                                 Estimate Std. Error t value Pr(>|t|)    
(Intercept)                    1.251e+00  1.799e-03   695.3   <2e-16 ***
splot.bioclim$BIO_02[index14] -3.751e-03  1.927e-05  -194.6   <2e-16 ***
Multiple R-squared:  0.03296,	Adjusted R-squared:  0.03296  '
abline(model1, lwd=3, col="blue")


# mpd presence-absence
system.time(
  mpd.sla.no.abu <-  DT[,list(mpd.sla = mntd.fun.dt(Cover.., SLA.dist.mat, StandSpeciesName, abundance.weighted=FALSE)),by=PlotObservationID]
)


###########
levels(splot.header2$COUNTRY_CO)
levels(splot.header$Country)
CWM2$Country <- splot.header$Country[index12]
data(countryExData)
str(countryExData)
sPDF <- joinCountryData2Map(splot.header, joinCode = "NAME",nameJoinColumn = "Country")
?mapCountryData
#mapCountryData(sPDF, nameColumnToPlot="BIODIVERSITY" )

#### Climatic space, from Jonathon Lenoir ####
library(fBasics)
# Color palette

# Import bio1 (MAT) of the world
bio <- read.table(paste("/home/helge/sPlot/Bioclim/", "BIO.txt", sep = ""),sep=";", header=T)
str(bio) #9033453 obs. of  22 variables
r <- raster(nrows=33, ncols=33, xmn=min(bio$BIO_01), xmx=max(bio$BIO_01), ymn=min(bio$BIO_12), ymx=max(bio$BIO_12))
mattar <- data.frame(bio$BIO_01,bio$BIO_12)
str(mattar)
names(mattar) <- c("bio1","bio12")
mattar_r <- rasterize(mattar, r, fun="count")
str(mattar_r)
plot(log10(mattar_r), asp=0, col=rev(divPalette(n=100, name=c("RdYlBu"))), 
     main="N 2.5 arc-minutes cells per bin (log10 scale)", cex.main=0.7,
     xlab="MAT (°C*10)",  ylab="TAR (mm)")
# Bio1_Bio12_world.png

#plot(mattar_r/max(getValues(mattar_r), na.rm=TRUE), asp=0, col=rev(divPalette(n=100, name=c("RdYlBu"))), main="Relative proportion of 10 arc-minutes cells per bin", xlab="MAR (°C*10)", ylab="TAR (mm)")
# Plot the sampling effort of sPlot across the MAT-TAR climatic space
str(splot.bioclim)
# Make it a spatial data frame
CRSlonlat <- CRS("+proj=longlat +datum=WGS84")
splot.bioclim.coords <- cbind(splot.bioclim$X, splot.bioclim$Y)
splot.bioclim.coords <- SpatialPoints(splot.bioclim.coords, proj4string=CRSlonlat)
splot.bioclim2 <- data.frame(splot.bioclim$BIO_01,splot.bioclim$BIO_12)
names(splot.bioclim2) <- c("bio1","bio12")
#splot.bioclim2 <- SpatialPointsDataFrame(splot.bioclim.coords, splot.bioclim2, proj4string=CRSlonlat)
#str(splot.bioclim2)
#sPlot_mattar <- extract(bio1, coordinates(sPlot))
#sPlot_mattar <- as.data.frame(sPlot_mattar)
#names(sPlot_mattar) <- c("bio1")
#sPlot_mattar$bio12 <- extract(bio12, coordinates(sPlot))
sPlot_mattar_r <- rasterize(splot.bioclim2, r, fun="count")
str(sPlot_mattar_r)
plot(mattar_r, asp=0, col="grey", legend=FALSE, main="N vegetation plots per bin (log10 scale)",
     xlab="MAT (°C)", ylab="TAR (mm)")
# Bio1_Bio12_world_grey.png
plot(log10(sPlot_mattar_r), asp=0, col=rev(divPalette(n=100, name=c("RdYlBu"))), 
     add=TRUE)
# Bio1_Bio12_sPlot_SLA.png

# Build the PC1-PC2 climatic space
load("/home/helge/sPlot/Bioclim/pca_data.RData")
str(pca.data)
r <- raster(nrows=33, ncols=33, xmn=min(pca.data$PC1), xmx=max(pca.data$PC1), ymn=min(pca.data$PC2), ymx=max(pca.data$PC2))
pca_r <- rasterize(pca.data[, c("PC1", "PC2")], r, fun="count")
plot(log10(pca_r), asp=0, col=rev(divPalette(n=100, name=c("RdYlBu"))), 
     main="N 2.5 arc-minutes cells per bin (log10 scale)", 
     xlab="PC1 (cold-to-warm)", ylab="PC2 (dry-to-wet)")
plot(pca_r/max(getValues(pca_r), na.rm=TRUE), asp=0, 
     col=rev(divPalette(n=100, name=c("RdYlBu"))), 
     main="Relative proportion of 2.5 arc-minutes cells per bin", 
     xlab="PC1 (cold-to-warm)", ylab="PC2 (dry-to-wet)")
# Plot the sampling effort of sPlot across the PC1-PC2 climatic space
index13 <- match(splot.bioclim$RAST_ID,pca.data$RAST_ID)
sPlot_pca <- pca.data[index13, c("PC1", "PC2")]
sPlot_pca_r <- rasterize(sPlot_pca, r, fun="count")
plot(pca_r, asp=0, col="grey", legend=FALSE, 
     main="N vegetation plots per bin (log10 scale)", 
     xlab="PC1 (cold-to-warm)", ylab="PC2 (dry-to-wet)")
plot(log10(sPlot_pca_r), asp=0, col=rev(divPalette(n=100, name=c("RdYlBu"))), 
     add=TRUE)

#### For Marten Winter ####
# species x country Matrix
species.country <- table(DT$species,splot.header$Country[index30])
str(species.country) # int [1:60908, 1:130]
names(dimnames(species.country))
names(dimnames(species.country)) <- c("species","country")

library(reshape2)
?melt.table
species.country2 <- melt(species.country)
str(species.country2) # 	188470 obs. of  3 variables:
names(species.country2)
names(species.country2)[3] <- "p_a"
table(species.country2$p_a)

species.country2$p_a[species.country2$p_a>0] <- 1
species.country2 <- species.country2[species.country2$p_a==1,]
str(species.country2) # 	188470 obs. of  3 variables:
species.country2 <- species.country2[,-3]
str(species.country2) # 188470
write.csv(species.country2,file = 
            paste("/home/oliver/shared/", "species.country2.csv", sep = ""), row.names = FALSE)
levels(species.country2$country) # 130
length(species.country2$country[species.country2$country=="Algeria"])
# 97
sum(species.country[,2]) 
# 799, this is the amount of records for DZA = Algeria
sum(species.country[,126]) 
# 1467200, sum of records for United States

TaxaNat <- read.csv(paste("/home/oliver/shared/Glonaf/", "TaxaNat.csv", sep = ""))
str(TaxaNat) #178779 obs. of  2

GlonafRegion <- read.csv(paste("/home/oliver/shared/Glonaf/", "GlonafRegion.csv", sep = ""))
str(GlonafRegion) #843 obs. of  10
trim.trailing <- function (x) sub("\\s+$", "", x)
# returns string w/o leading or trailing whitespace
GlonafRegion$Country_sPlot <- as.factor(trim.trailing(as.character(GlonafRegion$Country_sPlot)))
index14 <- match(levels(species.country2$country),levels(GlonafRegion$Country_sPlot))
index14
levels(GlonafRegion$Country)

data.frame(levels(species.country2$country),levels(GlonafRegion$Country_sPlot)[index14])
## all correct

index15 <- match(levels(GlonafRegion$Country_sPlot),GlonafRegion$Country_sPlot)
index15
index16 <- match(species.country2$country,levels(species.country2$country))
any(is.na(index16)) # F
length(levels(species.country2$species)) #60908
'species.country2$species.country <- paste(species.country2$species,
levels(species.country2$country)[index16],sep="_")
species.country2$species.country <- paste(species.country2$species,
levels(GlonafRegion$Country_sPlot)[index14][index16],sep="_")
species.country2$species.country <- paste(species.country2$species,
GlonafRegion$Country_sPlot[index15][index14][index16],sep="_")
'
species.country2$species.country <- paste(species.country2$species,
                                          GlonafRegion$ISO.country[index15][index14][index16],sep="_")
str(species.country2) # 188470 obs. of  3 variables:
head(species.country2)
species.country2[c(160000:160100),]
## fits!

index17 <- match(TaxaNat$Region,GlonafRegion$ID.region)
any(is.na(index17)) # F

TaxaNat$species.country <-paste(TaxaNat$Taxon2,GlonafRegion$ISO.country[index17], sep="_")

index18 <- match(species.country2$species.country,TaxaNat$species.country)
length(index18) #188470
length(index18[!is.na(index18)]) # 9283 out of 188470 species x country occurrences
# are alien occurrences
9283/188470*100 # 4.925452 % species x country occurrences
species.country2$alien <- F
species.country2$alien[!is.na(index18)] <- T
species.country2$ISO.country <- GlonafRegion$ISO.country[index15][index14][index16]
str(species.country2)
head(species.country2)
length(species.country2$ISO.country[species.country2$ISO.country=="DZA"])
# 12939, this cannot be
#species.country2$ISO.country[species.country2$ISO.country=="DZA"]
sum(species.country[,2]) 
# 799, this is the amount of records for DZA = Algeria
length(species.country2$ISO.country[species.country2$ISO.country=="DZA"
                                    & !is.na(species.country2$ISO.country)])
# 97!

#table(species.country2$ISO.country)
#species.country3 <- acast(species.country2, species~ISO.country, length)
length(species.country2$alien[species.country2$alien])
# 9283 alien records in sPlot
length(species.country2$alien[species.country2$alien & species.country2$ISO.country=="DZA"])
# 3 alien records in DZA = Algeria
length(species.country[species.country[,2]>0,2])
# 97, however there are only 97 records in total
length(species.country2$alien[species.country2$alien & species.country2$ISO.country=="USA"])
# 1911 alien species in USA
length(species.country[species.country[,126]>0,2])
# 9933 species in total in USA
alien.richness <- as.matrix(table(species.country2$ISO.country,species.country2$alien))[,2]

species.country3 <- species.country
species.country3[species.country3>0] <- 1
country.richness <- colSums(species.country3)
country.richness
names(country.richness)
data.frame(names(country.richness),GlonafRegion$ISO.country[index15][index14])
country.richness2 <- data.frame(GlonafRegion$ISO.country[index15][index14],as.numeric(country.richness))
names(country.richness2) <- c("ISO.country","richness")
country.richness2$ISO.country
country.richness2

index19 <- match(country.richness2$ISO.country,names(alien.richness))
index19
alien.richness
country.richness2$alien.richness <- alien.richness[index19]
country.richness2
country.richness2$alien.prop <- country.richness2$alien.richness/country.richness2$richness
country.richness2$alien.prop

sPDF <- joinCountryData2Map(country.richness2,joinCode = "ISO3",
                            nameJoinColumn = "ISO.country")
'112 codes from your data successfully matched countries in the map
18 codes from your data failed to match with a country code in the map
132 codes from the map werent represented in your data
'
mapParams <- mapCountryData(sPDF, nameColumnToPlot="richness",addLegend=FALSE)
do.call(addMapLegend, c(mapParams, legendWidth=0.5, legendMar = 2,legendLabels="all"))
mapParams <- mapCountryData(sPDF, nameColumnToPlot="richness",
                            catMethod=seq(0,10000,by=1000),addLegend=FALSE)
do.call(addMapLegend, c(mapParams, legendWidth=0.5, legendMar = 2,legendLabels="all"))

mapParams <- mapCountryData(sPDF, nameColumnToPlot="alien.richness",
                            catMethod=seq(0,2000,by=200),addLegend=FALSE)
do.call(addMapLegend, c(mapParams, legendWidth=0.5, legendMar = 2,legendLabels="all"))

max(country.richness2$alien.prop, na.rm=T) #0.2823779
mapParams <- mapCountryData(sPDF, nameColumnToPlot="alien.prop",addLegend=FALSE)
mapParams <- mapCountryData(sPDF, nameColumnToPlot="alien.prop",
                            catMethod=seq(0,0.3,by=0.02),addLegend=FALSE)
do.call(addMapLegend, c(mapParams, legendWidth=0.5, legendMar = 2,legendLabels="all"))


##################### OLD ###################
library(spdep)
library(maptools)
library(hexbin)
library(RColorBrewer)
# Use a lighter version of sPlot focusing on mean annual temperature MAT (BIO1)
# and total annual rainfall TAR (BIO12)
sPlot <- splot.header[, c("LONGITUDE", "LATITUDE", "BIO_1", "BIO_12")]
sPlot$LONGITUDE <- as.numeric(as.character(sPlot$LONGITUDE))
sPlot$LATITUDE <- as.numeric(as.character(sPlot$LATITUDE))
sPlot$BIO_1 <- as.numeric(sPlot$BIO_1)/10
sPlot$BIO_12 <- as.numeric(sPlot$BIO_12)
str(sPlot)
# Make it a spatial data frame
CRSlonlat <- CRS("+proj=longlat +datum=WGS84")
coords <- cbind(sPlot$LONGITUDE, sPlot$LATITUDE)
coords <- SpatialPoints(coords, proj4string=CRSlonlat)
sPlot <- SpatialPointsDataFrame(coords, sPlot, proj4string=CRSlonlat)
str(sPlot)
# Plot the data on the world map to check the coordinates and projection
data(wrld_simpl)
plot(wrld_simpl)
points(coordinates(sPlot), cex=0.1, col="red", pch=16)
# Set a color ramp
rf <- colorRampPalette(rev(brewer.pal(11, 'Spectral')))
# Plot the plot distribution within the MAT-TAR bidimentionnal climatic space
hexbinplot(BIO_12~BIO_1, data=sPlot@data, colramp=rf, trans=log, inv=exp,
           border="white", xlab="MAT", ylab="TAR", xbins=30)
# Display only bins with a minimum of 10 plots and a maximum of 1000 plots
hexbinplot(BIO_12~BIO_1, data=sPlot@data, colramp=rf, trans=log, inv=exp,
           border="white", xlab="MAT", ylab="TAR", xbins=30, mincnt=10, maxcnt=1000)
#


################ OLD ###################

###
load("C:\\Daten\\iDiv\\sPlot\\pca_data.Rdata")
str(pca.data)
any(is.na(pca.data$PC1)) #F

load("C:\\Daten\\iDiv\\sPlot\\sPlot_header_pca.RData")
str(splot.header2)
any(is.na(pca.data$PC1)) #F

library(raster)
r <- 
  splot_pca_r <- rasterize(splot_pca, r, run="count")
plot(splot_pca_r, asp=0, col=rev(divPalette(n=100, name="RdY1Bu"))))
plot(splot_r, asp=0, col=rev(divPalette(n=100, name="RdY1Bu"))))

index <- which(getValues(splot_pca_r)>0)
which(getValues(pca_r)>0)
poinra
getValues(splot_pca_r)[index] # 136 values
?sample
splo_pca$binid <- cellFromXY(pca_r, splot_pca)


###########################
load("C:\\Daten\\iDiv\\sPlot\\splot_pca2.Rdata")
str(splot_pca)
splot_pca <- data.table(splot_pca)
setkey(splot_pca,binid)
inter <- splot_pca[,list(count=length(PCA1)), by=binid]
splot_pca2 <- splot_pca[,list(PCA1, PCA2,count=length(PCA1)), by=binid]

str(splot_pca2)
hist(splot_pca2$binid)
inter2 <- splot_pca2[count>100,list(count=sum(PCA1)), by=binid]
str(inter2)
[,list(total.cover=sum(Cover..)), by=PlotObservationID]
<- DT[,list(total.cover=sum(Cover..)), by=PlotObservationID]

sample(splot_pca$PCA1,100)
splot_pca2 <-  splot_pca[table(splot_pca$binid)>100,sample(PCA1,100),by=binid]
str(splot_pca2)

resample <- function(PCA1_,PCA2_,binid_){
  if (length(PCA1)>100) {
    
  }
  else {
    1
  }
  ]index10 <- match(species,labels(d))
  d3 <- as.dist(dist[index10,index10])
  if (sum(d3)==0){
    NA
  }
  divc(as.data.frame(rel.cover),d3)$diversity
}
splot_pca2 <-  splot_pca[length(PCA1)<=100,res_ID := 1]
head(splot_pca2,100)
table(splot_pca2$binid,splot_pca2$count, exclude=NULL)
table(splot_pca$binid)
splot_pca2 <-  splot_pca[,res_ID := 0]
splot_pca2 <-  splot_pca[table(splot_pca2$binid)<=100,res_ID := 1]
splot.selection <-  splot_pca[,list(res = resample(PCA1,PCA2,binid)),by=binid]
#length(splot_pca[binid>100])
hist(splot_pca$binid)
splot.selection <-  splot_pca[,list(length(binid)),by=binid]
splot.selection <-  splot_pca[,list(count=length(PCA1)),by=binid]
splot.selection <-  splot_pca[,,by=binid]
hist(splot.selection$count)
str(splot.selection)
str(splot_pca2)
splot.selection <-  splot_pca[,list(nrow()),by=binid]
splot.selection <-  splot_pca[,count:=length(binid),by=binid]
length(splot_pca,by=binid)
########################################################################################

### CWM ###
#CWM1 <-  DT[!is.na(plot.species.trait.value),list(CWM = weighted.mean(plot.species.trait.value,Relative.cover)),by=PlotObservationID]
CWM1 <-  DT[,list(CWM = weighted.mean(plot.species.trait.value,Relative.cover)),by=PlotObservationID]
# see http://stackoverflow.com/questions/3367190/aggregate-and-weighted-mean-in-r
CWM1[1:100,]
hist(CWM1$CWM)
write.csv((x, file = paste(path.sPlot, "CWM_SLA_all.csv", sep = ""), row.names = FALSE))

### FD ###
library(ade4)    # f?r divc (Rao's Q)
# this does not work: ~50 GB RAM required for 40k x 40k species matrix:
'ktab1 <- ktab.list.df(list(as.data.frame(species.trait.value[!is.na(species.trait.value)])))
d <- dist.ktab(ktab1, type=c("Q"), option = "scaledBYrange")
#labels(d)
#str(d)
#attr(d,"Labels")
attr(d,"Labels") <- dimnames(species.trait.value)[[1]][!is.na(species.trait.value)]
labels(d)
any(is.na(d)) # F
dist <- as.matrix(d)

#tree.trait.weighted <- t(t(veg.KHG3.prop)  * all.traits.KHG[,j])
#dist.sqrt <- sqrt(dist)
d.sqrt <- sqrt(d)
labels(d.sqrt)
dist <- as.matrix(d.sqrt)
'
'Rao <- function(rel.cover,species){
index10 <- match(species,labels(d))
d3 <- as.dist(dist[index10,index10])
if (sum(d3)==0){
NA
}
divc(as.data.frame(rel.cover),d3)$diversity
}
FD.all <-  DT[!is.na(plot.species.trait.value),list(Rao.Q = Rao(Relative.cover,Matched.concept)),by=PlotObservationID]
'
#species.trait.value.scaled <- species.trait.value/(max(species.trait.value)-min(species.trait.value))
species.trait.value.scaled <- TRY.mean.sd.splot$SLA.mean/(max(TRY.mean.sd.splot$SLA.mean)-min(TRY.mean.sd.splot$SLA.mean))

# scale by range, which saves time for distance calculations

library(reshape2)

package.size = 1000
#number.plots.total <- length(levels(DT[!is.na(plot.species.trait.value)]$PlotObservationID)) # 705272
number.plots.total <- length(levels(DT$PlotObservationID)) # 701611 vs. 705272
FD.all <- array(0,number.plots.total, dimnames=list(levels(DT$PlotObservationID)))
number.packages <-ceiling(number.plots.total/package.size) #702
pb <- winProgressBar(title = "progress bar", min = 0, max =number.packages, width = 300)
for (k in 1:number.packages) {
  setWinProgressBar(pb, k, title=paste("Row ",k,"of ", number.packages, " is processed"))
  ifelse (k==number.packages, package<-number.plots.total%%package.size, package <- package.size)
  #index3 <- match(DT[!is.na(plot.species.trait.value)]$PlotObservationID,levels(DT[!is.na(plot.species.trait.value)]$PlotObservationID)[seq((k-1)*package.size+1,k*package.size,1)])
  index3 <- match(DT$PlotObservationID,levels(DT$PlotObservationID)[seq((k-1)*package.size+1,k*package.size,1)])
  #DT2 <- DT[!is.na(plot.species.trait.value) & !is.na(index3)]
  #DT2 <- DT2[!is.na(plot.species.trait.value)]
  #length(index3[!is.na(index3)])
  DT2 <- DT[!is.na(index3)]
  #str(DT2)
  #veg.matrix1 <- acast(DT2, Matched.concept ~ PlotObservationID, value.var="Relative.cover", fill=0)  
  veg.matrix1 <- acast(DT2, names.stand ~ PlotObservationID, value.var="Relative.cover", fun.aggregate=sum, fill=0)  
  # fun.aggregate = sum -> sums up multiple entries per species!
  # this can happen with the unified species nomenclature!
  #length(unique(dimnames(veg.matrix1)[[2]]))# 1000
  #length(unique(DT2$PlotObservationID)) # 1000
  #veg.matrix1[veg.matrix1 > 0] <- 1
  #any(colSums(veg.matrix1)!=1) #F
  index4 <- match(dimnames(veg.matrix1)[[1]],species.list)
  species.trait.value.scaled2 <- species.trait.value.scaled[index4]
  species.list2 <- species.list[index4]
  ktab1 <- ktab.list.df(list(as.data.frame(species.trait.value.scaled2)))
  d <- dist.ktab(ktab1, type=c("Q"), option = "noscale")
  #labels(d)
  #str(d)
  #attr(d,"Labels")
  attr(d,"Labels") <- species.list2
  #labels(d)
  #any(is.na(d)) # F
  #dist <- as.matrix(d)
  #as.matrix(d)[1:8,1:8]
  d <- sqrt(d)
  #dist <- as.matrix(d)
  #rm(d)
  if (k<number.packages){
    FD.all[seq((k-1)*package.size+1,k*package.size,1)] <- 
      divc(as.data.frame(veg.matrix1),d)$diversity
    #DT2[,list(Rao.Q = Rao(Relative.cover,Matched.concept)),by=PlotObservationID]$Rao.Q
    any(colSums(veg.matrix1)==0) # F o.K.
    any(colSums(veg.matrix1)!=1) # T ups....
    hist(colSums(veg.matrix1))
    any(rowSums(veg.matrix1)==0) # F o.K.
  } else {
    FD.all[seq((k-1)*package.size+1,number.plots.total,1)] <- 
      divc(as.data.frame(veg.matrix1),d)$diversity
    #DT2[,list(Rao.Q = Rao(Relative.cover,Matched.concept)),by=PlotObservationID]
  }
}
close(pb)
#FD.all
write.csv(FD.all, file = paste(path.sPlot, "FDall.csv", sep = ""), row.names = FALSE)