From cba6deee081bd54866753041f12ae7c35b5aa50e Mon Sep 17 00:00:00 2001 From: hbruelheide <helge.bruelheide@botanik.uni-halle.de> Date: Mon, 24 Oct 2016 10:42:14 +0200 Subject: [PATCH] Basic file to set up the workspace --- code/splot_trait_match1.R | 543 ++++++++++++++++++++++++++++++++++++++ code/splot_workspace1.R | 357 +++++++++++++++++++++++++ 2 files changed, 900 insertions(+) create mode 100644 code/splot_trait_match1.R create mode 100644 code/splot_workspace1.R diff --git a/code/splot_trait_match1.R b/code/splot_trait_match1.R new file mode 100644 index 0000000..4911976 --- /dev/null +++ b/code/splot_trait_match1.R @@ -0,0 +1,543 @@ +### Create the workspace for sPlot2.0 ### +### written by Helge Bruelheide, 20.10.2016 ### + +# There are x files: +# 1. DT.Rdata: a file in long format, including, among others, +# PlotObservationID, species, Cover +# and Relative.cover, while DT holds all original information, +# including the "Matched concept" which is the unified name from +# Turboveg 3, +# DT2 has been shortened to the relevant fields and +# contains only data with species that have a resolved name in the backbone +# Both DT and DT contain species for which we do not have traits +# 2. splot.header, read in from sPlot_2015_07_29_header.csv: +# header file for DT, holding plot information on database origin, +# country, lat, long, layer height and cover etc. +# splot.header matches to DT, but not to DT2 +# 3. backbone.splot2.1.try3.Rdata: taxonomic backbone holding all sPlot and +# TRY species names. These names are in names.sPlot.TRY, the resolved +# names are in name.short.correct +# 4. environment +# 5. Export_sPlot_2016_09_12.csv: gap filled data from TRY3.0 +# 6. CWM.Rdata: Community weighted means based on all TRY gap filled data +# 7. FD.Rao.Rdata, Rao's Q for all plots, using divc from ade4 +# 8. mpd.abu.Rdata, mntd.abu.Rdata, mpd.pa.Rdata, mntd.pa.Rdata +# mean pairwise distance and mean nearest neighbour distance, +# both based on abundances and presence/absence, using Picante + +library(data.table) + +load("/data/sPlot2.0/DT_20161021.RData") # server directory +load("data/DT_20161021.RData") # local directory +# 23586216 obs. of 14 variables. +# the field "species" holdes the resolved name according to the backbone +# the field "Matched concept" is the original Turboveg3 name. +setkey(DT,PlotObservationID) + +splot.header <- fread("/data/sPlot2.0/sPlot_2015_07_29_header.csv", + sep = "\t", na.strings=c("","NA")) +# You get a warning, which you can ignore. +# 1121244 rows and 43 columns. + + +###### Taxonomic Backbone ##### +load("/data/sPlot2.0/backbone.splot2.1.try3.Rdata") +str(backbone.splot2.1.try3) +#data.frame': 130602 obs. of 33 variables: +' $ Name_number : int 1 2 3 4 5 6 7 8 9 10 ... +$ names.sPlot.TRY : chr "?" "0" "[1269 Chlorophytum platt]" "[1284 Echinochloa]" ... +$ names.corr.string : chr "?" "0" "[1269 Chlorophytum platt]" "[1284 Echinochloa]" ... +$ sPlot.TRY : chr "S" "S" "S" "S" ... +$ Name_submitted : chr "Spermatophyta sp." "Spermatophyta sp." "Chlorophytum sp. [1269]" "Echinochloa sp." ... +$ Overall_score : num 0 0 0.9 0.9 0.9 0.9 0.9 0 0.9 0 ... +$ Name_matched : chr "No suitable matches found." "No suitable matches found." "Chlorophytum" "Echinochloa" ... +$ Name_matched_rank : chr "" "" "genus" "genus" ... +$ Name_score : num 0 0 1 1 1 1 1 0 1 0 ... +$ Family_score : num 0 0 NA NA NA NA 1 0 1 0 ... +$ Name_matched_accepted_family: chr "" "" "Asparagaceae" "Poaceae" ... +$ Genus_matched : chr "" "" "Chlorophytum" "Echinochloa" ... +$ Genus_score : num 0 0 1 1 1 1 NA 0 NA 0 ... +$ Specific_epithet_matched : chr "" "" "" "" ... +$ Specific_epithet_score : num 0 0 NA NA NA NA NA 0 NA 0 ... +$ Unmatched_terms : chr "" "" "\"\"sp. [1269]" "\"\"sp." ... +$ Taxonomic_status : chr "" "" "Accepted" "Accepted" ... +$ Accepted_name : chr "" "" "Chlorophytum" "Echinochloa" ... +$ Accepted_name_author : chr "" "" "" "" ... +$ Accepted_name_rank : chr "" "" "genus" "genus" ... +$ Accepted_name_url : chr "" "" "http://www.theplantlist.org/tpl1.1/search?q=Chlorophytum" "http://www.theplantlist.org/tpl1.1/search?q=Echinochloa" ... +$ Accepted_name_species : chr "" "" "" "" ... +$ Accepted_name_family : chr "" "" "Asparagaceae" "Poaceae" ... +$ Selected : chr "true" "true" "true" "true" ... +$ Source : chr "" "" "tpl" "tpl" ... +$ Warnings : chr " " " " " " " " ... +$ Manual.matching : chr NA NA NA NA ... +$ Status.correct : chr "No suitable matches found." "No suitable matches found." "Accepted" "Accepted" ... +$ name.correct : chr "No suitable matches found." "No suitable matches found." "Chlorophytum" "Echinochloa" ... +$ rank.correct : chr "higher" "higher" "genus" "genus" ... +$ family.correct : chr "" "" "Asparagaceae" "Poaceae" ... +$ name.short.correct : chr NA NA "Chlorophytum" "Echinochloa" ... +$ rank.short.correct : chr "higher" "higher" "genus" "genus" ... +' +# TRY 3.0 +TRY3.0 <- read.csv("/data/sPlot2.0/Export_sPlot_2016_09_12.csv") +str(TRY3.0) +#632938 obs. of 33 variables: +any(is.na(TRY3.0[,c(2:19)])) #F +any(TRY3.0[,c(2:19)]==0) #F + + +index2 <- match(TRY3.0$Species,backbone.splot2.1.try3$names.sPlot.TRY) +str(index2) +TRY3.0$name.short.correct <- backbone.splot2.1.try3$name.short.correct[index2] +library(stringr) +TRY3.0$genus.short.correct <- word(TRY3.0$name.short.correct,1) +head(TRY3.0[,c(34,35)],1000) +TRY3.0$rank.short.correct <- backbone.splot2.1.try3$rank.short.correct[index2] +table(TRY3.0$rank.short.correct, exclude=NULL) +'family genus higher species <NA> + 39 21021 1 611655 222 ' + +### Calculations ### +### take the log of all trait values in TRY3.0 ### +for (i in 2:19){ + TRY3.0[,i] <- log(TRY3.0[,i]) +} +any(is.na(TRY3.0[,c(2:19)])) #F + +TRY.all.n.3 <- aggregate(TRY3.0[,1], by=list(TRY3.0$name.short.correct), FUN=length) +head(TRY.all.n.3) +str(TRY.all.n.3) #52032 obs. of 2 variables: +names(TRY.all.n.3) +names(TRY.all.n.3) <- c("StandSpeciesName","n") + +TRY.all.mean.3 <- aggregate(TRY3.0[,c(2:19)], by=list(TRY3.0$name.short.correct), FUN=mean) +str(TRY.all.mean.3) #52032 obs. of 19 variables: +names(TRY.all.mean.3) +' [1] "Group.1" "X1" "X4" "X11" "X13" "X14" "X15" "X18" "X26" "X27" "X47" +[12] "X50" "X56" "X78" "X138" "X163" "X169" "X237" "X282" ' +names(TRY.all.mean.3) <- c("StandSpeciesName","LeafArea.mean", "StemDens.mean", "SLA.mean", "LeafC.perdrymass.mean", + "LeafN.mean","LeafP.mean", "PlantHeight.mean", "SeedMass.mean", "Seed.length.mean", + "LDMC.mean","LeafNperArea.mean", "LeafNPratio.mean", "Leaf.delta.15N.mean", + "Seed.num.rep.unit.mean", "Leaffreshmass.mean", "Stem.cond.dens.mean", + "Disp.unit.leng.mean", "Wood.vessel.length.mean") +any(is.na(TRY.all.mean.3$SLA.mean)) #F + +TRY.all.sd.3 <- aggregate(TRY3.0[,c(2:19)], by=list(TRY3.0$name.short.correct), FUN=sd) +str(TRY.all.sd.3) #52032 obs. of 19 variables: +names(TRY.all.sd.3) +names(TRY.all.sd.3) <- c("StandSpeciesName","LeafArea.sd", "StemDens.sd", "SLA.sd", "LeafC.perdrymass.sd", + "LeafN.sd","LeafP.sd", "PlantHeight.sd", "SeedMass.sd", "Seed.length.sd", + "LDMC.sd","LeafNperArea.sd", "LeafNPratio.sd", "Leaf.delta.15N.sd", + "Seed.num.rep.unit.sd", "Leaffreshmass.sd", "Stem.cond.dens.sd", + "Disp.unit.leng.sd", "Wood.vessel.length.sd") +any(is.na(TRY.all.sd.3$SLA.sd)) #T +TRY.all.mean.sd.3.by.taxon <- data.frame(TRY.all.n.3,TRY.all.mean.3[,c(2:19)],TRY.all.sd.3[,c(2:19)]) +str(TRY.all.mean.sd.3.by.taxon) #52032 obs. of 38 variables +'data.frame: 52032 obs. of 38 variables: +$ StandSpeciesName : chr "Aa" "Aaronsohnia pubescens" "Abarema" "Abarema adenophora" ... + $ n : int 1 1 8 4 1 1 1 38 4 87 ... +$ LeafArea.mean : num 6.61 5.33 7.2 6.96 7.37 ... +$ StemDens.mean : num -0.822 -0.823 -0.534 -0.427 -1.02 ... +$ SLA.mean : num 2.24 3.09 2.43 2.38 2.7 ... +$ LeafC.perdrymass.mean : num 6.17 6.11 6.18 6.26 6.14 ... +$ LeafN.mean : num 2.92 3.07 3.3 3.19 3.38 ... +$ LeafP.mean : num -0.0143 1.0433 -0.1211 0.3403 0.4916 ... +$ PlantHeight.mean : num -0.498 -1.611 2.598 2.874 1.83 ... +$ SeedMass.mean : num -4.08 -1.69 4.27 4.49 4.26 ... +$ Seed.length.mean : num -0.317 0.337 1.949 2.021 2.244 ... +$ LDMC.mean : num -1.471 -1.586 -1.01 -0.798 -0.986 ... +$ LeafNperArea.mean : num 0.8956 0.0878 0.8763 0.8343 0.6596 ... +$ LeafNPratio.mean : num 2.54 1.95 3.49 3.26 3.06 ... +$ Leaf.delta.15N.mean : num 0.38 0.4 0.888 1.258 1.36 ... +$ Seed.num.rep.unit.mean : num 9.56 10.73 2.04 1.64 5.76 ... +$ Leaffreshmass.mean : num -1.3303 -2.7662 0.0832 -0.2516 -0.7861 ... +$ Stem.cond.dens.mean : num 3.67 4.47 1.77 2.14 2.78 ... +$ Disp.unit.leng.mean : num -0.555 0.445 2.734 2.916 2.235 ... +$ Wood.vessel.length.mean: num 6.13 5.32 5.88 5.73 5.32 ... +$ LeafArea.sd : num NA NA 0.0698 0.0303 NA ... +$ StemDens.sd : num NA NA 0.00778 0.11012 NA ... +$ SLA.sd : num NA NA 0.0227 0.1063 NA ... +$ LeafC.perdrymass.sd : num NA NA 0.0014 0.0086 NA ... +$ LeafN.sd : num NA NA 0.025 0.0638 NA ... +$ LeafP.sd : num NA NA 0.0614 0.3936 NA ... +$ PlantHeight.sd : num NA NA 0.053 0.0626 NA ... +$ SeedMass.sd : num NA NA 0.0684 0.0315 NA ... +$ Seed.length.sd : num NA NA 0.0307 0.0457 NA ... +$ LDMC.sd : num NA NA 0.0346 0.0553 NA ... +$ LeafNperArea.sd : num NA NA 0.0412 0.098 NA ... +$ LeafNPratio.sd : num NA NA 0.0354 0.291 NA ... +$ Leaf.delta.15N.sd : num NA NA 0.0326 0.1651 NA ... +$ Seed.num.rep.unit.sd : num NA NA 0.192 0.211 NA ... +$ Leaffreshmass.sd : num NA NA 0.0702 0.0748 NA ... +$ Stem.cond.dens.sd : num NA NA 0.413 0.139 NA ... +$ Disp.unit.leng.sd : num NA NA 0.033 0.0927 NA ... +$ Wood.vessel.length.sd : num NA NA 0.288 0.147 NA ...' + +save(TRY.all.mean.sd.3.by.taxon, file="/data/sPlot2.0/TRY.all.mean.sd.3.by.taxon.Rdata") + + +TRY.all.n.3 <- aggregate(TRY3.0[,1], by=list(TRY3.0$genus.short.correct), FUN=length) +head(TRY.all.n.3) +str(TRY.all.n.3) #7873 obs. of 2 variables: +names(TRY.all.n.3) +names(TRY.all.n.3) <- c("StandSpeciesName","n") + +TRY.all.mean.3 <- aggregate(TRY3.0[,c(2:19)], by=list(TRY3.0$genus.short.correct), FUN=mean) +str(TRY.all.mean.3) #7873 obs. of 19 variables: +names(TRY.all.mean.3) +' [1] "Group.1" "X1" "X4" "X11" "X13" "X14" "X15" "X18" "X26" "X27" "X47" +[12] "X50" "X56" "X78" "X138" "X163" "X169" "X237" "X282" ' +names(TRY.all.mean.3) <- c("StandSpeciesName","LeafArea.mean", "StemDens.mean", "SLA.mean", "LeafC.perdrymass.mean", + "LeafN.mean","LeafP.mean", "PlantHeight.mean", "SeedMass.mean", "Seed.length.mean", + "LDMC.mean","LeafNperArea.mean", "LeafNPratio.mean", "Leaf.delta.15N.mean", + "Seed.num.rep.unit.mean", "Leaffreshmass.mean", "Stem.cond.dens.mean", + "Disp.unit.leng.mean", "Wood.vessel.length.mean") +any(is.na(TRY.all.mean.3$SLA.mean)) #F + +TRY.all.sd.3 <- aggregate(TRY3.0[,c(2:19)], by=list(TRY3.0$genus.short.correct), FUN=sd) +str(TRY.all.sd.3) #7873 obs. of 19 variables: +names(TRY.all.sd.3) +names(TRY.all.sd.3) <- c("StandSpeciesName","LeafArea.sd", "StemDens.sd", "SLA.sd", "LeafC.perdrymass.sd", + "LeafN.sd","LeafP.sd", "PlantHeight.sd", "SeedMass.sd", "Seed.length.sd", + "LDMC.sd","LeafNperArea.sd", "LeafNPratio.sd", "Leaf.delta.15N.sd", + "Seed.num.rep.unit.sd", "Leaffreshmass.sd", "Stem.cond.dens.sd", + "Disp.unit.leng.sd", "Wood.vessel.length.sd") +any(is.na(TRY.all.sd.3$SLA.sd)) #T + +TRY.all.mean.sd.3.by.genus.species <- TRY.all.mean.sd.3.by.taxon +names(TRY.all.mean.sd.3.by.genus.species) +index3 <- match(TRY.all.mean.sd.3.by.taxon$StandSpeciesName, + backbone.splot2.1.try3$name.short.correct) +any(is.na(index3)) #F + +which(backbone.splot2.1.try3$rank.short.correct[index3]=="genus") +table(backbone.splot2.1.try3$rank.short.correct[index3]) +'family genus higher species + 6 2342 1 49683 ' +#backbone.splot2.1.try3$rank.short.correct[index3]=="genus" + +index4 <- match(TRY.all.mean.sd.3.by.genus.species[backbone.splot2.1.try3$rank.short.correct[index3]=="genus",1], + TRY.all.n.3$StandSpeciesName) +str(index4) + + +TRY.all.mean.sd.3.by.genus.species[backbone.splot2.1.try3$rank.short.correct[index3]=="genus",2] <- TRY.all.n.3[index4,2] +for (i in 1:18){ + TRY.all.mean.sd.3.by.genus.species[backbone.splot2.1.try3$rank.short.correct[index3]=="genus",i+2] <- TRY.all.mean.3[index4,i+1] + TRY.all.mean.sd.3.by.genus.species[backbone.splot2.1.try3$rank.short.correct[index3]=="genus",i+20] <- TRY.all.sd.3[index4,i+1] +} +head(TRY.all.mean.sd.3.by.genus.species[,c(1:3)],50) +head(TRY.all.mean.sd.3.by.taxon[,c(1:3)],50) +save(TRY.all.mean.sd.3.by.genus.species, file="/data/sPlot2.0/TRY.all.mean.sd.3.by.genus.species.Rdata") +#load("/data/sPlot2.0/TRY.all.mean.sd.3.by.genus.species.Rdata") +any(is.na(TRY.all.mean.sd.3.by.genus.species[,c(3:21)])) +#T +any(is.na(TRY.all.mean.sd.3.by.taxon[,c(3:21)])) +#T +### + + +any(is.na(DT$species)) #T +# there NA cases as not all "Matched concept" names in DT have resolved names +length(unique(DT$PlotObservationID)) #1121244 +length(DT$species[!is.na(DT$species)]) #23555942 +length(DT$species) #23586216 +23586216-23555942 # 30274 NA names!!! +# it gives nonsense to match them with traits + +index7 <- match(DT$species,TRY.all.mean.sd.3.by.genus.species$StandSpeciesName) +length(index7) #23586216 +length(index7[!is.na(index7)]) +# 21172989, with TRY3.0 +# 21040927, with TRY2.0 +# 19841429, with first TRY version +23586216 - 21172989 # 2413227 entries have no species in CWM +# traits are fewer as not all CWM rows have traits +(23586216 - 21172989)/23586216*100 +# 10.23151% of all entries +(21172989)/23586216*100 +# which are 89.76849% of all entries +# previously: 86.79555% with TRY2.0 +# previously: 18.15247 % and with first TRY version + +DT2 <- DT[!is.na(DT$species),] +any(is.na(DT2$Relative.cover)) # F +length(unique(DT2$PlotObservationID)) #1121145 +#length(unique(sPlot3$PlotObservationID)) #1121245 +names(DT2) +names(DT2)[3] <- "Taxon.group" +names(DT2)[6] <- "Matched.concept" +names(DT2) +library(dplyr) +DT2 <- select(DT2,PlotObservationID,species, Taxon.group, Matched.concept, Layer, Cover, Relative.cover) +str(DT2) #23555942 obs. of 7 variables: +length(unique(DT2$PlotObservationID)) #1121145 +plot.total.cover4 <- DT2[, list(total.cover=sum(Cover)), by=PlotObservationID] +str(plot.total.cover4) #1121145 +min(plot.total.cover4$total.cover) #0.001 +max(plot.total.cover4$total.cover) #104704.1 +any(is.na(plot.total.cover4$total.cover)) # F +length(plot.total.cover4$total.cover[plot.total.cover4$total.cover==0]) +# 0 !!! +# all plots have cover + +index4 <- match(DT2$PlotObservationID,plot.total.cover4$PlotObservationID) +length(index4) #23555942 +any(is.na(index4)) # F + +# recalculate Relative cover +### CAREFUL! This has to be done by layer now ### +DT2$Relative.cover <- DT2$Cover/plot.total.cover4$total.cover[index4] + +plot.total.cover5 <- DT2[, list(total.cover=sum(Relative.cover)), by=PlotObservationID] +min(plot.total.cover5$total.cover) #1 +max(plot.total.cover5$total.cover) #1 + +save(DT2,file = "/data/sPlot2.0/DT2_20161021.RData") + + +length(unique(DT2$PlotObservationID)) #1121145 versus 1121244 before +index7 <- match(DT2$species,TRY.all.mean.sd.3.by.genus.species$StandSpeciesName) +length(index7) #23555942 +length(index7[!is.na(index7)]) +# 21172989, with TRY3.0 +23555942 - 21172989 +# 2382953 entries with valid species names have no species in the +# gap-filled trait file +# not all species there have traits +(23555942 - 21172989)/23555942*100 +# 10.11614% of all entries have no traits +(21172989)/23555942*100 +# which are 89.88386% of all entries + +### CWM ### +mean(TRY.all.mean.sd.3.by.genus.species$SLA.mean) # NA +mean(TRY.all.mean.sd.3.by.genus.species$SLA.mean,na.rm=T) # 2.63365 +# example +DT2$trait <- NA +DT2$trait <- TRY.all.mean.sd.3.by.genus.species$SLA.mean[index7] +length(DT2$trait[!is.na(DT2$trait)]) # 21172949 +length(DT2$trait[is.na(DT2$trait)]) # 2382993 +(21172949-2382993)/21172949 +# 0.8874511 % of all records in sPlot have a valid trait +length(unique(DT2$species)) #61131 +length(unique(DT2$species[!is.na(DT2$trait)])) #26666 +26666/61131 # 0.4362108 +str(DT2) + +colnames(TRY.all.mean.sd.3.by.genus.species) +CWM2 <- DT2[,list(CWM.SLA = weighted.mean(trait,Cover,na.rm = T)),by=PlotObservationID] +# I have checked that we do not need to prefilter only those entries that have a trait value +# this works fine +str(CWM2) +dim(CWM2) #1121145 +which(colnames(TRY.all.mean.sd.3.by.genus.species)=="LeafArea.mean") # 3 +which(colnames(TRY.all.mean.sd.3.by.genus.species)=="Wood.vessel.length.mean") # 20 +CWM <- array(NA,c(dim(CWM2)[1],18),dimnames=list(CWM2$PlotObservationID, + colnames(TRY.all.mean.sd.3.by.genus.species)[3:20])) +rm(CWM2) +for (i in 1:18){ + DT2$trait <- NA + DT2$trait <- TRY.all.mean.sd.3.by.genus.species[index7,i+2] + CWM[,i] <- DT2[,list(CWM.trait= weighted.mean(trait,Relative.cover,na.rm = T)),by=PlotObservationID]$CWM.trait +} +dim(CWM) #1121145 18 +length(CWM[,"SLA.mean"][!is.na(CWM[,"SLA.mean"])]) #1116113 +1116113/1121145 # 0.9955117% of all plots in sPlot2 have a CWM value + +save(CWM, file="/data/sPlot2.0/CWM.Rdata") # on the server +save(CWM, file="data/CWM.Rdata") # on the local system +#load("/data/sPlot2.0/CWM.Rdata") # on the server + +## FD Raos Q based on divc in ade4 +library(ade4) # f?r divc (Rao's Q) +FD.fun <- function(trait, abu){ + res <- as.double(NA) + if (length(trait[!is.na(trait)])>0){ + res <- 0 + #nam <- as.character(nam[!is.na(trait)]) + abu <- as.data.frame(abu[!is.na(trait)]) + #rownames(abu) <- nam + trait <- as.data.frame(trait[!is.na(trait)]) + #rownames(trait) <- nam + dis <- dist(trait,method="euclidean") + if (sum(dis)>1){ + sqrt.dis <- sqrt(dis) + # taking the square root is required to obtain the same result from the divc function + # as divc takes the square of distances values, as suggested by + # Champely & Chessel (2002, Env. Ecol. Stat. 9: 167-177) + #abu <- as.data.frame(abu) + res <- unlist(divc(abu, sqrt.dis)) + #Rao's Q: sumi sumj (dist ij prop i prop j)' + } + } + res +} + + +library(picante) +# define function for mean pairwise distance +'mpd.fun.dt <- function(abu, dis, nam, abundance.weighted){ + abu <- t(abu) + colnames(abu) <- nam + res <- mpd(abu, dis, abundance.weighted=abundance.weighted) +} +# define funtion of mean nearest neighbor distance +mntd.fun.dt <- function(abu, dis, nam, abundance.weighted){ + abu <- t(abu) + colnames(abu) <- nam + res <- mntd(abu, dis, abundance.weighted=abundance.weighted) + res +} +' + +mpd.fun <- function(nam, trait, abu,abundance.weighted){ + #res <- numeric(1) # carries result from the function + res <- as.double(NA) + nam <- nam[!is.na(trait)] + abu <- abu[!is.na(trait)] + trait <- trait[!is.na(trait)] + if (length(trait)>0){ + # dis <- as.matrix(dist(t(t1),method="euclidean")) + abu <- t(abu) + colnames(abu) <- nam + trait <- t(trait) + colnames(trait) <- nam + #dis <- as.matrix(vegdist(t(t1),method="euclidean", na.rm=T)) + dis <- as.matrix(dist(t(trait),method="euclidean")) + res <- mpd(abu, dis, abundance.weighted=abundance.weighted) + } + res +} +mntd.fun <- function(nam, trait, abu,abundance.weighted){ + #res <- numeric(1) # carries result from the function + res <- as.double(NA) + nam <- nam[!is.na(trait)] + abu <- abu[!is.na(trait)] + trait <- trait[!is.na(trait)] + if (length(trait)>0){ + # dis <- as.matrix(dist(t(t1),method="euclidean")) + abu <- t(abu) + colnames(abu) <- nam + trait <- t(trait) + colnames(trait) <- nam + #dis <- as.matrix(vegdist(t(t1),method="euclidean", na.rm=T)) + dis <- as.matrix(dist(t(trait),method="euclidean")) + res <- mntd(abu, dis, abundance.weighted=abundance.weighted) + } + res +} +test<- DT[c(1:10000),list(mpd.trait= mpd.fun(species, trait, Relative.cover, abundance.weighted=TRUE)),by=PlotObservationID] +str(test) +length(test$mpd.trait[is.na(test$mpd.trait)]) +head(test$mpd.trait) +DT[PlotObservationID==30,] +test$mpd.trait[test$PlotObservationID==30] +length(test$mntd.trait[is.na(test$mntd.trait)]) + +test<- DT[c(1:10000),list(mntd.trait= mntd.fun(species, trait, Relative.cover, abundance.weighted=TRUE)),by=PlotObservationID] +test$mntd.trait[test$PlotObservationID==30] + +FD.Rao <- array(NA,c(dim(CWM)[1],18),dimnames=list(dimnames(CWM)[[1]], + colnames(TRY.all.mean.sd.3.by.genus.species)[3:20])) +mpd.abu <- array(NA,c(dim(CWM)[1],18),dimnames=list(dimnames(CWM)[[1]], + colnames(TRY.all.mean.sd.3.by.genus.species)[3:20])) +mpd.pa <- array(NA,c(dim(CWM)[1],18),dimnames=list(dimnames(CWM)[[1]], + colnames(TRY.all.mean.sd.3.by.genus.species)[3:20])) +mntd.abu <- array(NA,c(dim(CWM)[1],18),dimnames=list(dimnames(CWM)[[1]], + colnames(TRY.all.mean.sd.3.by.genus.species)[3:20])) +mntd.pa <- array(NA,c(dim(CWM)[1],18),dimnames=list(dimnames(CWM)[[1]], + colnames(TRY.all.mean.sd.3.by.genus.species)[3:20])) + +str(FD.Rao) #logi [1:1121145, 1:18] + +for (i in 1:18){ + print(i) + DT2$trait <- NA + DT2$trait <- TRY.all.mean.sd.3.by.genus.species[index7,i+2] + #FD.Rao[,i] <- DT2[,list(FD.trait= FD.fun(trait, Relative.cover)),by=PlotObservationID]$FD.trait + #mpd.abu[,i] <- DT2[,list(mpd.trait= mpd.fun(species, trait, Relative.cover, abundance.weighted=TRUE)),by=PlotObservationID]$mpd.trait + mpd.pa[,i] <- DT2[,list(mpd.trait= mpd.fun(species, trait, Relative.cover, abundance.weighted=FALSE)),by=PlotObservationID]$mpd.trait + mntd.abu[,i] <- DT2[,list(mntd.trait= mntd.fun(species, trait, Relative.cover, abundance.weighted=TRUE)),by=PlotObservationID]$mntd.trait + #mntd.pa[,i] <- DT2[,list(mntd.trait= mntd.fun(species, trait, Relative.cover, abundance.weighted=FALSE)),by=PlotObservationID]$mntd.trait +} +save(FD.Rao, file="/data/sPlot2.0/FD.Rao.Rdata") # on the server +save(mpd.abu, file="/data/sPlot2.0/mpd.abu.Rdata") # on the server +save(mpd.pa, file="/data/sPlot2.0/mpd.pa.Rdata") # on the server +save(mntd.abu, file="/data/sPlot2.0/FD.Rao.Rdata") # on the server +save(mntd.pa, file="/data/sPlot2.0/mntd.pa.Rdata") # on the server +dim(mpd.pa) #1121145 18 +length(mpd.pa[,"SLA.mean"][!is.na(mpd.pa[,"SLA.mean"])]) #1100058 +1100058/1121145 #0.9811915 + +library(vegan) +head(dimnames(TRY.all.mean.sd.3.by.genus.species)[[1]]) +dimnames(TRY.all.mean.sd.3.by.genus.species)[[2]] +TRY.all.mean.sd.3.by.genus.species2 <- TRY.all.mean.sd.3.by.genus.species[!is.na(TRY.all.mean.sd.3.by.genus.species[,"SLA.mean"]),] +trait.pca1 <- rda(TRY.all.mean.sd.3.by.genus.species2[,c(5,9,10)],scale=T) +## do this for 3 traits +nam <- DT2$species[DT$PlotObservationID==1] +pca1 <- DT2$pca1[DT$PlotObservationID==1] +pca2 <- DT2$pca2[DT$PlotObservationID==1] +pca3 <- DT2$pca3[DT$PlotObservationID==1] +abu <- DT2$Relative.cover[DT$PlotObservationID==1] + +mpd.fun.multitrait <- function(nam, pca1, pca2, pca3, abu,abundance.weighted){ + #res <- numeric(1) # carries result from the function + trait <- as.matrix(cbind(pca1, pca2, pca3)) + res <- as.double(NA) + nam <- nam[!is.na(rowSums(trait))] + abu <- abu[!is.na(rowSums(trait))] + trait <- trait[!is.na(rowSums(trait)),] + if (length(trait)>3){ + # then there is more than one species (with 3 trait values) + abu <- t(abu) + colnames(abu) <- nam + trait <- t(trait) + colnames(trait) <- nam + dis <- as.matrix(dist(t(trait),method="euclidean")) + res <- mpd(abu, dis, abundance.weighted=abundance.weighted) + } + res +} +mntd.fun.multitrait <- function(nam, pca1, pca2, pca3, abu,abundance.weighted){ + #res <- numeric(1) # carries result from the function + trait <- as.matrix(cbind(pca1, pca2, pca3)) + res <- as.double(NA) + nam <- nam[!is.na(rowSums(trait))] + abu <- abu[!is.na(rowSums(trait))] + trait <- trait[!is.na(rowSums(trait)),] + if (length(trait)>3){ + abu <- t(abu) + colnames(abu) <- nam + trait <- t(trait) + colnames(trait) <- nam + dis <- as.matrix(dist(t(trait),method="euclidean")) + res <- mntd(abu, dis, abundance.weighted=abundance.weighted) + } + res +} + +identical(dimnames(TRY.all.mean.sd.3.by.genus.species2)[[1]], + dimnames(summary(trait.pca1)$sites)[[1]]) +#T +index8 <- match(DT2$species,TRY.all.mean.sd.3.by.genus.species2$StandSpeciesName) +length(index8) #23555942 +length(index8[!is.na(index8)]) #21172949 + + +DT2$pca1 <- summary(trait.pca1)$sites[index8,1] +DT2$pca2 <- summary(trait.pca1)$sites[index8,2] +DT2$pca3 <- summary(trait.pca1)$sites[index8,3] +names(DT2) +# pca scores in 9:11 +mpd.abu.LHS <- DT2[,list(mpd.trait= mpd.fun.multitrait(species, pca1, pca2, pca3, Relative.cover, abundance.weighted=TRUE)),by=PlotObservationID]$mpd.trait +mpd.pa.LHS <- DT2[,list(mpd.trait= mpd.fun.multitrait(species, pca1, pca2, pca3, Relative.cover, abundance.weighted=FALSE)),by=PlotObservationID]$mpd.trait +mntd.abu.LHS <- DT2[,list(mntd.trait= mntd.fun.multitrait(species, pca1, pca2, pca3, Relative.cover, abundance.weighted=TRUE)),by=PlotObservationID]$mntd.trait +mntd.pa.LHS <- DT2[,list(mntd.trait= mntd.fun.multitrait(species, pca1, pca2, pca3, Relative.cover, abundance.weighted=FALSE)),by=PlotObservationID]$mntd.trait + +mpd.mntd.LHS <- data.frame(mpd.abu.LHS,mpd.pa.LHS,mntd.abu.LHS, mntd.pa.LHS) +str(mpd.mntd.LHS) +#data.frame': 1121145 obs. of 4 variables: +save(mpd.mntd.LHS, file="/data/sPlot2.0/mpd.mntd.LHS.Rdata") # on the server + diff --git a/code/splot_workspace1.R b/code/splot_workspace1.R new file mode 100644 index 0000000..e6b384e --- /dev/null +++ b/code/splot_workspace1.R @@ -0,0 +1,357 @@ +### Create the workspace for sPlot2.0 ### +### written by Helge Bruelheide, 20.10.2016 ### + +# There are x files: +# 1. DT.Rdata: a file in long format, including, among others, +# PlotObservationID, species, Cover +# and Relative.cover, while DT holds all original information, +# including the "Matched concept" which is the unified name from +# Turboveg 3, +# DT2 has been shortened to the relevant fields and +# contains only data with species that have a resolved name in the backbone +# Both DT and DT contain species for which we do not have traits +# 2. splot.header, read in from sPlot_2015_07_29_header.csv: +# header file for DT, holding plot information on database origin, +# country, lat, long, layer height and cover etc. +# splot.header matches to DT, but not to DT2 +# 3. backbone.splot2.1.try3.Rdata: taxonomic backbone holding all sPlot and +# TRY species names. These names are in names.sPlot.TRY, the resolved +# names are in name.short.correct +# 4. environment +# 5. Export_sPlot_2016_09_12.csv: gap filled data from TRY3.0 +# 6. CWM.Rdata: Community weighted means based on all TRY gap filled data +# 7. FD.Rao.Rdata, Rao's Q for all plots, using divc from ade4 +# 8. mpd.abu.Rdata, mntd.abu.Rdata, mpd.pa.Rdata, mntd.pa.Rdata +# mean pairwise distance and mean nearest neighbour distance, +# both based on abundances and presence/absence, using Picante + +load("/data/sPlot2.0/DT_20161021.RData") + + +library(data.table) +splot.header <- fread("/data/sPlot2.0/sPlot_2015_07_29_header.csv", + sep = "\t", na.strings=c("","NA")) + +###### Backbone ##### +load("/data/sPlot2.0/backbone.splot2.1.try3.Rdata") +str(backbone.splot2.1.try3) +#data.frame': 130602 obs. of 33 variables: +' $ Name_number : int 1 2 3 4 5 6 7 8 9 10 ... +$ names.sPlot.TRY : chr "?" "0" "[1269 Chlorophytum platt]" "[1284 Echinochloa]" ... +$ names.corr.string : chr "?" "0" "[1269 Chlorophytum platt]" "[1284 Echinochloa]" ... +$ sPlot.TRY : chr "S" "S" "S" "S" ... +$ Name_submitted : chr "Spermatophyta sp." "Spermatophyta sp." "Chlorophytum sp. [1269]" "Echinochloa sp." ... +$ Overall_score : num 0 0 0.9 0.9 0.9 0.9 0.9 0 0.9 0 ... +$ Name_matched : chr "No suitable matches found." "No suitable matches found." "Chlorophytum" "Echinochloa" ... +$ Name_matched_rank : chr "" "" "genus" "genus" ... +$ Name_score : num 0 0 1 1 1 1 1 0 1 0 ... +$ Family_score : num 0 0 NA NA NA NA 1 0 1 0 ... +$ Name_matched_accepted_family: chr "" "" "Asparagaceae" "Poaceae" ... +$ Genus_matched : chr "" "" "Chlorophytum" "Echinochloa" ... +$ Genus_score : num 0 0 1 1 1 1 NA 0 NA 0 ... +$ Specific_epithet_matched : chr "" "" "" "" ... +$ Specific_epithet_score : num 0 0 NA NA NA NA NA 0 NA 0 ... +$ Unmatched_terms : chr "" "" "\"\"sp. [1269]" "\"\"sp." ... +$ Taxonomic_status : chr "" "" "Accepted" "Accepted" ... +$ Accepted_name : chr "" "" "Chlorophytum" "Echinochloa" ... +$ Accepted_name_author : chr "" "" "" "" ... +$ Accepted_name_rank : chr "" "" "genus" "genus" ... +$ Accepted_name_url : chr "" "" "http://www.theplantlist.org/tpl1.1/search?q=Chlorophytum" "http://www.theplantlist.org/tpl1.1/search?q=Echinochloa" ... +$ Accepted_name_species : chr "" "" "" "" ... +$ Accepted_name_family : chr "" "" "Asparagaceae" "Poaceae" ... +$ Selected : chr "true" "true" "true" "true" ... +$ Source : chr "" "" "tpl" "tpl" ... +$ Warnings : chr " " " " " " " " ... +$ Manual.matching : chr NA NA NA NA ... +$ Status.correct : chr "No suitable matches found." "No suitable matches found." "Accepted" "Accepted" ... +$ name.correct : chr "No suitable matches found." "No suitable matches found." "Chlorophytum" "Echinochloa" ... +$ rank.correct : chr "higher" "higher" "genus" "genus" ... +$ family.correct : chr "" "" "Asparagaceae" "Poaceae" ... +$ name.short.correct : chr NA NA "Chlorophytum" "Echinochloa" ... +$ rank.short.correct : chr "higher" "higher" "genus" "genus" ... +' +index2 <- match(TRY3.0$Species,backbone.splot2.1.try3$names.sPlot.TRY) +str(index2) +TRY3.0$name.short.correct <- backbone.splot2.1.try3$name.short.correct[index2] +library(stringr) +TRY3.0$genus.short.correct <- word(TRY3.0$name.short.correct,1) +head(TRY3.0[,c(34,35)],1000) +TRY3.0$rank.short.correct <- backbone.splot2.1.try3$rank.short.correct[index2] +table(TRY3.0$rank.short.correct, exclude=NULL) +'family genus higher species <NA> + 39 21021 1 611655 222 ' + +### Calculations ### + + +# TRY 3.0 +TRY3.0 <- read.csv("/data/sPlot2.0/Export_sPlot_2016_09_12.csv") +str(TRY3.0) +any(is.na(TRY3.0[,c(2:19)])) #F +any(TRY3.0[,c(2:19)]==0) #F + +for (i in 2:19){ + TRY3.0[,i] <- log(TRY3.0[,i]) +} +any(is.na(TRY3.0[,c(2:19)])) #F + +TRY.all.n.3 <- aggregate(TRY3.0[,1], by=list(TRY3.0$name.short.correct), FUN=length) +head(TRY.all.n.3) +str(TRY.all.n.3) #52032 obs. of 2 variables: +names(TRY.all.n.3) +names(TRY.all.n.3) <- c("StandSpeciesName","n") + +TRY.all.mean.3 <- aggregate(TRY3.0[,c(2:19)], by=list(TRY3.0$name.short.correct), FUN=mean) +str(TRY.all.mean.3) #52032 obs. of 19 variables: +names(TRY.all.mean.3) +' [1] "Group.1" "X1" "X4" "X11" "X13" "X14" "X15" "X18" "X26" "X27" "X47" +[12] "X50" "X56" "X78" "X138" "X163" "X169" "X237" "X282" ' +names(TRY.all.mean.3) <- c("StandSpeciesName","LeafArea.mean", "StemDens.mean", "SLA.mean", "LeafC.perdrymass.mean", + "LeafN.mean","LeafP.mean", "PlantHeight.mean", "SeedMass.mean", "Seed.length.mean", + "LDMC.mean","LeafNperArea.mean", "LeafNPratio.mean", "Leaf.delta.15N.mean", + "Seed.num.rep.unit.mean", "Leaffreshmass.mean", "Stem.cond.dens.mean", + "Disp.unit.leng.mean", "Wood.vessel.length.mean") +any(is.na(TRY.all.mean.3$SLA.mean)) #F + +TRY.all.sd.3 <- aggregate(TRY3.0[,c(2:19)], by=list(TRY3.0$name.short.correct), FUN=sd) +str(TRY.all.sd.3) #52032 obs. of 19 variables: +names(TRY.all.sd.3) +names(TRY.all.sd.3) <- c("StandSpeciesName","LeafArea.sd", "StemDens.sd", "SLA.sd", "LeafC.perdrymass.sd", + "LeafN.sd","LeafP.sd", "PlantHeight.sd", "SeedMass.sd", "Seed.length.sd", + "LDMC.sd","LeafNperArea.sd", "LeafNPratio.sd", "Leaf.delta.15N.sd", + "Seed.num.rep.unit.sd", "Leaffreshmass.sd", "Stem.cond.dens.sd", + "Disp.unit.leng.sd", "Wood.vessel.length.sd") +any(is.na(TRY.all.sd.3$SLA.sd)) #T +TRY.all.mean.sd.3.by.taxon <- data.frame(TRY.all.n.3,TRY.all.mean.3[,c(2:19)],TRY.all.sd.3[,c(2:19)]) +str(TRY.all.mean.sd.3.by.taxon) #52032 obs. of 38 variables +'data.frame: 52032 obs. of 38 variables: +$ StandSpeciesName : chr "Aa" "Aaronsohnia pubescens" "Abarema" "Abarema adenophora" ... + $ n : int 1 1 8 4 1 1 1 38 4 87 ... +$ LeafArea.mean : num 6.61 5.33 7.2 6.96 7.37 ... +$ StemDens.mean : num -0.822 -0.823 -0.534 -0.427 -1.02 ... +$ SLA.mean : num 2.24 3.09 2.43 2.38 2.7 ... +$ LeafC.perdrymass.mean : num 6.17 6.11 6.18 6.26 6.14 ... +$ LeafN.mean : num 2.92 3.07 3.3 3.19 3.38 ... +$ LeafP.mean : num -0.0143 1.0433 -0.1211 0.3403 0.4916 ... +$ PlantHeight.mean : num -0.498 -1.611 2.598 2.874 1.83 ... +$ SeedMass.mean : num -4.08 -1.69 4.27 4.49 4.26 ... +$ Seed.length.mean : num -0.317 0.337 1.949 2.021 2.244 ... +$ LDMC.mean : num -1.471 -1.586 -1.01 -0.798 -0.986 ... +$ LeafNperArea.mean : num 0.8956 0.0878 0.8763 0.8343 0.6596 ... +$ LeafNPratio.mean : num 2.54 1.95 3.49 3.26 3.06 ... +$ Leaf.delta.15N.mean : num 0.38 0.4 0.888 1.258 1.36 ... +$ Seed.num.rep.unit.mean : num 9.56 10.73 2.04 1.64 5.76 ... +$ Leaffreshmass.mean : num -1.3303 -2.7662 0.0832 -0.2516 -0.7861 ... +$ Stem.cond.dens.mean : num 3.67 4.47 1.77 2.14 2.78 ... +$ Disp.unit.leng.mean : num -0.555 0.445 2.734 2.916 2.235 ... +$ Wood.vessel.length.mean: num 6.13 5.32 5.88 5.73 5.32 ... +$ LeafArea.sd : num NA NA 0.0698 0.0303 NA ... +$ StemDens.sd : num NA NA 0.00778 0.11012 NA ... +$ SLA.sd : num NA NA 0.0227 0.1063 NA ... +$ LeafC.perdrymass.sd : num NA NA 0.0014 0.0086 NA ... +$ LeafN.sd : num NA NA 0.025 0.0638 NA ... +$ LeafP.sd : num NA NA 0.0614 0.3936 NA ... +$ PlantHeight.sd : num NA NA 0.053 0.0626 NA ... +$ SeedMass.sd : num NA NA 0.0684 0.0315 NA ... +$ Seed.length.sd : num NA NA 0.0307 0.0457 NA ... +$ LDMC.sd : num NA NA 0.0346 0.0553 NA ... +$ LeafNperArea.sd : num NA NA 0.0412 0.098 NA ... +$ LeafNPratio.sd : num NA NA 0.0354 0.291 NA ... +$ Leaf.delta.15N.sd : num NA NA 0.0326 0.1651 NA ... +$ Seed.num.rep.unit.sd : num NA NA 0.192 0.211 NA ... +$ Leaffreshmass.sd : num NA NA 0.0702 0.0748 NA ... +$ Stem.cond.dens.sd : num NA NA 0.413 0.139 NA ... +$ Disp.unit.leng.sd : num NA NA 0.033 0.0927 NA ... +$ Wood.vessel.length.sd : num NA NA 0.288 0.147 NA ...' + +save(TRY.all.mean.sd.3.by.taxon, file="/data/sPlot2.0/TRY.all.mean.sd.3.by.taxon.Rdata") + + +TRY.all.n.3 <- aggregate(TRY3.0[,1], by=list(TRY3.0$genus.short.correct), FUN=length) +head(TRY.all.n.3) +str(TRY.all.n.3) #7873 obs. of 2 variables: +names(TRY.all.n.3) +names(TRY.all.n.3) <- c("StandSpeciesName","n") + +TRY.all.mean.3 <- aggregate(TRY3.0[,c(2:19)], by=list(TRY3.0$genus.short.correct), FUN=mean) +str(TRY.all.mean.3) #7873 obs. of 19 variables: +names(TRY.all.mean.3) +' [1] "Group.1" "X1" "X4" "X11" "X13" "X14" "X15" "X18" "X26" "X27" "X47" +[12] "X50" "X56" "X78" "X138" "X163" "X169" "X237" "X282" ' +names(TRY.all.mean.3) <- c("StandSpeciesName","LeafArea.mean", "StemDens.mean", "SLA.mean", "LeafC.perdrymass.mean", + "LeafN.mean","LeafP.mean", "PlantHeight.mean", "SeedMass.mean", "Seed.length.mean", + "LDMC.mean","LeafNperArea.mean", "LeafNPratio.mean", "Leaf.delta.15N.mean", + "Seed.num.rep.unit.mean", "Leaffreshmass.mean", "Stem.cond.dens.mean", + "Disp.unit.leng.mean", "Wood.vessel.length.mean") +any(is.na(TRY.all.mean.3$SLA.mean)) #F + +TRY.all.sd.3 <- aggregate(TRY3.0[,c(2:19)], by=list(TRY3.0$genus.short.correct), FUN=sd) +str(TRY.all.sd.3) #7873 obs. of 19 variables: +names(TRY.all.sd.3) +names(TRY.all.sd.3) <- c("StandSpeciesName","LeafArea.sd", "StemDens.sd", "SLA.sd", "LeafC.perdrymass.sd", + "LeafN.sd","LeafP.sd", "PlantHeight.sd", "SeedMass.sd", "Seed.length.sd", + "LDMC.sd","LeafNperArea.sd", "LeafNPratio.sd", "Leaf.delta.15N.sd", + "Seed.num.rep.unit.sd", "Leaffreshmass.sd", "Stem.cond.dens.sd", + "Disp.unit.leng.sd", "Wood.vessel.length.sd") +any(is.na(TRY.all.sd.3$SLA.sd)) #T + +TRY.all.mean.sd.3.by.genus.species <- TRY.all.mean.sd.3.by.taxon +names(TRY.all.mean.sd.3.by.genus.species) +index3 <- match(TRY.all.mean.sd.3.by.taxon$StandSpeciesName, + backbone.splot2.1.try3$name.short.correct) +any(is.na(index3)) #F + +which(backbone.splot2.1.try3$rank.short.correct[index3]=="genus") +table(backbone.splot2.1.try3$rank.short.correct[index3]) +'family genus higher species +6 2342 1 49683 ' +#backbone.splot2.1.try3$rank.short.correct[index3]=="genus" + +index4 <- match(TRY.all.mean.sd.3.by.genus.species[backbone.splot2.1.try3$rank.short.correct[index3]=="genus",1], + TRY.all.n.3$StandSpeciesName) +str(index4) + + +TRY.all.mean.sd.3.by.genus.species[backbone.splot2.1.try3$rank.short.correct[index3]=="genus",2] <- TRY.all.n.3[index4,2] +for (i in 1:18){ + TRY.all.mean.sd.3.by.genus.species[backbone.splot2.1.try3$rank.short.correct[index3]=="genus",i+2] <- TRY.all.mean.3[index4,i+1] + TRY.all.mean.sd.3.by.genus.species[backbone.splot2.1.try3$rank.short.correct[index3]=="genus",i+20] <- TRY.all.sd.3[index4,i+1] +} +head(TRY.all.mean.sd.3.by.genus.species[,c(1:3)],50) +head(TRY.all.mean.sd.3.by.taxon[,c(1:3)],50) +save(TRY.all.mean.sd.3.by.genus.species, file="/data/sPlot2.0/TRY.all.mean.sd.3.by.genus.species.Rdata") + +any(is.na(TRY.all.mean.sd.3.by.genus.species[,c(3:21)])) +#T +any(is.na(TRY.all.mean.sd.3.by.taxon[,c(3:21)])) +#T +### + + +any(is.na(DT$species)) #T +# there NA cases as not all "Matched concept" names in DT have resolved names +length(unique(DT$PlotObservationID)) #1121244 +length(DT$species[!is.na(DT$species)]) #23555942 +length(DT$species) #23586216 +23586216-23555942 # 30274 NA names!!! +# it gives nonsense to match them with traits + +index7 <- match(DT$species,TRY.all.mean.sd.3.by.genus.species$StandSpeciesName) +length(index7) #23586216 +length(index7[!is.na(index7)]) +# 21172989, with TRY3.0 +# 21040927, with TRY2.0 +# 19841429, with first TRY version +23586216 - 21172989 # 2413227 entries have no traits +(23586216 - 21172989)/23586216*100 +# 10.23151% of all entries have no traits +(21172989)/23586216*100 +# which are 89.76849% of all entries +# previously: 86.79555% with TRY2.0 +# previously: 18.15247 % and with first TRY version + +DT <- DT[!is.na(DT$species),] +names(DT) +library(dplyr) +DT2 <- select(DT,PlotObservationID,species, Taxon group,Matched concept, Layer, Cover, Relative.cover) +length(unique(DT$PlotObservationID)) #1121145 versus 1121244 before +index7 <- match(DT$species,TRY.all.mean.sd.3.by.genus.species$StandSpeciesName) +length(index7) #23555942 +length(index7[!is.na(index7)]) +# 21172989, with TRY3.0 +23555942 - 21172989 # 2382953 entries with valid species names have no traits +(23555942 - 21172989)/23555942*100 +# 10.11614% of all entries have no traits +(21172989)/23555942*100 +# which are 89.88386% of all entries + +### CWM ### +mean(TRY.all.mean.sd.3.by.genus.species$SLA.mean) # NA +mean(TRY.all.mean.sd.3.by.genus.species$SLA.mean,na.rm=T) # 2.63365 +# example +DT$trait <- NA +DT$trait <- TRY.all.mean.sd.3.by.genus.species$SLA.mean[index7] +length(DT$trait[!is.na(DT$trait)]) # 21172949 +length(DT$trait[is.na(DT$trait)]) # 2382993 +str(DT) + +colnames(TRY.all.mean.sd.3.by.genus.species) +CWM2 <- DT[,list(CWM.SLA = weighted.mean(trait,Cover,na.rm = T)),by=PlotObservationID] +# I have checked that we do not need to prefilter only those entries that have a trait value +# this works fine +str(CWM2) +dim(CWM2) #1121145 +which(colnames(TRY.all.mean.sd.3.by.genus.species)=="LeafArea.mean") # 3 +which(colnames(TRY.all.mean.sd.3)=="Wood.vessel.length.mean") # 20 +CWM <- array(NA,c(dim(CWM2)[1],18),dimnames=list(CWM2$PlotObservationID, + colnames(TRY.all.mean.sd.3)[3:20])) +rm(CWM2) +for (i in 1:18){ + DT$trait <- NA + DT$trait <- TRY.all.mean.sd.3.by.genus.species[index7,i+2] + CWM[,i] <- DT[,list(CWM.trait= weighted.mean(trait,Relative.cover,na.rm = T)),by=PlotObservationID]$CWM.trait + +} + +## recalculate relative cover +## + +library(picante) +# define function for mean pairwise distance +mpd.fun.dt <- function(abu, dis, nam, abundance.weighted){ + abu <- t(abu) + colnames(abu) <- nam + res <- mpd(abu, dis, abundance.weighted=abundance.weighted) +} +# define funtion of mean nearest neighbor distance +mntd.fun.dt <- function(abu, dis, nam, abundance.weighted){ + abu <- t(abu) + colnames(abu) <- nam + res <- mntd(abu, dis, abundance.weighted=abundance.weighted) + res +} +test<- DT[c(1:1000000),list(mpd.trait= mpd.fun(species, trait, Relative.cover, abundance.weighted=TRUE)),by=PlotObservationID] + +mpd.fun <- function(nam, trait, abu,abundance.weighted){ + #res <- numeric(1) # carries result from the function + res <- as.double(NA) + if (length(trait[!is.na(trait)])>0){ + # dis <- as.matrix(dist(t(t1),method="euclidean")) + abu <- t(abu) + colnames(abu) <- nam + trait <- t(trait) + colnames(trait) <- nam + #dis <- as.matrix(vegdist(t(t1),method="euclidean", na.rm=T)) + dis <- as.matrix(dist(t(trait),method="euclidean")) + res <- mpd(abu, dis, abundance.weighted=abundance.weighted) + } + res +} + +str(CWM) #num [1:1117898, 1:18] +CWM[1:20,] +tail(CWM) +write.csv(CWM,file = "CWM_TRY3.csv", row.names = T) + + + +### OLD ### +index1 <- match(DT$PlotObservationID, splot.header$PlotObservationID) +any(is.na(index1)) #F +length(index1) # 24241941 +length(unique(index1[!is.na(index1)])) #1117940 + + +### OLD +index1 <- match(DT$"Matched concept",backbone.splot2.1.try3$names.sPlot.TRY) +any(is.na(index1)) #F +length(index1) #23586216 +length(index1[is.na(index1)]) #0 + +unique(backbone.splot2.1.try3$rank.short.correct) +# "higher" "genus" "family" "species" NA +DT$rank.short.correct <- backbone.splot2.1.try3$rank.short.correct[index1] +table(DT$rank.short.correct, exclude=NULL) +' family genus higher species <NA> + 25437 470078 6295 23053791 30615 ' + -- GitLab