Timestamp: Fri Mar 13 14:19:26 2020
Drafted: Francesco Maria Sabatini
Revised:
version: 1.0

This reports documents 1) the construction of Community Weighted Means (CWMs) and Variance (CWVs); and 2) the classification of plots into forest\non-forest based on species growth forms. It complements species composition data from sPlot 3.0 and gap-filled plant functional traits from TRY 5.0, as received by Jens Kattge on Jan 21, 2020.

library(tidyverse)
library(readr)
library(data.table)
library(knitr)
library(kableExtra)
library(stringr)
library(caret)
library(viridis)

1 Data import, preparation and cleaning

load("../_output/DT_sPlot3.0.RData")
load("../_output/Backbone3.0.RData")

Import TRY data

# Species, Genus, Family
try.species <- read_csv(
  "../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/input_data/hierarchy.info.csv",
  locale = locale(encoding = "latin1")) 
# Original data without gap-filling. With species and trait labels
try.allinfo <- read_csv(
  "../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/input_data/traits_x_georef_wide_table.csv", 
  locale = locale(encoding = "latin1"), 
                        col_types = paste0(c("dddccccc",rep("c", 84)), collapse=""))
# Individual-level gap-filled data - order as in try.allinfo
try.individuals0 <- read_csv(
  "../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/gapfilled_data/mean_gap_filled_back_transformed.csv", 
  locale = locale(encoding = "latin1"))

There are 609355 individual observations from 52104 distinct (unresolved) species in 7960 distinct (unresolved) genera.

1.2 Attach resolved names from Backbone

try.species.names <- try.allinfo %>% 
  dplyr::select(Species, Genus, GrowthForm) %>% 
  left_join(Backbone %>% 
              dplyr::select(Name_sPlot_TRY, Name_short) %>% 
              rename(Species=Name_sPlot_TRY), 
            by="Species") %>% 
  dplyr::select(Species, Name_short, Genus, GrowthForm)

After attaching resolved names, TRY data contains information on 50612 species.
Check for how many of the species in sPlot, trait information is available in TRY.

sPlot.species <- DT2 %>% 
  distinct(species) 

sPlot.in.TRY <- sPlot.species %>% 
  filter(species %in% (try.species.names %>% 
                                  distinct(Name_short) %>% 
                                  pull(Name_short))) 

Out of the 76912 standardizes species names in sPlot 3.0, 29519 (38.4%) also occur in TRY 5.0. This number does not account for matches at the genus level.

1.3 Create legend of trait names

trait.legend <- data.frame(full=try.allinfo %>% 
                             dplyr::select(starts_with("StdValue_")) %>% 
                             colnames() %>% 
                             gsub("StdValue_", "", .) %>% 
                             sort()) %>%
  mutate(full=as.character(full)) %>% 
  mutate(traitcode=parse_number(full)) %>% 
  arrange(traitcode) %>% 
  dplyr::select(traitcode, everything()) %>% 
  mutate(full=gsub(pattern = "^[0-9]+_", replacement="", full)) %>% 
  mutate(short=c("StemDens", "RootingDepth","LeafC.perdrymass", "LeafN","LeafP",
                 "StemDiam","SeedMass", "Seed.length","LeafThickness","LDMC",
                 "LeafNperArea","LeafDryMass.single","Leaf.delta.15N","SeedGerminationRate",
                 "Seed.num.rep.unit","LeafLength","LeafWidth","LeafCN.ratio","Leaffreshmass",
                 "Stem.cond.dens","Chromosome.n","Chromosome.cDNAcont", 
                 "Disp.unit.leng","StemConduitDiameter","Wood.vessel.length",
                 "WoodFiberLength","SpecificRootLength.fine","SpecificRootLength",
                 "PlantHeight.veg","PlantHeight.generative","LeafArea.leaf.noPet",
                 "LeafArea.leaflet.noPet","LeafArea.leaf.wPet","LeafArea.leaflet.wPet",
                 "LeafArea.leaf.undef","LeafArea.leaflet.undef","LeafArea.undef.undef",
                 "SLA.noPet", "SLA.wPet","SLA.undef", "LeafWaterCont")) %>% 
  ## Add SLA missing from allinfo file
  bind_rows(data.frame(traitcode=11, 
                       full="Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA)",
                       short="SLA")) %>% 
  bind_rows(data.frame(traitcode=18, 
                       full="Plant height (vegetative + generative)", 
                       short="PlantHeight")) %>%
  arrange(traitcode) %>% 
  #create a column to mark traits for which gap filled data is available.
  mutate(available=paste0("X", traitcode) %in% colnames(try.individuals0))
Legend of traits from TRY
traitcode full short available
4 Stem specific density (SSD) or wood density (stem dry mass per stem fresh volume)_g/cm3 StemDens TRUE
6 Root rooting depth_m RootingDepth TRUE
11 Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA) SLA TRUE
13 Leaf carbon (C) content per leaf dry mass_mg/g LeafC.perdrymass TRUE
14 Leaf nitrogen (N) content per leaf dry mass_mg/g LeafN TRUE
15 Leaf phosphorus (P) content per leaf dry mass_mg/g LeafP TRUE
18 Plant height (vegetative + generative) PlantHeight TRUE
21 Stem diameter_m StemDiam TRUE
26 Seed dry mass_mg SeedMass TRUE
27 Seed length_mm Seed.length TRUE
46 Leaf thickness_mm LeafThickness TRUE
47 Leaf dry mass per leaf fresh mass (leaf dry matter content, LDMC)_g g-1 LDMC TRUE
50 Leaf nitrogen (N) content per leaf area_g m-2 LeafNperArea TRUE
55 Leaf dry mass (single leaf)_mg LeafDryMass.single TRUE
78 Leaf nitrogen (N) isotope signature (delta 15N)_per mill Leaf.delta.15N TRUE
95 Seed germination rate (germination efficiency)_% SeedGerminationRate TRUE
138 Seed number per reproducton unit_number Seed.num.rep.unit TRUE
144 Leaf length_mm LeafLength TRUE
145 Leaf width_cm LeafWidth TRUE
146 Leaf carbon/nitrogen (C/N) ratio_g/cm3 LeafCN.ratio TRUE
163 Leaf fresh mass_g Leaffreshmass TRUE
169 Stem conduit density (vessels and tracheids)_mm-2 Stem.cond.dens TRUE
223 Species genotype: chromosome number_dimensionless Chromosome.n TRUE
224 Species genotype: chromosome cDNA content_pg Chromosome.cDNAcont TRUE
237 Dispersal unit length_mm Disp.unit.leng TRUE
281 Stem conduit diameter (vessels, tracheids)_micro m StemConduitDiameter TRUE
282 Wood vessel element length; stem conduit (vessel and tracheids) element length_micro m Wood.vessel.length TRUE
289 Wood fiber lengths_micro m WoodFiberLength TRUE
614 Fine root length per fine root dry mass (specific fine root length, SRL)_cm/g SpecificRootLength.fine FALSE
1080 Root length per root dry mass (specific root length, SRL)_cm/g SpecificRootLength TRUE
3106 Plant height vegetative_m PlantHeight.veg FALSE
3107 Plant height generative_m PlantHeight.generative FALSE
3108 Leaf area (in case of compound leaves: leaf, petiole excluded)_mm2 LeafArea.leaf.noPet FALSE
3109 Leaf area (in case of compound leaves: leaflet, petiole excluded)_mm2 LeafArea.leaflet.noPet FALSE
3110 Leaf area (in case of compound leaves: leaf, petiole included)_mm2 LeafArea.leaf.wPet FALSE
3111 Leaf area (in case of compound leaves: leaflet, petiole included)_mm2 LeafArea.leaflet.wPet FALSE
3112 Leaf area (in case of compound leaves: leaf, undefined if petiole in- or excluded)_mm2 LeafArea.leaf.undef TRUE
3113 Leaf area (in case of compound leaves: leaflet, undefined if petiole is in- or excluded)_mm2 LeafArea.leaflet.undef TRUE
3114 Leaf area (in case of compound leaves undefined if leaf or leaflet, undefined if petiole is in- or excluded)_mm2 LeafArea.undef.undef TRUE
3115 Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA): petiole excluded_mm2 mg-1 SLA.noPet FALSE
3116 Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA): petiole included_mm2 mg-1 SLA.wPet FALSE
3117 Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA): undefined if petiole is in- or excluded_mm2 mg-1 SLA.undef FALSE
3120 Leaf water content per leaf dry mass (not saturated)_g(W)/g(DM) LeafWaterCont TRUE

Use trait legend to change naming of try.individuals0 data.frame of traits

#create string to rename traits
col.to <- trait.legend %>% 
  filter(available==T) %>% 
  pull(short) 
col.from <- trait.legend %>% 
  filter(available==T) %>% 
  mutate(traitcode=paste0("X", traitcode))  %>% 
  pull(traitcode) 

try.individuals <- try.individuals0 %>% 
              rename_at(col.from, .funs=function(x) col.to)

1.3 Fix some known errors in the gap-filled matrix

Check traits at the individual level. There are some traits with unexpected negative entries:

try.species.names %>% 
    dplyr::select(Name_short) %>% 
    bind_cols(try.individuals %>% 
                  dplyr::select(-X1)) %>% 
  gather(variable, value, -Name_short) %>% 
  filter(value<0) %>% 
  group_by(variable) %>% 
  summarize(n=n())
## # A tibble: 5 x 2
##   variable                 n
##   <chr>                <int>
## 1 LDMC                   419
## 2 LeafC.perdrymass         9
## 3 Leaf.delta.15N      262283
## 4 SeedGerminationRate    120
## 5 StemDens               337

According to Jens Kattge, the entries for Leaf.delta.15N are legitimate, while in the other cases, it may be due to bad predictions. He suggested to delete these negative records.
Similarly, there are records with impossible values for height. Some species incorrectly predicted to have height >100 meters, and some herbs predicted to have a height >10 m.

try.individuals <- try.species.names %>% 
  dplyr::select(Name_short) %>% 
  bind_cols(try.individuals)

toexclude <- try.individuals %>% 
  gather(variable, value, -X1, -Name_short) %>% 
  filter(variable != "Leaf.delta.15N") %>% 
  filter(value<0) %>% 
  pull(X1)

toexclude2 <- try.individuals %>% 
  filter(PlantHeight>100  & (!Name_short %in% c("Pseudotsuga menziesii", "Sequoia sempervirens"))) %>% 
  pull(X1)

toexclude3 <- try.individuals %>% 
  filter(X1 %in% (try.allinfo %>% 
                     filter(GrowthForm=="herb") %>% 
                     pull(X1))) %>% 
  filter(PlantHeight>10) %>% 
  pull(X1)

try.individuals <- try.individuals %>% 
  filter(!X1 %in% c(toexclude, toexclude2, toexclude3)) %>% 
  dplyr::select(-X1)

This results in the exclusion of 874 individuals. In this way the total number of species included in TRY reduces to 50404

1.4 Calculate species and genus level trait means and sd

## Calculate species level trait means and sd. 
try.species.means <- try.individuals %>% 
  group_by(Name_short) %>% 
  #Add a field to indicate the number of observation per taxon
  left_join(x={.} %>% 
              summarize(n=n()), 
            y={.} %>% 
              summarize_at(.vars=vars(StemDens:LeafWaterCont ),
                           .funs=list(mean=~mean(.), sd=~sd(.))),
            by="Name_short") %>% 
  dplyr::select(Name_short, n, everything())

## Calculate genus level trait means and sd.
try.genus.means <- try.individuals %>% 
  mutate(Genus=word(Name_short, 1)) %>% 
  group_by(Genus) %>% 
  left_join(x={.} %>% 
              summarize(n=n()), 
            y={.} %>% 
              summarize_at(.vars=vars(StemDens:LeafWaterCont ),
                           .funs=list(mean=~mean(.), sd=~sd(.))),
            by="Genus") %>% 
  dplyr::select(Genus, n, everything())

The average number of observations per species and genus is 12.1 and 81.5, respectively. As many as 17443 species have only one observation (1250 at the genus level).

Example of trait means for 15 randomly selected species
Name_short n StemDens_mean RootingDepth_mean SLA_mean LeafC.perdrymass_mean LeafN_mean LeafP_mean PlantHeight_mean StemDiam_mean SeedMass_mean Seed.length_mean LeafThickness_mean LDMC_mean LeafNperArea_mean LeafDryMass.single_mean Leaf.delta.15N_mean SeedGerminationRate_mean Seed.num.rep.unit_mean LeafLength_mean LeafWidth_mean LeafCN.ratio_mean Leaffreshmass_mean Stem.cond.dens_mean Chromosome.n_mean Chromosome.cDNAcont_mean Disp.unit.leng_mean StemConduitDiameter_mean Wood.vessel.length_mean WoodFiberLength_mean SpecificRootLength_mean LeafArea.leaf.undef_mean LeafArea.leaflet.undef_mean LeafArea.undef.undef_mean LeafWaterCont_mean StemDens_sd RootingDepth_sd SLA_sd LeafC.perdrymass_sd LeafN_sd LeafP_sd PlantHeight_sd StemDiam_sd SeedMass_sd Seed.length_sd LeafThickness_sd LDMC_sd LeafNperArea_sd LeafDryMass.single_sd Leaf.delta.15N_sd SeedGerminationRate_sd Seed.num.rep.unit_sd LeafLength_sd LeafWidth_sd LeafCN.ratio_sd Leaffreshmass_sd Stem.cond.dens_sd Chromosome.n_sd Chromosome.cDNAcont_sd Disp.unit.leng_sd StemConduitDiameter_sd Wood.vessel.length_sd WoodFiberLength_sd SpecificRootLength_sd LeafArea.leaf.undef_sd LeafArea.leaflet.undef_sd LeafArea.undef.undef_sd LeafWaterCont_sd
Centaurium beyrichii 3 0.567 0.095 30.607 454.321 17.857 0.943 0.178 0.011 0.054 0.506 0.220 0.207 0.617 9.812 -0.503 96.860 1801.255 8.523 0.472 44.162 0.054 43.813 35.003 2.721 0.535 18.328 328.194 566.881 5999.193 302.882 126.018 151.069 6.505 0.001 0.004 0.589 1.391 0.149 0.013 0.018 0.001 0.016 0.044 0.002 0.004 0.010 0.397 0.153 2.941 122.488 0.209 0.020 0.793 0.002 1.845 0.046 0.007 0.043 0.726 5.599 16.255 103.513 9.602 0.965 7.487 0.082
Erythroxylum confusum 2 0.716 0.423 13.123 476.841 20.696 1.159 3.126 0.073 21.184 3.054 0.259 0.372 1.749 114.034 1.149 101.707 4564.633 28.599 1.095 24.657 0.312 86.468 26.279 5.787 3.586 34.429 628.565 1095.258 3670.488 760.530 896.085 2253.745 2.558 0.007 0.009 0.012 1.407 0.330 0.019 0.006 0.001 0.675 0.046 0.001 0.001 0.034 13.479 0.057 0.396 731.495 0.293 0.029 0.103 0.035 0.454 0.091 0.002 0.024 0.253 9.497 13.031 209.586 90.219 74.498 119.474 0.020
Macledium plantaginifolium 6 0.431 0.500 17.947 430.942 22.385 1.649 0.062 0.006 0.439 1.722 0.288 0.179 1.431 63.346 0.979 95.583 353.093 89.627 4.693 29.159 0.325 150.132 27.710 4.911 2.614 29.815 503.140 771.329 7342.450 875.005 711.051 1369.476 5.995 0.013 0.048 0.523 2.510 0.481 0.068 0.017 0.001 0.046 0.153 0.006 0.006 0.040 29.034 0.160 1.364 188.830 67.290 3.676 0.143 0.149 10.999 1.288 0.278 0.221 1.487 34.732 49.044 279.968 402.163 286.723 863.639 0.189
Dracocephalum stamineum 1 0.473 0.397 17.156 456.988 32.450 0.947 0.142 0.039 1.238 1.891 0.234 0.249 1.782 9.338 0.786 75.895 33.682 20.333 1.049 15.238 0.042 64.487 16.098 2.232 2.541 30.286 199.930 524.451 3880.706 143.676 281.742 438.693 4.710 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
Gutenbergia westii 5 0.463 0.848 18.387 443.249 19.592 1.429 0.795 0.022 0.784 2.192 0.240 0.244 1.152 43.960 -0.106 69.580 877.181 59.294 1.850 33.899 0.195 82.515 28.917 4.468 2.872 35.926 427.801 908.704 5256.775 748.139 630.576 639.862 4.957 0.005 0.052 0.413 1.319 0.268 0.038 0.161 0.001 0.060 0.123 0.005 0.004 0.013 12.420 0.153 1.129 187.695 26.786 0.996 0.347 0.055 4.459 0.852 0.136 0.145 1.051 24.965 41.213 219.216 212.053 158.901 256.539 0.091
Ximenia americana 37 0.822 4.785 6.241 452.916 17.692 1.247 5.405 0.053 656.444 24.254 0.113 0.654 2.894 63.714 3.228 74.972 0.632 42.643 2.043 44.078 0.064 10.389 34.840 3.206 25.811 43.612 301.814 1267.811 1286.962 1183.807 1151.937 366.611 0.542 0.037 0.491 1.399 2.874 1.289 0.079 0.760 0.009 62.030 1.490 0.010 0.021 0.556 13.430 0.179 3.276 0.119 9.249 0.431 2.753 0.010 0.451 0.858 0.077 1.580 2.057 11.068 42.227 110.898 192.507 168.663 65.260 0.052
Erica johnstoniana 10 0.577 0.174 8.151 522.106 9.422 0.818 0.727 0.158 0.082 0.726 0.166 0.390 1.349 2.983 -2.699 81.563 2046.082 4.012 0.199 58.180 0.008 146.991 25.000 1.760 0.668 32.358 523.603 626.854 1284.830 23.114 12.807 13.890 2.340 0.007 0.022 0.158 2.409 0.226 0.031 0.410 0.035 0.015 0.075 0.003 0.007 0.022 0.848 0.196 1.049 1913.907 1.880 0.087 1.001 0.002 30.482 1.613 0.057 0.056 4.483 121.662 75.083 139.774 6.607 3.383 5.004 0.069
Aegiphila cephalophora 47 0.454 0.396 24.788 464.402 30.305 1.633 7.634 0.068 20.383 3.988 0.224 0.272 1.311 177.236 2.293 81.360 641.517 104.584 7.436 19.247 0.708 63.436 29.777 2.436 5.781 34.463 383.310 919.072 6514.713 4321.571 4416.020 4335.465 4.693 0.008 0.040 3.746 1.593 1.278 0.081 0.452 0.007 1.649 0.157 0.023 0.026 0.127 34.412 0.083 0.488 65.943 6.964 0.643 0.679 0.122 2.798 0.484 0.055 0.238 1.248 10.206 16.873 415.605 586.217 515.194 561.638 0.604
Tragia furialis 4 0.555 1.212 14.611 428.919 10.485 0.790 0.545 0.023 10.483 1.809 0.256 0.281 0.872 218.132 1.190 88.764 289.120 89.915 3.530 63.519 0.747 25.394 23.807 4.679 2.398 32.852 375.868 781.889 4291.968 2648.300 2712.412 4196.031 3.682 0.005 0.072 0.420 1.836 0.142 0.027 0.075 0.001 0.701 0.077 0.004 0.005 0.019 54.487 0.096 0.980 100.854 40.303 1.702 0.492 0.187 0.907 0.428 0.161 0.105 1.138 18.670 32.068 104.820 680.378 642.966 1517.291 0.065
Amberboa glauca 1 0.473 0.875 14.916 444.242 20.238 1.620 0.628 0.018 2.967 3.419 0.287 0.255 1.453 39.809 0.634 89.827 661.733 63.102 1.318 28.835 0.165 77.310 27.461 5.009 4.344 33.619 456.478 904.224 3589.555 528.584 545.589 642.198 4.578 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
Artabotrys vanprukii 3 0.600 1.402 17.059 474.072 24.671 1.247 4.831 0.289 229.997 5.616 0.154 0.339 1.584 233.077 2.755 56.828 8.499 67.753 3.300 19.779 0.692 10.333 28.131 2.658 5.155 91.044 440.862 1024.450 545.714 3023.022 2610.311 3259.601 3.230 0.002 0.271 0.172 2.691 0.671 0.030 0.023 0.003 3.472 0.367 0.003 0.002 0.027 17.721 0.027 1.510 6.724 5.591 0.371 0.249 0.053 3.323 2.845 0.111 0.182 18.491 129.524 186.488 30.494 236.462 101.292 331.270 0.079
Hakea minyma 9 0.793 5.485 2.916 464.200 8.469 0.265 2.218 0.137 9.630 3.624 0.932 0.441 2.909 142.431 0.288 95.392 450.913 17.614 1.596 56.156 0.319 208.862 13.329 1.968 4.373 16.555 305.234 636.889 4860.843 376.332 386.763 566.380 2.277 0.004 0.250 0.060 1.099 0.077 0.008 0.116 0.007 1.633 0.136 0.009 0.003 0.062 14.007 0.119 0.444 56.085 1.199 0.111 0.526 0.031 6.075 0.122 0.028 0.168 0.354 3.615 7.742 230.112 37.559 32.186 62.014 0.030
Galanthus nivalis 51 0.319 0.165 26.178 446.117 30.725 2.383 0.147 0.010 6.470 3.624 0.309 0.104 1.363 52.496 1.109 81.973 18.746 122.501 0.371 16.405 0.347 6.740 23.947 17.114 4.587 34.396 456.795 1341.087 2230.299 1079.673 1489.264 447.483 7.410 0.004 0.010 1.717 1.024 0.570 0.052 0.028 0.001 0.674 0.236 0.008 0.007 0.062 7.422 0.091 0.423 5.430 9.929 0.032 0.289 0.044 0.273 0.264 0.255 0.280 0.989 12.361 30.863 89.896 125.672 185.267 54.073 0.238
Utricularia baoulensis 3 0.464 0.102 67.929 405.155 28.444 1.508 0.282 0.009 0.025 0.747 0.198 0.133 0.391 0.920 4.996 88.306 34.012 22.633 0.091 12.104 0.007 39.560 42.740 0.772 1.040 19.627 98.105 376.070 17173.734 62.818 118.325 64.294 15.393 0.005 0.008 3.112 0.937 0.853 0.011 0.046 0.000 0.002 0.028 0.002 0.007 0.007 0.271 0.149 0.907 3.289 8.145 0.045 0.185 0.002 0.627 0.656 0.040 0.025 0.470 5.702 11.754 425.072 19.667 32.632 24.557 0.490
Aspalathus pachyloba 1 0.571 0.979 11.230 464.302 23.558 1.296 1.994 0.130 5.021 3.027 0.244 0.354 2.188 66.808 0.944 95.009 78.946 29.737 0.935 22.215 0.196 26.015 21.585 2.848 4.317 19.210 267.666 619.429 1416.779 809.560 579.425 558.678 2.919 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA

1.5 Match taxa based on species, if available, or Genus

Combined the trait means based on species and genera into a single object, and check how many of these taxa match to the (resolved) species names in DT2.

try.combined.means <- try.genus.means %>% 
  rename(Taxon_name=Genus) %>% 
  mutate(Rank_correct="genus") %>% 
  bind_rows(try.species.means %>% 
              rename(Taxon_name=Name_short) %>% 
              mutate(Rank_correct="species")) %>% 
  dplyr::select(Taxon_name, Rank_correct, everything())

total.matches <- DT2 %>%
  distinct(species, Rank_correct) %>%
  left_join(try.combined.means %>%
              dplyr::rename(species=Taxon_name), 
            by=c("species", "Rank_correct")) %>% 
  filter(!is.na(SLA_mean)) %>% 
  nrow()

The total number of matched taxa (either at species, or genus level) is 31822.

1.6 Calculate summary statistics for species- and genus-level mean traits

mysummary <- try.combined.means %>% 
               group_by(Rank_correct) %>% 
               summarize_at(.vars=vars(StemDens_mean:LeafWaterCont_sd),
                            .funs=list(min=~min(., na.rm=T),
                                       q025=~quantile(., 0.25, na.rm=T), 
                                       q50=~quantile(., 0.50, na.rm=T), 
                                       q75=~quantile(., .75, na.rm=T), 
                                       max=~max(., na.rm=T), 
                                       mean=~mean(., na.rm=T), 
                                       sd=~sd(., na.rm=T))) %>% 
  gather(variable, value, -Rank_correct) %>% 
  separate(variable, sep="_", into=c("variable", "mean.sd", "stat")) %>% 
  mutate(stat=factor(stat, levels=c("min", "q025", "q50", "q75", "max", "mean", "sd"))) %>% 
  spread(key=stat, value=value) %>% 
  arrange(desc(Rank_correct))
Summary statistics for each trait, when summarized across species or genera
Rank_correct variable mean.sd min q025 q50 q75 max mean sd
species Chromosome.cDNAcont mean 0.006 2.150 2.992 4.595 1.067308e+03 4.344000e+00 8.248000e+00
species Chromosome.cDNAcont sd 0.000 0.026 0.047 0.090 1.761220e+02 9.500000e-02 1.127000e+00
species Chromosome.n mean 0.062 22.492 28.266 35.685 7.671771e+03 3.109500e+01 6.145900e+01
species Chromosome.n sd 0.000 0.242 0.400 0.692 1.621340e+02 6.730000e-01 1.571000e+00
species Disp.unit.leng mean 0.003 2.275 3.864 6.895 1.336988e+04 6.340000e+00 7.992200e+01
species Disp.unit.leng sd 0.000 0.069 0.150 0.308 9.470706e+03 5.790000e-01 5.220200e+01
species LDMC mean 0.012 0.243 0.321 0.383 1.372000e+00 3.140000e-01 9.700000e-02
species LDMC sd 0.000 0.003 0.004 0.008 2.250000e-01 8.000000e-03 1.000000e-02
species LeafArea.leaflet.undef mean 0.000 341.100 1039.636 2649.837 1.740828e+13 3.518983e+08 7.755323e+10
species LeafArea.leaflet.undef sd 0.000 20.713 76.627 245.085 2.199776e+13 6.776027e+08 1.211795e+11
species LeafArea.leaf.undef mean 0.000 343.283 1100.777 3115.948 1.341026e+11 2.667355e+06 5.973172e+08
species LeafArea.leaf.undef sd 0.000 22.934 87.305 292.521 3.408568e+09 1.475566e+05 2.038124e+07
species LeafArea.undef.undef mean 0.000 345.679 1175.095 3505.504 1.524769e+11 3.042582e+06 6.791606e+08
species LeafArea.undef.undef sd 0.000 24.273 99.143 369.860 4.133230e+09 2.504824e+05 3.205707e+07
species LeafCN.ratio mean 0.213 20.712 25.571 32.672 1.599229e+03 2.958100e+01 2.170200e+01
species LeafCN.ratio sd 0.000 0.233 0.406 0.817 2.016540e+02 7.940000e-01 2.033000e+00
species LeafC.perdrymass mean 93.949 441.375 456.739 474.192 1.131692e+03 4.565940e+02 2.970600e+01
species LeafC.perdrymass sd 0.000 0.804 1.231 1.880 7.672000e+01 1.737000e+00 2.240000e+00
species Leaf.delta.15N mean -31.434 -0.612 0.753 2.226 4.910200e+01 8.140000e-01 2.464000e+00
species Leaf.delta.15N sd 0.000 0.074 0.111 0.162 3.888400e+01 1.470000e-01 2.690000e-01
species LeafDryMass.single mean 0.000 27.531 88.304 280.901 1.023338e+17 2.030271e+12 4.558127e+14
species LeafDryMass.single sd 0.000 1.980 7.662 27.349 3.076084e+15 9.332895e+10 1.694330e+13
species Leaffreshmass mean 0.000 0.099 0.306 0.835 2.906497e+13 5.766402e+08 1.294605e+11
species Leaffreshmass sd 0.000 0.006 0.023 0.077 8.032411e+11 2.437070e+07 4.424312e+09
species LeafLength mean 0.000 29.485 57.719 97.435 3.089342e+08 6.260771e+03 1.376050e+06
species LeafLength sd 0.000 1.513 3.977 9.912 3.388372e+05 3.044500e+01 2.484372e+03
species LeafN mean 0.343 16.117 19.914 24.175 1.310526e+03 2.062200e+01 1.042300e+01
species LeafN sd 0.000 0.159 0.288 0.618 5.074300e+01 5.850000e-01 8.910000e-01
species LeafNperArea mean 0.002 1.196 1.489 1.878 4.016178e+04 2.427000e+00 1.788850e+02
species LeafNperArea sd 0.000 0.017 0.031 0.073 2.411833e+03 1.450000e-01 1.328600e+01
species LeafP mean 0.000 0.996 1.359 1.803 6.392164e+04 3.184000e+00 2.962370e+02
species LeafP sd 0.000 0.017 0.032 0.058 9.827822e+03 3.540000e-01 5.413300e+01
species LeafThickness mean 0.005 0.188 0.229 0.288 1.197200e+02 2.720000e-01 7.950000e-01
species LeafThickness sd 0.000 0.002 0.003 0.008 5.255759e+03 1.670000e-01 2.894900e+01
species LeafWaterCont mean 0.096 2.525 3.307 4.734 4.260880e+02 3.940000e+00 2.942000e+00
species LeafWaterCont sd 0.000 0.038 0.073 0.149 2.759500e+01 1.410000e-01 2.630000e-01
species LeafWidth mean 0.000 0.691 1.902 4.051 3.477895e+04 6.675000e+00 2.505850e+02
species LeafWidth sd 0.000 0.042 0.131 0.376 5.949567e+05 2.779000e+01 3.687968e+03
species PlantHeight mean 0.000 0.403 1.356 7.556 9.476600e+01 5.124000e+00 7.274000e+00
species PlantHeight sd 0.000 0.040 0.133 0.508 2.685300e+01 5.430000e-01 1.101000e+00
species RootingDepth mean 0.000 0.410 0.845 1.617 2.519319e+09 5.003361e+04 1.122150e+07
species RootingDepth sd 0.000 0.019 0.046 0.107 8.971563e+07 2.724074e+03 4.941605e+05
species SeedGerminationRate mean 5.441 83.595 88.845 93.286 2.416300e+02 8.756700e+01 1.031000e+01
species SeedGerminationRate sd 0.000 0.345 0.523 0.843 1.864950e+02 9.190000e-01 1.904000e+00
species Seed.length mean 0.004 1.817 3.029 5.599 1.089329e+04 4.931000e+00 5.310000e+01
species Seed.length sd 0.000 0.056 0.121 0.242 3.333800e+01 2.150000e-01 3.970000e-01
species SeedMass mean 0.000 0.730 4.336 36.238 9.521438e+08 1.938338e+04 4.241478e+06
species SeedMass sd 0.000 0.054 0.356 2.815 4.003387e+07 2.057742e+03 2.619802e+05
species Seed.num.rep.unit mean 0.000 42.927 180.778 791.571 2.225078e+25 4.416757e+20 9.910894e+22
species Seed.num.rep.unit sd 0.000 5.626 28.349 158.023 2.160083e+19 1.105285e+15 1.443121e+17
species SLA mean 0.000 10.691 14.520 18.798 5.490787e+03 1.576800e+01 2.873900e+01
species SLA sd 0.000 0.168 0.331 0.845 1.012120e+02 9.090000e-01 1.679000e+00
species SpecificRootLength mean 0.000 1337.375 2584.070 5187.719 3.071495e+09 2.477014e+05 2.296632e+07
species SpecificRootLength sd 0.000 48.689 111.593 254.706 8.360206e+08 3.145860e+04 4.669425e+06
species Stem.cond.dens mean 0.000 21.114 48.153 91.796 1.988212e+08 7.928559e+03 1.206849e+06
species Stem.cond.dens sd 0.000 0.689 1.873 4.707 1.210633e+07 4.337880e+02 6.759687e+04
species StemConduitDiameter mean 0.001 25.103 35.442 50.841 8.065496e+07 1.667555e+03 3.592592e+05
species StemConduitDiameter sd 0.000 0.549 1.006 1.853 1.181921e+03 2.191000e+00 9.768000e+00
species StemDens mean 0.007 0.433 0.535 0.637 2.640000e+00 5.410000e-01 1.480000e-01
species StemDens sd 0.000 0.003 0.005 0.009 1.253000e+00 1.000000e-02 1.700000e-02
species StemDiam mean 0.000 0.014 0.056 0.147 1.398249e+09 2.791541e+04 6.228172e+06
species StemDiam sd 0.000 0.001 0.003 0.011 5.904310e+08 1.939426e+04 3.263229e+06
species WoodFiberLength mean 12.143 668.313 834.580 1030.174 1.068162e+07 1.129441e+03 4.764112e+04
species WoodFiberLength sd 0.000 9.803 17.565 31.011 1.166290e+04 2.805900e+01 8.083800e+01
species Wood.vessel.length mean 3.460 313.875 413.753 537.522 2.082033e+06 5.573660e+02 1.082510e+04
species Wood.vessel.length sd 0.000 4.775 9.037 16.889 1.457577e+03 1.660500e+01 3.369800e+01
genus Chromosome.cDNAcont mean 0.006 2.332 3.178 4.971 1.067308e+03 4.910000e+00 1.834900e+01
genus Chromosome.cDNAcont sd 0.000 0.050 0.093 0.180 1.761220e+02 2.320000e-01 2.534000e+00
genus Chromosome.n mean 0.062 22.564 28.269 35.909 7.671771e+03 3.408700e+01 1.361450e+02
genus Chromosome.n sd 0.000 0.400 0.753 1.466 1.621340e+02 1.499000e+00 3.564000e+00
genus Disp.unit.leng mean 0.003 2.350 3.834 6.221 1.336988e+04 9.928000e+00 2.071760e+02
genus Disp.unit.leng sd 0.000 0.135 0.297 0.650 9.470706e+03 2.212000e+00 1.202130e+02
genus LDMC mean 0.021 0.234 0.307 0.365 1.372000e+00 3.020000e-01 1.000000e-01
genus LDMC sd 0.000 0.005 0.010 0.020 1.260000e-01 1.500000e-02 1.500000e-02
genus LeafArea.leaflet.undef mean 0.000 451.346 1047.617 2688.736 1.740828e+13 2.375967e+09 2.015193e+11
genus LeafArea.leaflet.undef sd 0.000 44.133 147.050 601.772 2.199776e+13 3.593650e+09 2.790665e+11
genus LeafArea.leaf.undef mean 0.000 404.628 1116.991 3187.225 1.341026e+11 1.799434e+07 1.552110e+09
genus LeafArea.leaf.undef sd 0.000 46.194 170.877 688.171 3.408568e+09 7.812932e+05 4.693424e+07
genus LeafArea.undef.undef mean 0.000 411.128 1167.756 3672.239 1.524769e+11 2.052324e+07 1.764777e+09
genus LeafArea.undef.undef sd 0.000 47.796 194.844 833.147 4.133230e+09 1.326702e+06 7.382015e+07
genus LeafCN.ratio mean 0.213 21.214 26.130 32.273 1.599229e+03 3.011200e+01 3.461300e+01
genus LeafCN.ratio sd 0.001 0.435 0.922 2.062 2.016540e+02 2.135000e+00 7.000000e+00
genus LeafC.perdrymass mean 95.729 440.581 454.527 471.238 1.131692e+03 4.545320e+02 3.514300e+01
genus LeafC.perdrymass sd 0.003 1.329 2.310 4.378 5.565700e+01 3.914000e+00 4.711000e+00
genus Leaf.delta.15N mean -31.434 -0.359 0.963 2.262 4.885800e+01 1.011000e+00 2.858000e+00
genus Leaf.delta.15N sd 0.000 0.122 0.212 0.379 1.000600e+01 3.190000e-01 3.540000e-01
genus LeafDryMass.single mean 0.000 30.226 93.617 274.186 1.023338e+17 1.370848e+13 1.184415e+15
genus LeafDryMass.single sd 0.000 4.095 15.886 59.613 3.076084e+15 4.949663e+11 3.901914e+13
genus Leaffreshmass mean 0.000 0.122 0.319 0.858 2.906497e+13 3.893499e+09 3.363992e+11
genus Leaffreshmass sd 0.000 0.013 0.050 0.178 8.032411e+11 1.292490e+08 1.018885e+10
genus LeafLength mean 0.000 31.480 56.829 92.781 3.089342e+08 4.167893e+04 3.575619e+06
genus LeafLength sd 0.000 2.623 6.764 18.839 3.388372e+05 1.278230e+02 5.721569e+03
genus LeafN mean 0.343 16.841 20.136 23.942 1.310526e+03 2.133700e+01 2.132200e+01
genus LeafN sd 0.001 0.299 0.660 1.682 5.074300e+01 1.283000e+00 1.636000e+00
genus LeafNperArea mean 0.002 1.224 1.492 1.843 4.016178e+04 7.062000e+00 4.648240e+02
genus LeafNperArea sd 0.000 0.032 0.074 0.217 2.411833e+03 5.490000e-01 3.059500e+01
genus LeafP mean 0.000 1.079 1.357 1.759 6.392164e+04 1.289300e+01 7.697160e+02
genus LeafP sd 0.000 0.031 0.063 0.138 9.827822e+03 1.717000e+00 1.246630e+02
genus LeafThickness mean 0.005 0.196 0.234 0.290 1.197200e+02 3.280000e-01 2.032000e+00
genus LeafThickness sd 0.000 0.004 0.009 0.021 5.255759e+03 8.640000e-01 6.666700e+01
genus LeafWaterCont mean 0.096 2.687 3.531 4.930 4.260880e+02 4.253000e+00 5.594000e+00
genus LeafWaterCont sd 0.000 0.077 0.163 0.373 2.759500e+01 3.060000e-01 5.300000e-01
genus LeafWidth mean 0.000 0.806 1.791 3.847 3.477895e+04 2.419400e+01 6.266720e+02
genus LeafWidth sd 0.000 0.078 0.238 0.769 5.949567e+05 1.464300e+02 8.492696e+03
genus PlantHeight mean 0.000 0.417 1.193 5.767 8.203000e+01 4.524000e+00 6.885000e+00
genus PlantHeight sd 0.000 0.067 0.217 1.027 2.685300e+01 1.021000e+00 1.858000e+00
genus RootingDepth mean 0.000 0.450 0.813 1.475 2.519319e+09 3.378072e+05 2.915871e+07
genus RootingDepth sd 0.000 0.037 0.086 0.198 8.971563e+07 1.444732e+04 1.138014e+06
genus SeedGerminationRate mean 6.898 84.389 88.783 92.519 2.396310e+02 8.772900e+01 1.137400e+01
genus SeedGerminationRate sd 0.001 0.539 0.988 2.031 4.806300e+01 1.785000e+00 2.319000e+00
genus Seed.length mean 0.004 1.856 3.002 5.078 1.089329e+04 6.656000e+00 1.375240e+02
genus Seed.length sd 0.000 0.106 0.234 0.508 3.333800e+01 4.780000e-01 9.910000e-01
genus SeedMass mean 0.000 0.878 4.279 28.605 9.521438e+08 1.301089e+05 1.102133e+07
genus SeedMass sd 0.000 0.135 0.869 5.768 4.003387e+07 1.084308e+04 6.032767e+05
genus Seed.num.rep.unit mean 0.000 59.107 249.776 963.874 2.225078e+25 2.981679e+21 2.575315e+23
genus Seed.num.rep.unit sd 0.000 14.011 65.788 373.790 9.602388e+20 1.585957e+17 1.218345e+19
genus SLA mean 0.000 11.537 15.069 18.147 5.490787e+03 1.731800e+01 7.069700e+01
genus SLA sd 0.000 0.339 0.793 2.677 4.479200e+01 1.953000e+00 2.726000e+00
genus SpecificRootLength mean 0.000 1401.747 2645.280 5204.620 3.071495e+09 1.620677e+06 5.964294e+07
genus SpecificRootLength sd 0.000 98.616 224.938 532.960 8.360206e+08 1.501950e+05 1.062790e+07
genus Stem.cond.dens mean 0.000 23.214 50.056 90.841 1.912493e+08 2.745431e+04 2.217954e+06
genus Stem.cond.dens sd 0.000 1.464 3.724 9.577 1.210633e+07 4.009839e+03 2.065772e+05
genus StemConduitDiameter mean 0.001 25.009 34.985 49.387 8.065496e+07 1.093829e+04 9.335138e+05
genus StemConduitDiameter sd 0.000 1.016 2.027 4.366 5.503930e+03 6.333000e+00 7.618700e+01
genus StemDens mean 0.015 0.444 0.539 0.627 2.640000e+00 5.420000e-01 1.480000e-01
genus StemDens sd 0.000 0.006 0.011 0.023 3.250000e-01 2.000000e-02 2.500000e-02
genus StemDiam mean 0.000 0.015 0.054 0.135 1.398249e+09 1.884852e+05 1.618370e+07
genus StemDiam sd 0.000 0.002 0.007 0.021 5.904310e+08 1.028566e+05 7.514889e+06
genus WoodFiberLength mean 12.143 706.995 848.135 1034.349 1.068162e+07 2.486524e+03 1.237817e+05
genus WoodFiberLength sd 0.000 17.855 34.226 64.310 4.830371e+03 5.568300e+01 1.081420e+02
genus Wood.vessel.length mean 3.460 331.407 428.826 540.696 2.082033e+06 9.565180e+02 2.618349e+04
genus Wood.vessel.length sd 0.000 8.980 18.009 37.710 2.797090e+03 3.549700e+01 7.404900e+01

2 Calculate CWMs and CWVs for each plot

Merge vegetation layers, where necessary. Combine cover values across layers

#Ancillary function
# Combine cover accounting for layers
combine.cover <- function(x){
    while (length(x)>1){
      x[2] <- x[1]+(100-x[1])*x[2]/100
      x <- x[-1]
    }
  return(x)
}

DT2.comb <- DT2 %>% 
  group_by(PlotObservationID, species, Rank_correct) %>% 
  summarize(Relative.cover=combine.cover(Relative.cover)) %>%
  ungroup() %>% 
  # re-normalize to 100%
  left_join(x=., 
            y={.} %>% 
              group_by(PlotObservationID) %>% 
              summarize(Tot.cover=sum(Relative.cover)), 
            by="PlotObservationID") %>% 
  mutate(Relative.cover=Relative.cover/Tot.cover) %>% 
  dplyr::select(-Tot.cover)

Calculate CWMs and CWV, as well as plot coverage statistics (proportion of total cover for which trait info exist, and proportion of species for which we have trait info). To avoid misleading results, CWM is calculated ONLY for plots for which we have some abundance information. All plots where Ab_scale==“pa” in ANY of the layers are therefore excluded.

# Tag plots where at least one layer has only p\a information 
any_pa <- DT2 %>% 
  distinct(PlotObservationID, Ab_scale) %>% 
  group_by(PlotObservationID) %>% 
  summarize(any.pa=any(Ab_scale=="pa")) %>% 
  filter(any.pa==T) %>% 
  pull(PlotObservationID)
length(any_pa)
## [1] 270855
# Exclude plots above and merge species data table with traits
CWM0 <- DT2.comb %>%
  filter(!PlotObservationID %in% any_pa) %>% 
  left_join(try.combined.means %>%
              dplyr::rename(species=Taxon_name) %>% 
              dplyr::select(species, Rank_correct, ends_with("_mean")), 
            by=c("species", "Rank_correct"))

# Calculate CWM for each trait in each plot
CWM1 <- CWM0 %>%
  group_by(PlotObservationID) %>%
  summarize_at(.vars= vars(StemDens_mean:LeafWaterCont_mean),
               .funs = list(~weighted.mean(., Relative.cover, na.rm=T))) %>%
  dplyr::select(PlotObservationID, order(colnames(.))) %>%
  gather(key=variable, value=CWM, -PlotObservationID)

# Calculate coverage for each trait in each plot
CWM2 <- CWM0 %>%
  mutate_at(.funs = list(~if_else(is.na(.),0,1) * Relative.cover), 
            .vars = vars(StemDens_mean:LeafWaterCont_mean)) %>%
  group_by(PlotObservationID) %>%
  summarize_at(.vars= vars(StemDens_mean:LeafWaterCont_mean),
               .funs = list(~sum(., na.rm=T))) %>%
  dplyr::select(PlotObservationID, order(colnames(.))) %>%
  gather(key=variable, value=trait.coverage, -PlotObservationID)
  
# Calculate CWV
# Ancillary function
variance2.fun <- function(trait, abu){
  res <- as.double(NA)
  abu <- abu[!is.na(trait)]
  trait <- trait[!is.na(trait)]
  abu <- abu/sum(abu)
  if (length(trait)>1){
    # you need more than 1 observation to calculate variance
    # for calculation see 
    # http://r.789695.n4.nabble.com/Weighted-skewness-and-curtosis-td4709956.html
    m.trait <- weighted.mean(trait,abu)
    res <- sum(abu*(trait-m.trait)^2)
  }
  res
}

CWM3 <- CWM0 %>%
  group_by(PlotObservationID) %>%
  summarize_at(.vars= vars(StemDens_mean:LeafWaterCont_mean),
               .funs = list(~variance2.fun(., Relative.cover))) %>%
  dplyr::select(PlotObservationID, order(colnames(.))) %>%
  gather(key=variable, value=CWV, -PlotObservationID)

## Calculate proportion of species having traits
CWM4 <- CWM0 %>%
  group_by(PlotObservationID) %>%
  #distinct(PlotObservationID, species, .keep_all = T) %>% 
  summarize_at(.vars= vars(StemDens_mean:LeafWaterCont_mean),
               .funs = list(~sum(!is.na(.)))) %>%
  dplyr::select(PlotObservationID, order(colnames(.))) %>%
  gather(key=variable, value=n.sp.with.trait, -PlotObservationID)

# Join together
CWM <- CWM1 %>%
  left_join(CWM2, by=c("PlotObservationID", "variable")) %>%
  left_join(CWM3, by=c("PlotObservationID", "variable")) %>%
  left_join(CWM4, by=c("PlotObservationID", "variable")) %>%
  left_join(CWM0 %>% 
              group_by(PlotObservationID) %>%
              summarize(sp.richness=n()), by=c("PlotObservationID")) %>%
  mutate(prop.sp.with.trait=n.sp.with.trait/sp.richness) %>%
  dplyr::select(PlotObservationID, variable, sp.richness, prop.sp.with.trait, trait.coverage, CWM, CWV) %>% 
  arrange(PlotObservationID)

2.1 Explore CWM output

Community weighted means of 3 randomly selected plots
PlotObservationID variable sp.richness prop.sp.with.trait trait.coverage CWM CWV
62739 Chromosome.cDNAcont_mean 26 0.846 0.941 5.244 6.690
62739 Chromosome.n_mean 26 0.846 0.941 30.680 69.086
62739 Disp.unit.leng_mean 26 0.846 0.941 3.694 1.455
62739 LDMC_mean 26 0.846 0.941 0.320 0.006
62739 LeafArea.leaflet.undef_mean 26 0.846 0.941 745.622 697935.874
62739 LeafArea.leaf.undef_mean 26 0.846 0.941 564.225 689875.144
62739 LeafArea.undef.undef_mean 26 0.846 0.941 469.819 299953.165
62739 LeafCN.ratio_mean 26 0.846 0.941 26.432 37.262
62739 LeafC.perdrymass_mean 26 0.846 0.941 458.715 183.249
62739 Leaf.delta.15N_mean 26 0.846 0.941 -1.825 2.753
62739 LeafDryMass.single_mean 26 0.846 0.941 33.667 1627.341
62739 Leaffreshmass_mean 26 0.846 0.941 0.142 0.090
62739 LeafLength_mean 26 0.846 0.941 114.563 3366.291
62739 LeafN_mean 26 0.846 0.941 19.105 35.752
62739 LeafNperArea_mean 26 0.846 0.941 1.181 0.044
62739 LeafP_mean 26 0.846 0.941 1.594 0.115
62739 LeafThickness_mean 26 0.846 0.941 0.221 0.004
62739 LeafWaterCont_mean 26 0.846 0.941 3.601 4.543
62739 LeafWidth_mean 26 0.846 0.941 0.997 0.440
62739 PlantHeight_mean 26 0.846 0.941 0.356 0.016
62739 RootingDepth_mean 26 0.846 0.941 0.282 0.030
62739 SeedGerminationRate_mean 26 0.846 0.941 95.724 50.734
62739 Seed.length_mean 26 0.846 0.941 3.000 0.884
62739 SeedMass_mean 26 0.846 0.941 1.662 3.275
62739 Seed.num.rep.unit_mean 26 0.846 0.941 312.290 478574.430
62739 SLA_mean 26 0.846 0.941 16.936 21.241
62739 SpecificRootLength_mean 26 0.846 0.941 5011.526 25625710.566
62739 Stem.cond.dens_mean 26 0.846 0.941 60.399 772.599
62739 StemConduitDiameter_mean 26 0.846 0.941 44.649 1651.302
62739 StemDens_mean 26 0.846 0.941 0.363 0.005
62739 StemDiam_mean 26 0.846 0.941 0.006 0.000
62739 WoodFiberLength_mean 26 0.846 0.941 804.468 35793.176
62739 Wood.vessel.length_mean 26 0.846 0.941 492.260 27213.267
873674 Chromosome.cDNAcont_mean 6 1.000 1.000 4.718 11.528
873674 Chromosome.n_mean 6 1.000 1.000 41.252 106.114
873674 Disp.unit.leng_mean 6 1.000 1.000 1.831 0.969
873674 LDMC_mean 6 1.000 1.000 0.239 0.002
873674 LeafArea.leaflet.undef_mean 6 1.000 1.000 348.742 288472.020
873674 LeafArea.leaf.undef_mean 6 1.000 1.000 251.644 45279.816
873674 LeafArea.undef.undef_mean 6 1.000 1.000 411.875 353167.476
873674 LeafCN.ratio_mean 6 1.000 1.000 20.280 23.234
873674 LeafC.perdrymass_mean 6 1.000 1.000 441.364 126.327
873674 Leaf.delta.15N_mean 6 1.000 1.000 -0.554 0.129
873674 LeafDryMass.single_mean 6 1.000 1.000 12.685 614.748
873674 Leaffreshmass_mean 6 1.000 1.000 0.054 0.008
873674 LeafLength_mean 6 1.000 1.000 50.888 1175.144
873674 LeafN_mean 6 1.000 1.000 24.651 11.496
873674 LeafNperArea_mean 6 1.000 1.000 1.188 0.128
873674 LeafP_mean 6 1.000 1.000 1.876 0.096
873674 LeafThickness_mean 6 1.000 1.000 0.187 0.004
873674 LeafWaterCont_mean 6 1.000 1.000 4.692 0.884
873674 LeafWidth_mean 6 1.000 1.000 0.677 0.181
873674 PlantHeight_mean 6 1.000 1.000 0.347 0.015
873674 RootingDepth_mean 6 1.000 1.000 0.209 0.022
873674 SeedGerminationRate_mean 6 1.000 1.000 89.701 74.768
873674 Seed.length_mean 6 1.000 1.000 1.406 0.187
873674 SeedMass_mean 6 1.000 1.000 0.596 0.311
873674 Seed.num.rep.unit_mean 6 1.000 1.000 3636.007 74356462.209
873674 SLA_mean 6 1.000 1.000 24.377 65.668
873674 SpecificRootLength_mean 6 1.000 1.000 12928.108 96188218.588
873674 Stem.cond.dens_mean 6 1.000 1.000 72.800 8210.119
873674 StemConduitDiameter_mean 6 1.000 1.000 46.710 332.728
873674 StemDens_mean 6 1.000 1.000 0.360 0.003
873674 StemDiam_mean 6 1.000 1.000 0.005 0.000
873674 WoodFiberLength_mean 6 1.000 1.000 714.187 8195.128
873674 Wood.vessel.length_mean 6 1.000 1.000 331.944 6284.534
1812243 Chromosome.cDNAcont_mean 19 0.789 0.982 1.984 7.342
1812243 Chromosome.n_mean 19 0.789 0.982 40.622 103.334
1812243 Disp.unit.leng_mean 19 0.789 0.982 3.090 0.817
1812243 LDMC_mean 19 0.789 0.982 0.348 0.002
1812243 LeafArea.leaflet.undef_mean 19 0.789 0.982 497.877 47650.175
1812243 LeafArea.leaf.undef_mean 19 0.789 0.982 699.620 124626.639
1812243 LeafArea.undef.undef_mean 19 0.789 0.982 722.654 84269.641
1812243 LeafCN.ratio_mean 19 0.789 0.982 23.206 12.241
1812243 LeafC.perdrymass_mean 19 0.789 0.982 479.582 185.281
1812243 Leaf.delta.15N_mean 19 0.789 0.982 -1.756 0.667
1812243 LeafDryMass.single_mean 19 0.789 0.982 47.268 297.442
1812243 Leaffreshmass_mean 19 0.789 0.982 0.138 0.002
1812243 LeafLength_mean 19 0.789 0.982 48.438 123.220
1812243 LeafN_mean 19 0.789 0.982 21.376 5.401
1812243 LeafNperArea_mean 19 0.789 0.982 1.588 0.116
1812243 LeafP_mean 19 0.789 0.982 1.810 0.022
1812243 LeafThickness_mean 19 0.789 0.982 0.179 0.003
1812243 LeafWaterCont_mean 19 0.789 0.982 2.983 0.563
1812243 LeafWidth_mean 19 0.789 0.982 1.789 0.608
1812243 PlantHeight_mean 19 0.789 0.982 4.703 3.057
1812243 RootingDepth_mean 19 0.789 0.982 0.509 0.061
1812243 SeedGerminationRate_mean 19 0.789 0.982 80.750 138.314
1812243 Seed.length_mean 19 0.789 0.982 1.832 0.847
1812243 SeedMass_mean 19 0.789 0.982 1.114 8.135
1812243 Seed.num.rep.unit_mean 19 0.789 0.982 4744.166 12554159.898
1812243 SLA_mean 19 0.789 0.982 14.573 9.654
1812243 SpecificRootLength_mean 19 0.789 0.982 3013.360 2673995.849
1812243 Stem.cond.dens_mean 19 0.789 0.982 174.147 2100.541
1812243 StemConduitDiameter_mean 19 0.789 0.982 43.061 235.841
1812243 StemDens_mean 19 0.789 0.982 0.505 0.006
1812243 StemDiam_mean 19 0.789 0.982 0.293 0.034
1812243 WoodFiberLength_mean 19 0.789 0.982 680.083 2680.235
1812243 Wood.vessel.length_mean 19 0.789 0.982 443.884 3804.891

Scatterplot comparing coverage of traits values across plots, when based on relative cover and when based on proportion of species richness

ggplot(data=CWM %>% 
         #all variables have the same coverage. Showcase with LDMC
         filter(variable=="LDMC_mean"), aes(x=trait.coverage, y=prop.sp.with.trait, col=log(sp.richness))) + 
  geom_point(pch="+", alpha=1/3) + 
  geom_abline(intercept = 0, slope=1, col=2, lty=2, lwd=.7) + 
  xlim(c(0,1)) + 
  ylim(c(0,1)) + 
  scale_color_viridis() + 
  theme_bw() +
  xlab("Trait coverage (Relative  cover)") + 
  ylab("Trait coverage (Proportion of species)") + 
  coord_equal()

Calculate summary statistics for trait coverage in plots

CWM.coverage <- CWM %>% 
  filter(variable=="LDMC_mean") %>% 
  summarize_at(.vars=vars(trait.coverage, prop.sp.with.trait),
                .funs=list(num.0s=~sum(.==0),
                           min=~min(., na.rm=T),
                           q025=~quantile(., 0.25, na.rm=T), 
                           q50=~quantile(., 0.50, na.rm=T), 
                           q75=~quantile(., .75, na.rm=T), 
                           max=~max(., na.rm=T), 
                           mean=~mean(., na.rm=T), 
                           sd=~sd(., na.rm=T))) %>% 
  gather(key=variable, value=value) %>% 
  separate(variable, sep="_", into=c("metric", "stat")) %>% 
  mutate(stat=factor(stat, levels=c("num.0s", "min", "q025", "q50", "q75", "max", "mean", "sd"))) %>% 
  spread(key=stat, value=value) 
Summary of plot-level coverage of CWM and CWVs
metric num.0s min q025 q50 q75 max mean sd
prop.sp.with.trait 12153 0 0.778 0.895 0.976 1 0.843 0.178
trait.coverage 14465 0 0.780 0.948 0.995 1 0.842 0.226

Calculate summary statistics for CWMs and CWVs

CWM.summary <- CWM %>% 
  rename(myvar=variable) %>% 
  group_by(myvar) %>% 
  summarize_at(.vars=vars(CWM:CWV),
                .funs=list(min=~min(., na.rm=T),
                           q025=~quantile(., 0.25, na.rm=T), 
                           q50=~quantile(., 0.50, na.rm=T), 
                           q75=~quantile(., .75, na.rm=T), 
                           max=~max(., na.rm=T), 
                           mean=~mean(., na.rm=T), 
                           sd=~sd(., na.rm=T))) %>% 
  gather(key=variable, value=value, -myvar) %>% 
  separate(variable, sep="_", into=c("metric", "stat")) %>% 
  mutate(stat=factor(stat, levels=c("min", "q025", "q50", "q75", "max", "mean", "sd"))) %>% 
  spread(key=stat, value=value) %>% 
  arrange(metric, myvar)
Summary of CWMs and CWVs across all plots
myvar metric min q025 q50 q75 max mean sd
Chromosome.cDNAcont_mean CWM 0.084 3.397 4.995 7.221000e+00 9.037800e+01 6.325000e+00 5.062000e+00
Chromosome.n_mean CWM 0.114 27.419 31.729 3.879600e+01 4.166200e+03 3.476300e+01 1.394100e+01
Disp.unit.leng_mean CWM 0.019 2.672 3.575 6.217000e+00 3.592797e+03 1.407900e+01 9.290000e+01
LDMC_mean CWM 0.014 0.242 0.283 3.330000e-01 9.680000e-01 2.860000e-01 6.900000e-02
LeafArea.leaflet.undef_mean CWM 0.001 507.129 1050.050 2.172636e+03 2.896005e+08 3.226197e+03 4.104026e+05
LeafArea.leaf.undef_mean CWM 0.007 483.762 1047.625 2.246030e+03 1.714323e+08 3.726639e+05 3.751084e+06
LeafArea.undef.undef_mean CWM 0.001 544.712 1187.907 2.191171e+03 2.239247e+07 2.033596e+03 2.517515e+04
LeafCN.ratio_mean CWM 0.986 20.056 23.395 2.795100e+01 8.780210e+02 2.557300e+01 1.485100e+01
LeafC.perdrymass_mean CWM 96.454 440.810 450.720 4.661430e+02 9.991090e+02 4.532800e+02 2.608000e+01
Leaf.delta.15N_mean CWM -12.462 -1.075 -0.126 7.990000e-01 4.310700e+01 -1.000000e-03 2.020000e+00
LeafDryMass.single_mean CWM 0.000 30.722 63.056 1.287740e+02 1.013838e+05 1.141790e+02 3.650770e+02
Leaffreshmass_mean CWM 0.000 0.123 0.248 4.820000e-01 1.641220e+02 4.560000e-01 9.660000e-01
LeafLength_mean CWM 0.013 48.237 74.735 1.025800e+02 6.731693e+04 8.650600e+01 2.036740e+02
LeafN_mean CWM 3.830 19.173 22.307 2.518600e+01 2.882780e+02 2.249900e+01 5.312000e+00
LeafNperArea_mean CWM 0.002 1.141 1.309 1.582000e+00 8.900100e+01 1.429000e+00 5.530000e-01
LeafP_mean CWM 0.014 1.530 1.836 2.204000e+00 6.392164e+04 1.198250e+02 1.329472e+03
LeafThickness_mean CWM 0.005 0.188 0.220 2.760000e-01 5.413100e+01 3.160000e-01 1.071000e+00
LeafWaterCont_mean CWM 0.387 3.479 4.436 5.455000e+00 4.260880e+02 5.551000e+00 9.423000e+00
LeafWidth_mean CWM 0.005 0.766 1.348 2.516000e+00 2.675318e+04 6.202000e+00 1.380740e+02
PlantHeight_mean CWM 0.005 0.334 0.600 4.256000e+00 6.994000e+01 3.017000e+00 4.592000e+00
RootingDepth_mean CWM 0.004 0.352 0.511 7.590000e-01 7.274138e+04 8.000000e-01 5.744000e+01
SeedGerminationRate_mean CWM 7.122 84.121 88.935 9.255000e+01 2.416300e+02 8.800300e+01 6.843000e+00
Seed.length_mean CWM 0.061 1.906 2.492 4.018000e+00 1.089329e+04 1.521500e+01 2.169080e+02
SeedMass_mean CWM 0.000 0.929 2.201 1.680400e+01 3.486900e+06 6.139530e+02 1.817469e+04
Seed.num.rep.unit_mean CWM 0.000 1048.176 3666.780 1.535367e+04 1.747089e+21 7.757933e+15 2.941775e+18
SLA_mean CWM 1.473 15.395 20.164 2.464200e+01 5.490787e+03 3.446600e+01 1.274150e+02
SpecificRootLength_mean CWM 0.000 4402.671 7311.149 1.206094e+04 3.071495e+09 4.833799e+06 6.390279e+07
Stem.cond.dens_mean CWM 0.013 72.161 97.866 1.396460e+02 4.878582e+06 2.509100e+02 8.898355e+03
StemConduitDiameter_mean CWM 0.001 33.009 41.673 5.289200e+01 1.664311e+07 9.358500e+01 2.039292e+04
StemDens_mean CWM 0.053 0.361 0.420 4.960000e-01 2.640000e+00 4.350000e-01 1.160000e-01
StemDiam_mean CWM 0.000 0.010 0.024 9.600000e-02 8.884200e+01 6.700000e-02 1.660000e-01
WoodFiberLength_mean CWM 116.018 721.172 810.433 9.265990e+02 2.204640e+06 1.344704e+03 1.068620e+04
Wood.vessel.length_mean CWM 31.679 366.971 430.588 5.216400e+02 4.298853e+05 4.660700e+02 5.613640e+02
Chromosome.cDNAcont_mean CWV 0.000 5.199 13.316 3.286200e+01 2.143228e+04 4.214700e+01 8.687900e+01
Chromosome.n_mean CWV 0.000 67.635 146.115 3.293860e+02 1.451823e+07 4.478910e+02 2.884217e+04
Disp.unit.leng_mean CWV 0.000 1.423 3.803 1.558700e+01 3.227013e+06 1.993778e+04 1.687256e+05
LDMC_mean CWV 0.000 0.002 0.004 6.000000e-03 1.570000e-01 5.000000e-03 5.000000e-03
LeafArea.leaflet.undef_mean CWV 0.000 293015.667 1450730.566 5.990615e+06 3.653300e+16 2.210796e+11 7.911348e+13
LeafArea.leaf.undef_mean CWV 0.000 273271.206 1254786.343 6.694266e+06 7.347254e+15 5.016001e+13 4.280153e+14
LeafArea.undef.undef_mean CWV 0.000 404274.008 1201009.405 4.885570e+06 1.809148e+15 1.479687e+10 3.237818e+12
LeafCN.ratio_mean CWV 0.000 14.700 34.954 8.120700e+01 6.171788e+05 4.188350e+02 4.284321e+03
LeafC.perdrymass_mean CWV 0.000 173.719 338.389 5.940400e+02 1.057395e+05 6.283850e+02 2.957771e+03
Leaf.delta.15N_mean CWV 0.000 1.674 2.964 4.794000e+00 6.184630e+02 6.734000e+00 2.510900e+01
LeafDryMass.single_mean CWV 0.000 1063.567 5482.848 2.761995e+04 7.538101e+09 3.614544e+05 3.418852e+07
Leaffreshmass_mean CWV 0.000 0.019 0.088 4.620000e-01 4.203363e+04 2.092000e+00 1.115460e+02
LeafLength_mean CWV 0.000 987.260 2310.388 5.021992e+03 1.874145e+10 2.481698e+05 2.770738e+07
LeafN_mean CWV 0.000 13.152 23.690 3.797600e+01 2.717296e+05 3.413700e+01 3.774750e+02
LeafNperArea_mean CWV 0.000 0.064 0.122 2.440000e-01 1.125946e+04 8.500000e-01 2.869200e+01
LeafP_mean CWV 0.000 0.144 0.278 5.170000e-01 1.021494e+09 4.016335e+06 4.197853e+07
LeafThickness_mean CWV 0.000 0.002 0.005 1.200000e-02 3.430838e+03 2.631000e+00 3.093600e+01
LeafWaterCont_mean CWV 0.000 1.439 2.856 4.859000e+00 4.497611e+04 3.115000e+02 2.596554e+03
LeafWidth_mean CWV 0.000 0.484 1.745 4.710000e+00 3.023911e+08 1.310860e+05 3.412976e+06
PlantHeight_mean CWV 0.000 0.021 0.094 2.408100e+01 8.370440e+02 2.190000e+01 4.601400e+01
RootingDepth_mean CWV 0.000 0.041 0.101 2.490000e-01 1.871033e+10 1.273134e+04 1.455312e+07
SeedGerminationRate_mean CWV 0.000 26.381 50.523 9.287700e+01 7.015952e+03 8.142900e+01 1.357510e+02
Seed.length_mean CWV 0.000 0.534 1.282 4.575000e+00 2.966463e+07 6.939615e+04 1.200800e+06
SeedMass_mean CWV 0.000 0.701 8.150 1.076864e+03 2.901391e+13 2.281825e+09 6.879021e+10
Seed.num.rep.unit_mean CWV 0.000 3535043.315 68066414.372 1.268117e+09 5.087198e+42 2.752668e+37 9.727433e+39
SLA_mean CWV 0.000 19.547 36.728 7.503500e+01 7.533142e+06 5.391560e+04 4.390777e+05
SpecificRootLength_mean CWV 0.000 10764758.164 34930547.576 7.815094e+07 2.358521e+18 6.700197e+15 9.603623e+16
Stem.cond.dens_mean CWV 0.000 1822.666 4341.535 1.031477e+04 3.534652e+13 1.494370e+09 8.989725e+10
StemConduitDiameter_mean CWV 0.000 216.543 492.647 1.049426e+03 1.065354e+15 2.249810e+09 1.361695e+12
StemDens_mean CWV 0.000 0.005 0.009 1.500000e-02 1.447000e+00 1.600000e-02 5.400000e-02
StemDiam_mean CWV 0.000 0.000 0.001 8.000000e-03 7.519691e+04 2.810000e-01 9.473500e+01
WoodFiberLength_mean CWV 0.000 27192.902 50141.402 8.514083e+04 1.868337e+13 1.962241e+08 2.403650e+10
Wood.vessel.length_mean CWV 0.000 13052.293 26281.636 5.234523e+04 7.096939e+11 1.573668e+06 9.069681e+08

2.2 Export CWM and species mean trait values

save(try.combined.means, CWM, file="../_output/Traits_CWMs_sPlot3.RData")

3 Classify plots in is.forest or is.non.forest based on species traits

sPlot has two independent systems for classifying plots to vegetation types. The first relies on the expert opinion of data contributors and classifies plots into broad habitat types. These broad habitat types are coded using 5, non-mutually exclusive dummy variables:
1) Forest
2) Grassland
3) Shrubland
4) Sparse vegetation
5) Wetland
A plot may belong to more than one formation, e.g. a Savannah is categorized as Forest + Grassland (FG). This system is, unfortunately, not consistently available across all plots, being the large majority of classified plots only available for Europe.
There is therefore the need to give at least some indication to the remaining unclassified plots. To achieve this, already from v2.1, sPlot started using a classification into forest and non-forest, based on the share of trees, and the layering of vegetation. Here, we derived the (mutually exclusive) is.forest and is.non.forest classification of plots.

3.1 Derive species level information on Growth Forms

We used different sources of information:
1) Data from the gap-filled trait matrix
2) Manual cleaning of the most common species for which growth trait info is not available
3) Data from TRY (public dataset only) on all species with growth form info (Trait ID = 42)
4) Cross-match with species assigned to tree layer in DT table.

Step 1: Attach growth form trait information to DT table. Growth form information derives from TRY

DT.gf <- DT2 %>% 
  filter(taxon_group=="Vascular plant") %>% 
  #join with try names, using resolved species names as key
  left_join(try.species.names %>% 
              dplyr::select(Name_short, GrowthForm) %>% 
              rename(species=Name_short) %>% 
              distinct(species, .keep_all=T), 
            by="species") %>% 
  left_join(try.species.means %>% 
              dplyr::select(Name_short, PlantHeight_mean) %>% 
              rename(species=Name_short), 
            by="species")
# number of records withouth Growth Form info
sum(is.na(DT.gf$GrowthForm))
## [1] 4998815

Step 2: Select most common species without growth-trait information to export and check manually

top.gf.nas <- DT.gf %>% 
  filter(is.na(GrowthForm)) %>% 
  group_by(species) %>% 
  summarize(n=n()) %>% 
  arrange(desc(n))
write_csv(top.gf.nas %>% 
            filter(n>1000), 
  path="../_derived/Species_missingGF.csv")

The first 47567 species account for 56.59% of the missing records. Assign growth forms manually, reimport and coalesce into DT.gf

# Import manually classified species - this info is also reported in Appendix 1
gf.manual <- read_csv("../_derived/Species_missingGF_complete.csv")
## Parsed with column specification:
## cols(
##   species = col_character(),
##   GrowthForm = col_character()
## )
DT.gf <- DT.gf %>% 
  left_join(gf.manual %>% 
              rename(GrowthForm.m=GrowthForm),
            by="species") %>% 
   mutate(GrowthForm=coalesce(GrowthForm, GrowthForm.m)) %>% 
   dplyr::select(-GrowthForm.m)

After manual completion, the number of records without growth form information decresead to 2332279.

Step 3: Import additional data on growth-form from TRY (Accessed 10 March 2020).
All public data on growth form downloaded. First take care of unmatched quotation marks in the txt file. Do this from command line.

# escape all unmatched quotation marks. Run in Linux terminal
#sed 's/"/\\"/g' 8854.txt > 8854_test.csv
#sed "s/'/\\'/g" 8854.txt > 8854_test.csv

Information on growth form is not organized and has a myriad of levels. Extract and simplify to the set of few types used so far. In case a species is attributed to multiple growth forms use a majority vote.

all.gf0 <- read_delim("../_input/TRY5.0_v1.1/8854_test.txt", delim="\t") 

all.gf <- all.gf0 %>% 
  filter(TraitID==42) %>% 
  distinct(AccSpeciesName, OrigValueStr) %>% 
  rename(GrowthForm0=OrigValueStr) %>% 
  mutate(GrowthForm0=tolower(GrowthForm0)) %>%
  filter(AccSpeciesName %in% sPlot.species$species) %>% 
  mutate(GrowthForm_simplified= GrowthForm0) %>% 
  mutate(GrowthForm_simplified=replace(GrowthForm_simplified, 
                                       list=str_detect(GrowthForm0,
                                       "vine|climber|liana|carnivore|epiphyte|^succulent|lichen|parasite|
                                       hydrohalophyte|aquatic|cactous|parasitic|hydrophytes|carnivorous"), 
                                       "other")) %>%
  mutate(GrowthForm_simplified=replace(GrowthForm_simplified, 
                                       list=str_detect(GrowthForm0, 
                                                       "tree|conifer|^woody$|palmoid|mangrove|gymnosperm"), 
                                       "tree")) %>% 
  mutate(GrowthForm_simplified=replace(GrowthForm_simplified, 
                                       list=str_detect(GrowthForm0, "shrub|scrub|bamboo"), "shrub")) %>%
  mutate(GrowthForm_simplified=replace(GrowthForm_simplified, 
                                       list=str_detect(GrowthForm0,
                                      "herb|sedge|graminoid|fern|forb|herbaceous|grass|chaemaephyte|geophyte|annual"),
                                       "herb")) %>%
  mutate(GrowthForm_simplified=ifelse(GrowthForm_simplified %in% c("other", "herb", "shrub", "tree"), 
                                      GrowthForm_simplified, NA)) %>% 
  filter(!is.na(GrowthForm_simplified)) 

#Some species have multiple attributions - use a majority vote. NA if ties
get.mode <- function(x){
  if(length(unique(x))==1){
    return(as.character(unique(x)))} else{
    tmp <- sort(table(x), decreasing=T)
    if(tmp[1]!=tmp[2]){return(names(tmp)[1])} else {
    return("Unknown")}
    }
  }

all.gf <- all.gf %>% 
  group_by(AccSpeciesName) %>% 
  summarize(GrowthForm_simplified=get.mode(GrowthForm_simplified)) %>% 
  filter(GrowthForm_simplified!="Unknown")

table(all.gf$GrowthForm_simplified, exclude=NULL)  
## 
##  herb other shrub  tree 
## 21467  3429  7406  9194
#coalesce this info into DT.gf
DT.gf <- DT.gf %>% 
  left_join(all.gf %>% 
              rename(species=AccSpeciesName), 
            by="species") %>% 
  mutate(GrowthForm=coalesce(GrowthForm, GrowthForm_simplified)) %>% 
  dplyr::select(-GrowthForm_simplified)

Step 4: Cross-match. Assign all species occurring in at least one relevé in the tree layer as tree. Conservatively, do this only when the record is at species level (exclude records at genus\family level)

other.trees <- DT.gf %>% 
  filter(Layer==1 & is.na(GrowthForm)) %>% 
  filter(Rank_correct=="species") %>% 
  distinct(species, Layer, GrowthForm) %>% 
  pull(species)

DT.gf <- DT.gf %>% 
  mutate(GrowthForm=replace(GrowthForm, 
                            list=species %in% other.trees, 
                            values="tree"))

After cross-matching, the number of records without growth form information decresead to 1264978.

Average height per growth form
GrowthForm Height
herb 0.520
herb/shrub 1.982
herb 1.522
herb/shrub/tree 5.241
other 4.550
shrub 2.351
shrub/tree 5.094
shrub 4.644
tree 13.077
NA 2.507

Classify species as tree or tall shrubs vs. other. Make a compact table of species growth forms and create fields is.tree.or.tall.shrub and is.not.tree.and.small.
Define a species as is.tree.or.tall.shrub when it is either defined as tree, OR has a height >10
Define a species as is.not.tree.or.shrub.and.small when it has a height <10, as long as it’s not defined a tree. When height is not available, it is sufficient that the species is classified as “herb” or “other”.

GF <- DT.gf %>% 
  distinct(species, GrowthForm, PlantHeight_mean) %>% 
  ## define is.tree.or.tall
  mutate(is.tree.or.tall.shrub=NA) %>% 
  mutate(is.tree.or.tall.shrub=replace(is.tree.or.tall.shrub, 
                                       list=str_detect(GrowthForm, "tree"), 
                                       T)) %>% 
  mutate(is.tree.or.tall.shrub=replace(is.tree.or.tall.shrub, 
                                       list=PlantHeight_mean>=10, 
                                       T)) %>% 
  ## define is.not.tree.or.shrub.and.small 
  mutate(is.not.tree.or.shrub.and.small=NA) %>% 
  mutate(is.not.tree.or.shrub.and.small=replace(is.not.tree.or.shrub.and.small,
                                       list=PlantHeight_mean<10, 
                                       T)) %>% 
  mutate(is.not.tree.or.shrub.and.small=replace(is.not.tree.or.shrub.and.small,
                                       list=is.na(PlantHeight_mean) & str_detect(GrowthForm, "herb|other"), 
                                       T)) %>%   
  ## use each field in turn to define which of the records in the other is F
  mutate(is.not.tree.or.shrub.and.small=replace(is.not.tree.or.shrub.and.small,
                                       list= is.tree.or.tall.shrub==T,
                                       F)) %>% 
  mutate(is.tree.or.tall.shrub=replace(is.tree.or.tall.shrub,
                                       list= is.not.tree.or.shrub.and.small==T,
                                       F)) %>% 
  ## drop redundant field
  dplyr::select(-is.not.tree.or.shrub.and.small)
  

## cross-check classification  
table(GF$GrowthForm, GF$is.tree.or.tall.shrub, exclude=NULL)
##                  
##                   FALSE  TRUE  <NA>
##   herb            22429     2     0
##   herb/shrub         39     1     0
##   herb\\shrub         8     0     0
##   herb/shrub/tree     0     2     0
##   other            1646    42     0
##   shrub            5410    93  2323
##   shrub/tree          0   105     0
##   shrub\\tree         0    28     0
##   tree                0 13458     0
##   <NA>              818    50 26690
## Check for herb species classified as trees
GF %>% 
  filter(is.tree.or.tall.shrub & GrowthForm=="herb")
## # A tibble: 2 x 4
##   species                   GrowthForm PlantHeight_mean is.tree.or.tall.shrub
##   <chr>                     <chr>                 <dbl> <lgl>                
## 1 Phyllostachys bambusoides herb                   16.6 TRUE                 
## 2 Bambusa vulgaris          herb                   14.2 TRUE

These are Bamboo species and their hiehgts seems reasonable.

3.2 Classify plots into forest\non-forest

Define a plot as forest if:
1) Has a total cover of the the tree layer >=25% (from header)
2) Has a total cover in Layer 1 >=25% (from DT)
3) Has a total cover of tree or tall shrub species >=25% (from DT + TRY)
4) Has data on Basal area summing to 10 m2/ha

The first three criteria are declined to define non forest as follows:
1) Info on total cover of the tree layer is available and <25%
2) Info on total cover in Layer 1 is available and <25%
3) The relative cover of non tree species is >75%

Criteria 2 and 3 only apply to plots having cover data in percentage.
Reimport header file

load("../_output/header_sPlot3.0.RData")

Criterium 1

plot.vegtype1 <- header %>% 
  dplyr::select(PlotObservationID, `Cover tree layer (%)`) %>% 
  rename(Cover_trees=`Cover tree layer (%)`) %>% 
  mutate(is.forest=Cover_trees>=25) 

table(plot.vegtype1 %>% dplyr::select(is.forest), exclude=NULL)
## 
##   FALSE    TRUE    <NA> 
##   26211  191834 1760641

Criterium 2

# Select only plots having cover data in percentage
mysel <- (DT.gf %>% 
            distinct(PlotObservationID, Ab_scale) %>% 
            group_by(PlotObservationID) %>% 
            summarize(AllCovPer=all(Ab_scale=="CoverPerc")) %>% 
            filter(AllCovPer==T) %>% 
            pull(PlotObservationID))
# Excludedd plots
nrow(header)-length(mysel)
## [1] 294879
plot.vegtype2 <- DT.gf %>% 
  filter(PlotObservationID %in% mysel ) %>% 
  filter(Layer %in% c(1,2,3)) %>% 
  # first sum the cover of all species in a layer
  group_by(PlotObservationID, Layer) %>% 
  summarize(cover_perc=sum(cover_perc)) %>% 
  # then combine cover across layers
  group_by(PlotObservationID) %>% 
  summarize(cover_perc=combine.cover(cover_perc)) %>% 
  mutate(is.forest=cover_perc>=25) 

table(plot.vegtype1 %>% dplyr::select(is.forest), exclude=NULL)
## 
##   FALSE    TRUE    <NA> 
##   26211  191834 1760641

Criterium 3

plot.vegtype3 <- DT.gf %>% 
  #filter plots where all records are recorded as percentage cover
  filter(PlotObservationID %in% mysel ) %>% 
  # combine cover across layers
  group_by(PlotObservationID, species) %>%
  summarize(cover_perc=combine.cover(cover_perc)) %>%
  ungroup() %>% 
  # attach species Growth Form information
  left_join(GF, by="species")%>% 
  group_by(PlotObservationID) %>% 
  summarize(cover_tree=sum(cover_perc*is.tree.or.tall.shrub, na.rm=T), 
            cover_non_tree=sum(cover_perc*(!is.tree.or.tall.shrub), na.rm=T), 
            cover_unknown=sum(cover_perc* is.na(is.tree.or.tall.shrub))) %>% 
  rowwise() %>% 
  ## classify plots based on cover of different growth forms
  mutate(tot.cover=sum(cover_tree, cover_non_tree, cover_unknown, na.rm=T)) %>% 
  mutate(is.forest=cover_tree>=25) %>% 
  mutate(is.non.forest=cover_tree<25 & (cover_non_tree/tot.cover)>.75)

table(plot.vegtype3 %>% dplyr::select(is.forest, is.non.forest), exclude=NULL)
##          is.non.forest
## is.forest   FALSE    TRUE    <NA>
##     FALSE   72115 1137090      10
##     TRUE   474592       0       0

Criterium 4

plot.vegtype4 <-  DT.gf %>% 
  filter(Ab_scale=="x_BA") %>% 
  group_by(PlotObservationID) %>% 
  summarize(tot.ba=sum(Abundance)) %>% 
  mutate(is.forest=tot.ba>10)

table(plot.vegtype4 %>% dplyr::select(is.forest), exclude=NULL)
## 
## FALSE  TRUE 
##  1358  5558

Combine classifications from the three criteria. Use majority vote to assign plots. In case of ties, a progressively lower priority is given from criterium 1 to criterium 4.

plot.vegtype <- header %>% 
  dplyr::select(PlotObservationID) %>% 
  left_join(plot.vegtype1 %>% 
              dplyr::select(PlotObservationID, is.forest), 
            by="PlotObservationID") %>% 
  left_join(plot.vegtype2 %>% 
              dplyr::select(PlotObservationID, is.forest), 
            by="PlotObservationID") %>% 
  left_join(plot.vegtype3 %>% 
              dplyr::select(PlotObservationID, is.forest, is.non.forest) %>% 
              rename(is.non.forest.x.x=is.non.forest), 
            by="PlotObservationID") %>% 
  left_join(plot.vegtype4 %>% 
              dplyr::select(PlotObservationID, is.forest), 
            by="PlotObservationID") %>% 
  ## assign vegtype based on majority vote. In case of ties use the order of criteria as ranking
  rowwise() %>% 
  mutate(mean.forest=mean(c(is.forest.x, is.forest.y, is.forest.x.x, is.forest.y.y), na.rm=T)) %>% 
  mutate(mean.forest2=coalesce(is.forest.x, is.forest.y, is.forest.x.x, is.forest.y.y)) %>% 
  mutate(is.forest=ifelse(mean.forest==0.5, mean.forest2, mean.forest>0.5)) %>%  
  # same for is.non.forest
  mutate(mean.non.forest=mean(c( (!is.forest.x), (!is.forest.y), is.non.forest.x.x, (!is.forest.y.y)), na.rm=T)) %>% 
  mutate(mean.non.forest2=coalesce( (!is.forest.x), (!is.forest.y), is.non.forest.x.x, (!is.forest.y.y))) %>% 
  mutate(is.non.forest=ifelse(mean.non.forest==0.5, mean.non.forest2, mean.non.forest>0.5)) %>% 
  # when both is.forest & is.non.forest are F transform to NA
  mutate(both.F=ifelse( ( (is.forest==F | is.na(is.forest)) & is.non.forest==F), T, F)) %>% 
  mutate(is.forest=replace(is.forest, list=both.F==T, values=NA)) %>% 
  mutate(is.non.forest=replace(is.non.forest, list=both.F==T, values=NA))

table(plot.vegtype %>% dplyr::select(is.forest, is.non.forest), exclude=NULL)
##          is.non.forest
## is.forest   FALSE    TRUE    <NA>
##     FALSE       0 1161496       7
##     TRUE   468261       0       0
##     <NA>        0       0  348922

3.3 Cross-check and validate

Cross check with sPlot’s 5-class (incomplete) native classification deriving from data contributors. Build a Confusion matrix.

cross.check <- header %>% 
  dplyr::select(PlotObservationID, Forest) %>% 
  left_join(plot.vegtype %>% 
              dplyr::select(PlotObservationID, is.forest, is.non.forest) %>% 
              rename(Forest=is.forest, Other=is.non.forest) %>% 
              gather(isfor_isnonfor, value, -PlotObservationID) %>% 
              filter(value==T) %>% 
              dplyr::select(-value), 
            by="PlotObservationID") %>% 
  mutate(Other=1*Forest!=1) %>% 
  gather(veg_type, value, -PlotObservationID, -isfor_isnonfor) %>% 
  filter(value==1) %>% 
  dplyr::select(-value)

#Build a confusion matrix to evaluate the comparison  
u <- union(cross.check$isfor_isnonfor, cross.check$veg_type)
t <- table( factor(cross.check$isfor_isnonfor, u), factor(cross.check$veg_type, u))
confm <- caret::confusionMatrix(t)
Confusion matrix between sPlot’s native classification of habitats (columns), and classification based on four criteria based on vegetation layers and growth forms (rows)
Forest Other
Forest 381412 25588
Other 28027 974204
Formulas of associated statistics are available on the help page of the caret package and associated references. The overall accuracy of the classification based on is.forest\is.non.forest, when tested against sPlot’s native habitat classification is 0.96, the Kappa statistics is 0.91.
Associated statistics of confusion matrix by class
x
Sensitivity 0.9315478
Specificity 0.9744067
Pos Pred Value 0.9371302
Neg Pred Value 0.9720354
Precision 0.9371302
Recall 0.9315478
F1 0.9343307
Prevalence 0.2905407
Detection Rate 0.2706526
Detection Prevalence 0.2888100
Balanced Accuracy 0.9529772
## [1] TRUE

Through the process described above, we managed to classify 1629757, of which 468261 is forest and 1161496 is non-forest.
The total number of plots with attribution to forest\non-forest (either coming from sPlot’s native classification, or from the process above) is: 1727553.

4 Export and update other objects

sPlot.traits <- sPlot.species %>% 
  arrange(species) %>% 
  left_join(GF %>% 
              dplyr::select(species, GrowthForm, is.tree.or.tall.shrub), 
            by="species") %>% 
  left_join(try.combined.means %>% 
              rename(species=Taxon_name), by="species") %>% 
  dplyr::select(-Rank_correct)
  
save(try.combined.means, CWM, sPlot.traits, file="../_output/Traits_CWMs_sPlot3.RData")

header <- header %>% 
  left_join(plot.vegtype %>% 
              dplyr::select(PlotObservationID, is.forest, is.non.forest),
            by="PlotObservationID") %>% 
  dplyr::select(PlotObservationID:ESY, is.forest:is.non.forest, everything())

save(header, file="../_output/header_sPlot3.0.RData")

APPENDIX

Appendix 1 - Growth forms of most common species

As assigned manually.

cat(readLines("../_derived/Species_missingGF_complete.csv"), sep = '\n')
species,GrowthForm
Taraxacum,herb
Quercus robur,tree
Corylus avellana,tree
Frangula alnus,shrub
Festuca ovina,herb
Vaccinium vitis-idaea,shrub
NA,NA
Rubus,shrub
Capsella bursa-pastoris,herb
Salix cinerea,tree
Solanum dulcamara,herb
Tripolium pannonicum,herb
Impatiens noli-tangere,herb
Ononis spinosa,shrub
Centaurea nigra,herb
Rubus ulmifolius,shrub
Alisma plantago-aquatica,herb
Spirodela polyrhiza,herb
Salix,NA
Helictochloa pratensis,herb
Ruscus aculeatus,shrub
Lophozonia,tree
Stachys recta,herb
Crataegus laevigata,shrub
Festuca rupicola,herb
Metrosideros diffusa,herb
Rhamnus cathartica,shrub\tree
Helianthemum oelandicum,herb
Dicksonia squarrosa,herb
Rosa,shrub
Carex viridula,herb
Podocarpus spinulosus,shrub
Pinus mugo,tree
Orthilia secunda,herb
Cyathea smithii,tree
Erica arborea,shrub\tree
Hippocrepis emerus,herb
Phillyrea latifolia,tree
Triglochin palustris,herb
Metrosideros fulgens,other
Apera spica-venti,herb
Crataegus,shrub
Blechnum discolor,herb
Blechnum novae-zelandiae,herb
Tragopogon pratensis,herb
Bellidiastrum michelii,herb
Sedum album,herb
Raphanus raphanistrum,herb
Quercus coccifera,tree
Quercus mongolica,tree
Hydrocharis morsus-ranae,herb
Camellia japonica,shrub\tree
Arbutus unedo,shrub\tree
Dactylorhiza majalis,herb
Trachelospermum asiaticum,other
Myosotis laxa,herb
Valeriana crispa,herb
Hieracium lachenalii,herb
Festuca drymeja,herb
Asplenium flaccidum,herb
Rubus australis,other
Adenostyles alpina,herb
Viola,herb
Hymenophyllum demissum,herb
Hieracium,herb
Senecio nemorensis,herb
Lemna,herb
Microsorum pustulatum,herb
Epilobium ciliatum,herb
Paederia foetida,herb
Ledum palustre,shrub
Arctostaphylos uva-ursi,shrub
Poaceae,herb
Epilobium,herb
Alchemilla,herb
Genista sagittalis,shrub
Blechnum nipponicum,herb
Biscutella laevigata,herb
Galeopsis,herb
Ribes uva-crispa,shrub
Prunus mahaleb,shrub\tree
Asparagus officinalis,shrub
Disporum smilacinum,herb
Brunella vulgaris,herb
Veronica anagallis-aquatica,herb
Rhododendron kaempferi,shrub
Festuca,herb
Lipandra polysperma,herb
Sedum rupestre,herb
Helictochloa versicolor,herb
Hymenophyllum nephrophyllum,herb
Cephalotaxus harringtonia,shrub\tree
Helleborus odorus,herb
Hyacinthoides non-scripta,herb
Artemisia maritima,shrub
Helictochloa bromoides,herb
Salix euxina,tree
Viburnum furcatum,shrub
Hymenophyllum multifidum,herb
Asplenium bulbiferum,herb
Cotinus coggygria,shrub
Juniperus phoenicea,shrub\tree
Artemisia indica,herb
Pieris japonica,shrub\tree
Genista scorpius,shrub
Viburnum wrightii,shrub
Ampelopsis glandulosa,other
Potentilla pusilla,herb
Blechnum fluviatile,herb
Rubus palmatus,shrub
Artemisia santonicum,herb\shrub
Senecio leucanthemifolius,herb
Thymus,herb
Solidago canadensis,herb
Echinops ritro,herb
Seseli elatum,herb
Cymbidium goeringii,herb
Pleioblastus argenteostriatus,herb
Reynoutria japonica,herb
Rubus angloserpens,shrub
Noccaea,herb
Smilax glauca,other
Polystichum spinulosum,herb
Scirpus maritimus,herb
Luzuriaga parviflora,herb
Bryonia cretica,other
Kadsura japonica,other
Betula,tree
Carex goodenoughii,herb
Thymus longicaulis,herb
Thelypteris limbosperma,herb
Callitriche,herb
Salix pentandra,tree
Chenopodiastrum murale,herb
Quercus,tree
Parthenocissus tricuspidata,other
Aria alnifolia,tree
Callicarpa mollis,shrub
Amaranthus hybridus,herb
Leptospermum scoparium,shrub\tree
Corylus sieboldiana,shrub
Pittosporum tobira,shrub\tree
Torilis arvensis,herb
Zanthoxylum bungeanum,shrub\tree
Crepis vesicaria,herb
Dioscorea tokoro,herb
Leptopteris superba,herb
Cyanus montanus,herb
Prunus cerasifera,shrub\tree
Salix appendiculata,shrub
Lathyrus laxiflorus,herb
Galeopsis ladanum,herb
Ericameria nauseosa,shrub
Cyclamen hederifolium,herb
Hymenophyllum revolutum,herb
Dendropanax trifidus,shrub\tree
Lastreopsis hispida,herb
Pilosella hoppeana,herb
Vandasina retusa,other
Oxybasis rubra,herb
Dianthus hyssopifolius,herb
Clinopodium nepeta,herb
Cardamine glanduligera,herb
Chamaesyce peplis,herb
Pueraria montana,other
Alyssum turkestanicum,herb
Minuartia sedoides,herb
Cyanus triumfettii,herb
Cyclosorus pozoi,herb
Cyclamen repandum,herb
Astilbe thunbergii,herb
Anthyllis montana,herb
Mitchella undulata,herb
Krascheninnikovia ceratoides,shrub
Dioscorea japonica,other
Sibbaldianthe bifurca,herb
Tripterospermum trinervium,NA
Cerasus jamasakura,tree
Hierochloe repens,herb
Festuca gautieri,herb
Salicornia perennans,herb
Salix atrocinerea,tree
Agrostis,herb
Oxybasis glauca,herb
Saxifraga exarata,herb
Hymenophyllum flabellatum,herb
Salix viminalis,shrub
Sasa borealis,herb\shrub
Puccinellia festuciformis,herb
Symplocos sawafutagi,shrub
Athyrium yokoscense,herb
Rubus buergeri,shrub
Prunus leveilleana,tree
Pertya scandens,shrub
Dioscorea quaternata,other
Cyathea dealbata,shrub\tree
Calamagrostis stricta,herb
Soldanella carpatica,herb
Selinum pyrenaeum,herb
Laurus nobilis,shrub\tree
Ononis natrix,shrub
Farfugium japonicum,herb
Cornus sanguinea,shrub
Vaccinium microcarpum,shrub
Limonium meyeri,herb
Vaccinium japonicum,shrub
Scandix pecten-veneris,herb
Lemmaphyllum microphyllum,herb
Amaranthus blitum,herb
Chimaphila maculata,herb
Euphorbia nicaeensis,herb\shrub
Dodonaea viscosa,shrub\tree
Coprosma microcarpa,shrub
Lomandra multiflora,herb
Microlaena stipoides,herb
Microstegium vimineum,herb
Pteretis struthiopteris,herb
Rumex scutatus,herb
Podospermum canum,herb
Ampelodesmos mauritanicus,herb
Tmesipteris tannensis,herb
Allium carinatum,herb
Hymenophyllum dilatatum,herb
Lindsaea trichomanoides,herb
Pilosella bauhini,herb
Hymenophyllum sanguinolentum,herb
Elaeagnus pungens,shrub
Vitis vinifera,other
Mespilus germanica,shrub\tree
Odontarrhena,NA
Myosotis,herb
Teucrium pyrenaicum,herb
Centaurea thuillieri,herb
Vaccinium smallii,shrub
Hymenophyllum,herb
Carex kitaibeliana,herb
Pogostemon stellatus,herb
Vicia,herb
Quercus dalechampii,tree
Sedum roseum,herb
Stauntonia hexaphylla,other
Pulmonaria affinis,herb
Vaccinium bracteatum,shrub\tree
Lonicera gracilipes,shrub
Dryopteris setosa,herb
Herniaria hirsuta,herb
Aralia elata,shrub\tree
Eurybia divaricata,herb
Hydrangea scandens,shrub
Mentha,herb
Lindera benzoin,shrub
Juniperus virginiana,tree
Ainsliaea acerifolia,herb
x Ammocalamagrostis,NA
Galium,herb
Ligustrum tschonoskii,shrub
Blechnum chambersii,herb
Ulex parviflorus,shrub
Artemisia gmelinii,herb
Paliurus spina-christi,shrub
Luzula,herb
Piper kadsura,other
Polygonum maritimum,herb
Ulmus,tree
Actinidia arguta,other
Chenopodiastrum hybridum,herb
Stemona lucida,other
Rubia tatarica,herb
Vaccinium hirtum,shrub
Rhododendron maximum,shrub
Anisocampium niponicum,herb
Sticherus cunninghamii,herb
Smilax sieboldii,other
Potentilla humifusa,herb
Cyathea colensoi,herb\shrub
Endiandra virens,tree
Polygonum equisetiforme,herb
Dryopteris lacera,herb
Hylodesmum podocarpum,herb
Rumex,herb
Aphananthe aspera,tree
Geranium solanderi,herb
Pseudopanax linearis,shrub
Sedum alpestre,herb
Lepisorus thunbergianus,herb
Aria japonica,tree
Elytrigia repens,herb
Ainsliaea apiculata,herb
Senecio,NA
Schisandra repanda,other
Cardamine,herb
Carex dolichostachya,herb
Potentilla supina,herb
Schizocodon soldanelloides,herb
Rhaphiolepis indica,shrub
Scilla lilio-hyacinthus,herb
Clinopodium menthifolium,herb
Aster,NA
Sasa palmata,herb
Brucea javanica,shrub
Anemone scherfelii,herb
Arundinella hirta,herb
Thymus nervosus,herb
Laportea bulbifera,herb
No suitable,NA
Potentilla montana,herb
Leptopteris hymenophylloides,herb
Solidago,herb
Compositae,NA
Pimpinella tragium,herb
Soldanella hungarica,herb
Leptorumohra mutica,herb
Artemisia pontica,herb
Verbascum,herb
Carex lenta,herb
Fraxinus chinensis,tree
Centranthus ruber,herb
Sesbania sesban,tree
Phormium colensoi,herb
Asparagus aphyllus,herb\shrub
Nasturtium,herb
Carex conica,herb
Lauraceae,NA
Dumasia truncata,other
Pilosella floribunda,herb
Goodenia geniculata,herb
Medicago intertexta,herb
Prunus,shrub\tree
Austrostipa scabra,herb
Juncus,herb
Sempervivum arachnoideum,herb
Thymus striatus,herb
Jasione crispa,herb
Echinochloa crusgalli,herb
Lindera glauca,shrub
Laburnum anagyroides,shrub
Oxalis pes-caprae,herb
Dianella nigra,herb
Jacobaea subalpina,herb
Campanula serrata,herb
Piptatherum coerulescens,herb
Carex pisiformis,herb
Geum sylvaticum,herb
Minuartia recurva,herb
Globularia repens,herb
Fraxinus,tree
Eucalyptus phaenophylla,tree
Osmorhiza aristata,herb
Leguminosae,NA
Helictochloa marginata,herb
Polygonatum lasianthum,herb
Rosa dumalis,shrub
Hymenophyllum scabrum,herb
Puccinellia gigantea,herb
Heloniopsis orientalis,herb
Anthemis cretica,herb
Styrax officinalis,shrub
Hosta sieboldiana,herb
Earina mucronata,herb
Calamagrostis hakonensis,herb
Tragopogon podolicus,herb
Thymus pulcherrimus,herb
Adenophora triphylla,herb
Aster ovatus,herb
Crepis lampsanoides,herb
Panicum boscii,herb
Pluchea dioscoridis,shrub
Amelanchier laevis,tree
Silene pusilla,herb
Eupatorium makinoi,herb
Polyphlebium venosum,herb
Uncinia,herb
Rubia argyi,other
Plagiogyria matsumureana,herb
Dryopteris,herb
Symphytum cordatum,herb
Ononis striata,herb
Allium,herb
Ruscus hypoglossum,shrub
Parathelypteris japonica,herb
Cyrtomium fortunei,herb
Ranunculus taisanensis,herb
Desmodium brachypodum,herb
Carex blepharicarpa,herb
Viburnum phlebotrichum,shrub
Atractylodes ovata,NA
Cichorium pumilum,herb
Ranunculus,herb
Cyperus gracilis,herb
Carex stenostachys,herb
Diplopterygium glaucum,herb
Sesleria rigida,herb
Centaurea,herb
Opuntia,other
Galium octonarium,herb
Pseudowintera axillaris,shrub\tree
Tricyrtis affinis,herb
Asplenium platyneuron,herb
Clematis terniflora,other
Parsonsia heterophylla,other
Raukaua edgerleyi,tree
Dianthus giganteiformis,herb
Viola sieheana,herb
Hosta sieboldii,herb
Sasa nipponica,herb
Cirsium,herb
Arachniodes standishii,NA
Paspalidium geminatum,herb
Alhagi graecorum,shrub
Cuscuta campestris,other
Allium saxatile,herb
Trifolium,herb
Persicaria longiseta,NA
Jacobaea maritima,NA
Acer shirasawanum,tree
Athyrium vidalii,herb
Centaurea nemoralis,herb
Circaea ×,herb
Dactylorhiza,herb
Xanthorrhoea acaulis,other
Cynoglossum,herb
Boehmeria silvestrii,herb\shrub
Serratula coronata,herb
Salix phylicifolia,shrub
Genista depressa,NA
Populus,tree
Phlegmariurus,NA
Atropa bella-donna,herb
Bignonia capreolata,other
Amelanchier,shrub\tree
Launaea nudicaulis,herb
Photinia glabra,tree
Suaeda acuminata,herb
Gonocarpus teucrioides,herb\shrub
Pulsatilla grandis,herb
Sesleria comosa,herb
Patzkea spadicea,herb
Koeleria nitidula,herb
Orobanche crenata,other
Achillea asiatica,herb
Paris tetraphylla,herb
Edraianthus graminifolius,herb
Clematis apiifolia,other
Thelypteris acuminata,herb
Patzkea paniculata,herb
Dichondra,herb
Dryopteris pseudomas,herb
Festuca hystrix,herb
Blechnum minus,herb
Maianthemum japonicum,herb
Millettia japonica,NA
Pteris cretica,herb
Leucanthemum rotundifolium,herb
Pyrrosia eleagnifolia,other
Elionurus citreus,herb
Ochlopoa supina,NA
Crocus veluchensis,herb
Galium maritimum,herb
Crepis albida,herb
Solidago curtisii,herb
Coptis trifolia,herb
Syneilesis palmata,herb
Chenopodium bonus-henricus,herb
Potentilla,herb
Artemisia lerchiana,herb
Lathyrus pisiformis,herb
Euphorbia plumerioides,NA
Ophiopogon planiscapus,herb
Ranunculus aduncus,herb
Scabiosa triniifolia,herb
Viola kusanoana,herb
Rytidosperma linkii,herb
Festuca dalmatica,herb
Berchemia racemosa,shrub
Lespedeza maximowiczii,shrub
Wisteria brachybotrys,NA
Quercus infectoria,shrub\tree
Asarum caucasicum,herb
Centaurea aspera,herb
Lechenaultia filiformis,NA
Tragopogon porrifolius,herb
Athyrium asplenioides,herb
Silene sericea,herb
Scrophularia alpestris,herb
Rhododendron pentandrum,NA
Thymus comosus,herb
Sanicula chinensis,herb
Inula oculus-christi,herb
Lamium,herb
Arachniodes aristata,NA
Onosma simplicissima,NA
Ranunculus pseudomontanus,herb
Corylus cornuta,shrub
Arachniodes sporadosora,NA
Orostachys spinosa,other
Olearia lacunosa,shrub\tree
Carthamus mitissimus,herb
Stewartia pseudocamellia,tree
Eucalyptus indurata,tree
Prosopis glandulosa,shrub\tree
Aurinia saxatilis,herb
Dampiera purpurea,herb\shrub
Cirsium nipponicum,NA
Patrinia villosa,NA
Galium pseudoaristatum,herb
Rhinanthus,herb
Leionema elatius,shrub
Arrhenatherum longifolium,herb
Limonium bellidifolium,herb
Brachiaria whiteana,herb
Adiantum capillus-veneris,herb
Vittadinia cuneata,herb
Carex rhizina,herb
Tephrosia,NA
Leontopodium nivale,herb
Crocus caeruleus,herb
Cuscuta,other
Pyrrosia lingua,herb
Ficaria fascicularis,herb
Pilosella peleteriana,herb
Dinebra decipiens,herb
Psychotria asiatica,shrub
Vicia pyrenaica,herb
Galax urceolata,herb
Aristolochia serpentaria,herb
Sedum brevifolium,herb
Impatiens atrosanguinea,herb
Dapsilanthus ramosus,herb
Nephrodium sabaei,herb
Silene rubella,herb
Blechnum procerum,herb
Phyllanthera grayi,tree
Lycopodium alpinum,herb
Codonopsis lanceolata,other
Persicaria senegalensis,herb
Bolboschoenus glaucus,herb
Clematis japonica,NA
Asplenium incisum,herb
Chrysothamnus,NA
Kunzea ericoides,shrub\tree
Elatostema involucratum,herb
Liriope minor,herb
Campanula spatulata,herb
Orobanche,other
Laserpitium krapffii,herb
Picrothamnus,NA
Thymus roegneri,herb
Achillea coarctata,herb
Cephalaria uralensis,herb
Artemisia nitrosa,herb
Ozothamnus tesselatus,NA
Sedum urvillei,herb
Lamium garganicum,herb
Pyrola asarifolia,herb
Orites lancifolius,shrub
Polygonatum falcatum,herb
Cerastium,herb
Gaultheria procumbens,herb
Keraudrenia hookeriana,NA
Polystichum polyblepharum,herb
Lindera sericea,NA
Paesia scaberula,herb
Litsea japonica,shrub
Crepis fraasii,herb
Hypecoum imberbe,herb
Plantago monosperma,herb
Quercus rosacea,tree
Halesia tetraptera,tree
Polystichum retrosopaleaceum,herb
Leptorumohra miqueliana,herb
Boehmeria spicata,shrub
lachenalii subsp.,NA
Amaranthus graecizans,herb
Cephalomanes obscurum,herb
Sedum amplexicaule,herb
Alectryon oleifolius,tree
Galium bungei,herb
Tmesipteris,NA
Blechnum filiforme,herb
Hieracium transylvanicum,herb
Viola orbiculata,herb
Spiraea crenata,shrub
Molinia japonica,herb
Actinidia polygama,other
Bursaria spinosa,shrub\tree
Acacia aneura,tree
Heterachne,NA
Oenanthe javanica,herb
Lemna aequinoctialis,herb
Calythrix,shrub
Senecio aegyptius,NA
Petasites frigidus,herb
Dalbergia densa,other
Carex morrowii,herb
Viola vaginata,herb
Alpinia intermedia,NA
Enkianthus campanulatus,NA
Leucopogon,NA
Menziesia ferruginea,shrub
Spiraea media,shrub
Dryopteris pacifica,herb
Minuartia setacea,herb
Salvia officinalis,herb
Coprosma dumosa,shrub
Bidens,NA
Aristida vagans,herb
Phragmites japonicus,herb
Lysimachia japonica,NA
Knautia arvernensis,herb
Ononis cristata,NA
Lamyropsis cynaroides,NA
Puccinellia tenuissima,NA
Burchardia congesta,herb
Galium trifidum,herb
Armeria canescens,herb
Minuartia laricifolia,herb
Carex reinii,herb
Picea,tree
Senna,NA
Asarum sieboldii,herb
Atriplex,NA
Pseudoraphis,NA
Symphyotrichum lateriflorum,herb
Panicum effusum,herb
Microlepia marginata,NA
Prunus apetala,shrub\tree
Alyssum obovatum,herb
Bromus,herb
Rubus pannosus,shrub
Sedobassia sedoides,herb
Alyssum hirsutum,herb
Astelia,NA
Prosartes lanuginosa,herb
Jacobaea adonidifolia,herb
Helleborus purpurascens,herb
Ulmus davidiana,tree
Campanula sparsa,herb
Gleichenia,NA
Veratrum maackii,NA
Sorghum virgatum,herb
Rhododendron lagopus,shrub
Blechnum nigrum,herb
Leucopogon muticus,shrub
Biscutella auriculata,herb
Geranium collinum,herb
Centranthus calcitrapae,herb
Oxalis griffithii,herb
Festuca pseudodalmatica,herb
Galatella angustissima,herb
Prenanthes,herb
Gaultheria myrsinoides,shrub
Sarcobatus baileyi,shrub
Vitis heyneana,other
Dioscorea gracillima,NA
Launaea fragilis,herb
Sonchus bulbosus,herb
Leptospermum polygalifolium,shrub
Digitaria,herb
Lycopodium volubile,herb
Aralia cordata,herb
Carex concinnoides,herb
Avenula pubescens,herb
Pleurospermum uralense,herb
Taraxacum hamatum,herb
Ranunculus reflexus,herb
Euphorbia subcordata,herb
Ferulago sylvatica,herb
Carthamus carduncellus,herb
Psychotria serpens,other
Sonchus,NA

SessionInfo

sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.6 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/openblas-base/libblas.so.3
## LAPACK: /usr/lib/libopenblasp-r0.2.18.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] viridis_0.5.1     viridisLite_0.3.0 caret_6.0-84      lattice_0.20-40  
##  [5] kableExtra_1.1.0  knitr_1.28        data.table_1.12.8 forcats_0.5.0    
##  [9] stringr_1.4.0     dplyr_0.8.5       purrr_0.3.3       readr_1.3.1      
## [13] tidyr_1.0.2       tibble_2.1.3      ggplot2_3.3.0     tidyverse_1.3.0  
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.1           jsonlite_1.6.1       splines_3.6.3       
##  [4] foreach_1.4.8        prodlim_2019.11.13   modelr_0.1.6        
##  [7] assertthat_0.2.1     highr_0.8            stats4_3.6.3        
## [10] cellranger_1.1.0     yaml_2.2.1           ipred_0.9-9         
## [13] pillar_1.4.2         backports_1.1.5      glue_1.3.1          
## [16] digest_0.6.23        rvest_0.3.5          colorspace_1.4-1    
## [19] recipes_0.1.9        htmltools_0.4.0      Matrix_1.2-18       
## [22] plyr_1.8.6           timeDate_3043.102    pkgconfig_2.0.3     
## [25] broom_0.5.5          haven_2.2.0          scales_1.1.0        
## [28] webshot_0.5.2        gower_0.2.1          lava_1.6.6          
## [31] generics_0.0.2       ellipsis_0.3.0       withr_2.1.2         
## [34] nnet_7.3-13          cli_2.0.2            survival_3.1-11     
## [37] magrittr_1.5         crayon_1.3.4         readxl_1.3.1        
## [40] evaluate_0.14        fs_1.3.2             fansi_0.4.1         
## [43] nlme_3.1-145         MASS_7.3-51.5        xml2_1.2.2          
## [46] class_7.3-15         tools_3.6.3          hms_0.5.3           
## [49] lifecycle_0.2.0      munsell_0.5.0        reprex_0.3.0        
## [52] e1071_1.7-3          compiler_3.6.3       rlang_0.4.4         
## [55] grid_3.6.3           iterators_1.0.12     rstudioapi_0.11     
## [58] rmarkdown_2.1        gtable_0.3.0         ModelMetrics_1.2.2.1
## [61] codetools_0.2-16     DBI_1.1.0            reshape2_1.4.3      
## [64] R6_2.4.1             gridExtra_2.3        lubridate_1.7.4     
## [67] utf8_1.1.4           stringi_1.4.6        Rcpp_1.0.3          
## [70] vctrs_0.2.3          rpart_4.1-15         dbplyr_1.4.2        
## [73] tidyselect_1.0.0     xfun_0.12