Timestamp: Mon Nov 30 12:32:53 2020
Drafted: Francesco Maria Sabatini
Revised:
version: 1.0

This reports documents 1) the construction of Community Weighted Means (CWMs) and Variance (CWVs); and 2) the classification of plots into forest\non-forest based on species growth forms. It complements species composition data from sPlot 3.0 and gap-filled plant functional traits from TRY 5.0, as received by Jens Kattge on Jan 21, 2020.

Changes in version 1.1 - Standardized Growth form names in sPlot_traits.

library(tidyverse)
library(readr)
library(data.table)
library(knitr)
library(kableExtra)
library(stringr)
library(caret)
library(viridis)

1 Data import, preparation and cleaning

#load("/data/sPlot/releases/sPlot3.0/DT_sPlot3.0.RData")
#load("/data/sPlot/releases/sPlot3.0/Backbone3.0.RData")
load("../_output/Backbone3.0.RData")
load("../_output/DT_sPlot3.0.RData")

Import TRY data

# Species, Genus, Family
try.species <- read_csv(
  "../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/input_data/hierarchy.info.csv",
  locale = locale(encoding = "latin1")) 
# Original data without gap-filling. With species and trait labels
try.allinfo <- read_csv(
  "../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/input_data/traits_x_georef_wide_table.csv", 
  locale = locale(encoding = "latin1"), 
                        col_types = paste0(c("dddccccc",rep("c", 84)), collapse=""))
# Individual-level gap-filled data - order as in try.allinfo
try.individuals0 <- read_csv(
  "../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/gapfilled_data/mean_gap_filled_back_transformed.csv", 
  locale = locale(encoding = "latin1"))

There are 609355 individual observations from 52104 distinct (unresolved) species in 7960 distinct (unresolved) genera.

1.2 Attach resolved names from Backbone

try.species.names <- try.allinfo %>% 
  dplyr::select(Species, Genus, GrowthForm) %>% 
  left_join(Backbone %>% 
              dplyr::select(Name_sPlot_TRY, Name_short) %>% 
              rename(Species=Name_sPlot_TRY), 
            by="Species") %>% 
  dplyr::select(Species, Name_short, Genus, GrowthForm)

After attaching resolved names, TRY data contains information on 50612 species.
Check for how many of the species in sPlot, trait information is available in TRY.

sPlot.species <- DT2 %>% 
  distinct(Species) 

sPlot.in.TRY <- sPlot.species %>% 
  filter(Species %in% (try.species.names %>% 
                                  distinct(Name_short) %>% 
                                  pull(Name_short))) 

Out of the 76912 standardizes species names in sPlot 3.0, 29519 (38.4%) also occur in TRY 5.0. This number does not account for matches at the genus level.

1.3 Create legend of trait names

trait.legend <- data.frame(full=try.allinfo %>% 
                             dplyr::select(starts_with("StdValue_")) %>% 
                             colnames() %>% 
                             gsub("StdValue_", "", .) %>% 
                             sort()) %>%
  mutate(full=as.character(full)) %>% 
  mutate(traitcode=parse_number(full)) %>% 
  arrange(traitcode) %>% 
  dplyr::select(traitcode, everything()) %>% 
  mutate(full=gsub(pattern = "^[0-9]+_", replacement="", full)) %>% 
  mutate(short=c("StemDens", "RootingDepth","LeafC.perdrymass", "LeafN","LeafP",
                 "StemDiam","SeedMass", "Seed.length","LeafThickness","LDMC",
                 "LeafNperArea","LeafDryMass.single","Leaf.delta.15N","SeedGerminationRate",
                 "Seed.num.rep.unit","LeafLength","LeafWidth","LeafCN.ratio","Leaffreshmass",
                 "Stem.cond.dens","Chromosome.n","Chromosome.cDNAcont", 
                 "Disp.unit.leng","StemConduitDiameter","Wood.vessel.length",
                 "WoodFiberLength","SpecificRootLength.fine","SpecificRootLength",
                 "PlantHeight.veg","PlantHeight.generative","LeafArea.leaf.noPet",
                 "LeafArea.leaflet.noPet","LeafArea.leaf.wPet","LeafArea.leaflet.wPet",
                 "LeafArea.leaf.undef","LeafArea.leaflet.undef","LeafArea.undef.undef",
                 "SLA.noPet", "SLA.wPet","SLA.undef", "LeafWaterCont")) %>% 
  ## Add SLA missing from allinfo file
  bind_rows(data.frame(traitcode=11, 
                       full="Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA)",
                       short="SLA")) %>% 
  bind_rows(data.frame(traitcode=18, 
                       full="Plant height (vegetative + generative)", 
                       short="PlantHeight")) %>%
  arrange(traitcode) %>% 
  #create a column to mark traits for which gap filled data is available.
  mutate(available=paste0("X", traitcode) %in% colnames(try.individuals0))
Legend of traits from TRY
traitcode full short available
4 Stem specific density (SSD) or wood density (stem dry mass per stem fresh volume)_g/cm3 StemDens TRUE
6 Root rooting depth_m RootingDepth TRUE
11 Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA) SLA TRUE
13 Leaf carbon (C) content per leaf dry mass_mg/g LeafC.perdrymass TRUE
14 Leaf nitrogen (N) content per leaf dry mass_mg/g LeafN TRUE
15 Leaf phosphorus (P) content per leaf dry mass_mg/g LeafP TRUE
18 Plant height (vegetative + generative) PlantHeight TRUE
21 Stem diameter_m StemDiam TRUE
26 Seed dry mass_mg SeedMass TRUE
27 Seed length_mm Seed.length TRUE
46 Leaf thickness_mm LeafThickness TRUE
47 Leaf dry mass per leaf fresh mass (leaf dry matter content, LDMC)_g g-1 LDMC TRUE
50 Leaf nitrogen (N) content per leaf area_g m-2 LeafNperArea TRUE
55 Leaf dry mass (single leaf)_mg LeafDryMass.single TRUE
78 Leaf nitrogen (N) isotope signature (delta 15N)_per mill Leaf.delta.15N TRUE
95 Seed germination rate (germination efficiency)_% SeedGerminationRate TRUE
138 Seed number per reproducton unit_number Seed.num.rep.unit TRUE
144 Leaf length_mm LeafLength TRUE
145 Leaf width_cm LeafWidth TRUE
146 Leaf carbon/nitrogen (C/N) ratio_g/cm3 LeafCN.ratio TRUE
163 Leaf fresh mass_g Leaffreshmass TRUE
169 Stem conduit density (vessels and tracheids)_mm-2 Stem.cond.dens TRUE
223 Species genotype: chromosome number_dimensionless Chromosome.n TRUE
224 Species genotype: chromosome cDNA content_pg Chromosome.cDNAcont TRUE
237 Dispersal unit length_mm Disp.unit.leng TRUE
281 Stem conduit diameter (vessels, tracheids)_micro m StemConduitDiameter TRUE
282 Wood vessel element length; stem conduit (vessel and tracheids) element length_micro m Wood.vessel.length TRUE
289 Wood fiber lengths_micro m WoodFiberLength TRUE
614 Fine root length per fine root dry mass (specific fine root length, SRL)_cm/g SpecificRootLength.fine FALSE
1080 Root length per root dry mass (specific root length, SRL)_cm/g SpecificRootLength TRUE
3106 Plant height vegetative_m PlantHeight.veg FALSE
3107 Plant height generative_m PlantHeight.generative FALSE
3108 Leaf area (in case of compound leaves: leaf, petiole excluded)_mm2 LeafArea.leaf.noPet FALSE
3109 Leaf area (in case of compound leaves: leaflet, petiole excluded)_mm2 LeafArea.leaflet.noPet FALSE
3110 Leaf area (in case of compound leaves: leaf, petiole included)_mm2 LeafArea.leaf.wPet FALSE
3111 Leaf area (in case of compound leaves: leaflet, petiole included)_mm2 LeafArea.leaflet.wPet FALSE
3112 Leaf area (in case of compound leaves: leaf, undefined if petiole in- or excluded)_mm2 LeafArea.leaf.undef TRUE
3113 Leaf area (in case of compound leaves: leaflet, undefined if petiole is in- or excluded)_mm2 LeafArea.leaflet.undef TRUE
3114 Leaf area (in case of compound leaves undefined if leaf or leaflet, undefined if petiole is in- or excluded)_mm2 LeafArea.undef.undef TRUE
3115 Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA): petiole excluded_mm2 mg-1 SLA.noPet FALSE
3116 Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA): petiole included_mm2 mg-1 SLA.wPet FALSE
3117 Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA): undefined if petiole is in- or excluded_mm2 mg-1 SLA.undef FALSE
3120 Leaf water content per leaf dry mass (not saturated)_g(W)/g(DM) LeafWaterCont TRUE

Use trait legend to change naming of try.individuals0 data.frame of traits

#create string to rename traits
col.to <- trait.legend %>% 
  filter(available==T) %>% 
  pull(short) 
col.from <- trait.legend %>% 
  filter(available==T) %>% 
  mutate(traitcode=paste0("X", traitcode))  %>% 
  pull(traitcode) 

try.individuals <- try.individuals0 %>% 
              rename_at(col.from, .funs=function(x) col.to)

1.3 Fix some known errors in the gap-filled matrix

Check traits at the individual level. There are some traits with unexpected negative entries:

try.species.names %>% 
    dplyr::select(Name_short) %>% 
    bind_cols(try.individuals %>% 
                  dplyr::select(-X1)) %>% 
  gather(variable, value, -Name_short) %>% 
  filter(value<0) %>% 
  group_by(variable) %>% 
  summarize(n=n())
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 5 x 2
##   variable                 n
##   <chr>                <int>
## 1 LDMC                   419
## 2 LeafC.perdrymass         9
## 3 Leaf.delta.15N      262283
## 4 SeedGerminationRate    120
## 5 StemDens               337

According to Jens Kattge, the entries for Leaf.delta.15N are legitimate, while in the other cases, it may be due to bad predictions. He suggested to delete these negative records.
Similarly, there are records with impossible values for height. Some species incorrectly predicted to have height >100 meters, and some herbs predicted to have a height >10 m.

try.individuals <- try.species.names %>% 
  dplyr::select(Name_short) %>% 
  bind_cols(try.individuals)

toexclude <- try.individuals %>% 
  gather(variable, value, -X1, -Name_short) %>% 
  filter(variable != "Leaf.delta.15N") %>% 
  filter(value<0) %>% 
  pull(X1)

toexclude2 <- try.individuals %>% 
  filter(PlantHeight>100  & (!Name_short %in% c("Pseudotsuga menziesii", "Sequoia sempervirens"))) %>% 
  pull(X1)

toexclude3 <- try.individuals %>% 
  filter(X1 %in% (try.allinfo %>% 
                     filter(GrowthForm=="herb") %>% 
                     pull(X1))) %>% 
  filter(PlantHeight>10) %>% 
  pull(X1)

try.individuals <- try.individuals %>% 
  filter(!X1 %in% c(toexclude, toexclude2, toexclude3)) %>% 
  dplyr::select(-X1)

This results in the exclusion of 874 individuals. In this way the total number of species included in TRY reduces to 50404

1.4 Calculate species and genus level trait means and sd

## Calculate species level trait means and sd. 
try.species.means <- try.individuals %>% 
  group_by(Name_short) %>% 
  #Add a field to indicate the number of observations per taxon
  left_join(x={.} %>% 
              summarize(n=n()), 
            y={.} %>% 
              summarize_at(.vars=vars(StemDens:LeafWaterCont ),
                           .funs=list(mean=~mean(.), sd=~sd(.))),
            by="Name_short") %>% 
  dplyr::select(Name_short, n, everything())
## `summarise()` ungrouping output (override with `.groups` argument)
## Calculate genus level trait means and sd.
try.genus.means <- try.individuals %>% 
  mutate(Genus=word(Name_short, 1)) %>% 
  group_by(Genus) %>% 
  left_join(x={.} %>% 
              summarize(n=n()), 
            y={.} %>% 
              summarize_at(.vars=vars(StemDens:LeafWaterCont ),
                           .funs=list(mean=~mean(.), sd=~sd(.))),
            by="Genus") %>% 
  dplyr::select(Genus, n, everything())
## `summarise()` ungrouping output (override with `.groups` argument)

The average number of observations per species and genus is 12.1 and 81.5, respectively. As many as 17443 species have only one observation (1250 at the genus level).

Example of trait means for 15 randomly selected species
Name_short n StemDens_mean RootingDepth_mean SLA_mean LeafC.perdrymass_mean LeafN_mean LeafP_mean PlantHeight_mean StemDiam_mean SeedMass_mean Seed.length_mean LeafThickness_mean LDMC_mean LeafNperArea_mean LeafDryMass.single_mean Leaf.delta.15N_mean SeedGerminationRate_mean Seed.num.rep.unit_mean LeafLength_mean LeafWidth_mean LeafCN.ratio_mean Leaffreshmass_mean Stem.cond.dens_mean Chromosome.n_mean Chromosome.cDNAcont_mean Disp.unit.leng_mean StemConduitDiameter_mean Wood.vessel.length_mean WoodFiberLength_mean SpecificRootLength_mean LeafArea.leaf.undef_mean LeafArea.leaflet.undef_mean LeafArea.undef.undef_mean LeafWaterCont_mean StemDens_sd RootingDepth_sd SLA_sd LeafC.perdrymass_sd LeafN_sd LeafP_sd PlantHeight_sd StemDiam_sd SeedMass_sd Seed.length_sd LeafThickness_sd LDMC_sd LeafNperArea_sd LeafDryMass.single_sd Leaf.delta.15N_sd SeedGerminationRate_sd Seed.num.rep.unit_sd LeafLength_sd LeafWidth_sd LeafCN.ratio_sd Leaffreshmass_sd Stem.cond.dens_sd Chromosome.n_sd Chromosome.cDNAcont_sd Disp.unit.leng_sd StemConduitDiameter_sd Wood.vessel.length_sd WoodFiberLength_sd SpecificRootLength_sd LeafArea.leaf.undef_sd LeafArea.leaflet.undef_sd LeafArea.undef.undef_sd LeafWaterCont_sd
Erythronium helenae 1 0.317 0.257 19.784 477.909 32.024 2.287 0.195 0.001 4.736 6.676 0.312 0.166 1.682 86.472 -0.894 89.529 1491.996 138.658 1.370 15.931 0.504 138.997 30.808 43.594 7.628 30.327 910.255 1607.261 7143.899 1789.947 1969.877 1509.740 7.255 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
Persoonia arborea 1 0.760 2.559 5.008 485.746 7.041 0.327 2.397 0.262 182.507 9.175 0.454 0.412 1.487 90.245 2.701 82.643 157.004 67.807 1.064 67.115 0.202 44.755 11.316 3.298 11.606 21.269 318.888 960.371 3195.023 327.684 410.933 361.406 2.493 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
Trichilia japurensis 1 0.609 0.835 15.914 485.048 26.647 1.773 14.551 0.167 192.250 19.486 0.177 0.360 1.688 452.891 4.548 86.983 4220.298 154.101 6.125 18.111 1.335 100.418 22.844 4.110 31.558 29.374 741.991 1699.542 4103.808 7449.858 3474.489 7109.743 3.144 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
Thymus odoratissimus 13 0.288 0.295 18.776 453.988 19.238 1.786 0.082 0.011 0.256 0.873 0.264 0.347 1.046 1.373 -2.206 98.813 78.585 4.288 0.144 26.003 0.005 162.487 39.577 2.305 1.055 26.507 288.931 335.028 2626.616 29.739 35.015 11.680 3.361 0.006 0.017 2.408 1.619 0.848 0.088 0.012 0.001 0.018 0.037 0.014 0.021 0.096 0.171 0.138 0.510 11.950 0.274 0.012 0.900 0.000 7.779 0.668 0.029 0.035 0.559 6.836 5.787 201.291 3.780 3.686 1.209 0.296
Machilus kwangtungensis 1 0.490 1.244 12.037 486.193 15.901 0.871 9.986 0.098 316.964 11.265 0.251 0.391 1.311 238.098 -1.585 92.824 7.505 94.124 2.713 33.317 0.611 15.899 20.120 2.719 17.084 30.685 287.657 751.789 1583.783 2439.004 1700.093 2380.238 2.262 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
Lettowianthus stellatus 2 0.570 1.653 13.146 477.414 22.917 1.296 13.352 0.196 297.448 7.565 0.204 0.389 1.724 677.274 2.056 60.784 47.077 138.938 6.902 20.666 1.773 13.145 31.287 2.212 6.534 96.850 512.782 1141.476 659.456 8909.863 5775.238 10375.603 2.767 0.003 0.057 0.279 0.741 0.070 0.021 0.747 0.018 49.588 0.146 0.002 0.005 0.029 21.654 0.000 0.141 0.541 5.675 0.112 0.135 0.023 0.885 0.205 0.024 0.053 3.579 1.927 4.150 53.949 14.852 47.986 126.039 0.081
Sesamum alatum 11 0.655 9.401 14.764 386.093 15.164 2.320 0.865 0.179 1.875 1.096 0.265 0.237 1.478 35.658 0.754 86.328 25.000 48.712 0.775 34.514 0.122 9.431 20.797 2.475 1.415 56.033 251.858 547.576 1206.730 335.724 1024.770 67.354 2.910 0.007 0.587 0.408 1.831 0.326 0.053 0.251 0.018 0.217 0.084 0.005 0.005 0.029 11.083 0.135 0.991 8.416 26.095 0.443 0.515 0.038 0.586 0.785 0.099 0.098 1.600 12.661 23.197 50.856 104.516 273.470 28.862 0.071
Athyrium 2 0.225 0.439 33.111 431.168 24.312 4.039 0.550 0.033 1.364 3.527 0.114 0.196 0.913 397.442 -1.543 70.828 1914970.336 474.375 0.625 18.574 1.791 78.770 72.317 11.123 6.974 75.307 1575.374 1440.113 1000.705 12708.255 3095.665 419.890 7.674 0.026 0.112 15.979 15.422 4.625 0.825 0.042 0.013 0.021 0.087 0.026 0.039 0.319 179.317 0.234 0.189 410118.491 9.989 0.041 5.523 0.494 2.007 1.105 0.033 0.038 2.284 2.487 68.987 129.182 513.560 218.896 22.445 1.675
Lupinus princei 2 0.382 1.578 15.156 411.141 35.057 2.238 0.639 0.014 122.001 7.947 0.246 0.191 2.270 95.754 0.571 96.377 41.579 39.911 1.446 12.700 0.501 47.363 39.739 1.830 8.344 95.265 365.863 715.749 1340.251 1181.745 1500.171 2357.628 6.642 0.000 0.132 0.288 0.187 0.665 0.085 0.081 0.001 54.627 1.060 0.001 0.005 0.063 3.349 0.026 0.036 3.201 0.884 0.099 0.210 0.011 1.105 0.493 0.014 0.988 0.154 2.228 2.366 138.781 40.374 42.762 45.402 0.041
Tarasa operculata 1 0.478 0.866 15.237 447.580 22.084 1.349 2.700 0.108 0.833 1.429 0.226 0.301 1.591 270.698 1.421 90.368 430.270 78.956 3.379 23.971 0.894 24.820 37.151 3.244 2.027 35.261 447.278 748.905 2093.461 3285.121 2129.248 3274.885 3.425 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
Cryptantha flava 1 0.537 0.867 9.682 417.567 15.933 1.351 0.597 0.081 0.705 1.668 0.333 0.228 1.710 110.548 2.191 85.209 163.982 38.579 1.534 29.334 0.462 32.462 31.982 3.762 2.106 34.258 457.839 861.501 3903.507 784.048 1052.251 770.115 3.934 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
Plantago webbii 1 0.255 0.448 13.952 423.873 19.864 1.546 0.396 0.008 1.091 2.282 0.277 0.231 1.385 57.559 2.186 94.312 1071.165 87.469 2.117 21.221 0.269 63.243 14.664 2.354 2.437 31.656 305.043 623.819 12785.441 608.057 802.403 1188.235 5.377 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
Lepidium montanum 1 0.436 0.758 19.024 427.743 32.777 2.407 0.218 0.011 0.236 1.342 0.178 0.191 1.713 5.928 1.780 93.296 1098.633 18.435 0.418 15.082 0.030 78.726 24.185 1.300 1.652 38.886 160.863 424.620 11908.769 125.143 154.046 61.024 6.687 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
Carex coriacea 2 0.328 0.356 14.387 451.291 19.110 1.438 0.306 0.009 0.921 2.060 0.214 0.365 1.429 33.458 -0.461 85.218 99.079 78.098 0.526 24.389 0.092 38.384 57.306 1.453 3.052 125.713 399.304 862.064 5770.978 539.495 593.549 613.632 2.383 0.001 0.017 0.170 0.034 0.051 0.022 0.007 0.000 0.015 0.001 0.001 0.000 0.025 3.382 0.103 0.313 7.590 4.617 0.045 0.148 0.010 1.194 0.075 0.033 0.042 0.533 4.443 3.451 119.024 63.180 50.278 75.894 0.013
Sinobambusa tootsik 2 0.434 0.903 18.260 451.563 20.126 1.411 9.253 0.023 2.999 4.294 0.165 0.358 1.083 46.928 1.306 85.368 2598.157 88.123 0.987 21.096 0.143 32.831 27.569 6.129 4.890 41.483 440.927 1109.670 7099.183 897.278 936.116 944.993 2.658 0.002 0.060 0.685 1.418 0.035 0.011 0.888 0.001 0.111 0.091 0.003 0.004 0.038 2.224 0.050 0.231 457.735 1.700 0.004 0.029 0.005 0.375 0.251 0.016 0.029 1.191 4.326 15.199 36.147 20.688 13.544 13.579 0.040

1.5 Match taxa based on species, if available, or Genus

Combined the trait means based on species and genera into a single object, and check how many of these taxa match to the (resolved) species names in DT2.

try.combined.means <- try.genus.means %>% 
  rename(Taxon_name=Genus) %>% 
  mutate(Rank_correct="genus") %>% 
  bind_rows(try.species.means %>% 
              rename(Taxon_name=Name_short) %>% 
              mutate(Rank_correct="species")) %>% 
  dplyr::select(Taxon_name, Rank_correct, everything())

total.matches <- DT2 %>%
  distinct(Species, Rank_correct) %>%
  left_join(try.combined.means %>%
              dplyr::rename(Species=Taxon_name), 
            by=c("Species", "Rank_correct")) %>% 
  filter(!is.na(SLA_mean)) %>% 
  nrow()

The total number of matched taxa (either at species, or genus level) is 31822.

1.6 Calculate summary statistics for species- and genus-level mean traits

mysummary <- try.combined.means %>% 
               group_by(Rank_correct) %>% 
               summarize_at(.vars=vars(StemDens_mean:LeafWaterCont_sd),
                            .funs=list(min=~min(., na.rm=T),
                                       q025=~quantile(., 0.25, na.rm=T), 
                                       q50=~quantile(., 0.50, na.rm=T), 
                                       q75=~quantile(., .75, na.rm=T), 
                                       max=~max(., na.rm=T), 
                                       mean=~mean(., na.rm=T), 
                                       sd=~sd(., na.rm=T))) %>% 
  gather(variable, value, -Rank_correct) %>% 
  separate(variable, sep="_", into=c("variable", "mean.sd", "stat")) %>% 
  mutate(stat=factor(stat, levels=c("min", "q025", "q50", "q75", "max", "mean", "sd"))) %>% 
  spread(key=stat, value=value) %>% 
  arrange(desc(Rank_correct)) %>% 
  mutate_at(.vars=vars(min:sd),
            .funs=~round(.,3))
## Warning: attributes are not identical across measure variables;
## they will be dropped
Summary statistics for each trait, when summarized across species or genera
Rank_correct variable mean.sd min q025 q50 q75 max mean sd
species Chromosome.cDNAcont mean 0.006 2.150 2.992 4.595 1.067308e+03 4.344000e+00 8.248000e+00
species Chromosome.cDNAcont sd 0.000 0.026 0.047 0.090 1.761220e+02 9.500000e-02 1.127000e+00
species Chromosome.n mean 0.062 22.492 28.266 35.685 7.671771e+03 3.109500e+01 6.145900e+01
species Chromosome.n sd 0.000 0.242 0.400 0.692 1.621340e+02 6.730000e-01 1.571000e+00
species Disp.unit.leng mean 0.003 2.275 3.864 6.895 1.336988e+04 6.340000e+00 7.992200e+01
species Disp.unit.leng sd 0.000 0.069 0.150 0.308 9.470706e+03 5.790000e-01 5.220200e+01
species LDMC mean 0.012 0.243 0.321 0.383 1.372000e+00 3.140000e-01 9.700000e-02
species LDMC sd 0.000 0.003 0.004 0.008 2.250000e-01 8.000000e-03 1.000000e-02
species LeafArea.leaflet.undef mean 0.000 341.100 1039.636 2649.837 1.740828e+13 3.518983e+08 7.755323e+10
species LeafArea.leaflet.undef sd 0.000 20.713 76.627 245.085 2.199776e+13 6.776027e+08 1.211795e+11
species LeafArea.leaf.undef mean 0.000 343.283 1100.777 3115.948 1.341026e+11 2.667355e+06 5.973172e+08
species LeafArea.leaf.undef sd 0.000 22.934 87.305 292.521 3.408568e+09 1.475566e+05 2.038124e+07
species LeafArea.undef.undef mean 0.000 345.679 1175.095 3505.504 1.524769e+11 3.042582e+06 6.791606e+08
species LeafArea.undef.undef sd 0.000 24.273 99.143 369.860 4.133230e+09 2.504824e+05 3.205707e+07
species LeafCN.ratio mean 0.213 20.712 25.571 32.672 1.599229e+03 2.958100e+01 2.170200e+01
species LeafCN.ratio sd 0.000 0.233 0.406 0.817 2.016540e+02 7.940000e-01 2.033000e+00
species LeafC.perdrymass mean 93.949 441.375 456.739 474.192 1.131692e+03 4.565940e+02 2.970600e+01
species LeafC.perdrymass sd 0.000 0.804 1.231 1.880 7.672000e+01 1.737000e+00 2.240000e+00
species Leaf.delta.15N mean -31.434 -0.612 0.753 2.226 4.910200e+01 8.140000e-01 2.464000e+00
species Leaf.delta.15N sd 0.000 0.074 0.111 0.162 3.888400e+01 1.470000e-01 2.690000e-01
species LeafDryMass.single mean 0.000 27.531 88.304 280.901 1.023338e+17 2.030271e+12 4.558127e+14
species LeafDryMass.single sd 0.000 1.980 7.662 27.349 3.076084e+15 9.332895e+10 1.694330e+13
species Leaffreshmass mean 0.000 0.099 0.306 0.835 2.906497e+13 5.766402e+08 1.294605e+11
species Leaffreshmass sd 0.000 0.006 0.023 0.077 8.032411e+11 2.437070e+07 4.424312e+09
species LeafLength mean 0.000 29.485 57.719 97.435 3.089342e+08 6.260771e+03 1.376050e+06
species LeafLength sd 0.000 1.513 3.977 9.912 3.388372e+05 3.044500e+01 2.484372e+03
species LeafN mean 0.343 16.117 19.914 24.175 1.310526e+03 2.062200e+01 1.042300e+01
species LeafN sd 0.000 0.159 0.288 0.618 5.074300e+01 5.850000e-01 8.910000e-01
species LeafNperArea mean 0.002 1.196 1.489 1.878 4.016178e+04 2.427000e+00 1.788850e+02
species LeafNperArea sd 0.000 0.017 0.031 0.073 2.411833e+03 1.450000e-01 1.328600e+01
species LeafP mean 0.000 0.996 1.359 1.803 6.392164e+04 3.184000e+00 2.962370e+02
species LeafP sd 0.000 0.017 0.032 0.058 9.827822e+03 3.540000e-01 5.413300e+01
species LeafThickness mean 0.005 0.188 0.229 0.288 1.197200e+02 2.720000e-01 7.950000e-01
species LeafThickness sd 0.000 0.002 0.003 0.008 5.255759e+03 1.670000e-01 2.894900e+01
species LeafWaterCont mean 0.096 2.525 3.307 4.734 4.260880e+02 3.940000e+00 2.942000e+00
species LeafWaterCont sd 0.000 0.038 0.073 0.149 2.759500e+01 1.410000e-01 2.630000e-01
species LeafWidth mean 0.000 0.691 1.902 4.051 3.477895e+04 6.675000e+00 2.505850e+02
species LeafWidth sd 0.000 0.042 0.131 0.376 5.949567e+05 2.779000e+01 3.687968e+03
species PlantHeight mean 0.000 0.403 1.356 7.556 9.476600e+01 5.124000e+00 7.274000e+00
species PlantHeight sd 0.000 0.040 0.133 0.508 2.685300e+01 5.430000e-01 1.101000e+00
species RootingDepth mean 0.000 0.410 0.845 1.617 2.519319e+09 5.003361e+04 1.122150e+07
species RootingDepth sd 0.000 0.019 0.046 0.107 8.971563e+07 2.724074e+03 4.941605e+05
species SeedGerminationRate mean 5.441 83.595 88.845 93.286 2.416300e+02 8.756700e+01 1.031000e+01
species SeedGerminationRate sd 0.000 0.345 0.523 0.843 1.864950e+02 9.190000e-01 1.904000e+00
species Seed.length mean 0.004 1.817 3.029 5.599 1.089329e+04 4.931000e+00 5.310000e+01
species Seed.length sd 0.000 0.056 0.121 0.242 3.333800e+01 2.150000e-01 3.970000e-01
species SeedMass mean 0.000 0.730 4.336 36.238 9.521438e+08 1.938338e+04 4.241478e+06
species SeedMass sd 0.000 0.054 0.356 2.815 4.003387e+07 2.057742e+03 2.619802e+05
species Seed.num.rep.unit mean 0.000 42.927 180.778 791.571 2.225078e+25 4.416757e+20 9.910894e+22
species Seed.num.rep.unit sd 0.000 5.626 28.349 158.023 2.160083e+19 1.105285e+15 1.443121e+17
species SLA mean 0.000 10.691 14.520 18.798 5.490787e+03 1.576800e+01 2.873900e+01
species SLA sd 0.000 0.168 0.331 0.845 1.012120e+02 9.090000e-01 1.679000e+00
species SpecificRootLength mean 0.000 1337.375 2584.070 5187.719 3.071495e+09 2.477014e+05 2.296632e+07
species SpecificRootLength sd 0.000 48.689 111.593 254.706 8.360206e+08 3.145860e+04 4.669425e+06
species Stem.cond.dens mean 0.000 21.114 48.153 91.796 1.988212e+08 7.928559e+03 1.206849e+06
species Stem.cond.dens sd 0.000 0.689 1.873 4.707 1.210633e+07 4.337880e+02 6.759687e+04
species StemConduitDiameter mean 0.001 25.103 35.442 50.841 8.065496e+07 1.667555e+03 3.592592e+05
species StemConduitDiameter sd 0.000 0.549 1.006 1.853 1.181921e+03 2.191000e+00 9.768000e+00
species StemDens mean 0.007 0.433 0.535 0.637 2.640000e+00 5.410000e-01 1.480000e-01
species StemDens sd 0.000 0.003 0.005 0.009 1.253000e+00 1.000000e-02 1.700000e-02
species StemDiam mean 0.000 0.014 0.056 0.147 1.398249e+09 2.791541e+04 6.228172e+06
species StemDiam sd 0.000 0.001 0.003 0.011 5.904310e+08 1.939426e+04 3.263229e+06
species WoodFiberLength mean 12.143 668.313 834.580 1030.174 1.068162e+07 1.129441e+03 4.764112e+04
species WoodFiberLength sd 0.000 9.803 17.565 31.011 1.166290e+04 2.805900e+01 8.083800e+01
species Wood.vessel.length mean 3.460 313.875 413.753 537.522 2.082033e+06 5.573660e+02 1.082510e+04
species Wood.vessel.length sd 0.000 4.775 9.037 16.889 1.457577e+03 1.660500e+01 3.369800e+01
genus Chromosome.cDNAcont mean 0.006 2.332 3.178 4.971 1.067308e+03 4.910000e+00 1.834900e+01
genus Chromosome.cDNAcont sd 0.000 0.050 0.093 0.180 1.761220e+02 2.320000e-01 2.534000e+00
genus Chromosome.n mean 0.062 22.564 28.269 35.909 7.671771e+03 3.408700e+01 1.361450e+02
genus Chromosome.n sd 0.000 0.400 0.753 1.466 1.621340e+02 1.499000e+00 3.564000e+00
genus Disp.unit.leng mean 0.003 2.350 3.834 6.221 1.336988e+04 9.928000e+00 2.071760e+02
genus Disp.unit.leng sd 0.000 0.135 0.297 0.650 9.470706e+03 2.212000e+00 1.202130e+02
genus LDMC mean 0.021 0.234 0.307 0.365 1.372000e+00 3.020000e-01 1.000000e-01
genus LDMC sd 0.000 0.005 0.010 0.020 1.260000e-01 1.500000e-02 1.500000e-02
genus LeafArea.leaflet.undef mean 0.000 451.346 1047.617 2688.736 1.740828e+13 2.375967e+09 2.015193e+11
genus LeafArea.leaflet.undef sd 0.000 44.133 147.050 601.772 2.199776e+13 3.593650e+09 2.790665e+11
genus LeafArea.leaf.undef mean 0.000 404.628 1116.991 3187.225 1.341026e+11 1.799434e+07 1.552110e+09
genus LeafArea.leaf.undef sd 0.000 46.194 170.877 688.171 3.408568e+09 7.812932e+05 4.693424e+07
genus LeafArea.undef.undef mean 0.000 411.128 1167.756 3672.239 1.524769e+11 2.052324e+07 1.764777e+09
genus LeafArea.undef.undef sd 0.000 47.796 194.844 833.147 4.133230e+09 1.326702e+06 7.382015e+07
genus LeafCN.ratio mean 0.213 21.214 26.130 32.273 1.599229e+03 3.011200e+01 3.461300e+01
genus LeafCN.ratio sd 0.001 0.435 0.922 2.062 2.016540e+02 2.135000e+00 7.000000e+00
genus LeafC.perdrymass mean 95.729 440.581 454.527 471.238 1.131692e+03 4.545320e+02 3.514300e+01
genus LeafC.perdrymass sd 0.003 1.329 2.310 4.378 5.565700e+01 3.914000e+00 4.711000e+00
genus Leaf.delta.15N mean -31.434 -0.359 0.963 2.262 4.885800e+01 1.011000e+00 2.858000e+00
genus Leaf.delta.15N sd 0.000 0.122 0.212 0.379 1.000600e+01 3.190000e-01 3.540000e-01
genus LeafDryMass.single mean 0.000 30.226 93.617 274.186 1.023338e+17 1.370848e+13 1.184415e+15
genus LeafDryMass.single sd 0.000 4.095 15.886 59.613 3.076084e+15 4.949663e+11 3.901914e+13
genus Leaffreshmass mean 0.000 0.122 0.319 0.858 2.906497e+13 3.893499e+09 3.363992e+11
genus Leaffreshmass sd 0.000 0.013 0.050 0.178 8.032411e+11 1.292490e+08 1.018885e+10
genus LeafLength mean 0.000 31.480 56.829 92.781 3.089342e+08 4.167893e+04 3.575619e+06
genus LeafLength sd 0.000 2.623 6.764 18.839 3.388372e+05 1.278230e+02 5.721569e+03
genus LeafN mean 0.343 16.841 20.136 23.942 1.310526e+03 2.133700e+01 2.132200e+01
genus LeafN sd 0.001 0.299 0.660 1.682 5.074300e+01 1.283000e+00 1.636000e+00
genus LeafNperArea mean 0.002 1.224 1.492 1.843 4.016178e+04 7.062000e+00 4.648240e+02
genus LeafNperArea sd 0.000 0.032 0.074 0.217 2.411833e+03 5.490000e-01 3.059500e+01
genus LeafP mean 0.000 1.079 1.357 1.759 6.392164e+04 1.289300e+01 7.697160e+02
genus LeafP sd 0.000 0.031 0.063 0.138 9.827822e+03 1.717000e+00 1.246630e+02
genus LeafThickness mean 0.005 0.196 0.234 0.290 1.197200e+02 3.280000e-01 2.032000e+00
genus LeafThickness sd 0.000 0.004 0.009 0.021 5.255759e+03 8.640000e-01 6.666700e+01
genus LeafWaterCont mean 0.096 2.687 3.531 4.930 4.260880e+02 4.253000e+00 5.594000e+00
genus LeafWaterCont sd 0.000 0.077 0.163 0.373 2.759500e+01 3.060000e-01 5.300000e-01
genus LeafWidth mean 0.000 0.806 1.791 3.847 3.477895e+04 2.419400e+01 6.266720e+02
genus LeafWidth sd 0.000 0.078 0.238 0.769 5.949567e+05 1.464300e+02 8.492696e+03
genus PlantHeight mean 0.000 0.417 1.193 5.767 8.203000e+01 4.524000e+00 6.885000e+00
genus PlantHeight sd 0.000 0.067 0.217 1.027 2.685300e+01 1.021000e+00 1.858000e+00
genus RootingDepth mean 0.000 0.450 0.813 1.475 2.519319e+09 3.378072e+05 2.915871e+07
genus RootingDepth sd 0.000 0.037 0.086 0.198 8.971563e+07 1.444732e+04 1.138014e+06
genus SeedGerminationRate mean 6.898 84.389 88.783 92.519 2.396310e+02 8.772900e+01 1.137400e+01
genus SeedGerminationRate sd 0.001 0.539 0.988 2.031 4.806300e+01 1.785000e+00 2.319000e+00
genus Seed.length mean 0.004 1.856 3.002 5.078 1.089329e+04 6.656000e+00 1.375240e+02
genus Seed.length sd 0.000 0.106 0.234 0.508 3.333800e+01 4.780000e-01 9.910000e-01
genus SeedMass mean 0.000 0.878 4.279 28.605 9.521438e+08 1.301089e+05 1.102133e+07
genus SeedMass sd 0.000 0.135 0.869 5.768 4.003387e+07 1.084308e+04 6.032767e+05
genus Seed.num.rep.unit mean 0.000 59.107 249.776 963.874 2.225078e+25 2.981679e+21 2.575315e+23
genus Seed.num.rep.unit sd 0.000 14.011 65.788 373.790 9.602388e+20 1.585957e+17 1.218345e+19
genus SLA mean 0.000 11.537 15.069 18.147 5.490787e+03 1.731800e+01 7.069700e+01
genus SLA sd 0.000 0.339 0.793 2.677 4.479200e+01 1.953000e+00 2.726000e+00
genus SpecificRootLength mean 0.000 1401.747 2645.280 5204.620 3.071495e+09 1.620677e+06 5.964294e+07
genus SpecificRootLength sd 0.000 98.616 224.938 532.960 8.360206e+08 1.501950e+05 1.062790e+07
genus Stem.cond.dens mean 0.000 23.214 50.056 90.841 1.912493e+08 2.745431e+04 2.217954e+06
genus Stem.cond.dens sd 0.000 1.464 3.724 9.577 1.210633e+07 4.009839e+03 2.065772e+05
genus StemConduitDiameter mean 0.001 25.009 34.985 49.387 8.065496e+07 1.093829e+04 9.335138e+05
genus StemConduitDiameter sd 0.000 1.016 2.027 4.366 5.503930e+03 6.333000e+00 7.618700e+01
genus StemDens mean 0.015 0.444 0.539 0.627 2.640000e+00 5.420000e-01 1.480000e-01
genus StemDens sd 0.000 0.006 0.011 0.023 3.250000e-01 2.000000e-02 2.500000e-02
genus StemDiam mean 0.000 0.015 0.054 0.135 1.398249e+09 1.884852e+05 1.618370e+07
genus StemDiam sd 0.000 0.002 0.007 0.021 5.904310e+08 1.028566e+05 7.514889e+06
genus WoodFiberLength mean 12.143 706.995 848.135 1034.349 1.068162e+07 2.486524e+03 1.237817e+05
genus WoodFiberLength sd 0.000 17.855 34.226 64.310 4.830371e+03 5.568300e+01 1.081420e+02
genus Wood.vessel.length mean 3.460 331.407 428.826 540.696 2.082033e+06 9.565180e+02 2.618349e+04
genus Wood.vessel.length sd 0.000 8.980 18.009 37.710 2.797090e+03 3.549700e+01 7.404900e+01

2 Calculate CWMs and CWVs for each plot

Merge vegetation layers, where necessary. Combine cover values across layers

#Ancillary function
# Combine cover accounting for layers
combine.cover <- function(x){
    while (length(x)>1){
      x[2] <- x[1]+(100-x[1])*x[2]/100
      x <- x[-1]
    }
  return(x)
}

DT2.comb <- DT2 %>% 
  group_by(PlotObservationID, Species, Rank_correct) %>% 
  summarize(Relative_cover=combine.cover(Relative_cover)) %>%
  ungroup() %>% 
  # re-normalize to 100%
  left_join(x=., 
            y={.} %>% 
              group_by(PlotObservationID) %>% 
              summarize(Tot.cover=sum(Relative_cover)), 
            by="PlotObservationID") %>% 
  mutate(Relative_cover=Relative_cover/Tot.cover) %>% 
  dplyr::select(-Tot.cover)
## `summarise()` regrouping output by 'PlotObservationID', 'Species' (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)

Calculate CWMs and CWV, as well as plot coverage statistics (proportion of total cover for which trait info exist, and proportion of species for which we have trait info). To avoid misleading results, CWM is calculated ONLY for plots for which we have some abundance information. All plots where Ab_scale==“pa” in ANY of the layers are therefore excluded.

# Tag plots where at least one layer has only p\a information 
any_pa <- DT2 %>% 
  distinct(PlotObservationID, Ab_scale) %>% 
  group_by(PlotObservationID) %>% 
  summarize(any.pa=any(Ab_scale=="pa")) %>% 
  filter(any.pa==T) %>% 
  pull(PlotObservationID)
## `summarise()` ungrouping output (override with `.groups` argument)
length(any_pa)
## [1] 272981
# Exclude plots above and merge species data table with traits
CWM0 <- DT2.comb %>%
  filter(!PlotObservationID %in% any_pa) %>% 
  left_join(try.combined.means %>%
              dplyr::rename(Species=Taxon_name) %>% 
              dplyr::select(Species, Rank_correct, ends_with("_mean")), 
            by=c("Species", "Rank_correct"))

# Calculate CWM for each trait in each plot
CWM1 <- CWM0 %>%
  group_by(PlotObservationID) %>%
  summarize_at(.vars= vars(StemDens_mean:LeafWaterCont_mean),
               .funs = list(~weighted.mean(., Relative_cover, na.rm=T))) %>%
  dplyr::select(PlotObservationID, order(colnames(.))) %>%
  gather(key=variable, value=CWM, -PlotObservationID)

# Calculate coverage for each trait in each plot
CWM2 <- CWM0 %>%
  mutate_at(.funs = list(~if_else(is.na(.),0,1) * Relative_cover), 
            .vars = vars(StemDens_mean:LeafWaterCont_mean)) %>%
  group_by(PlotObservationID) %>%
  summarize_at(.vars= vars(StemDens_mean:LeafWaterCont_mean),
               .funs = list(~sum(., na.rm=T))) %>%
  dplyr::select(PlotObservationID, order(colnames(.))) %>%
  gather(key=variable, value=trait.coverage, -PlotObservationID)
  
# Calculate CWV
# Ancillary function
variance2.fun <- function(trait, abu){
  res <- as.double(NA)
  abu <- abu[!is.na(trait)]
  trait <- trait[!is.na(trait)]
  abu <- abu/sum(abu)
  if (length(trait)>1){
    # you need more than 1 observation to calculate variance
    # for calculation see 
    # http://r.789695.n4.nabble.com/Weighted-skewness-and-curtosis-td4709956.html
    m.trait <- weighted.mean(trait,abu)
    res <- sum(abu*(trait-m.trait)^2)
  }
  res
}

CWM3 <- CWM0 %>%
  group_by(PlotObservationID) %>%
  summarize_at(.vars= vars(StemDens_mean:LeafWaterCont_mean),
               .funs = list(~variance2.fun(., Relative_cover))) %>%
  dplyr::select(PlotObservationID, order(colnames(.))) %>%
  gather(key=variable, value=CWV, -PlotObservationID)

## Calculate proportion of species having traits
CWM4 <- CWM0 %>%
  group_by(PlotObservationID) %>%
  #distinct(PlotObservationID, species, .keep_all = T) %>% 
  summarize_at(.vars= vars(StemDens_mean:LeafWaterCont_mean),
               .funs = list(~sum(!is.na(.)))) %>%
  dplyr::select(PlotObservationID, order(colnames(.))) %>%
  gather(key=variable, value=n.sp.with.trait, -PlotObservationID)

# Join together
CWM <- CWM1 %>%
  left_join(CWM2, by=c("PlotObservationID", "variable")) %>%
  left_join(CWM3, by=c("PlotObservationID", "variable")) %>%
  left_join(CWM4, by=c("PlotObservationID", "variable")) %>%
  left_join(CWM0 %>% 
              group_by(PlotObservationID) %>%
              summarize(sp.richness=n()), by=c("PlotObservationID")) %>%
  mutate(prop.sp.with.trait=n.sp.with.trait/sp.richness) %>%
  dplyr::select(PlotObservationID, variable, sp.richness, prop.sp.with.trait, trait.coverage, CWM, CWV) %>% 
  arrange(PlotObservationID)
## `summarise()` ungrouping output (override with `.groups` argument)

2.1 Explore CWM output

Community weighted means of 3 randomly selected plots
PlotObservationID variable sp.richness prop.sp.with.trait trait.coverage CWM CWV
796833 Chromosome.cDNAcont_mean 44 0.909 0.944 4.610 3.962500e+01
796833 Chromosome.n_mean 44 0.909 0.944 31.220 1.620460e+02
796833 Disp.unit.leng_mean 44 0.909 0.944 42.398 9.593643e+04
796833 LDMC_mean 44 0.909 0.944 0.323 5.000000e-03
796833 LeafArea.leaflet.undef_mean 44 0.909 0.944 1791.514 8.296512e+06
796833 LeafArea.leaf.undef_mean 44 0.909 0.944 2272310.513 3.841014e+14
796833 LeafArea.undef.undef_mean 44 0.909 0.944 1702.047 1.776553e+06
796833 LeafCN.ratio_mean 44 0.909 0.944 26.310 9.970200e+01
796833 LeafC.perdrymass_mean 44 0.909 0.944 469.199 4.439610e+02
796833 Leaf.delta.15N_mean 44 0.909 0.944 -0.560 2.469000e+01
796833 LeafDryMass.single_mean 44 0.909 0.944 123.828 2.821406e+04
796833 Leaffreshmass_mean 44 0.909 0.944 0.574 3.567000e+00
796833 LeafLength_mean 44 0.909 0.944 93.050 5.240739e+03
796833 LeafN_mean 44 0.909 0.944 19.974 2.104200e+01
796833 LeafNperArea_mean 44 0.909 0.944 1.330 2.110000e-01
796833 LeafP_mean 44 0.909 0.944 243.347 4.360164e+06
796833 LeafThickness_mean 44 0.909 0.944 0.219 8.000000e-03
796833 LeafWaterCont_mean 44 0.909 0.944 9.009 2.336575e+03
796833 LeafWidth_mean 44 0.909 0.944 2.328 4.678000e+00
796833 PlantHeight_mean 44 0.909 0.944 4.929 6.164000e+01
796833 RootingDepth_mean 44 0.909 0.944 0.555 1.010000e-01
796833 SeedGerminationRate_mean 44 0.909 0.944 85.594 8.822800e+01
796833 Seed.length_mean 44 0.909 0.944 13.557 6.270275e+03
796833 SeedMass_mean 44 0.909 0.944 242.245 4.263994e+05
796833 Seed.num.rep.unit_mean 44 0.909 0.944 10154.788 3.771562e+09
796833 SLA_mean 44 0.909 0.944 89.706 3.915915e+05
796833 SpecificRootLength_mean 44 0.909 0.944 11380611.499 9.640858e+15
796833 Stem.cond.dens_mean 44 0.909 0.944 95.568 4.202752e+03
796833 StemConduitDiameter_mean 44 0.909 0.944 47.843 6.853140e+02
796833 StemDens_mean 44 0.909 0.944 0.449 1.500000e-02
796833 StemDiam_mean 44 0.909 0.944 0.090 1.700000e-02
796833 WoodFiberLength_mean 44 0.909 0.944 858.274 8.980877e+04
796833 Wood.vessel.length_mean 44 0.909 0.944 482.047 4.479830e+04
1668105 Chromosome.cDNAcont_mean 20 0.600 0.290 4.084 5.183000e+00
1668105 Chromosome.n_mean 20 0.600 0.290 29.592 4.619400e+01
1668105 Disp.unit.leng_mean 20 0.600 0.290 2.057 3.770000e-01
1668105 LDMC_mean 20 0.600 0.290 0.201 3.000000e-03
1668105 LeafArea.leaflet.undef_mean 20 0.600 0.290 5045.735 9.275990e+07
1668105 LeafArea.leaf.undef_mean 20 0.600 0.290 2894.653 1.774991e+07
1668105 LeafArea.undef.undef_mean 20 0.600 0.290 5887.095 1.294313e+08
1668105 LeafCN.ratio_mean 20 0.600 0.290 16.681 5.016600e+01
1668105 LeafC.perdrymass_mean 20 0.600 0.290 454.165 2.367720e+02
1668105 Leaf.delta.15N_mean 20 0.600 0.290 1.766 3.035000e+00
1668105 LeafDryMass.single_mean 20 0.600 0.290 171.790 9.171535e+04
1668105 Leaffreshmass_mean 20 0.600 0.290 0.907 2.377000e+00
1668105 LeafLength_mean 20 0.600 0.290 63.229 6.673616e+03
1668105 LeafN_mean 20 0.600 0.290 36.449 1.239280e+02
1668105 LeafNperArea_mean 20 0.600 0.290 1.532 1.660000e-01
1668105 LeafP_mean 20 0.600 0.290 2.557 1.340000e+00
1668105 LeafThickness_mean 20 0.600 0.290 0.281 9.000000e-03
1668105 LeafWaterCont_mean 20 0.600 0.290 6.415 3.361000e+00
1668105 LeafWidth_mean 20 0.600 0.290 2.472 1.144000e+01
1668105 PlantHeight_mean 20 0.600 0.290 0.395 4.800000e-02
1668105 RootingDepth_mean 20 0.600 0.290 0.523 1.030000e-01
1668105 SeedGerminationRate_mean 20 0.600 0.290 94.334 2.722900e+01
1668105 Seed.length_mean 20 0.600 0.290 1.811 2.170000e-01
1668105 SeedMass_mean 20 0.600 0.290 1.213 8.220000e-01
1668105 Seed.num.rep.unit_mean 20 0.600 0.290 5626.976 1.077128e+08
1668105 SLA_mean 20 0.600 0.290 25.715 3.934200e+01
1668105 SpecificRootLength_mean 20 0.600 0.290 12045.855 1.508776e+08
1668105 Stem.cond.dens_mean 20 0.600 0.290 84.019 9.019500e+02
1668105 StemConduitDiameter_mean 20 0.600 0.290 49.704 5.853650e+02
1668105 StemDens_mean 20 0.600 0.290 0.407 1.400000e-02
1668105 StemDiam_mean 20 0.600 0.290 0.012 0.000000e+00
1668105 WoodFiberLength_mean 20 0.600 0.290 815.225 2.777286e+04
1668105 Wood.vessel.length_mean 20 0.600 0.290 449.217 2.443246e+04

Scatterplot comparing coverage of traits values across plots, when based on relative cover and when based on proportion of species richness

ggplot(data=CWM %>% 
         #all variables have the same coverage. Showcase with LDMC
         filter(variable=="LDMC_mean"), aes(x=trait.coverage, y=prop.sp.with.trait, col=log(sp.richness))) + 
  geom_point(pch="+", alpha=1/3) + 
  geom_abline(intercept = 0, slope=1, col=2, lty=2, lwd=.7) + 
  xlim(c(0,1)) + 
  ylim(c(0,1)) + 
  scale_color_viridis() + 
  theme_bw() +
  xlab("Trait coverage (Relative  cover)") + 
  ylab("Trait coverage (Proportion of species)") + 
  coord_equal()

Calculate summary statistics for trait coverage in plots

CWM.coverage <- CWM %>% 
  filter(variable=="LDMC_mean") %>% 
  summarize_at(.vars=vars(trait.coverage, prop.sp.with.trait),
                .funs=list(num.0s=~sum(.==0),
                           min=~min(., na.rm=T),
                           q025=~quantile(., 0.25, na.rm=T), 
                           q50=~quantile(., 0.50, na.rm=T), 
                           q75=~quantile(., .75, na.rm=T), 
                           max=~max(., na.rm=T), 
                           mean=~mean(., na.rm=T), 
                           sd=~sd(., na.rm=T))) %>% 
  gather(key=variable, value=value) %>% 
  separate(variable, sep="_", into=c("metric", "stat")) %>% 
  mutate(stat=factor(stat, levels=c("num.0s", "min", "q025", "q50", "q75", "max", "mean", "sd"))) %>% 
  spread(key=stat, value=value) 
Summary of plot-level coverage of CWM and CWVs
metric num.0s min q025 q50 q75 max mean sd
prop.sp.with.trait 12093 0 0.778 0.895 0.975 1 0.843 0.178
trait.coverage 12339 0 0.782 0.948 0.995 1 0.843 0.225

Calculate summary statistics for CWMs and CWVs

CWM.summary <- CWM %>% 
  rename(myvar=variable) %>% 
  group_by(myvar) %>% 
  summarize_at(.vars=vars(CWM:CWV),
                .funs=list(min=~min(., na.rm=T),
                           q025=~quantile(., 0.25, na.rm=T), 
                           q50=~quantile(., 0.50, na.rm=T), 
                           q75=~quantile(., .75, na.rm=T), 
                           max=~max(., na.rm=T), 
                           mean=~mean(., na.rm=T), 
                           sd=~sd(., na.rm=T))) %>% 
  gather(key=variable, value=value, -myvar) %>% 
  separate(variable, sep="_", into=c("metric", "stat")) %>% 
  mutate(stat=factor(stat, levels=c("min", "q025", "q50", "q75", "max", "mean", "sd"))) %>% 
  spread(key=stat, value=value) %>% 
  arrange(metric, myvar)
Summary of CWMs and CWVs across all plots
myvar metric min q025 q50 q75 max mean sd
Chromosome.cDNAcont_mean CWM 0.084 3.397 4.995 7.221000e+00 9.037800e+01 6.325000e+00 5.062000e+00
Chromosome.n_mean CWM 0.114 27.419 31.729 3.879600e+01 4.166200e+03 3.476300e+01 1.394100e+01
Disp.unit.leng_mean CWM 0.019 2.672 3.575 6.217000e+00 3.592797e+03 1.407900e+01 9.290000e+01
LDMC_mean CWM 0.014 0.242 0.283 3.330000e-01 9.680000e-01 2.860000e-01 6.900000e-02
LeafArea.leaflet.undef_mean CWM 0.001 507.128 1050.045 2.172641e+03 2.896005e+08 3.226204e+03 4.104034e+05
LeafArea.leaf.undef_mean CWM 0.007 483.761 1047.616 2.246039e+03 1.714323e+08 3.726654e+05 3.751092e+06
LeafArea.undef.undef_mean CWM 0.001 544.710 1187.908 2.191178e+03 2.239247e+07 2.033599e+03 2.517520e+04
LeafCN.ratio_mean CWM 0.986 20.056 23.395 2.795100e+01 8.780210e+02 2.557300e+01 1.485100e+01
LeafC.perdrymass_mean CWM 96.454 440.810 450.720 4.661430e+02 9.991090e+02 4.532800e+02 2.608000e+01
Leaf.delta.15N_mean CWM -12.462 -1.075 -0.126 7.990000e-01 4.310700e+01 -1.000000e-03 2.020000e+00
LeafDryMass.single_mean CWM 0.000 30.722 63.056 1.287740e+02 1.013838e+05 1.141800e+02 3.650770e+02
Leaffreshmass_mean CWM 0.000 0.123 0.248 4.820000e-01 1.641220e+02 4.560000e-01 9.660000e-01
LeafLength_mean CWM 0.013 48.237 74.735 1.025790e+02 6.731693e+04 8.650600e+01 2.036740e+02
LeafN_mean CWM 3.830 19.173 22.307 2.518600e+01 2.882780e+02 2.249900e+01 5.312000e+00
LeafNperArea_mean CWM 0.002 1.141 1.309 1.582000e+00 8.900100e+01 1.429000e+00 5.530000e-01
LeafP_mean CWM 0.014 1.530 1.836 2.204000e+00 6.392164e+04 1.198250e+02 1.329475e+03
LeafThickness_mean CWM 0.005 0.188 0.220 2.760000e-01 5.413100e+01 3.160000e-01 1.071000e+00
LeafWaterCont_mean CWM 0.387 3.479 4.436 5.455000e+00 4.260880e+02 5.551000e+00 9.423000e+00
LeafWidth_mean CWM 0.005 0.766 1.348 2.516000e+00 2.675318e+04 6.202000e+00 1.380740e+02
PlantHeight_mean CWM 0.005 0.334 0.600 4.256000e+00 6.994000e+01 3.017000e+00 4.592000e+00
RootingDepth_mean CWM 0.004 0.352 0.511 7.590000e-01 7.274138e+04 8.000000e-01 5.744000e+01
SeedGerminationRate_mean CWM 7.122 84.121 88.935 9.255000e+01 2.416300e+02 8.800300e+01 6.843000e+00
Seed.length_mean CWM 0.061 1.906 2.492 4.018000e+00 1.089329e+04 1.521500e+01 2.169090e+02
SeedMass_mean CWM 0.000 0.929 2.201 1.680400e+01 3.486900e+06 6.139560e+02 1.817473e+04
Seed.num.rep.unit_mean CWM 0.000 1048.206 3666.800 1.535384e+04 1.747089e+21 7.757966e+15 2.941781e+18
SLA_mean CWM 1.473 15.395 20.164 2.464200e+01 5.490787e+03 3.446600e+01 1.274150e+02
SpecificRootLength_mean CWM 0.000 4402.639 7311.159 1.206101e+04 3.071495e+09 4.833819e+06 6.390292e+07
Stem.cond.dens_mean CWM 0.013 72.161 97.866 1.396460e+02 4.878582e+06 2.509100e+02 8.898374e+03
StemConduitDiameter_mean CWM 0.001 33.009 41.673 5.289200e+01 1.664311e+07 9.358600e+01 2.039296e+04
StemDens_mean CWM 0.053 0.361 0.420 4.960000e-01 2.640000e+00 4.350000e-01 1.160000e-01
StemDiam_mean CWM 0.000 0.010 0.024 9.600000e-02 8.884200e+01 6.700000e-02 1.660000e-01
WoodFiberLength_mean CWM 116.018 721.171 810.432 9.265990e+02 2.204640e+06 1.344705e+03 1.068622e+04
Wood.vessel.length_mean CWM 31.679 366.970 430.588 5.216390e+02 4.298853e+05 4.660690e+02 5.613650e+02
Chromosome.cDNAcont_mean CWV 0.000 5.199 13.316 3.286200e+01 2.143228e+04 4.214700e+01 8.687900e+01
Chromosome.n_mean CWV 0.000 67.635 146.116 3.293860e+02 1.451823e+07 4.478920e+02 2.884220e+04
Disp.unit.leng_mean CWV 0.000 1.423 3.803 1.558700e+01 3.227013e+06 1.993783e+04 1.687258e+05
LDMC_mean CWV 0.000 0.002 0.004 6.000000e-03 1.570000e-01 5.000000e-03 5.000000e-03
LeafArea.leaflet.undef_mean CWV 0.000 293016.033 1450747.887 5.990616e+06 3.653300e+16 2.210802e+11 7.911358e+13
LeafArea.leaf.undef_mean CWV 0.000 273271.973 1254799.386 6.694363e+06 7.347254e+15 5.016013e+13 4.280158e+14
LeafArea.undef.undef_mean CWV 0.000 404276.456 1201028.080 4.885587e+06 1.809148e+15 1.479690e+10 3.237822e+12
LeafCN.ratio_mean CWV 0.000 14.700 34.954 8.120700e+01 6.171788e+05 4.188360e+02 4.284326e+03
LeafC.perdrymass_mean CWV 0.000 173.720 338.390 5.940410e+02 1.057395e+05 6.283870e+02 2.957775e+03
Leaf.delta.15N_mean CWV 0.000 1.674 2.964 4.795000e+00 6.184630e+02 6.734000e+00 2.510900e+01
LeafDryMass.single_mean CWV 0.000 1063.574 5482.871 2.761999e+04 7.538101e+09 3.614553e+05 3.418856e+07
Leaffreshmass_mean CWV 0.000 0.019 0.088 4.620000e-01 4.203363e+04 2.092000e+00 1.115460e+02
LeafLength_mean CWV 0.000 987.260 2310.379 5.021975e+03 1.874145e+10 2.481704e+05 2.770741e+07
LeafN_mean CWV 0.000 13.152 23.690 3.797600e+01 2.717296e+05 3.413700e+01 3.774750e+02
LeafNperArea_mean CWV 0.000 0.064 0.122 2.440000e-01 1.125946e+04 8.500000e-01 2.869200e+01
LeafP_mean CWV 0.000 0.144 0.278 5.170000e-01 1.021494e+09 4.016345e+06 4.197858e+07
LeafThickness_mean CWV 0.000 0.002 0.005 1.200000e-02 3.430838e+03 2.631000e+00 3.093600e+01
LeafWaterCont_mean CWV 0.000 1.439 2.856 4.859000e+00 4.497611e+04 3.115000e+02 2.596558e+03
LeafWidth_mean CWV 0.000 0.484 1.745 4.710000e+00 3.023911e+08 1.310863e+05 3.412980e+06
PlantHeight_mean CWV 0.000 0.021 0.094 2.408100e+01 8.370440e+02 2.190100e+01 4.601400e+01
RootingDepth_mean CWV 0.000 0.041 0.101 2.490000e-01 1.871033e+10 1.273138e+04 1.455314e+07
SeedGerminationRate_mean CWV 0.000 26.381 50.523 9.287700e+01 7.015952e+03 8.142800e+01 1.357510e+02
Seed.length_mean CWV 0.000 0.534 1.282 4.575000e+00 2.966463e+07 6.939632e+04 1.200802e+06
SeedMass_mean CWV 0.000 0.701 8.150 1.076868e+03 2.901391e+13 2.281830e+09 6.879029e+10
Seed.num.rep.unit_mean CWV 0.000 3535124.986 68066597.072 1.268117e+09 5.087198e+42 2.752675e+37 9.727445e+39
SLA_mean CWV 0.000 19.547 36.728 7.503500e+01 7.533142e+06 5.391573e+04 4.390782e+05
SpecificRootLength_mean CWV 0.000 10764853.760 34930690.443 7.815134e+07 2.358521e+18 6.700213e+15 9.603635e+16
Stem.cond.dens_mean CWV 0.000 1822.672 4341.569 1.031477e+04 3.534652e+13 1.494373e+09 8.989736e+10
StemConduitDiameter_mean CWV 0.000 216.546 492.647 1.049428e+03 1.065354e+15 2.249815e+09 1.361697e+12
StemDens_mean CWV 0.000 0.005 0.009 1.500000e-02 1.447000e+00 1.600000e-02 5.400000e-02
StemDiam_mean CWV 0.000 0.000 0.001 8.000000e-03 7.519691e+04 2.810000e-01 9.473500e+01
WoodFiberLength_mean CWV 0.000 27192.997 50141.675 8.514111e+04 1.868337e+13 1.962246e+08 2.403653e+10
Wood.vessel.length_mean CWV 0.000 13052.355 26281.704 5.234534e+04 7.096939e+11 1.573672e+06 9.069692e+08

2.2 Export CWM and species mean trait values

save(try.combined.means, CWM, file="../_output/Traits_CWMs_sPlot3.RData")

3 Classify plots in is.forest or is.non.forest based on species traits

sPlot has two independent systems for classifying plots to vegetation types. The first relies on the expert opinion of data contributors and classifies plots into broad habitat types. These broad habitat types are coded using 5, non-mutually exclusive dummy variables:
1) Forest
2) Grassland
3) Shrubland
4) Sparse vegetation
5) Wetland
A plot may belong to more than one formation, e.g. a Savannah is categorized as Forest + Grassland (FG). This system is, unfortunately, not consistently available across all plots, being the large majority of classified plots only available for Europe.
There is therefore the need to give at least some indication to the remaining unclassified plots. To achieve this, already from v2.1, sPlot started using a classification into forest and non-forest, based on the share of trees, and the layering of vegetation. Here, we derived the (mutually exclusive) is.forest and is.non.forest classification of plots.

3.1 Derive species level information on Growth Forms

We used different sources of information:
1) Data from the gap-filled trait matrix
2) Manual cleaning of the most common species for which growth trait info is not available
3) Data from TRY (public dataset only) on all species with growth form info (Trait ID = 42)
4) Cross-match with species assigned to tree layer in DT table.

Step 1: Attach growth form trait information to DT table. Growth form information derives from TRY

DT.gf <- DT2 %>% 
  filter(Taxon_group=="Vascular plant") %>% 
  #join with try names, using resolved species names as key
  left_join(try.species.names %>% 
              dplyr::select(Name_short, GrowthForm) %>% 
              rename(Species=Name_short) %>% 
              distinct(Species, .keep_all=T), 
            by="Species") %>% 
  left_join(try.species.means %>% 
              dplyr::select(Name_short, PlantHeight_mean) %>% 
              rename(Species=Name_short), 
            by="Species")
# number of records withouth Growth Form info
sum(is.na(DT.gf$GrowthForm))
## [1] 4998850

Step 2: Select most common species without growth-trait information to export and check manually

top.gf.nas <- DT.gf %>% 
  filter(is.na(GrowthForm)) %>% 
  group_by(Species) %>% 
  summarize(n=n()) %>% 
  arrange(desc(n))
## `summarise()` ungrouping output (override with `.groups` argument)
write_csv(top.gf.nas %>% 
            filter(n>1000), 
  path="../_derived/Species_missingGF.csv")

The first 47569 species account for 56.59% of the missing records. Assign growth forms manually, reimport and coalesce into DT.gf

# Import manually classified species - this info is also reported in Appendix 1
gf.manual <- read_csv("../_derived/Species_missingGF_complete.csv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   species = col_character(),
##   GrowthForm = col_character()
## )
DT.gf <- DT.gf %>% 
  left_join(gf.manual %>% 
              rename(GrowthForm.m=GrowthForm, Species=species),
            by="Species") %>% 
   mutate(GrowthForm=coalesce(GrowthForm, GrowthForm.m)) %>% 
   dplyr::select(-GrowthForm.m)

After manual completion, the number of records without growth form information decresead to 2332314.

Step 3: Import additional data on growth-form from TRY (Accessed 10 March 2020).
All public data on growth form downloaded. First take care of unmatched quotation marks in the txt file. Do this from command line.

# escape all unmatched quotation marks. Run in Linux terminal
#sed 's/"/\\"/g' 8854.txt > 8854_test.csv
#sed "s/'/\\'/g" 8854.txt > 8854_test.csv

Information on growth form is not organized and has a myriad of levels. Extract and simplify to the set of few types used so far. In case a species is attributed to multiple growth forms use a majority vote.

all.gf0 <- read_delim("../_input/TRY5.0_v1.1/8854_test.txt", delim="\t") 

all.gf <- all.gf0 %>% 
  filter(TraitID==42) %>% 
  distinct(AccSpeciesName, OrigValueStr) %>% 
  rename(GrowthForm0=OrigValueStr) %>% 
  mutate(GrowthForm0=tolower(GrowthForm0)) %>%
  filter(AccSpeciesName %in% sPlot.species$Species) %>% 
  mutate(GrowthForm_simplified= GrowthForm0) %>% 
  mutate(GrowthForm_simplified=replace(GrowthForm_simplified, 
                                       list=str_detect(GrowthForm0,
                                       "vine|climber|liana|carnivore|epiphyte|^succulent|lichen|parasite|
                                       hydrohalophyte|aquatic|cactous|parasitic|hydrophytes|carnivorous"), 
                                       "other")) %>%
  mutate(GrowthForm_simplified=replace(GrowthForm_simplified, 
                                       list=str_detect(GrowthForm0, 
                                                       "tree|conifer|^woody$|palmoid|mangrove|gymnosperm"), 
                                       "tree")) %>% 
  mutate(GrowthForm_simplified=replace(GrowthForm_simplified, 
                                       list=str_detect(GrowthForm0, "shrub|scrub|bamboo"), "shrub")) %>%
  mutate(GrowthForm_simplified=replace(GrowthForm_simplified, 
                                       list=str_detect(GrowthForm0,
                                      "herb|sedge|graminoid|fern|forb|herbaceous|grass|chaemaephyte|geophyte|annual"),
                                       "herb")) %>%
  mutate(GrowthForm_simplified=ifelse(GrowthForm_simplified %in% c("other", "herb", "shrub", "tree"), 
                                      GrowthForm_simplified, NA)) %>% 
  filter(!is.na(GrowthForm_simplified)) 

#Some species have multiple attributions - use a majority vote. NA if ties
get.mode <- function(x){
  if(length(unique(x))==1){
    return(as.character(unique(x)))} else{
    tmp <- sort(table(x), decreasing=T)
    if(tmp[1]!=tmp[2]){return(names(tmp)[1])} else {
    return("Unknown")}
    }
  }

all.gf <- all.gf %>% 
  group_by(AccSpeciesName) %>% 
  summarize(GrowthForm_simplified=get.mode(GrowthForm_simplified)) %>% 
  filter(GrowthForm_simplified!="Unknown")

table(all.gf$GrowthForm_simplified, exclude=NULL)  
## 
##  herb other shrub  tree 
## 21467  3429  7406  9194
#coalesce this info into DT.gf
DT.gf <- DT.gf %>% 
  left_join(all.gf %>% 
              rename(Species=AccSpeciesName), 
            by="Species") %>% 
  mutate(GrowthForm=coalesce(GrowthForm, GrowthForm_simplified)) %>% 
  dplyr::select(-GrowthForm_simplified)

Step 4: Cross-match. Assign all species occurring in at least one relevé in the tree layer as tree. Conservatively, do this only when the record is at species level (exclude records at genus\family level)

other.trees <- DT.gf %>% 
  filter(Layer==1 & is.na(GrowthForm)) %>% 
  filter(Rank_correct=="species") %>% 
  distinct(Species, Layer, GrowthForm) %>% 
  pull(Species)

DT.gf <- DT.gf %>% 
  mutate(GrowthForm=replace(GrowthForm, 
                            list=Species %in% other.trees, 
                            values="tree"))

After cross-matching, the number of records without growth form information decresead to 1264966.

## `summarise()` ungrouping output (override with `.groups` argument)
Average height per growth form
GrowthForm Height
herb 0.520
herb/shrub 1.982
herb 1.522
herb/shrub/tree 5.241
other 4.550
shrub 2.351
shrub/tree 5.094
shrub 4.644
tree 13.077
NA 2.507

Classify species as tree or tall shrubs vs. other. Make a compact table of species growth forms and create fields is.tree.or.tall.shrub and is.not.tree.and.small.
Define a species as is.tree.or.tall.shrub when it is either defined as tree, OR has a height >10
Define a species as is.not.tree.or.shrub.and.small when it has a height <10, as long as it’s not defined a tree. When height is not available, it is sufficient that the species is classified as “herb” or “other”.

GF <- DT.gf %>% 
  distinct(Species, GrowthForm, PlantHeight_mean) %>% 
  mutate(GrowthForm=fct_collapse(GrowthForm, 
                                 "herb/shrub"=c("herb\\shrub","herb/shrub"), 
                                 "shrub/tree"=c("shrub/tree", "shrub\\tree"))) %>% 
  ## define is.tree.or.tall
  mutate(is.tree.or.tall.shrub=NA) %>% 
  mutate(is.tree.or.tall.shrub=replace(is.tree.or.tall.shrub, 
                                       list=str_detect(GrowthForm, "tree"), 
                                       T)) %>% 
  mutate(is.tree.or.tall.shrub=replace(is.tree.or.tall.shrub, 
                                       list=PlantHeight_mean>=10, 
                                       T)) %>% 
  ## define is.not.tree.or.shrub.and.small 
  mutate(is.not.tree.or.shrub.and.small=NA) %>% 
  mutate(is.not.tree.or.shrub.and.small=replace(is.not.tree.or.shrub.and.small,
                                       list=PlantHeight_mean<10, 
                                       T)) %>% 
  mutate(is.not.tree.or.shrub.and.small=replace(is.not.tree.or.shrub.and.small,
                                       list=is.na(PlantHeight_mean) & str_detect(GrowthForm, "herb|other"), 
                                       T)) %>%   
  ## use each field in turn to define which of the records in the other is F
  mutate(is.not.tree.or.shrub.and.small=replace(is.not.tree.or.shrub.and.small,
                                       list= is.tree.or.tall.shrub==T,
                                       F)) %>% 
  mutate(is.tree.or.tall.shrub=replace(is.tree.or.tall.shrub,
                                       list= is.not.tree.or.shrub.and.small==T,
                                       F)) %>% 
  ## drop redundant field
  dplyr::select(-is.not.tree.or.shrub.and.small)
  

## cross-check classification  
table(GF$GrowthForm, GF$is.tree.or.tall.shrub, exclude=NULL)
##                  
##                   FALSE  TRUE  <NA>
##   herb            22430     2     0
##   herb/shrub         47     1     0
##   herb/shrub/tree     0     2     0
##   other            1646    42     0
##   shrub            5410    93  2323
##   shrub/tree          0   133     0
##   tree                0 13458     0
##   <NA>              818    50 26691
## Check for herb species classified as trees
GF %>% 
  filter(is.tree.or.tall.shrub & GrowthForm=="herb")
## # A tibble: 2 x 4
##   Species                   GrowthForm PlantHeight_mean is.tree.or.tall.shrub
##   <chr>                     <fct>                 <dbl> <lgl>                
## 1 Phyllostachys bambusoides herb                   16.6 TRUE                 
## 2 Bambusa vulgaris          herb                   14.2 TRUE

These are Bamboo species and their hiehgts seems reasonable.

3.2 Classify plots into forest\non-forest

Define a plot as forest if:
1) Has a total cover of the the tree layer >=25% (from header)
2) Has a total cover in Layer 1 >=25% (from DT)
3) Has a total cover of tree or tall shrub species >=25% (from DT + TRY)
4) Has data on Basal area summing to 10 m2/ha

The first three criteria are declined to define non forest as follows:
1) Info on total cover of the tree layer is available and <25%
2) Info on total cover in Layer 1 is available and <25%
3) The relative cover of non tree species is >75%

Criteria 2 and 3 only apply to plots having cover data in percentage.
Reimport header file

load("../_output/header_sPlot3.0.RData")

Criterium 1

plot.vegtype1 <- header %>% 
  dplyr::select(PlotObservationID, `Cover tree layer (%)`) %>% 
  rename(Cover_trees=`Cover tree layer (%)`) %>% 
  mutate(is.forest=Cover_trees>=25) 

table(plot.vegtype1 %>% dplyr::select(is.forest), exclude=NULL)
## 
##   FALSE    TRUE    <NA> 
##   26211  191834 1760634

Criterium 2

# Select only plots having cover data in percentage
mysel <- (DT.gf %>% 
            distinct(PlotObservationID, Ab_scale) %>% 
            group_by(PlotObservationID) %>% 
            summarize(AllCovPer=all(Ab_scale=="CoverPerc")) %>% 
            filter(AllCovPer==T) %>% 
            pull(PlotObservationID))
## `summarise()` ungrouping output (override with `.groups` argument)
# Excludedd plots
nrow(header)-length(mysel)
## [1] 294880
plot.vegtype2 <- DT.gf %>% 
  filter(PlotObservationID %in% mysel ) %>% 
  filter(Layer %in% c(1,2,3)) %>% 
  # first sum the cover of all species in a layer
  group_by(PlotObservationID, Layer) %>% 
  summarize(Cover_perc=sum(Abundance)) %>% 
  # then combine cover across layers
  group_by(PlotObservationID) %>% 
  summarize(Cover_perc=combine.cover(Cover_perc)) %>% 
  mutate(is.forest=Cover_perc>=25) 
## `summarise()` regrouping output by 'PlotObservationID' (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
table(plot.vegtype1 %>% dplyr::select(is.forest), exclude=NULL)
## 
##   FALSE    TRUE    <NA> 
##   26211  191834 1760634

Criterium 3

plot.vegtype3 <- DT.gf %>% 
  #filter plots where all records are recorded as percentage cover
  filter(PlotObservationID %in% mysel ) %>% 
  # combine cover across layers
  group_by(PlotObservationID, Species) %>%
  summarize(cover_perc=combine.cover(Abundance)) %>%
  ungroup() %>% 
  # attach species Growth Form information
  left_join(GF, by="Species")%>% 
  group_by(PlotObservationID) %>% 
  summarize(cover_tree=sum(cover_perc*is.tree.or.tall.shrub, na.rm=T), 
            cover_non_tree=sum(cover_perc*(!is.tree.or.tall.shrub), na.rm=T), 
            cover_unknown=sum(cover_perc* is.na(is.tree.or.tall.shrub))) %>% 
  rowwise() %>% 
  ## classify plots based on cover of different growth forms
  mutate(tot.cover=sum(cover_tree, cover_non_tree, cover_unknown, na.rm=T)) %>% 
  mutate(is.forest=cover_tree>=25) %>% 
  mutate(is.non.forest=cover_tree<25 & (cover_non_tree/tot.cover)>.75)
## `summarise()` regrouping output by 'PlotObservationID' (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
table(plot.vegtype3 %>% dplyr::select(is.forest, is.non.forest), exclude=NULL)
##          is.non.forest
## is.forest   FALSE    TRUE    <NA>
##     FALSE   72105 1137092      10
##     TRUE   474592       0       0

Criterium 4

plot.vegtype4 <-  DT.gf %>% 
  filter(Ab_scale=="x_BA") %>% 
  group_by(PlotObservationID) %>% 
  summarize(tot.ba=sum(Abundance)) %>% 
  mutate(is.forest=tot.ba>10)
## `summarise()` ungrouping output (override with `.groups` argument)
table(plot.vegtype4 %>% dplyr::select(is.forest), exclude=NULL)
## 
## FALSE  TRUE 
##  1358  5558

Combine classifications from the three criteria. Use majority vote to assign plots. In case of ties, a progressively lower priority is given from criterium 1 to criterium 4.

plot.vegtype <- header %>% 
  dplyr::select(PlotObservationID) %>% 
  left_join(plot.vegtype1 %>% 
              dplyr::select(PlotObservationID, is.forest), 
            by="PlotObservationID") %>% 
  left_join(plot.vegtype2 %>% 
              dplyr::select(PlotObservationID, is.forest), 
            by="PlotObservationID") %>% 
  left_join(plot.vegtype3 %>% 
              dplyr::select(PlotObservationID, is.forest, is.non.forest) %>% 
              rename(is.non.forest.x.x=is.non.forest), 
            by="PlotObservationID") %>% 
  left_join(plot.vegtype4 %>% 
              dplyr::select(PlotObservationID, is.forest), 
            by="PlotObservationID") %>% 
  ## assign vegtype based on majority vote. In case of ties use the order of criteria as ranking
  rowwise() %>% 
  mutate(mean.forest=mean(c(is.forest.x, is.forest.y, is.forest.x.x, is.forest.y.y), na.rm=T)) %>% 
  mutate(mean.forest2=coalesce(is.forest.x, is.forest.y, is.forest.x.x, is.forest.y.y)) %>% 
  mutate(is.forest=ifelse(mean.forest==0.5, mean.forest2, mean.forest>0.5)) %>%  
  # same for is.non.forest
  mutate(mean.non.forest=mean(c( (!is.forest.x), (!is.forest.y), is.non.forest.x.x, (!is.forest.y.y)), na.rm=T)) %>% 
  mutate(mean.non.forest2=coalesce( (!is.forest.x), (!is.forest.y), is.non.forest.x.x, (!is.forest.y.y))) %>% 
  mutate(is.non.forest=ifelse(mean.non.forest==0.5, mean.non.forest2, mean.non.forest>0.5)) %>% 
  # when both is.forest & is.non.forest are F transform to NA
  mutate(both.F=ifelse( ( (is.forest==F | is.na(is.forest)) & is.non.forest==F), T, F)) %>% 
  mutate(is.forest=replace(is.forest, list=both.F==T, values=NA)) %>% 
  mutate(is.non.forest=replace(is.non.forest, list=both.F==T, values=NA))

table(plot.vegtype %>% dplyr::select(is.forest, is.non.forest), exclude=NULL)
##          is.non.forest
## is.forest   FALSE    TRUE    <NA>
##     FALSE       0 1161498       7
##     TRUE   468261       0       0
##     <NA>        0       0  348913

3.3 Cross-check and validate

Cross check with sPlot’s 5-class (incomplete) native classification deriving from data contributors. Build a Confusion matrix.

cross.check <- header %>% 
  dplyr::select(PlotObservationID, Forest) %>% 
  left_join(plot.vegtype %>% 
              dplyr::select(PlotObservationID, is.forest, is.non.forest) %>% 
              rename(Forest=is.forest, Other=is.non.forest) %>% 
              gather(isfor_isnonfor, value, -PlotObservationID) %>% 
              filter(value==T) %>% 
              dplyr::select(-value), 
            by="PlotObservationID") %>% 
  mutate(Other=1*Forest!=1) %>% 
  gather(veg_type, value, -PlotObservationID, -isfor_isnonfor) %>% 
  filter(value==1) %>% 
  dplyr::select(-value)

#Build a confusion matrix to evaluate the comparison  
u <- union(cross.check$isfor_isnonfor, cross.check$veg_type)
t <- table( factor(cross.check$isfor_isnonfor, u), factor(cross.check$veg_type, u))
confm <- caret::confusionMatrix(t)
Confusion matrix between sPlot’s native classification of habitats (columns), and classification based on four criteria based on vegetation layers and growth forms (rows)
Forest Other
Forest 381412 25588
Other 28020 974213
Formulas of associated statistics are available on the help page of the caret package and associated references. The overall accuracy of the classification based on is.forest\is.non.forest, when tested against sPlot’s native habitat classification is 0.96, the Kappa statistics is 0.91.
Associated statistics of confusion matrix by class
x
Sensitivity 0.9315637
Specificity 0.9744069
Pos Pred Value 0.9371302
Neg Pred Value 0.9720424
Precision 0.9371302
Recall 0.9315637
F1 0.9343387
Prevalence 0.2905353
Detection Rate 0.2706522
Detection Prevalence 0.2888096
Balanced Accuracy 0.9529853
## [1] TRUE

Through the process described above, we managed to classify 1629759, of which 468261 is forest and 1161498 is non-forest.
The total number of plots with attribution to forest\non-forest (either coming from sPlot’s native classification, or from the process above) is: 1727546.

4 Export and update other objects

sPlot.traits <- sPlot.species %>% 
  arrange(Species) %>% 
  left_join(GF %>% 
              dplyr::select(Species, GrowthForm, is.tree.or.tall.shrub), 
            by="Species") %>% 
  left_join(try.combined.means %>% 
              rename(Species=Taxon_name), by="Species") %>% 
  dplyr::select(-Rank_correct)
  
save(try.combined.means, CWM, sPlot.traits, trait.legend, file="../_output/Traits_CWMs_sPlot3.RData")

header <- header %>% 
  left_join(plot.vegtype %>% 
              dplyr::select(PlotObservationID, is.forest, is.non.forest),
            by="PlotObservationID") %>% 
  dplyr::select(PlotObservationID:ESY, is.forest:is.non.forest, everything())

save(header, file="../_output/header_sPlot3.0.RData")

APPENDIX

Appendix 1 - Growth forms of most common species

As assigned manually.

cat(readLines("../_derived/Species_missingGF_complete.csv"), sep = '\n')
species,GrowthForm
Taraxacum,herb
Quercus robur,tree
Corylus avellana,tree
Frangula alnus,shrub
Festuca ovina,herb
Vaccinium vitis-idaea,shrub
NA,NA
Rubus,shrub
Capsella bursa-pastoris,herb
Salix cinerea,tree
Solanum dulcamara,herb
Tripolium pannonicum,herb
Impatiens noli-tangere,herb
Ononis spinosa,shrub
Centaurea nigra,herb
Rubus ulmifolius,shrub
Alisma plantago-aquatica,herb
Spirodela polyrhiza,herb
Salix,NA
Helictochloa pratensis,herb
Ruscus aculeatus,shrub
Lophozonia,tree
Stachys recta,herb
Crataegus laevigata,shrub
Festuca rupicola,herb
Metrosideros diffusa,herb
Rhamnus cathartica,shrub\tree
Helianthemum oelandicum,herb
Dicksonia squarrosa,herb
Rosa,shrub
Carex viridula,herb
Podocarpus spinulosus,shrub
Pinus mugo,tree
Orthilia secunda,herb
Cyathea smithii,tree
Erica arborea,shrub\tree
Hippocrepis emerus,herb
Phillyrea latifolia,tree
Triglochin palustris,herb
Metrosideros fulgens,other
Apera spica-venti,herb
Crataegus,shrub
Blechnum discolor,herb
Blechnum novae-zelandiae,herb
Tragopogon pratensis,herb
Bellidiastrum michelii,herb
Sedum album,herb
Raphanus raphanistrum,herb
Quercus coccifera,tree
Quercus mongolica,tree
Hydrocharis morsus-ranae,herb
Camellia japonica,shrub\tree
Arbutus unedo,shrub\tree
Dactylorhiza majalis,herb
Trachelospermum asiaticum,other
Myosotis laxa,herb
Valeriana crispa,herb
Hieracium lachenalii,herb
Festuca drymeja,herb
Asplenium flaccidum,herb
Rubus australis,other
Adenostyles alpina,herb
Viola,herb
Hymenophyllum demissum,herb
Hieracium,herb
Senecio nemorensis,herb
Lemna,herb
Microsorum pustulatum,herb
Epilobium ciliatum,herb
Paederia foetida,herb
Ledum palustre,shrub
Arctostaphylos uva-ursi,shrub
Poaceae,herb
Epilobium,herb
Alchemilla,herb
Genista sagittalis,shrub
Blechnum nipponicum,herb
Biscutella laevigata,herb
Galeopsis,herb
Ribes uva-crispa,shrub
Prunus mahaleb,shrub\tree
Asparagus officinalis,shrub
Disporum smilacinum,herb
Brunella vulgaris,herb
Veronica anagallis-aquatica,herb
Rhododendron kaempferi,shrub
Festuca,herb
Lipandra polysperma,herb
Sedum rupestre,herb
Helictochloa versicolor,herb
Hymenophyllum nephrophyllum,herb
Cephalotaxus harringtonia,shrub\tree
Helleborus odorus,herb
Hyacinthoides non-scripta,herb
Artemisia maritima,shrub
Helictochloa bromoides,herb
Salix euxina,tree
Viburnum furcatum,shrub
Hymenophyllum multifidum,herb
Asplenium bulbiferum,herb
Cotinus coggygria,shrub
Juniperus phoenicea,shrub\tree
Artemisia indica,herb
Pieris japonica,shrub\tree
Genista scorpius,shrub
Viburnum wrightii,shrub
Ampelopsis glandulosa,other
Potentilla pusilla,herb
Blechnum fluviatile,herb
Rubus palmatus,shrub
Artemisia santonicum,herb\shrub
Senecio leucanthemifolius,herb
Thymus,herb
Solidago canadensis,herb
Echinops ritro,herb
Seseli elatum,herb
Cymbidium goeringii,herb
Pleioblastus argenteostriatus,herb
Reynoutria japonica,herb
Rubus angloserpens,shrub
Noccaea,herb
Smilax glauca,other
Polystichum spinulosum,herb
Scirpus maritimus,herb
Luzuriaga parviflora,herb
Bryonia cretica,other
Kadsura japonica,other
Betula,tree
Carex goodenoughii,herb
Thymus longicaulis,herb
Thelypteris limbosperma,herb
Callitriche,herb
Salix pentandra,tree
Chenopodiastrum murale,herb
Quercus,tree
Parthenocissus tricuspidata,other
Aria alnifolia,tree
Callicarpa mollis,shrub
Amaranthus hybridus,herb
Leptospermum scoparium,shrub\tree
Corylus sieboldiana,shrub
Pittosporum tobira,shrub\tree
Torilis arvensis,herb
Zanthoxylum bungeanum,shrub\tree
Crepis vesicaria,herb
Dioscorea tokoro,herb
Leptopteris superba,herb
Cyanus montanus,herb
Prunus cerasifera,shrub\tree
Salix appendiculata,shrub
Lathyrus laxiflorus,herb
Galeopsis ladanum,herb
Ericameria nauseosa,shrub
Cyclamen hederifolium,herb
Hymenophyllum revolutum,herb
Dendropanax trifidus,shrub\tree
Lastreopsis hispida,herb
Pilosella hoppeana,herb
Vandasina retusa,other
Oxybasis rubra,herb
Dianthus hyssopifolius,herb
Clinopodium nepeta,herb
Cardamine glanduligera,herb
Chamaesyce peplis,herb
Pueraria montana,other
Alyssum turkestanicum,herb
Minuartia sedoides,herb
Cyanus triumfettii,herb
Cyclosorus pozoi,herb
Cyclamen repandum,herb
Astilbe thunbergii,herb
Anthyllis montana,herb
Mitchella undulata,herb
Krascheninnikovia ceratoides,shrub
Dioscorea japonica,other
Sibbaldianthe bifurca,herb
Tripterospermum trinervium,NA
Cerasus jamasakura,tree
Hierochloe repens,herb
Festuca gautieri,herb
Salicornia perennans,herb
Salix atrocinerea,tree
Agrostis,herb
Oxybasis glauca,herb
Saxifraga exarata,herb
Hymenophyllum flabellatum,herb
Salix viminalis,shrub
Sasa borealis,herb\shrub
Puccinellia festuciformis,herb
Symplocos sawafutagi,shrub
Athyrium yokoscense,herb
Rubus buergeri,shrub
Prunus leveilleana,tree
Pertya scandens,shrub
Dioscorea quaternata,other
Cyathea dealbata,shrub\tree
Calamagrostis stricta,herb
Soldanella carpatica,herb
Selinum pyrenaeum,herb
Laurus nobilis,shrub\tree
Ononis natrix,shrub
Farfugium japonicum,herb
Cornus sanguinea,shrub
Vaccinium microcarpum,shrub
Limonium meyeri,herb
Vaccinium japonicum,shrub
Scandix pecten-veneris,herb
Lemmaphyllum microphyllum,herb
Amaranthus blitum,herb
Chimaphila maculata,herb
Euphorbia nicaeensis,herb\shrub
Dodonaea viscosa,shrub\tree
Coprosma microcarpa,shrub
Lomandra multiflora,herb
Microlaena stipoides,herb
Microstegium vimineum,herb
Pteretis struthiopteris,herb
Rumex scutatus,herb
Podospermum canum,herb
Ampelodesmos mauritanicus,herb
Tmesipteris tannensis,herb
Allium carinatum,herb
Hymenophyllum dilatatum,herb
Lindsaea trichomanoides,herb
Pilosella bauhini,herb
Hymenophyllum sanguinolentum,herb
Elaeagnus pungens,shrub
Vitis vinifera,other
Mespilus germanica,shrub\tree
Odontarrhena,NA
Myosotis,herb
Teucrium pyrenaicum,herb
Centaurea thuillieri,herb
Vaccinium smallii,shrub
Hymenophyllum,herb
Carex kitaibeliana,herb
Pogostemon stellatus,herb
Vicia,herb
Quercus dalechampii,tree
Sedum roseum,herb
Stauntonia hexaphylla,other
Pulmonaria affinis,herb
Vaccinium bracteatum,shrub\tree
Lonicera gracilipes,shrub
Dryopteris setosa,herb
Herniaria hirsuta,herb
Aralia elata,shrub\tree
Eurybia divaricata,herb
Hydrangea scandens,shrub
Mentha,herb
Lindera benzoin,shrub
Juniperus virginiana,tree
Ainsliaea acerifolia,herb
x Ammocalamagrostis,NA
Galium,herb
Ligustrum tschonoskii,shrub
Blechnum chambersii,herb
Ulex parviflorus,shrub
Artemisia gmelinii,herb
Paliurus spina-christi,shrub
Luzula,herb
Piper kadsura,other
Polygonum maritimum,herb
Ulmus,tree
Actinidia arguta,other
Chenopodiastrum hybridum,herb
Stemona lucida,other
Rubia tatarica,herb
Vaccinium hirtum,shrub
Rhododendron maximum,shrub
Anisocampium niponicum,herb
Sticherus cunninghamii,herb
Smilax sieboldii,other
Potentilla humifusa,herb
Cyathea colensoi,herb\shrub
Endiandra virens,tree
Polygonum equisetiforme,herb
Dryopteris lacera,herb
Hylodesmum podocarpum,herb
Rumex,herb
Aphananthe aspera,tree
Geranium solanderi,herb
Pseudopanax linearis,shrub
Sedum alpestre,herb
Lepisorus thunbergianus,herb
Aria japonica,tree
Elytrigia repens,herb
Ainsliaea apiculata,herb
Senecio,NA
Schisandra repanda,other
Cardamine,herb
Carex dolichostachya,herb
Potentilla supina,herb
Schizocodon soldanelloides,herb
Rhaphiolepis indica,shrub
Scilla lilio-hyacinthus,herb
Clinopodium menthifolium,herb
Aster,NA
Sasa palmata,herb
Brucea javanica,shrub
Anemone scherfelii,herb
Arundinella hirta,herb
Thymus nervosus,herb
Laportea bulbifera,herb
No suitable,NA
Potentilla montana,herb
Leptopteris hymenophylloides,herb
Solidago,herb
Compositae,NA
Pimpinella tragium,herb
Soldanella hungarica,herb
Leptorumohra mutica,herb
Artemisia pontica,herb
Verbascum,herb
Carex lenta,herb
Fraxinus chinensis,tree
Centranthus ruber,herb
Sesbania sesban,tree
Phormium colensoi,herb
Asparagus aphyllus,herb\shrub
Nasturtium,herb
Carex conica,herb
Lauraceae,NA
Dumasia truncata,other
Pilosella floribunda,herb
Goodenia geniculata,herb
Medicago intertexta,herb
Prunus,shrub\tree
Austrostipa scabra,herb
Juncus,herb
Sempervivum arachnoideum,herb
Thymus striatus,herb
Jasione crispa,herb
Echinochloa crusgalli,herb
Lindera glauca,shrub
Laburnum anagyroides,shrub
Oxalis pes-caprae,herb
Dianella nigra,herb
Jacobaea subalpina,herb
Campanula serrata,herb
Piptatherum coerulescens,herb
Carex pisiformis,herb
Geum sylvaticum,herb
Minuartia recurva,herb
Globularia repens,herb
Fraxinus,tree
Eucalyptus phaenophylla,tree
Osmorhiza aristata,herb
Leguminosae,NA
Helictochloa marginata,herb
Polygonatum lasianthum,herb
Rosa dumalis,shrub
Hymenophyllum scabrum,herb
Puccinellia gigantea,herb
Heloniopsis orientalis,herb
Anthemis cretica,herb
Styrax officinalis,shrub
Hosta sieboldiana,herb
Earina mucronata,herb
Calamagrostis hakonensis,herb
Tragopogon podolicus,herb
Thymus pulcherrimus,herb
Adenophora triphylla,herb
Aster ovatus,herb
Crepis lampsanoides,herb
Panicum boscii,herb
Pluchea dioscoridis,shrub
Amelanchier laevis,tree
Silene pusilla,herb
Eupatorium makinoi,herb
Polyphlebium venosum,herb
Uncinia,herb
Rubia argyi,other
Plagiogyria matsumureana,herb
Dryopteris,herb
Symphytum cordatum,herb
Ononis striata,herb
Allium,herb
Ruscus hypoglossum,shrub
Parathelypteris japonica,herb
Cyrtomium fortunei,herb
Ranunculus taisanensis,herb
Desmodium brachypodum,herb
Carex blepharicarpa,herb
Viburnum phlebotrichum,shrub
Atractylodes ovata,NA
Cichorium pumilum,herb
Ranunculus,herb
Cyperus gracilis,herb
Carex stenostachys,herb
Diplopterygium glaucum,herb
Sesleria rigida,herb
Centaurea,herb
Opuntia,other
Galium octonarium,herb
Pseudowintera axillaris,shrub\tree
Tricyrtis affinis,herb
Asplenium platyneuron,herb
Clematis terniflora,other
Parsonsia heterophylla,other
Raukaua edgerleyi,tree
Dianthus giganteiformis,herb
Viola sieheana,herb
Hosta sieboldii,herb
Sasa nipponica,herb
Cirsium,herb
Arachniodes standishii,NA
Paspalidium geminatum,herb
Alhagi graecorum,shrub
Cuscuta campestris,other
Allium saxatile,herb
Trifolium,herb
Persicaria longiseta,NA
Jacobaea maritima,NA
Acer shirasawanum,tree
Athyrium vidalii,herb
Centaurea nemoralis,herb
Circaea ×,herb
Dactylorhiza,herb
Xanthorrhoea acaulis,other
Cynoglossum,herb
Boehmeria silvestrii,herb\shrub
Serratula coronata,herb
Salix phylicifolia,shrub
Genista depressa,NA
Populus,tree
Phlegmariurus,NA
Atropa bella-donna,herb
Bignonia capreolata,other
Amelanchier,shrub\tree
Launaea nudicaulis,herb
Photinia glabra,tree
Suaeda acuminata,herb
Gonocarpus teucrioides,herb\shrub
Pulsatilla grandis,herb
Sesleria comosa,herb
Patzkea spadicea,herb
Koeleria nitidula,herb
Orobanche crenata,other
Achillea asiatica,herb
Paris tetraphylla,herb
Edraianthus graminifolius,herb
Clematis apiifolia,other
Thelypteris acuminata,herb
Patzkea paniculata,herb
Dichondra,herb
Dryopteris pseudomas,herb
Festuca hystrix,herb
Blechnum minus,herb
Maianthemum japonicum,herb
Millettia japonica,NA
Pteris cretica,herb
Leucanthemum rotundifolium,herb
Pyrrosia eleagnifolia,other
Elionurus citreus,herb
Ochlopoa supina,NA
Crocus veluchensis,herb
Galium maritimum,herb
Crepis albida,herb
Solidago curtisii,herb
Coptis trifolia,herb
Syneilesis palmata,herb
Chenopodium bonus-henricus,herb
Potentilla,herb
Artemisia lerchiana,herb
Lathyrus pisiformis,herb
Euphorbia plumerioides,NA
Ophiopogon planiscapus,herb
Ranunculus aduncus,herb
Scabiosa triniifolia,herb
Viola kusanoana,herb
Rytidosperma linkii,herb
Festuca dalmatica,herb
Berchemia racemosa,shrub
Lespedeza maximowiczii,shrub
Wisteria brachybotrys,NA
Quercus infectoria,shrub\tree
Asarum caucasicum,herb
Centaurea aspera,herb
Lechenaultia filiformis,NA
Tragopogon porrifolius,herb
Athyrium asplenioides,herb
Silene sericea,herb
Scrophularia alpestris,herb
Rhododendron pentandrum,NA
Thymus comosus,herb
Sanicula chinensis,herb
Inula oculus-christi,herb
Lamium,herb
Arachniodes aristata,NA
Onosma simplicissima,NA
Ranunculus pseudomontanus,herb
Corylus cornuta,shrub
Arachniodes sporadosora,NA
Orostachys spinosa,other
Olearia lacunosa,shrub\tree
Carthamus mitissimus,herb
Stewartia pseudocamellia,tree
Eucalyptus indurata,tree
Prosopis glandulosa,shrub\tree
Aurinia saxatilis,herb
Dampiera purpurea,herb\shrub
Cirsium nipponicum,NA
Patrinia villosa,NA
Galium pseudoaristatum,herb
Rhinanthus,herb
Leionema elatius,shrub
Arrhenatherum longifolium,herb
Limonium bellidifolium,herb
Brachiaria whiteana,herb
Adiantum capillus-veneris,herb
Vittadinia cuneata,herb
Carex rhizina,herb
Tephrosia,NA
Leontopodium nivale,herb
Crocus caeruleus,herb
Cuscuta,other
Pyrrosia lingua,herb
Ficaria fascicularis,herb
Pilosella peleteriana,herb
Dinebra decipiens,herb
Psychotria asiatica,shrub
Vicia pyrenaica,herb
Galax urceolata,herb
Aristolochia serpentaria,herb
Sedum brevifolium,herb
Impatiens atrosanguinea,herb
Dapsilanthus ramosus,herb
Nephrodium sabaei,herb
Silene rubella,herb
Blechnum procerum,herb
Phyllanthera grayi,tree
Lycopodium alpinum,herb
Codonopsis lanceolata,other
Persicaria senegalensis,herb
Bolboschoenus glaucus,herb
Clematis japonica,NA
Asplenium incisum,herb
Chrysothamnus,NA
Kunzea ericoides,shrub\tree
Elatostema involucratum,herb
Liriope minor,herb
Campanula spatulata,herb
Orobanche,other
Laserpitium krapffii,herb
Picrothamnus,NA
Thymus roegneri,herb
Achillea coarctata,herb
Cephalaria uralensis,herb
Artemisia nitrosa,herb
Ozothamnus tesselatus,NA
Sedum urvillei,herb
Lamium garganicum,herb
Pyrola asarifolia,herb
Orites lancifolius,shrub
Polygonatum falcatum,herb
Cerastium,herb
Gaultheria procumbens,herb
Keraudrenia hookeriana,NA
Polystichum polyblepharum,herb
Lindera sericea,NA
Paesia scaberula,herb
Litsea japonica,shrub
Crepis fraasii,herb
Hypecoum imberbe,herb
Plantago monosperma,herb
Quercus rosacea,tree
Halesia tetraptera,tree
Polystichum retrosopaleaceum,herb
Leptorumohra miqueliana,herb
Boehmeria spicata,shrub
lachenalii subsp.,NA
Amaranthus graecizans,herb
Cephalomanes obscurum,herb
Sedum amplexicaule,herb
Alectryon oleifolius,tree
Galium bungei,herb
Tmesipteris,NA
Blechnum filiforme,herb
Hieracium transylvanicum,herb
Viola orbiculata,herb
Spiraea crenata,shrub
Molinia japonica,herb
Actinidia polygama,other
Bursaria spinosa,shrub\tree
Acacia aneura,tree
Heterachne,NA
Oenanthe javanica,herb
Lemna aequinoctialis,herb
Calythrix,shrub
Senecio aegyptius,NA
Petasites frigidus,herb
Dalbergia densa,other
Carex morrowii,herb
Viola vaginata,herb
Alpinia intermedia,NA
Enkianthus campanulatus,NA
Leucopogon,NA
Menziesia ferruginea,shrub
Spiraea media,shrub
Dryopteris pacifica,herb
Minuartia setacea,herb
Salvia officinalis,herb
Coprosma dumosa,shrub
Bidens,NA
Aristida vagans,herb
Phragmites japonicus,herb
Lysimachia japonica,NA
Knautia arvernensis,herb
Ononis cristata,NA
Lamyropsis cynaroides,NA
Puccinellia tenuissima,NA
Burchardia congesta,herb
Galium trifidum,herb
Armeria canescens,herb
Minuartia laricifolia,herb
Carex reinii,herb
Picea,tree
Senna,NA
Asarum sieboldii,herb
Atriplex,NA
Pseudoraphis,NA
Symphyotrichum lateriflorum,herb
Panicum effusum,herb
Microlepia marginata,NA
Prunus apetala,shrub\tree
Alyssum obovatum,herb
Bromus,herb
Rubus pannosus,shrub
Sedobassia sedoides,herb
Alyssum hirsutum,herb
Astelia,NA
Prosartes lanuginosa,herb
Jacobaea adonidifolia,herb
Helleborus purpurascens,herb
Ulmus davidiana,tree
Campanula sparsa,herb
Gleichenia,NA
Veratrum maackii,NA
Sorghum virgatum,herb
Rhododendron lagopus,shrub
Blechnum nigrum,herb
Leucopogon muticus,shrub
Biscutella auriculata,herb
Geranium collinum,herb
Centranthus calcitrapae,herb
Oxalis griffithii,herb
Festuca pseudodalmatica,herb
Galatella angustissima,herb
Prenanthes,herb
Gaultheria myrsinoides,shrub
Sarcobatus baileyi,shrub
Vitis heyneana,other
Dioscorea gracillima,NA
Launaea fragilis,herb
Sonchus bulbosus,herb
Leptospermum polygalifolium,shrub
Digitaria,herb
Lycopodium volubile,herb
Aralia cordata,herb
Carex concinnoides,herb
Avenula pubescens,herb
Pleurospermum uralense,herb
Taraxacum hamatum,herb
Ranunculus reflexus,herb
Euphorbia subcordata,herb
Ferulago sylvatica,herb
Carthamus carduncellus,herb
Psychotria serpens,other
Sonchus,NA

SessionInfo

sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.7 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/openblas-base/libblas.so.3
## LAPACK: /usr/lib/libopenblasp-r0.2.18.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] viridis_0.5.1     viridisLite_0.3.0 caret_6.0-84      lattice_0.20-41  
##  [5] kableExtra_1.3.1  knitr_1.30        data.table_1.13.2 forcats_0.5.0    
##  [9] stringr_1.4.0     dplyr_1.0.2       purrr_0.3.4       readr_1.4.0      
## [13] tidyr_1.1.2       tibble_3.0.1      ggplot2_3.3.0     tidyverse_1.3.0  
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.2           jsonlite_1.7.1       splines_3.6.3       
##  [4] foreach_1.5.1        prodlim_2019.11.13   modelr_0.1.6        
##  [7] assertthat_0.2.1     highr_0.8            stats4_3.6.3        
## [10] cellranger_1.1.0     yaml_2.2.1           ipred_0.9-9         
## [13] pillar_1.4.3         backports_1.2.0      glue_1.4.2          
## [16] digest_0.6.25        rvest_0.3.6          colorspace_2.0-0    
## [19] recipes_0.1.15       htmltools_0.5.0      Matrix_1.2-18       
## [22] plyr_1.8.6           timeDate_3043.102    pkgconfig_2.0.3     
## [25] broom_0.7.0          haven_2.3.1          scales_1.1.1        
## [28] webshot_0.5.2        gower_0.2.2          lava_1.6.8.1        
## [31] generics_0.1.0       ellipsis_0.3.1       withr_2.3.0         
## [34] nnet_7.3-14          cli_2.2.0            survival_3.2-7      
## [37] magrittr_2.0.1       crayon_1.3.4         readxl_1.3.1        
## [40] evaluate_0.14        fs_1.5.0             fansi_0.4.1         
## [43] nlme_3.1-150         MASS_7.3-53          xml2_1.3.2          
## [46] class_7.3-17         tools_3.6.3          hms_0.5.3           
## [49] lifecycle_0.2.0      munsell_0.5.0        reprex_0.3.0        
## [52] e1071_1.7-4          compiler_3.6.3       rlang_0.4.9         
## [55] grid_3.6.3           iterators_1.0.13     rstudioapi_0.13     
## [58] rmarkdown_2.5        gtable_0.3.0         ModelMetrics_1.2.2.2
## [61] codetools_0.2-18     DBI_1.1.0            reshape2_1.4.4      
## [64] R6_2.5.0             gridExtra_2.3        lubridate_1.7.9.2   
## [67] utf8_1.1.4           stringi_1.5.3        Rcpp_1.0.5          
## [70] vctrs_0.3.5          rpart_4.1-15         dbplyr_2.0.0        
## [73] tidyselect_1.1.0     xfun_0.19