Timestamp: Wed Dec 2 01:10:20 2020
Drafted: Francesco Maria Sabatini
Revised:
version: 1.0

This report documents 1) the construction of Community Weighted Means (CWMs) and Variance (CWVs); and 2) the classification of plots into forest\non-forest based on species growth forms. It complements species composition data from sPlot 3.0 and gap-filled plant functional traits from TRY 5.0, as received by Jens Kattge on Jan 21, 2020.

Changes in version 1.1 - Standardized Growth form names in sPlot_traits.

library(tidyverse)
library(readr)
library(data.table)
library(knitr)
library(kableExtra)
library(stringr)
library(caret)
library(viridis)

1 Data import, preparation and cleaning

#load("/data/sPlot/releases/sPlot3.0/DT_sPlot3.0.RData")
#load("/data/sPlot/releases/sPlot3.0/Backbone3.0.RData")
load("../_output/Backbone3.0.RData")
load("../_output/DT_sPlot3.0.RData")

Import TRY data

# Species, Genus, Family
try.species <- read_csv(
  "../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/input_data/hierarchy.info.csv",
  locale = locale(encoding = "latin1")) 
# Original data without gap-filling. With species and trait labels
try.allinfo <- read_csv(
  "../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/input_data/traits_x_georef_wide_table.csv", 
  locale = locale(encoding = "latin1"), 
                        col_types = paste0(c("dddccccc",rep("c", 84)), collapse=""))
# Individual-level gap-filled data - order as in try.allinfo
try.individuals0 <- read_csv(
  "../_input/TRY5.0_v1.1/TRY_5_GapFilledData_2020/gapfilled_data/mean_gap_filled_back_transformed.csv", 
  locale = locale(encoding = "latin1"))

There are 609355 individual observations from 52104 distinct (unresolved) species in 7960 distinct (unresolved) genera.

1.2 Attach resolved names from Backbone

try.species.names <- try.allinfo %>% 
  dplyr::select(Species, Genus, GrowthForm) %>% 
  left_join(Backbone %>% 
              dplyr::select(Name_sPlot_TRY, Name_short) %>% 
              rename(Species=Name_sPlot_TRY), 
            by="Species") %>% 
  dplyr::select(Species, Name_short, Genus, GrowthForm)

After attaching resolved names, TRY data contains information on 50612 species.
Check for how many of the species in sPlot, trait information is available in TRY.

sPlot.species <- DT2 %>% 
  distinct(Species) 

sPlot.in.TRY <- sPlot.species %>% 
  filter(Species %in% (try.species.names %>% 
                                  distinct(Name_short) %>% 
                                  pull(Name_short))) 

Out of the 76912 standardizes species names in sPlot 3.0, 29519 (38.4%) also occur in TRY 5.0. This number does not account for matches at the genus level.

1.3 Create legend of trait names

trait.legend <- data.frame(full=try.allinfo %>% 
                             dplyr::select(starts_with("StdValue_")) %>% 
                             colnames() %>% 
                             gsub("StdValue_", "", .) %>% 
                             sort()) %>%
  mutate(full=as.character(full)) %>% 
  mutate(traitcode=parse_number(full)) %>% 
  arrange(traitcode) %>% 
  dplyr::select(traitcode, everything()) %>% 
  mutate(full=gsub(pattern = "^[0-9]+_", replacement="", full)) %>% 
  mutate(short=c("StemDens", "RootingDepth","LeafC.perdrymass", "LeafN","LeafP",
                 "StemDiam","SeedMass", "Seed.length","LeafThickness","LDMC",
                 "LeafNperArea","LeafDryMass.single","Leaf.delta.15N","SeedGerminationRate",
                 "Seed.num.rep.unit","LeafLength","LeafWidth","LeafCN.ratio","Leaffreshmass",
                 "Stem.cond.dens","Chromosome.n","Chromosome.cDNAcont", 
                 "Disp.unit.leng","StemConduitDiameter","Wood.vessel.length",
                 "WoodFiberLength","SpecificRootLength.fine","SpecificRootLength",
                 "PlantHeight.veg","PlantHeight.generative","LeafArea.leaf.noPet",
                 "LeafArea.leaflet.noPet","LeafArea.leaf.wPet","LeafArea.leaflet.wPet",
                 "LeafArea.leaf.undef","LeafArea.leaflet.undef","LeafArea.undef.undef",
                 "SLA.noPet", "SLA.wPet","SLA.undef", "LeafWaterCont")) %>% 
  ## Add SLA missing from allinfo file
  bind_rows(data.frame(traitcode=11, 
                       full="Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA)",
                       short="SLA")) %>% 
  bind_rows(data.frame(traitcode=18, 
                       full="Plant height (vegetative + generative)", 
                       short="PlantHeight")) %>%
  arrange(traitcode) %>% 
  #create a column to mark traits for which gap filled data is available.
  mutate(available=paste0("X", traitcode) %in% colnames(try.individuals0))
Legend of traits from TRY
traitcode full short available
4 Stem specific density (SSD) or wood density (stem dry mass per stem fresh volume)_g/cm3 StemDens TRUE
6 Root rooting depth_m RootingDepth TRUE
11 Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA) SLA TRUE
13 Leaf carbon (C) content per leaf dry mass_mg/g LeafC.perdrymass TRUE
14 Leaf nitrogen (N) content per leaf dry mass_mg/g LeafN TRUE
15 Leaf phosphorus (P) content per leaf dry mass_mg/g LeafP TRUE
18 Plant height (vegetative + generative) PlantHeight TRUE
21 Stem diameter_m StemDiam TRUE
26 Seed dry mass_mg SeedMass TRUE
27 Seed length_mm Seed.length TRUE
46 Leaf thickness_mm LeafThickness TRUE
47 Leaf dry mass per leaf fresh mass (leaf dry matter content, LDMC)_g g-1 LDMC TRUE
50 Leaf nitrogen (N) content per leaf area_g m-2 LeafNperArea TRUE
55 Leaf dry mass (single leaf)_mg LeafDryMass.single TRUE
78 Leaf nitrogen (N) isotope signature (delta 15N)_per mill Leaf.delta.15N TRUE
95 Seed germination rate (germination efficiency)_% SeedGerminationRate TRUE
138 Seed number per reproducton unit_number Seed.num.rep.unit TRUE
144 Leaf length_mm LeafLength TRUE
145 Leaf width_cm LeafWidth TRUE
146 Leaf carbon/nitrogen (C/N) ratio_g/cm3 LeafCN.ratio TRUE
163 Leaf fresh mass_g Leaffreshmass TRUE
169 Stem conduit density (vessels and tracheids)_mm-2 Stem.cond.dens TRUE
223 Species genotype: chromosome number_dimensionless Chromosome.n TRUE
224 Species genotype: chromosome cDNA content_pg Chromosome.cDNAcont TRUE
237 Dispersal unit length_mm Disp.unit.leng TRUE
281 Stem conduit diameter (vessels, tracheids)_micro m StemConduitDiameter TRUE
282 Wood vessel element length; stem conduit (vessel and tracheids) element length_micro m Wood.vessel.length TRUE
289 Wood fiber lengths_micro m WoodFiberLength TRUE
614 Fine root length per fine root dry mass (specific fine root length, SRL)_cm/g SpecificRootLength.fine FALSE
1080 Root length per root dry mass (specific root length, SRL)_cm/g SpecificRootLength TRUE
3106 Plant height vegetative_m PlantHeight.veg FALSE
3107 Plant height generative_m PlantHeight.generative FALSE
3108 Leaf area (in case of compound leaves: leaf, petiole excluded)_mm2 LeafArea.leaf.noPet FALSE
3109 Leaf area (in case of compound leaves: leaflet, petiole excluded)_mm2 LeafArea.leaflet.noPet FALSE
3110 Leaf area (in case of compound leaves: leaf, petiole included)_mm2 LeafArea.leaf.wPet FALSE
3111 Leaf area (in case of compound leaves: leaflet, petiole included)_mm2 LeafArea.leaflet.wPet FALSE
3112 Leaf area (in case of compound leaves: leaf, undefined if petiole in- or excluded)_mm2 LeafArea.leaf.undef TRUE
3113 Leaf area (in case of compound leaves: leaflet, undefined if petiole is in- or excluded)_mm2 LeafArea.leaflet.undef TRUE
3114 Leaf area (in case of compound leaves undefined if leaf or leaflet, undefined if petiole is in- or excluded)_mm2 LeafArea.undef.undef TRUE
3115 Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA): petiole excluded_mm2 mg-1 SLA.noPet FALSE
3116 Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA): petiole included_mm2 mg-1 SLA.wPet FALSE
3117 Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA): undefined if petiole is in- or excluded_mm2 mg-1 SLA.undef FALSE
3120 Leaf water content per leaf dry mass (not saturated)_g(W)/g(DM) LeafWaterCont TRUE

Use trait legend to change naming of try.individuals0 data.frame of traits

#create string to rename traits
col.to <- trait.legend %>% 
  filter(available==T) %>% 
  pull(short) 
col.from <- trait.legend %>% 
  filter(available==T) %>% 
  mutate(traitcode=paste0("X", traitcode))  %>% 
  pull(traitcode) 

try.individuals <- try.individuals0 %>% 
              rename_at(col.from, .funs=function(x) col.to)

1.3 Fix some known errors in the gap-filled matrix

Check traits at the individual level. There are some traits with unexpected negative entries:

try.species.names %>% 
    dplyr::select(Name_short) %>% 
    bind_cols(try.individuals %>% 
                  dplyr::select(-X1)) %>% 
  gather(variable, value, -Name_short) %>% 
  filter(value<0) %>% 
  group_by(variable) %>% 
  summarize(n=n())
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 5 x 2
##   variable                 n
##   <chr>                <int>
## 1 LDMC                   419
## 2 LeafC.perdrymass         9
## 3 Leaf.delta.15N      262283
## 4 SeedGerminationRate    120
## 5 StemDens               337

According to Jens Kattge, the entries for Leaf.delta.15N are legitimate, while in the other cases, it may be due to bad predictions. He suggested to delete these negative records.
Similarly, there are records with impossible values for height. Some species incorrectly predicted to have height >100 meters, and some herbs predicted to have a height >10 m.

try.individuals <- try.species.names %>% 
  dplyr::select(Name_short) %>% 
  bind_cols(try.individuals)

toexclude <- try.individuals %>% 
  gather(variable, value, -X1, -Name_short) %>% 
  filter(variable != "Leaf.delta.15N") %>% 
  filter(value<0) %>% 
  pull(X1)

toexclude2 <- try.individuals %>% 
  filter(PlantHeight>100  & (!Name_short %in% c("Pseudotsuga menziesii", "Sequoia sempervirens"))) %>% 
  pull(X1)

toexclude3 <- try.individuals %>% 
  filter(X1 %in% (try.allinfo %>% 
                     filter(GrowthForm=="herb") %>% 
                     pull(X1))) %>% 
  filter(PlantHeight>10) %>% 
  pull(X1)

try.individuals <- try.individuals %>% 
  filter(!X1 %in% c(toexclude, toexclude2, toexclude3)) %>% 
  dplyr::select(-X1)

This results in the exclusion of 874 individuals. In this way the total number of species included in TRY reduces to 50404

1.4 Calculate species and genus level trait means and sd

## Calculate species level trait means and sd. 
try.species.means <- try.individuals %>% 
  group_by(Name_short) %>% 
  #Add a field to indicate the number of observations per taxon
  left_join(x={.} %>% 
              summarize(n=n()), 
            y={.} %>% 
              summarize_at(.vars=vars(StemDens:LeafWaterCont ),
                           .funs=list(mean=~mean(.), sd=~sd(.))),
            by="Name_short") %>% 
  dplyr::select(Name_short, n, everything())
## `summarise()` ungrouping output (override with `.groups` argument)
## Calculate genus level trait means and sd.
try.genus.means <- try.individuals %>% 
  mutate(Genus=word(Name_short, 1)) %>% 
  group_by(Genus) %>% 
  left_join(x={.} %>% 
              summarize(n=n()), 
            y={.} %>% 
              summarize_at(.vars=vars(StemDens:LeafWaterCont ),
                           .funs=list(mean=~mean(.), sd=~sd(.))),
            by="Genus") %>% 
  dplyr::select(Genus, n, everything())
## `summarise()` ungrouping output (override with `.groups` argument)

The average number of observations per species and genus is 12.1 and 81.5, respectively. As many as 17443 species have only one observation (1250 at the genus level).

Example of trait means for 15 randomly selected species
Name_short n StemDens_mean RootingDepth_mean SLA_mean LeafC.perdrymass_mean LeafN_mean LeafP_mean PlantHeight_mean StemDiam_mean SeedMass_mean Seed.length_mean LeafThickness_mean LDMC_mean LeafNperArea_mean LeafDryMass.single_mean Leaf.delta.15N_mean SeedGerminationRate_mean Seed.num.rep.unit_mean LeafLength_mean LeafWidth_mean LeafCN.ratio_mean Leaffreshmass_mean Stem.cond.dens_mean Chromosome.n_mean Chromosome.cDNAcont_mean Disp.unit.leng_mean StemConduitDiameter_mean Wood.vessel.length_mean WoodFiberLength_mean SpecificRootLength_mean LeafArea.leaf.undef_mean LeafArea.leaflet.undef_mean LeafArea.undef.undef_mean LeafWaterCont_mean StemDens_sd RootingDepth_sd SLA_sd LeafC.perdrymass_sd LeafN_sd LeafP_sd PlantHeight_sd StemDiam_sd SeedMass_sd Seed.length_sd LeafThickness_sd LDMC_sd LeafNperArea_sd LeafDryMass.single_sd Leaf.delta.15N_sd SeedGerminationRate_sd Seed.num.rep.unit_sd LeafLength_sd LeafWidth_sd LeafCN.ratio_sd Leaffreshmass_sd Stem.cond.dens_sd Chromosome.n_sd Chromosome.cDNAcont_sd Disp.unit.leng_sd StemConduitDiameter_sd Wood.vessel.length_sd WoodFiberLength_sd SpecificRootLength_sd LeafArea.leaf.undef_sd LeafArea.leaflet.undef_sd LeafArea.undef.undef_sd LeafWaterCont_sd
Couratari multiflora 75 0.466 2.107 11.552 478.132 24.695 0.787 21.011 0.206 112.877 9.198 0.210 0.390 2.125 212.822 -0.790 65.385 79.708 139.964 4.318 20.160 0.539 4.665 24.833 3.401 7.024 155.757 593.002 1508.993 369.418 2329.425 2788.096 2890.797 1.625 0.014 0.298 2.281 3.027 1.262 0.044 3.049 0.054 9.670 0.378 0.038 0.016 0.327 72.612 0.301 0.809 25.785 17.955 0.613 0.987 0.165 0.424 0.723 0.099 0.359 15.145 32.625 52.501 44.089 561.280 576.566 652.464 0.125
Nesaea linearis 5 0.570 0.286 12.468 440.975 14.839 1.140 0.887 0.086 0.041 0.581 0.280 0.244 1.467 69.572 -1.798 84.474 19263.370 38.534 0.675 37.406 0.283 41.880 35.007 2.414 0.686 56.533 697.825 1053.461 1751.765 679.727 522.866 711.183 3.683 0.006 0.021 0.251 1.784 0.281 0.033 0.148 0.006 0.003 0.033 0.004 0.003 0.026 18.779 0.142 1.156 6675.872 23.381 0.308 0.548 0.074 1.907 1.278 0.071 0.037 1.664 33.219 49.173 99.730 170.933 124.013 278.602 0.045
Lemna trisulca 30 0.427 0.080 41.847 372.945 27.720 4.066 0.015 0.001 1.110 1.100 0.369 0.125 0.792 0.395 -2.508 73.660 40.237 13.573 0.060 14.484 0.003 231.625 41.594 9.577 1.455 30.018 453.488 669.799 2253.613 17.458 8.855 25.559 10.108 0.011 0.017 14.771 4.303 2.864 0.441 0.012 0.000 0.184 0.115 0.069 0.036 0.259 0.141 0.178 0.623 11.223 1.756 0.010 1.462 0.001 13.480 1.251 0.269 0.139 1.690 11.082 16.495 276.511 3.507 1.379 4.563 1.556
Reseda decursiva 3 0.609 0.397 19.452 480.626 31.096 2.324 0.302 0.035 0.192 1.002 0.259 0.159 1.878 13.887 5.321 78.060 1782.339 71.644 1.977 19.423 0.069 62.991 25.926 1.228 1.047 91.276 285.407 927.942 15032.393 189.886 582.851 307.613 6.316 0.006 0.052 0.140 2.863 0.891 0.068 0.023 0.003 0.018 0.023 0.011 0.005 0.063 0.220 0.369 13.386 422.258 8.715 0.188 0.732 0.002 5.854 0.430 0.037 0.020 14.171 12.816 97.734 1012.177 5.729 12.525 2.543 0.176
Ocotea sandwithii 2 0.545 1.103 9.410 495.017 15.576 0.782 12.892 0.234 268.618 18.115 0.224 0.424 1.638 424.874 1.119 90.845 7.263 83.696 4.157 29.858 0.995 27.321 22.919 2.552 23.234 24.807 464.556 997.150 819.221 3908.415 3349.176 3426.879 2.345 0.005 0.030 0.117 0.718 0.134 0.003 1.006 0.014 3.814 0.045 0.006 0.001 0.001 48.722 0.039 1.284 0.717 7.259 0.399 0.415 0.104 0.294 0.326 0.054 0.037 0.931 1.083 11.644 29.829 376.740 364.881 454.050 0.016
Shorea venulosa 1 0.649 2.949 11.935 493.746 16.371 0.811 8.542 0.245 567.603 11.980 0.213 0.383 1.473 840.566 1.241 93.214 4.315 41.980 4.546 30.897 2.238 13.623 15.627 1.222 13.310 41.222 328.226 767.091 1004.587 7964.442 5250.595 7441.995 2.592 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
Pourouma herrerensis 1 0.450 0.494 11.838 469.772 22.391 1.050 16.113 0.215 5.452 4.206 0.249 0.365 1.911 1042.891 2.563 78.956 484.593 138.362 9.058 21.494 3.145 9.816 22.802 2.331 2.982 48.726 785.919 1323.873 536.925 10422.042 10194.504 9641.649 2.717 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
Melicope triphylla 1 0.389 0.887 16.193 457.216 22.710 1.002 2.908 0.063 10.042 3.771 0.215 0.323 1.477 497.682 0.714 94.044 593.240 76.059 3.913 23.196 1.653 40.829 43.108 6.718 5.871 30.557 743.927 925.028 1352.737 7594.671 1222.198 4028.875 3.650 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
Rytidosperma nudiflorum 1 0.423 0.942 7.530 431.646 13.854 1.707 0.308 0.005 1.274 2.912 0.232 0.418 1.799 15.750 -2.627 99.876 115.261 18.343 0.102 31.557 0.039 59.339 30.823 5.499 4.188 30.334 292.607 646.847 7742.221 151.057 142.689 176.048 1.878 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
Digitaria coenicola 9 0.525 0.838 26.554 433.317 19.010 1.306 0.474 0.010 0.963 2.545 0.139 0.249 0.777 13.766 3.130 84.644 310.804 43.523 0.247 22.844 0.058 21.575 36.978 2.483 2.783 82.198 338.822 978.217 7549.750 337.511 289.983 264.843 4.840 0.003 0.026 0.426 1.406 0.193 0.017 0.065 0.001 0.072 0.056 0.001 0.002 0.013 0.766 0.108 0.525 52.066 2.219 0.014 0.265 0.003 0.226 0.323 0.035 0.070 1.139 6.744 19.317 336.389 22.323 14.091 17.428 0.044
Solanum punctulatum 4 0.405 1.882 9.157 458.893 32.292 1.662 1.155 0.094 2.919 3.160 0.280 0.299 3.583 354.845 1.676 86.894 2438.998 56.376 4.379 18.802 1.209 138.249 36.098 2.935 5.237 39.829 440.758 751.470 3030.975 2794.876 2296.868 4145.300 4.116 0.003 0.061 0.362 0.596 1.082 0.035 0.029 0.004 0.085 0.078 0.004 0.004 0.121 23.776 0.078 0.428 139.945 1.841 0.232 0.627 0.083 3.895 0.417 0.037 0.136 0.949 8.641 11.091 169.634 210.123 150.505 310.314 0.096
Sisyrinchium hitchcockii 1 0.344 0.234 15.265 444.745 14.723 1.439 0.255 0.018 1.299 1.280 0.286 0.298 1.250 27.907 -0.104 80.421 4378.847 42.612 0.529 96.475 0.097 14.610 50.724 10.457 1.384 46.423 714.092 1436.412 1247.283 352.943 470.486 309.762 2.957 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
Mallotus blumeanus 1 0.511 0.763 17.566 423.122 21.296 1.520 7.350 0.079 14.526 2.920 0.217 0.430 1.283 236.905 3.272 84.350 124.941 123.335 4.808 20.750 0.541 24.810 37.607 3.104 3.489 42.751 517.545 950.201 2737.598 3525.861 4658.093 3432.958 1.915 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
Festuca heterophylla 57 0.330 0.504 18.733 442.526 27.717 1.877 0.595 0.005 1.841 3.456 0.215 0.297 1.535 13.196 -0.418 99.456 638.216 79.902 0.399 16.737 0.048 36.715 27.853 8.514 4.960 34.756 366.708 866.343 5129.804 280.263 147.315 329.573 3.349 0.011 0.092 5.774 2.597 2.324 0.176 0.163 0.001 0.454 0.497 0.026 0.041 0.320 4.618 0.151 0.505 135.751 9.162 0.044 1.203 0.011 2.516 0.657 0.188 0.979 2.150 11.100 17.899 606.944 42.357 17.716 47.129 0.617
Caryota monostachya 1 0.618 0.571 21.400 503.069 24.219 1.389 4.078 0.076 430.594 12.016 0.171 0.251 1.381 667.745 1.048 89.004 8.620 124.629 5.080 24.522 2.578 15.148 19.359 9.941 21.234 18.055 503.194 1271.516 1251.859 16703.067 2729.596 7456.041 4.519 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA

1.5 Match taxa based on species, if available, or Genus

Combined the trait means based on species and genera into a single object, and check how many of these taxa match to the (resolved) species names in DT2.

try.combined.means <- try.genus.means %>% 
  rename(Taxon_name=Genus) %>% 
  mutate(Rank_correct="genus") %>% 
  bind_rows(try.species.means %>% 
              rename(Taxon_name=Name_short) %>% 
              mutate(Rank_correct="species")) %>% 
  dplyr::select(Taxon_name, Rank_correct, everything())

total.matches <- DT2 %>%
  distinct(Species, Rank_correct) %>%
  left_join(try.combined.means %>%
              dplyr::rename(Species=Taxon_name), 
            by=c("Species", "Rank_correct")) %>% 
  filter(!is.na(SLA_mean)) %>% 
  nrow()

The total number of matched taxa (either at species, or genus level) is 31822.

1.6 Calculate summary statistics for species- and genus-level mean traits

mysummary <- try.combined.means %>% 
               group_by(Rank_correct) %>% 
               summarize_at(.vars=vars(StemDens_mean:LeafWaterCont_sd),
                            .funs=list(min=~min(., na.rm=T),
                                       q025=~quantile(., 0.25, na.rm=T), 
                                       q50=~quantile(., 0.50, na.rm=T), 
                                       q75=~quantile(., .75, na.rm=T), 
                                       max=~max(., na.rm=T), 
                                       mean=~mean(., na.rm=T), 
                                       sd=~sd(., na.rm=T))) %>% 
  gather(variable, value, -Rank_correct) %>% 
  separate(variable, sep="_", into=c("variable", "mean.sd", "stat")) %>% 
  mutate(stat=factor(stat, levels=c("min", "q025", "q50", "q75", "max", "mean", "sd"))) %>% 
  spread(key=stat, value=value) %>% 
  arrange(desc(Rank_correct)) %>% 
  mutate_at(.vars=vars(min:sd),
            .funs=~round(.,3))
## Warning: attributes are not identical across measure variables;
## they will be dropped
Summary statistics for each trait, when summarized across species or genera
Rank_correct variable mean.sd min q025 q50 q75 max mean sd
species Chromosome.cDNAcont mean 0.006 2.150 2.992 4.595 1.067308e+03 4.344000e+00 8.248000e+00
species Chromosome.cDNAcont sd 0.000 0.026 0.047 0.090 1.761220e+02 9.500000e-02 1.127000e+00
species Chromosome.n mean 0.062 22.492 28.266 35.685 7.671771e+03 3.109500e+01 6.145900e+01
species Chromosome.n sd 0.000 0.242 0.400 0.692 1.621340e+02 6.730000e-01 1.571000e+00
species Disp.unit.leng mean 0.003 2.275 3.864 6.895 1.336988e+04 6.340000e+00 7.992200e+01
species Disp.unit.leng sd 0.000 0.069 0.150 0.308 9.470706e+03 5.790000e-01 5.220200e+01
species LDMC mean 0.012 0.243 0.321 0.383 1.372000e+00 3.140000e-01 9.700000e-02
species LDMC sd 0.000 0.003 0.004 0.008 2.250000e-01 8.000000e-03 1.000000e-02
species LeafArea.leaflet.undef mean 0.000 341.100 1039.636 2649.837 1.740828e+13 3.518983e+08 7.755323e+10
species LeafArea.leaflet.undef sd 0.000 20.713 76.627 245.085 2.199776e+13 6.776027e+08 1.211795e+11
species LeafArea.leaf.undef mean 0.000 343.283 1100.777 3115.948 1.341026e+11 2.667355e+06 5.973172e+08
species LeafArea.leaf.undef sd 0.000 22.934 87.305 292.521 3.408568e+09 1.475566e+05 2.038124e+07
species LeafArea.undef.undef mean 0.000 345.679 1175.095 3505.504 1.524769e+11 3.042582e+06 6.791606e+08
species LeafArea.undef.undef sd 0.000 24.273 99.143 369.860 4.133230e+09 2.504824e+05 3.205707e+07
species LeafCN.ratio mean 0.213 20.712 25.571 32.672 1.599229e+03 2.958100e+01 2.170200e+01
species LeafCN.ratio sd 0.000 0.233 0.406 0.817 2.016540e+02 7.940000e-01 2.033000e+00
species LeafC.perdrymass mean 93.949 441.375 456.739 474.192 1.131692e+03 4.565940e+02 2.970600e+01
species LeafC.perdrymass sd 0.000 0.804 1.231 1.880 7.672000e+01 1.737000e+00 2.240000e+00
species Leaf.delta.15N mean -31.434 -0.612 0.753 2.226 4.910200e+01 8.140000e-01 2.464000e+00
species Leaf.delta.15N sd 0.000 0.074 0.111 0.162 3.888400e+01 1.470000e-01 2.690000e-01
species LeafDryMass.single mean 0.000 27.531 88.304 280.901 1.023338e+17 2.030271e+12 4.558127e+14
species LeafDryMass.single sd 0.000 1.980 7.662 27.349 3.076084e+15 9.332895e+10 1.694330e+13
species Leaffreshmass mean 0.000 0.099 0.306 0.835 2.906497e+13 5.766402e+08 1.294605e+11
species Leaffreshmass sd 0.000 0.006 0.023 0.077 8.032411e+11 2.437070e+07 4.424312e+09
species LeafLength mean 0.000 29.485 57.719 97.435 3.089342e+08 6.260771e+03 1.376050e+06
species LeafLength sd 0.000 1.513 3.977 9.912 3.388372e+05 3.044500e+01 2.484372e+03
species LeafN mean 0.343 16.117 19.914 24.175 1.310526e+03 2.062200e+01 1.042300e+01
species LeafN sd 0.000 0.159 0.288 0.618 5.074300e+01 5.850000e-01 8.910000e-01
species LeafNperArea mean 0.002 1.196 1.489 1.878 4.016178e+04 2.427000e+00 1.788850e+02
species LeafNperArea sd 0.000 0.017 0.031 0.073 2.411833e+03 1.450000e-01 1.328600e+01
species LeafP mean 0.000 0.996 1.359 1.803 6.392164e+04 3.184000e+00 2.962370e+02
species LeafP sd 0.000 0.017 0.032 0.058 9.827822e+03 3.540000e-01 5.413300e+01
species LeafThickness mean 0.005 0.188 0.229 0.288 1.197200e+02 2.720000e-01 7.950000e-01
species LeafThickness sd 0.000 0.002 0.003 0.008 5.255759e+03 1.670000e-01 2.894900e+01
species LeafWaterCont mean 0.096 2.525 3.307 4.734 4.260880e+02 3.940000e+00 2.942000e+00
species LeafWaterCont sd 0.000 0.038 0.073 0.149 2.759500e+01 1.410000e-01 2.630000e-01
species LeafWidth mean 0.000 0.691 1.902 4.051 3.477895e+04 6.675000e+00 2.505850e+02
species LeafWidth sd 0.000 0.042 0.131 0.376 5.949567e+05 2.779000e+01 3.687968e+03
species PlantHeight mean 0.000 0.403 1.356 7.556 9.476600e+01 5.124000e+00 7.274000e+00
species PlantHeight sd 0.000 0.040 0.133 0.508 2.685300e+01 5.430000e-01 1.101000e+00
species RootingDepth mean 0.000 0.410 0.845 1.617 2.519319e+09 5.003361e+04 1.122150e+07
species RootingDepth sd 0.000 0.019 0.046 0.107 8.971563e+07 2.724074e+03 4.941605e+05
species SeedGerminationRate mean 5.441 83.595 88.845 93.286 2.416300e+02 8.756700e+01 1.031000e+01
species SeedGerminationRate sd 0.000 0.345 0.523 0.843 1.864950e+02 9.190000e-01 1.904000e+00
species Seed.length mean 0.004 1.817 3.029 5.599 1.089329e+04 4.931000e+00 5.310000e+01
species Seed.length sd 0.000 0.056 0.121 0.242 3.333800e+01 2.150000e-01 3.970000e-01
species SeedMass mean 0.000 0.730 4.336 36.238 9.521438e+08 1.938338e+04 4.241478e+06
species SeedMass sd 0.000 0.054 0.356 2.815 4.003387e+07 2.057742e+03 2.619802e+05
species Seed.num.rep.unit mean 0.000 42.927 180.778 791.571 2.225078e+25 4.416757e+20 9.910894e+22
species Seed.num.rep.unit sd 0.000 5.626 28.349 158.023 2.160083e+19 1.105285e+15 1.443121e+17
species SLA mean 0.000 10.691 14.520 18.798 5.490787e+03 1.576800e+01 2.873900e+01
species SLA sd 0.000 0.168 0.331 0.845 1.012120e+02 9.090000e-01 1.679000e+00
species SpecificRootLength mean 0.000 1337.375 2584.070 5187.719 3.071495e+09 2.477014e+05 2.296632e+07
species SpecificRootLength sd 0.000 48.689 111.593 254.706 8.360206e+08 3.145860e+04 4.669425e+06
species Stem.cond.dens mean 0.000 21.114 48.153 91.796 1.988212e+08 7.928559e+03 1.206849e+06
species Stem.cond.dens sd 0.000 0.689 1.873 4.707 1.210633e+07 4.337880e+02 6.759687e+04
species StemConduitDiameter mean 0.001 25.103 35.442 50.841 8.065496e+07 1.667555e+03 3.592592e+05
species StemConduitDiameter sd 0.000 0.549 1.006 1.853 1.181921e+03 2.191000e+00 9.768000e+00
species StemDens mean 0.007 0.433 0.535 0.637 2.640000e+00 5.410000e-01 1.480000e-01
species StemDens sd 0.000 0.003 0.005 0.009 1.253000e+00 1.000000e-02 1.700000e-02
species StemDiam mean 0.000 0.014 0.056 0.147 1.398249e+09 2.791541e+04 6.228172e+06
species StemDiam sd 0.000 0.001 0.003 0.011 5.904310e+08 1.939426e+04 3.263229e+06
species WoodFiberLength mean 12.143 668.313 834.580 1030.174 1.068162e+07 1.129441e+03 4.764112e+04
species WoodFiberLength sd 0.000 9.803 17.565 31.011 1.166290e+04 2.805900e+01 8.083800e+01
species Wood.vessel.length mean 3.460 313.875 413.753 537.522 2.082033e+06 5.573660e+02 1.082510e+04
species Wood.vessel.length sd 0.000 4.775 9.037 16.889 1.457577e+03 1.660500e+01 3.369800e+01
genus Chromosome.cDNAcont mean 0.006 2.332 3.178 4.971 1.067308e+03 4.910000e+00 1.834900e+01
genus Chromosome.cDNAcont sd 0.000 0.050 0.093 0.180 1.761220e+02 2.320000e-01 2.534000e+00
genus Chromosome.n mean 0.062 22.564 28.269 35.909 7.671771e+03 3.408700e+01 1.361450e+02
genus Chromosome.n sd 0.000 0.400 0.753 1.466 1.621340e+02 1.499000e+00 3.564000e+00
genus Disp.unit.leng mean 0.003 2.350 3.834 6.221 1.336988e+04 9.928000e+00 2.071760e+02
genus Disp.unit.leng sd 0.000 0.135 0.297 0.650 9.470706e+03 2.212000e+00 1.202130e+02
genus LDMC mean 0.021 0.234 0.307 0.365 1.372000e+00 3.020000e-01 1.000000e-01
genus LDMC sd 0.000 0.005 0.010 0.020 1.260000e-01 1.500000e-02 1.500000e-02
genus LeafArea.leaflet.undef mean 0.000 451.346 1047.617 2688.736 1.740828e+13 2.375967e+09 2.015193e+11
genus LeafArea.leaflet.undef sd 0.000 44.133 147.050 601.772 2.199776e+13 3.593650e+09 2.790665e+11
genus LeafArea.leaf.undef mean 0.000 404.628 1116.991 3187.225 1.341026e+11 1.799434e+07 1.552110e+09
genus LeafArea.leaf.undef sd 0.000 46.194 170.877 688.171 3.408568e+09 7.812932e+05 4.693424e+07
genus LeafArea.undef.undef mean 0.000 411.128 1167.756 3672.239 1.524769e+11 2.052324e+07 1.764777e+09
genus LeafArea.undef.undef sd 0.000 47.796 194.844 833.147 4.133230e+09 1.326702e+06 7.382015e+07
genus LeafCN.ratio mean 0.213 21.214 26.130 32.273 1.599229e+03 3.011200e+01 3.461300e+01
genus LeafCN.ratio sd 0.001 0.435 0.922 2.062 2.016540e+02 2.135000e+00 7.000000e+00
genus LeafC.perdrymass mean 95.729 440.581 454.527 471.238 1.131692e+03 4.545320e+02 3.514300e+01
genus LeafC.perdrymass sd 0.003 1.329 2.310 4.378 5.565700e+01 3.914000e+00 4.711000e+00
genus Leaf.delta.15N mean -31.434 -0.359 0.963 2.262 4.885800e+01 1.011000e+00 2.858000e+00
genus Leaf.delta.15N sd 0.000 0.122 0.212 0.379 1.000600e+01 3.190000e-01 3.540000e-01
genus LeafDryMass.single mean 0.000 30.226 93.617 274.186 1.023338e+17 1.370848e+13 1.184415e+15
genus LeafDryMass.single sd 0.000 4.095 15.886 59.613 3.076084e+15 4.949663e+11 3.901914e+13
genus Leaffreshmass mean 0.000 0.122 0.319 0.858 2.906497e+13 3.893499e+09 3.363992e+11
genus Leaffreshmass sd 0.000 0.013 0.050 0.178 8.032411e+11 1.292490e+08 1.018885e+10
genus LeafLength mean 0.000 31.480 56.829 92.781 3.089342e+08 4.167893e+04 3.575619e+06
genus LeafLength sd 0.000 2.623 6.764 18.839 3.388372e+05 1.278230e+02 5.721569e+03
genus LeafN mean 0.343 16.841 20.136 23.942 1.310526e+03 2.133700e+01 2.132200e+01
genus LeafN sd 0.001 0.299 0.660 1.682 5.074300e+01 1.283000e+00 1.636000e+00
genus LeafNperArea mean 0.002 1.224 1.492 1.843 4.016178e+04 7.062000e+00 4.648240e+02
genus LeafNperArea sd 0.000 0.032 0.074 0.217 2.411833e+03 5.490000e-01 3.059500e+01
genus LeafP mean 0.000 1.079 1.357 1.759 6.392164e+04 1.289300e+01 7.697160e+02
genus LeafP sd 0.000 0.031 0.063 0.138 9.827822e+03 1.717000e+00 1.246630e+02
genus LeafThickness mean 0.005 0.196 0.234 0.290 1.197200e+02 3.280000e-01 2.032000e+00
genus LeafThickness sd 0.000 0.004 0.009 0.021 5.255759e+03 8.640000e-01 6.666700e+01
genus LeafWaterCont mean 0.096 2.687 3.531 4.930 4.260880e+02 4.253000e+00 5.594000e+00
genus LeafWaterCont sd 0.000 0.077 0.163 0.373 2.759500e+01 3.060000e-01 5.300000e-01
genus LeafWidth mean 0.000 0.806 1.791 3.847 3.477895e+04 2.419400e+01 6.266720e+02
genus LeafWidth sd 0.000 0.078 0.238 0.769 5.949567e+05 1.464300e+02 8.492696e+03
genus PlantHeight mean 0.000 0.417 1.193 5.767 8.203000e+01 4.524000e+00 6.885000e+00
genus PlantHeight sd 0.000 0.067 0.217 1.027 2.685300e+01 1.021000e+00 1.858000e+00
genus RootingDepth mean 0.000 0.450 0.813 1.475 2.519319e+09 3.378072e+05 2.915871e+07
genus RootingDepth sd 0.000 0.037 0.086 0.198 8.971563e+07 1.444732e+04 1.138014e+06
genus SeedGerminationRate mean 6.898 84.389 88.783 92.519 2.396310e+02 8.772900e+01 1.137400e+01
genus SeedGerminationRate sd 0.001 0.539 0.988 2.031 4.806300e+01 1.785000e+00 2.319000e+00
genus Seed.length mean 0.004 1.856 3.002 5.078 1.089329e+04 6.656000e+00 1.375240e+02
genus Seed.length sd 0.000 0.106 0.234 0.508 3.333800e+01 4.780000e-01 9.910000e-01
genus SeedMass mean 0.000 0.878 4.279 28.605 9.521438e+08 1.301089e+05 1.102133e+07
genus SeedMass sd 0.000 0.135 0.869 5.768 4.003387e+07 1.084308e+04 6.032767e+05
genus Seed.num.rep.unit mean 0.000 59.107 249.776 963.874 2.225078e+25 2.981679e+21 2.575315e+23
genus Seed.num.rep.unit sd 0.000 14.011 65.788 373.790 9.602388e+20 1.585957e+17 1.218345e+19
genus SLA mean 0.000 11.537 15.069 18.147 5.490787e+03 1.731800e+01 7.069700e+01
genus SLA sd 0.000 0.339 0.793 2.677 4.479200e+01 1.953000e+00 2.726000e+00
genus SpecificRootLength mean 0.000 1401.747 2645.280 5204.620 3.071495e+09 1.620677e+06 5.964294e+07
genus SpecificRootLength sd 0.000 98.616 224.938 532.960 8.360206e+08 1.501950e+05 1.062790e+07
genus Stem.cond.dens mean 0.000 23.214 50.056 90.841 1.912493e+08 2.745431e+04 2.217954e+06
genus Stem.cond.dens sd 0.000 1.464 3.724 9.577 1.210633e+07 4.009839e+03 2.065772e+05
genus StemConduitDiameter mean 0.001 25.009 34.985 49.387 8.065496e+07 1.093829e+04 9.335138e+05
genus StemConduitDiameter sd 0.000 1.016 2.027 4.366 5.503930e+03 6.333000e+00 7.618700e+01
genus StemDens mean 0.015 0.444 0.539 0.627 2.640000e+00 5.420000e-01 1.480000e-01
genus StemDens sd 0.000 0.006 0.011 0.023 3.250000e-01 2.000000e-02 2.500000e-02
genus StemDiam mean 0.000 0.015 0.054 0.135 1.398249e+09 1.884852e+05 1.618370e+07
genus StemDiam sd 0.000 0.002 0.007 0.021 5.904310e+08 1.028566e+05 7.514889e+06
genus WoodFiberLength mean 12.143 706.995 848.135 1034.349 1.068162e+07 2.486524e+03 1.237817e+05
genus WoodFiberLength sd 0.000 17.855 34.226 64.310 4.830371e+03 5.568300e+01 1.081420e+02
genus Wood.vessel.length mean 3.460 331.407 428.826 540.696 2.082033e+06 9.565180e+02 2.618349e+04
genus Wood.vessel.length sd 0.000 8.980 18.009 37.710 2.797090e+03 3.549700e+01 7.404900e+01

2 Calculate CWMs and CWVs for each plot

Merge vegetation layers, where necessary. Combine cover values across layers

#Ancillary function
# Combine cover accounting for layers
combine.cover <- function(x){
    while (length(x)>1){
      x[2] <- x[1]+(100-x[1])*x[2]/100
      x <- x[-1]
    }
  return(x)
}

DT2.comb <- DT2 %>% 
  group_by(PlotObservationID, Species, Rank_correct) %>% 
  summarize(Relative_cover=combine.cover(Relative_cover)) %>%
  ungroup() %>% 
  # re-normalize to 100%
  left_join(x=., 
            y={.} %>% 
              group_by(PlotObservationID) %>% 
              summarize(Tot.cover=sum(Relative_cover)), 
            by="PlotObservationID") %>% 
  mutate(Relative_cover=Relative_cover/Tot.cover) %>% 
  dplyr::select(-Tot.cover)
## `summarise()` regrouping output by 'PlotObservationID', 'Species' (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)

Calculate CWMs and CWV, as well as plot coverage statistics (proportion of total cover for which trait info exist, and proportion of species for which we have trait info). To avoid misleading results, CWM is calculated ONLY for plots for which we have some abundance information. All plots where Ab_scale==“pa” in ANY of the layers are therefore excluded.

# Tag plots where at least one layer has only p\a information 
any_pa <- DT2 %>% 
  distinct(PlotObservationID, Ab_scale) %>% 
  group_by(PlotObservationID) %>% 
  summarize(any.pa=any(Ab_scale=="pa")) %>% 
  filter(any.pa==T) %>% 
  pull(PlotObservationID)
## `summarise()` ungrouping output (override with `.groups` argument)
length(any_pa)
## [1] 272981
# Exclude plots above and merge species data table with traits
CWM0 <- DT2.comb %>%
  filter(!PlotObservationID %in% any_pa) %>% 
  left_join(try.combined.means %>%
              dplyr::rename(Species=Taxon_name) %>% 
              dplyr::select(Species, Rank_correct, ends_with("_mean")), 
            by=c("Species", "Rank_correct"))
# Calculate CWM for each trait in each plot
CWM1 <- CWM0 %>%
  group_by(PlotObservationID) %>%
  summarize_at(.vars= vars(StemDens_mean:LeafWaterCont_mean),
               .funs = list(~weighted.mean(., Relative_cover, na.rm=T))) %>%
  dplyr::select(PlotObservationID, order(colnames(.))) %>%
  gather(key=variable, value=CWM, -PlotObservationID)
# Calculate coverage for each trait in each plot
CWM2 <- CWM0 %>%
  mutate_at(.funs = list(~if_else(is.na(.),0,1) * Relative_cover), 
            .vars = vars(StemDens_mean:LeafWaterCont_mean)) %>%
  group_by(PlotObservationID) %>%
  summarize_at(.vars= vars(StemDens_mean:LeafWaterCont_mean),
               .funs = list(~sum(., na.rm=T))) %>%
  dplyr::select(PlotObservationID, order(colnames(.))) %>%
  gather(key=variable, value=trait.coverage, -PlotObservationID)
# Calculate CWV
# Ancillary function
variance2.fun <- function(trait, abu){
  res <- as.double(NA)
  abu <- abu[!is.na(trait)]
  trait <- trait[!is.na(trait)]
  abu <- abu/sum(abu)
  if (length(trait)>1){
    # you need more than 1 observation to calculate variance
    # for calculation see 
    # http://r.789695.n4.nabble.com/Weighted-skewness-and-curtosis-td4709956.html
    m.trait <- weighted.mean(trait,abu)
    res <- sum(abu*(trait-m.trait)^2)
  }
  res
}

CWM3 <- CWM0 %>%
  group_by(PlotObservationID) %>%
  summarize_at(.vars= vars(StemDens_mean:LeafWaterCont_mean),
               .funs = list(~variance2.fun(., Relative_cover))) %>%
  dplyr::select(PlotObservationID, order(colnames(.))) %>%
  gather(key=variable, value=CWV, -PlotObservationID)
## Calculate proportion of species having traits
CWM4 <- CWM0 %>%
  group_by(PlotObservationID) %>%
  #distinct(PlotObservationID, species, .keep_all = T) %>% 
  summarize_at(.vars= vars(StemDens_mean:LeafWaterCont_mean),
               .funs = list(~sum(!is.na(.)))) %>%
  dplyr::select(PlotObservationID, order(colnames(.))) %>%
  gather(key=variable, value=n.sp.with.trait, -PlotObservationID)

# Join together
CWM <- CWM1 %>%
  left_join(CWM2, by=c("PlotObservationID", "variable")) %>%
  left_join(CWM3, by=c("PlotObservationID", "variable")) %>%
  left_join(CWM4, by=c("PlotObservationID", "variable")) %>%
  left_join(CWM0 %>% 
              group_by(PlotObservationID) %>%
              summarize(sp.richness=n()), by=c("PlotObservationID")) %>%
  mutate(prop.sp.with.trait=n.sp.with.trait/sp.richness) %>%
  dplyr::select(PlotObservationID, variable, sp.richness, prop.sp.with.trait, trait.coverage, CWM, CWV) %>% 
  arrange(PlotObservationID)
## `summarise()` ungrouping output (override with `.groups` argument)

2.1 Explore CWM output

Community weighted means of 3 randomly selected plots
PlotObservationID variable sp.richness prop.sp.with.trait trait.coverage CWM CWV
163462 Chromosome.cDNAcont_mean 26 0.769 0.821 7.201 16.784
163462 Chromosome.n_mean 26 0.769 0.821 28.874 109.790
163462 Disp.unit.leng_mean 26 0.769 0.821 3.672 3.200
163462 LDMC_mean 26 0.769 0.821 0.311 0.008
163462 LeafArea.leaflet.undef_mean 26 0.769 0.821 550.851 607749.801
163462 LeafArea.leaf.undef_mean 26 0.769 0.821 488.308 392512.971
163462 LeafArea.undef.undef_mean 26 0.769 0.821 623.677 492726.384
163462 LeafCN.ratio_mean 26 0.769 0.821 29.529 57.689
163462 LeafC.perdrymass_mean 26 0.769 0.821 444.212 229.823
163462 Leaf.delta.15N_mean 26 0.769 0.821 -1.731 6.355
163462 LeafDryMass.single_mean 26 0.769 0.821 24.675 964.633
163462 Leaffreshmass_mean 26 0.769 0.821 0.111 0.023
163462 LeafLength_mean 26 0.769 0.821 61.494 766.254
163462 LeafN_mean 26 0.769 0.821 17.670 27.163
163462 LeafNperArea_mean 26 0.769 0.821 1.094 0.029
163462 LeafP_mean 26 0.769 0.821 1.350 0.280
163462 LeafThickness_mean 26 0.769 0.821 0.188 0.002
163462 LeafWaterCont_mean 26 0.769 0.821 4.435 7.492
163462 LeafWidth_mean 26 0.769 0.821 0.746 0.838
163462 PlantHeight_mean 26 0.769 0.821 0.227 0.006
163462 RootingDepth_mean 26 0.769 0.821 0.290 0.060
163462 SeedGerminationRate_mean 26 0.769 0.821 94.703 72.616
163462 Seed.length_mean 26 0.769 0.821 2.631 1.222
163462 SeedMass_mean 26 0.769 0.821 1.178 2.327
163462 Seed.num.rep.unit_mean 26 0.769 0.821 1025.731 38899159.288
163462 SLA_mean 26 0.769 0.821 16.963 21.099
163462 SpecificRootLength_mean 26 0.769 0.821 8694.110 56986511.831
163462 Stem.cond.dens_mean 26 0.769 0.821 86.490 1994.257
163462 StemConduitDiameter_mean 26 0.769 0.821 38.280 2338.941
163462 StemDens_mean 26 0.769 0.821 0.277 0.019
163462 StemDiam_mean 26 0.769 0.821 0.004 0.000
163462 WoodFiberLength_mean 26 0.769 0.821 690.885 27348.390
163462 Wood.vessel.length_mean 26 0.769 0.821 351.533 17944.756
1575246 Chromosome.cDNAcont_mean 43 0.907 0.706 5.474 45.688
1575246 Chromosome.n_mean 43 0.907 0.706 30.117 92.396
1575246 Disp.unit.leng_mean 43 0.907 0.706 5.809 13.991
1575246 LDMC_mean 43 0.907 0.706 0.311 0.003
1575246 LeafArea.leaflet.undef_mean 43 0.907 0.706 1342.295 2796141.926
1575246 LeafArea.leaf.undef_mean 43 0.907 0.706 1289.664 1887141.262
1575246 LeafArea.undef.undef_mean 43 0.907 0.706 1526.325 2900009.066
1575246 LeafCN.ratio_mean 43 0.907 0.706 22.621 51.006
1575246 LeafC.perdrymass_mean 43 0.907 0.706 460.391 445.854
1575246 Leaf.delta.15N_mean 43 0.907 0.706 -1.351 3.983
1575246 LeafDryMass.single_mean 43 0.907 0.706 66.413 5665.972
1575246 Leaffreshmass_mean 43 0.907 0.706 0.227 0.061
1575246 LeafLength_mean 43 0.907 0.706 76.131 2116.516
1575246 LeafN_mean 43 0.907 0.706 22.772 19.646
1575246 LeafNperArea_mean 43 0.907 0.706 1.218 0.075
1575246 LeafP_mean 43 0.907 0.706 1.936 0.517
1575246 LeafThickness_mean 43 0.907 0.706 0.174 0.001
1575246 LeafWaterCont_mean 43 0.907 0.706 3.672 2.453
1575246 LeafWidth_mean 43 0.907 0.706 2.539 6.637
1575246 PlantHeight_mean 43 0.907 0.706 3.212 24.902
1575246 RootingDepth_mean 43 0.907 0.706 0.895 4.258
1575246 SeedGerminationRate_mean 43 0.907 0.706 84.342 158.791
1575246 Seed.length_mean 43 0.907 0.706 3.798 4.443
1575246 SeedMass_mean 43 0.907 0.706 28.398 5250.632
1575246 Seed.num.rep.unit_mean 43 0.907 0.706 793.317 2149740.817
1575246 SLA_mean 43 0.907 0.706 20.626 41.653
1575246 SpecificRootLength_mean 43 0.907 0.706 6286.799 34225071.181
1575246 Stem.cond.dens_mean 43 0.907 0.706 99.247 5850.536
1575246 StemConduitDiameter_mean 43 0.907 0.706 39.811 633.278
1575246 StemDens_mean 43 0.907 0.706 0.441 0.019
1575246 StemDiam_mean 43 0.907 0.706 0.053 0.008
1575246 WoodFiberLength_mean 43 0.907 0.706 789.895 29495.315
1575246 Wood.vessel.length_mean 43 0.907 0.706 400.123 22482.312
1613637 Chromosome.cDNAcont_mean 17 1.000 1.000 7.057 11.186
1613637 Chromosome.n_mean 17 1.000 1.000 24.524 141.252
1613637 Disp.unit.leng_mean 17 1.000 1.000 2.595 3.758
1613637 LDMC_mean 17 1.000 1.000 0.248 0.003
1613637 LeafArea.leaflet.undef_mean 17 1.000 1.000 584.540 944440.565
1613637 LeafArea.leaf.undef_mean 17 1.000 1.000 791.009 3682102.010
1613637 LeafArea.undef.undef_mean 17 1.000 1.000 970.261 959623.457
1613637 LeafCN.ratio_mean 17 1.000 1.000 19.737 7.666
1613637 LeafC.perdrymass_mean 17 1.000 1.000 445.689 33.164
1613637 Leaf.delta.15N_mean 17 1.000 1.000 -0.522 4.061
1613637 LeafDryMass.single_mean 17 1.000 1.000 37.123 8911.952
1613637 Leaffreshmass_mean 17 1.000 1.000 0.179 0.158
1613637 LeafLength_mean 17 1.000 1.000 83.541 957.881
1613637 LeafN_mean 17 1.000 1.000 22.723 5.603
1613637 LeafNperArea_mean 17 1.000 1.000 0.936 0.064
1613637 LeafP_mean 17 1.000 1.000 2.333 0.338
1613637 LeafThickness_mean 17 1.000 1.000 0.166 0.006
1613637 LeafWaterCont_mean 17 1.000 1.000 5.134 5.039
1613637 LeafWidth_mean 17 1.000 1.000 0.924 0.486
1613637 PlantHeight_mean 17 1.000 1.000 0.329 0.010
1613637 RootingDepth_mean 17 1.000 1.000 0.288 0.023
1613637 SeedGerminationRate_mean 17 1.000 1.000 87.853 69.676
1613637 Seed.length_mean 17 1.000 1.000 2.125 1.643
1613637 SeedMass_mean 17 1.000 1.000 0.668 0.579
1613637 Seed.num.rep.unit_mean 17 1.000 1.000 3136.769 58555911.463
1613637 SLA_mean 17 1.000 1.000 25.955 24.270
1613637 SpecificRootLength_mean 17 1.000 1.000 12715.841 56249304.666
1613637 Stem.cond.dens_mean 17 1.000 1.000 71.400 1787.683
1613637 StemConduitDiameter_mean 17 1.000 1.000 36.966 894.034
1613637 StemDens_mean 17 1.000 1.000 0.378 0.005
1613637 StemDiam_mean 17 1.000 1.000 0.003 0.000
1613637 WoodFiberLength_mean 17 1.000 1.000 758.321 80825.535
1613637 Wood.vessel.length_mean 17 1.000 1.000 368.216 53161.504

Scatterplot comparing coverage of traits values across plots, when based on relative cover and when based on proportion of species richness

ggplot(data=CWM %>% 
         #all variables have the same coverage. Showcase with LDMC
         filter(variable=="LDMC_mean"), aes(x=trait.coverage, y=prop.sp.with.trait, col=log(sp.richness))) + 
  geom_point(pch="+", alpha=1/3) + 
  geom_abline(intercept = 0, slope=1, col=2, lty=2, lwd=.7) + 
  xlim(c(0,1)) + 
  ylim(c(0,1)) + 
  scale_color_viridis() + 
  theme_bw() +
  xlab("Trait coverage (Relative  cover)") + 
  ylab("Trait coverage (Proportion of species)") + 
  coord_equal()

Calculate summary statistics for trait coverage in plots

CWM.coverage <- CWM %>% 
  filter(variable=="LDMC_mean") %>% 
  summarize_at(.vars=vars(trait.coverage, prop.sp.with.trait),
                .funs=list(num.0s=~sum(.==0),
                           min=~min(., na.rm=T),
                           q025=~quantile(., 0.25, na.rm=T), 
                           q50=~quantile(., 0.50, na.rm=T), 
                           q75=~quantile(., .75, na.rm=T), 
                           max=~max(., na.rm=T), 
                           mean=~mean(., na.rm=T), 
                           sd=~sd(., na.rm=T))) %>% 
  gather(key=variable, value=value) %>% 
  separate(variable, sep="_", into=c("metric", "stat")) %>% 
  mutate(stat=factor(stat, levels=c("num.0s", "min", "q025", "q50", "q75", "max", "mean", "sd"))) %>% 
  spread(key=stat, value=value) 
Summary of plot-level coverage of CWM and CWVs
metric num.0s min q025 q50 q75 max mean sd
prop.sp.with.trait 12089 0 0.778 0.895 0.975 1 0.843 0.178
trait.coverage 12335 0 0.782 0.948 0.995 1 0.843 0.224

Calculate summary statistics for CWMs and CWVs

CWM.summary <- CWM %>% 
  rename(myvar=variable) %>% 
  group_by(myvar) %>% 
  summarize_at(.vars=vars(CWM:CWV),
                .funs=list(min=~min(., na.rm=T),
                           q025=~quantile(., 0.25, na.rm=T), 
                           q50=~quantile(., 0.50, na.rm=T), 
                           q75=~quantile(., .75, na.rm=T), 
                           max=~max(., na.rm=T), 
                           mean=~mean(., na.rm=T), 
                           sd=~sd(., na.rm=T))) %>% 
  gather(key=variable, value=value, -myvar) %>% 
  separate(variable, sep="_", into=c("metric", "stat")) %>% 
  mutate(stat=factor(stat, levels=c("min", "q025", "q50", "q75", "max", "mean", "sd"))) %>% 
  spread(key=stat, value=value) %>% 
  arrange(metric, myvar)
Summary of CWMs and CWVs across all plots
myvar metric min q025 q50 q75 max mean sd
Chromosome.cDNAcont_mean CWM 0.084 3.397 4.995 7.222000e+00 9.037800e+01 6.326000e+00 5.063000e+00
Chromosome.n_mean CWM 0.114 27.425 31.734 3.880200e+01 4.166200e+03 3.476800e+01 1.394200e+01
Disp.unit.leng_mean CWM 0.019 2.672 3.576 6.219000e+00 3.592797e+03 1.408500e+01 9.292800e+01
LDMC_mean CWM 0.014 0.242 0.283 3.340000e-01 9.680000e-01 2.860000e-01 6.900000e-02
LeafArea.leaflet.undef_mean CWM 0.001 507.341 1050.348 2.173208e+03 2.896005e+08 3.227542e+03 4.105272e+05
LeafArea.leaf.undef_mean CWM 0.007 483.984 1047.941 2.246854e+03 1.714323e+08 3.728897e+05 3.752212e+06
LeafArea.undef.undef_mean CWM 0.001 545.000 1188.443 2.191873e+03 2.239247e+07 2.034356e+03 2.518277e+04
LeafCN.ratio_mean CWM 0.986 20.054 23.391 2.794500e+01 8.780210e+02 2.557000e+01 1.485400e+01
LeafC.perdrymass_mean CWM 96.454 440.807 450.718 4.661500e+02 9.991090e+02 4.532810e+02 2.608700e+01
Leaf.delta.15N_mean CWM -12.462 -1.075 -0.127 7.980000e-01 4.310700e+01 -2.000000e-03 2.021000e+00
LeafDryMass.single_mean CWM 0.000 30.723 63.051 1.287540e+02 1.013838e+05 1.141710e+02 3.651690e+02
Leaffreshmass_mean CWM 0.000 0.123 0.248 4.820000e-01 1.641220e+02 4.560000e-01 9.660000e-01
LeafLength_mean CWM 0.013 48.259 74.755 1.025940e+02 6.731693e+04 8.652700e+01 2.037330e+02
LeafN_mean CWM 3.830 19.174 22.308 2.518800e+01 2.882780e+02 2.250100e+01 5.313000e+00
LeafNperArea_mean CWM 0.002 1.141 1.308 1.582000e+00 8.900100e+01 1.429000e+00 5.530000e-01
LeafP_mean CWM 0.014 1.530 1.836 2.205000e+00 6.392164e+04 1.198970e+02 1.329873e+03
LeafThickness_mean CWM 0.005 0.188 0.220 2.760000e-01 5.413100e+01 3.160000e-01 1.071000e+00
LeafWaterCont_mean CWM 0.387 3.479 4.436 5.454000e+00 4.260880e+02 5.551000e+00 9.426000e+00
LeafWidth_mean CWM 0.005 0.766 1.349 2.517000e+00 2.675318e+04 6.206000e+00 1.381160e+02
PlantHeight_mean CWM 0.005 0.334 0.601 4.262000e+00 6.994000e+01 3.019000e+00 4.593000e+00
RootingDepth_mean CWM 0.004 0.352 0.511 7.590000e-01 7.274138e+04 8.000000e-01 5.745700e+01
SeedGerminationRate_mean CWM 7.122 84.119 88.932 9.254900e+01 2.416300e+02 8.800100e+01 6.844000e+00
Seed.length_mean CWM 0.061 1.906 2.492 4.019000e+00 1.089329e+04 1.522300e+01 2.169740e+02
SeedMass_mean CWM 0.000 0.929 2.200 1.682700e+01 3.486900e+06 6.143220e+02 1.818021e+04
Seed.num.rep.unit_mean CWM 0.000 1050.522 3671.694 1.537319e+04 1.747089e+21 7.762646e+15 2.942668e+18
SLA_mean CWM 1.473 15.401 20.169 2.464600e+01 5.490787e+03 3.447800e+01 1.274530e+02
SpecificRootLength_mean CWM 0.000 4402.100 7314.035 1.206517e+04 3.071495e+09 4.836732e+06 6.392209e+07
Stem.cond.dens_mean CWM 0.013 72.172 97.881 1.396670e+02 4.878582e+06 2.510080e+02 8.901057e+03
StemConduitDiameter_mean CWM 0.001 33.015 41.678 5.290100e+01 1.664311e+07 9.362100e+01 2.039911e+04
StemDens_mean CWM 0.053 0.361 0.420 4.960000e-01 2.640000e+00 4.350000e-01 1.160000e-01
StemDiam_mean CWM 0.000 0.010 0.024 9.600000e-02 8.884200e+01 6.700000e-02 1.660000e-01
WoodFiberLength_mean CWM 116.018 721.210 810.507 9.266720e+02 2.204640e+06 1.345073e+03 1.068943e+04
Wood.vessel.length_mean CWM 31.679 367.037 430.626 5.217140e+02 4.298853e+05 4.661420e+02 5.615230e+02
Chromosome.cDNAcont_mean CWV 0.000 5.203 13.325 3.288400e+01 2.143228e+04 4.216700e+01 8.689900e+01
Chromosome.n_mean CWV 0.000 67.699 146.202 3.295600e+02 1.451823e+07 4.480960e+02 2.885079e+04
Disp.unit.leng_mean CWV 0.000 1.423 3.803 1.560100e+01 3.227013e+06 1.994971e+04 1.687753e+05
LDMC_mean CWV 0.000 0.002 0.004 6.000000e-03 1.570000e-01 5.000000e-03 5.000000e-03
LeafArea.leaflet.undef_mean CWV 0.000 293166.506 1450872.063 5.994004e+06 3.653300e+16 2.212119e+11 7.913715e+13
LeafArea.leaf.undef_mean CWV 0.000 273334.585 1254730.027 6.702791e+06 7.347254e+15 5.019003e+13 4.281416e+14
LeafArea.undef.undef_mean CWV 0.000 404467.753 1201634.610 4.891264e+06 1.809148e+15 1.480572e+10 3.238787e+12
LeafCN.ratio_mean CWV 0.000 14.692 34.930 8.115000e+01 6.171788e+05 4.190060e+02 4.285596e+03
LeafC.perdrymass_mean CWV 0.000 173.798 338.452 5.941330e+02 1.057395e+05 6.285710e+02 2.958634e+03
Leaf.delta.15N_mean CWV 0.000 1.675 2.965 4.796000e+00 6.184630e+02 6.737000e+00 2.511600e+01
LeafDryMass.single_mean CWV 0.000 1063.493 5479.479 2.758981e+04 7.538101e+09 3.616439e+05 3.419875e+07
Leaffreshmass_mean CWV 0.000 0.019 0.088 4.610000e-01 4.203363e+04 2.093000e+00 1.115800e+02
LeafLength_mean CWV 0.000 987.647 2310.747 5.022841e+03 1.874145e+10 2.483168e+05 2.771567e+07
LeafN_mean CWV 0.000 13.151 23.689 3.797500e+01 2.717296e+05 3.414000e+01 3.775880e+02
LeafNperArea_mean CWV 0.000 0.064 0.122 2.440000e-01 1.125946e+04 8.500000e-01 2.870100e+01
LeafP_mean CWV 0.000 0.144 0.278 5.170000e-01 1.021494e+09 4.018739e+06 4.199098e+07
LeafThickness_mean CWV 0.000 0.002 0.005 1.200000e-02 3.430838e+03 2.633000e+00 3.094500e+01
LeafWaterCont_mean CWV 0.000 1.439 2.856 4.859000e+00 4.497611e+04 3.116840e+02 2.597320e+03
LeafWidth_mean CWV 0.000 0.485 1.747 4.713000e+00 3.023911e+08 1.311644e+05 3.413996e+06
PlantHeight_mean CWV 0.000 0.021 0.094 2.413600e+01 8.370440e+02 2.191300e+01 4.602500e+01
RootingDepth_mean CWV 0.000 0.041 0.101 2.490000e-01 1.871033e+10 1.273896e+04 1.455747e+07
SeedGerminationRate_mean CWV 0.000 26.379 50.522 9.287800e+01 7.015952e+03 8.143800e+01 1.357860e+02
Seed.length_mean CWV 0.000 0.535 1.283 4.579000e+00 2.966463e+07 6.943769e+04 1.201159e+06
SeedMass_mean CWV 0.000 0.701 8.140 1.080944e+03 2.901391e+13 2.283191e+09 6.881077e+10
Seed.num.rep.unit_mean CWV 0.000 3551176.277 68274419.118 1.271837e+09 5.087198e+42 2.754316e+37 9.730343e+39
SLA_mean CWV 0.000 19.558 36.742 7.506600e+01 7.533142e+06 5.394785e+04 4.392071e+05
SpecificRootLength_mean CWV 0.000 10784534.016 34964920.672 7.818524e+07 2.358521e+18 6.704207e+15 9.606483e+16
Stem.cond.dens_mean CWV 0.000 1824.341 4343.688 1.031891e+04 3.534652e+13 1.495264e+09 8.992414e+10
StemConduitDiameter_mean CWV 0.000 216.711 492.938 1.050047e+03 1.065354e+15 2.251156e+09 1.362102e+12
StemDens_mean CWV 0.000 0.005 0.010 1.500000e-02 1.447000e+00 1.600000e-02 5.400000e-02
StemDiam_mean CWV 0.000 0.000 0.001 8.000000e-03 7.519691e+04 2.810000e-01 9.476300e+01
WoodFiberLength_mean CWV 0.000 27210.371 50165.281 8.517096e+04 1.868337e+13 1.963415e+08 2.404369e+10
Wood.vessel.length_mean CWV 0.000 13062.775 26296.712 5.237147e+04 7.096939e+11 1.574603e+06 9.072394e+08

3 Classify plots in is.forest or is.non.forest based on species traits

sPlot has two independent systems for classifying plots to vegetation types. The first relies on the expert opinion of data contributors and classifies plots into broad habitat types. These broad habitat types are coded using 5, non-mutually exclusive dummy variables:
1) Forest
2) Grassland
3) Shrubland
4) Sparse vegetation
5) Wetland
A plot may belong to more than one formation, e.g. a Savannah is categorized as Forest + Grassland (FG). This system is, unfortunately, not consistently available across all plots, being the large majority of classified plots only available for Europe.
There is therefore the need to give at least some indication to the remaining unclassified plots. To achieve this, already from v2.1, sPlot started using a classification into forest and non-forest, based on the share of trees, and the layering of vegetation. Here, we derived the (mutually exclusive) is.forest and is.non.forest classification of plots.

3.1 Derive species level information on Growth Forms

We used different sources of information:
1) Data from the gap-filled trait matrix
2) Manual cleaning of the most common species for which growth trait info is not available
3) Data from TRY (public dataset only) on all species with growth form info (Trait ID = 42)
4) Cross-match with species assigned to tree layer in DT table.

Step 1: Attach growth form trait information to DT table. Growth form information derives from TRY

DT.gf <- DT2 %>% 
  filter(Taxon_group=="Vascular plant") %>% 
  #join with try names, using resolved species names as key
  left_join(try.species.names %>% 
              dplyr::select(Name_short, GrowthForm) %>% 
              rename(Species=Name_short) %>% 
              distinct(Species, .keep_all=T), 
            by="Species") %>% 
  left_join(try.species.means %>% 
              dplyr::select(Name_short, PlantHeight_mean) %>% 
              rename(Species=Name_short), 
            by="Species")
# number of records withouth Growth Form info
sum(is.na(DT.gf$GrowthForm))
## [1] 4997688

Step 2: Select most common species without growth-trait information to export and check manually

top.gf.nas <- DT.gf %>% 
  filter(is.na(GrowthForm)) %>% 
  group_by(Species) %>% 
  summarize(n=n()) %>% 
  arrange(desc(n))
## `summarise()` ungrouping output (override with `.groups` argument)
write_csv(top.gf.nas %>% 
            filter(n>1000), 
  path="../_derived/Species_missingGF.csv")

The first 47569 species account for 56.57% of the missing records. Assign growth forms manually, reimport and coalesce into DT.gf

# Import manually classified species - this info is also reported in Appendix 1
gf.manual <- read_csv("../_derived/Species_missingGF_complete.csv")
## 
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
##   species = col_character(),
##   GrowthForm = col_character()
## )
DT.gf <- DT.gf %>% 
  left_join(gf.manual %>% 
              rename(GrowthForm.m=GrowthForm, Species=species),
            by="Species") %>% 
   mutate(GrowthForm=coalesce(GrowthForm, GrowthForm.m)) %>% 
   dplyr::select(-GrowthForm.m)

After manual completion, the number of records without growth form information decresead to 2331724.

Step 3: Import additional data on growth-form from TRY (Accessed 10 March 2020).
All public data on growth form downloaded. First take care of unmatched quotation marks in the txt file. Do this from command line.

# escape all unmatched quotation marks. Run in Linux terminal
#sed 's/"/\\"/g' 8854.txt > 8854_test.csv
#sed "s/'/\\'/g" 8854.txt > 8854_test.csv

Information on growth form is not organized and has a myriad of levels. Extract and simplify to the set of few types used so far. In case a species is attributed to multiple growth forms use a majority vote.

all.gf0 <- read_delim("../_input/TRY5.0_v1.1/8854_test.txt", delim="\t") 

all.gf <- all.gf0 %>% 
  filter(TraitID==42) %>% 
  distinct(AccSpeciesName, OrigValueStr) %>% 
  rename(GrowthForm0=OrigValueStr) %>% 
  mutate(GrowthForm0=tolower(GrowthForm0)) %>%
  filter(AccSpeciesName %in% sPlot.species$Species) %>% 
  mutate(GrowthForm_simplified= GrowthForm0) %>% 
  mutate(GrowthForm_simplified=replace(GrowthForm_simplified, 
                                       list=str_detect(GrowthForm0,
                                       "vine|climber|liana|carnivore|epiphyte|^succulent|lichen|parasite|
                                       hydrohalophyte|aquatic|cactous|parasitic|hydrophytes|carnivorous"), 
                                       "other")) %>%
  mutate(GrowthForm_simplified=replace(GrowthForm_simplified, 
                                       list=str_detect(GrowthForm0, 
                                                       "tree|conifer|^woody$|palmoid|mangrove|gymnosperm"), 
                                       "tree")) %>% 
  mutate(GrowthForm_simplified=replace(GrowthForm_simplified, 
                                       list=str_detect(GrowthForm0, "shrub|scrub|bamboo"), "shrub")) %>%
  mutate(GrowthForm_simplified=replace(GrowthForm_simplified, 
                                       list=str_detect(GrowthForm0,
                                      "herb|sedge|graminoid|fern|forb|herbaceous|grass|chaemaephyte|geophyte|annual"),
                                       "herb")) %>%
  mutate(GrowthForm_simplified=ifelse(GrowthForm_simplified %in% c("other", "herb", "shrub", "tree"), 
                                      GrowthForm_simplified, NA)) %>% 
  filter(!is.na(GrowthForm_simplified)) 

#Some species have multiple attributions - use a majority vote. NA if ties
get.mode <- function(x){
  if(length(unique(x))==1){
    return(as.character(unique(x)))} else{
    tmp <- sort(table(x), decreasing=T)
    if(tmp[1]!=tmp[2]){return(names(tmp)[1])} else {
    return("Unknown")}
    }
  }

all.gf <- all.gf %>% 
  group_by(AccSpeciesName) %>% 
  summarize(GrowthForm_simplified=get.mode(GrowthForm_simplified)) %>% 
  filter(GrowthForm_simplified!="Unknown")

table(all.gf$GrowthForm_simplified, exclude=NULL)  
## 
##  herb other shrub  tree 
## 21467  3429  7406  9194
#coalesce this info into DT.gf
DT.gf <- DT.gf %>% 
  left_join(all.gf %>% 
              rename(Species=AccSpeciesName), 
            by="Species") %>% 
  mutate(GrowthForm=coalesce(GrowthForm, GrowthForm_simplified)) %>% 
  dplyr::select(-GrowthForm_simplified)

Step 4: Cross-match. Assign all species occurring in at least one relevé in the tree layer as tree. Conservatively, do this only when the record is at species level (exclude records at genus\family level)

other.trees <- DT.gf %>% 
  filter(Layer==1 & is.na(GrowthForm)) %>% 
  filter(Rank_correct=="species") %>% 
  distinct(Species, Layer, GrowthForm) %>% 
  pull(Species)

DT.gf <- DT.gf %>% 
  mutate(GrowthForm=replace(GrowthForm, 
                            list=Species %in% other.trees, 
                            values="tree"))

After cross-matching, the number of records without growth form information decresead to 1264620.

## `summarise()` ungrouping output (override with `.groups` argument)
Average height per growth form
GrowthForm Height
herb 0.520
herb/shrub 1.982
herb 1.522
herb/shrub/tree 5.241
other 4.550
shrub 2.351
shrub/tree 5.094
shrub 4.644
tree 13.077
NA 2.507

Classify species as tree or tall shrubs vs. other. Make a compact table of species growth forms and create fields is.tree.or.tall.shrub and is.not.tree.and.small.
Define a species as is.tree.or.tall.shrub when it is either defined as tree, OR has a height >10
Define a species as is.not.tree.or.shrub.and.small when it has a height <10, as long as it’s not defined a tree. When height is not available, it is sufficient that the species is classified as “herb” or “other”.

GF <- DT.gf %>% 
  distinct(Species, GrowthForm, PlantHeight_mean) %>% 
  mutate(GrowthForm=fct_collapse(GrowthForm, 
                                 "herb/shrub"=c("herb\\shrub","herb/shrub"), 
                                 "shrub/tree"=c("shrub/tree", "shrub\\tree"))) %>% 
  ## define is.tree.or.tall
  mutate(is.tree.or.tall.shrub=NA) %>% 
  mutate(is.tree.or.tall.shrub=replace(is.tree.or.tall.shrub, 
                                       list=str_detect(GrowthForm, "tree"), 
                                       T)) %>% 
  mutate(is.tree.or.tall.shrub=replace(is.tree.or.tall.shrub, 
                                       list=PlantHeight_mean>=10, 
                                       T)) %>% 
  ## define is.not.tree.or.shrub.and.small 
  mutate(is.not.tree.or.shrub.and.small=NA) %>% 
  mutate(is.not.tree.or.shrub.and.small=replace(is.not.tree.or.shrub.and.small,
                                       list=PlantHeight_mean<10, 
                                       T)) %>% 
  mutate(is.not.tree.or.shrub.and.small=replace(is.not.tree.or.shrub.and.small,
                                       list=is.na(PlantHeight_mean) & str_detect(GrowthForm, "herb|other"), 
                                       T)) %>%   
  ## use each field in turn to define which of the records in the other is F
  mutate(is.not.tree.or.shrub.and.small=replace(is.not.tree.or.shrub.and.small,
                                       list= is.tree.or.tall.shrub==T,
                                       F)) %>% 
  mutate(is.tree.or.tall.shrub=replace(is.tree.or.tall.shrub,
                                       list= is.not.tree.or.shrub.and.small==T,
                                       F)) %>% 
  ## drop redundant field
  dplyr::select(-is.not.tree.or.shrub.and.small)
  

## cross-check classification  
table(GF$GrowthForm, GF$is.tree.or.tall.shrub, exclude=NULL)
##                  
##                   FALSE  TRUE  <NA>
##   herb            22430     2     0
##   herb/shrub         47     1     0
##   herb/shrub/tree     0     2     0
##   other            1646    42     0
##   shrub            5410    93  2323
##   shrub/tree          0   133     0
##   tree                0 13458     0
##   <NA>              818    50 26691
## Check for herb species classified as trees
GF %>% 
  filter(is.tree.or.tall.shrub & GrowthForm=="herb")
## # A tibble: 2 x 4
##   Species                   GrowthForm PlantHeight_mean is.tree.or.tall.shrub
##   <chr>                     <fct>                 <dbl> <lgl>                
## 1 Phyllostachys bambusoides herb                   16.6 TRUE                 
## 2 Bambusa vulgaris          herb                   14.2 TRUE

These are Bamboo species and their hiehgts seems reasonable.

3.2 Classify plots into forest\non-forest

Define a plot as forest if:
1) Has a total cover of the the tree layer >=25% (from header)
2) Has a total cover in Layer 1 >=25% (from DT)
3) Has a total cover of tree or tall shrub species >=25% (from DT + TRY)
4) Has data on Basal area summing to 10 m2/ha

The first three criteria are declined to define non forest as follows:
1) Info on total cover of the tree layer is available and <25%
2) Info on total cover in Layer 1 is available and <25%
3) The relative cover of non tree species is >75%

Criteria 2 and 3 only apply to plots having cover data in percentage.
Reimport header file

load("../_output/header_sPlot3.0.RData")

Criterium 1

plot.vegtype1 <- header %>% 
  dplyr::select(PlotObservationID, `Cover tree layer (%)`) %>% 
  rename(Cover_trees=`Cover tree layer (%)`) %>% 
  mutate(is.forest=Cover_trees>=25) 

table(plot.vegtype1 %>% dplyr::select(is.forest), exclude=NULL)
## 
##   FALSE    TRUE    <NA> 
##   26211  191833 1759593

Criterium 2

# Select only plots having cover data in percentage
mysel <- (DT.gf %>% 
            distinct(PlotObservationID, Ab_scale) %>% 
            group_by(PlotObservationID) %>% 
            summarize(AllCovPer=all(Ab_scale=="CoverPerc")) %>% 
            filter(AllCovPer==T) %>% 
            pull(PlotObservationID))
## `summarise()` ungrouping output (override with `.groups` argument)
# Excludedd plots
nrow(header)-length(mysel)
## [1] 294880
plot.vegtype2 <- DT.gf %>% 
  filter(PlotObservationID %in% mysel ) %>% 
  filter(Layer %in% c(1,2,3)) %>% 
  # first sum the cover of all species in a layer
  group_by(PlotObservationID, Layer) %>% 
  summarize(Cover_perc=sum(Abundance)) %>% 
  # then combine cover across layers
  group_by(PlotObservationID) %>% 
  summarize(Cover_perc=combine.cover(Cover_perc)) %>% 
  mutate(is.forest=Cover_perc>=25) 
## `summarise()` regrouping output by 'PlotObservationID' (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
table(plot.vegtype1 %>% dplyr::select(is.forest), exclude=NULL)
## 
##   FALSE    TRUE    <NA> 
##   26211  191833 1759593

Criterium 3

plot.vegtype3 <- DT.gf %>% 
  #filter plots where all records are recorded as percentage cover
  filter(PlotObservationID %in% mysel ) %>% 
  # combine cover across layers
  group_by(PlotObservationID, Species) %>%
  summarize(cover_perc=combine.cover(Abundance)) %>%
  ungroup() %>% 
  # attach species Growth Form information
  left_join(GF, by="Species")%>% 
  group_by(PlotObservationID) %>% 
  summarize(cover_tree=sum(cover_perc*is.tree.or.tall.shrub, na.rm=T), 
            cover_non_tree=sum(cover_perc*(!is.tree.or.tall.shrub), na.rm=T), 
            cover_unknown=sum(cover_perc* is.na(is.tree.or.tall.shrub))) %>% 
  rowwise() %>% 
  ## classify plots based on cover of different growth forms
  mutate(tot.cover=sum(cover_tree, cover_non_tree, cover_unknown, na.rm=T)) %>% 
  mutate(is.forest=cover_tree>=25) %>% 
  mutate(is.non.forest=cover_tree<25 & (cover_non_tree/tot.cover)>.75)
## `summarise()` regrouping output by 'PlotObservationID' (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
table(plot.vegtype3 %>% dplyr::select(is.forest, is.non.forest), exclude=NULL)
##          is.non.forest
## is.forest   FALSE    TRUE    <NA>
##     FALSE   72087 1136069      10
##     TRUE   474591       0       0

Criterium 4

plot.vegtype4 <-  DT.gf %>% 
  filter(Ab_scale=="x_BA") %>% 
  group_by(PlotObservationID) %>% 
  summarize(tot.ba=sum(Abundance)) %>% 
  mutate(is.forest=tot.ba>10)
## `summarise()` ungrouping output (override with `.groups` argument)
table(plot.vegtype4 %>% dplyr::select(is.forest), exclude=NULL)
## 
## FALSE  TRUE 
##  1358  5558

Combine classifications from the three criteria. Use majority vote to assign plots. In case of ties, a progressively lower priority is given from criterium 1 to criterium 4.

plot.vegtype <- header %>% 
  dplyr::select(PlotObservationID) %>% 
  left_join(plot.vegtype1 %>% 
              dplyr::select(PlotObservationID, is.forest), 
            by="PlotObservationID") %>% 
  left_join(plot.vegtype2 %>% 
              dplyr::select(PlotObservationID, is.forest), 
            by="PlotObservationID") %>% 
  left_join(plot.vegtype3 %>% 
              dplyr::select(PlotObservationID, is.forest, is.non.forest) %>% 
              rename(is.non.forest.x.x=is.non.forest), 
            by="PlotObservationID") %>% 
  left_join(plot.vegtype4 %>% 
              dplyr::select(PlotObservationID, is.forest), 
            by="PlotObservationID") %>% 
  ## assign vegtype based on majority vote. In case of ties use the order of criteria as ranking
  rowwise() %>% 
  mutate(mean.forest=mean(c(is.forest.x, is.forest.y, is.forest.x.x, is.forest.y.y), na.rm=T)) %>% 
  mutate(mean.forest2=coalesce(is.forest.x, is.forest.y, is.forest.x.x, is.forest.y.y)) %>% 
  mutate(is.forest=ifelse(mean.forest==0.5, mean.forest2, mean.forest>0.5)) %>%  
  # same for is.non.forest
  mutate(mean.non.forest=mean(c( (!is.forest.x), (!is.forest.y), is.non.forest.x.x, (!is.forest.y.y)), na.rm=T)) %>% 
  mutate(mean.non.forest2=coalesce( (!is.forest.x), (!is.forest.y), is.non.forest.x.x, (!is.forest.y.y))) %>% 
  mutate(is.non.forest=ifelse(mean.non.forest==0.5, mean.non.forest2, mean.non.forest>0.5)) %>% 
  # when both is.forest & is.non.forest are F transform to NA
  mutate(both.F=ifelse( ( (is.forest==F | is.na(is.forest)) & is.non.forest==F), T, F)) %>% 
  mutate(is.forest=replace(is.forest, list=both.F==T, values=NA)) %>% 
  mutate(is.non.forest=replace(is.non.forest, list=both.F==T, values=NA))

table(plot.vegtype %>% dplyr::select(is.forest, is.non.forest), exclude=NULL)
##          is.non.forest
## is.forest   FALSE    TRUE    <NA>
##     FALSE       0 1160476       7
##     TRUE   468259       0       0
##     <NA>        0       0  348895

3.3 Cross-check and validate

Cross check with sPlot’s 5-class (incomplete) native classification deriving from data contributors. Build a Confusion matrix.

cross.check <- header %>% 
  dplyr::select(PlotObservationID, Forest) %>% 
  left_join(plot.vegtype %>% 
              dplyr::select(PlotObservationID, is.forest, is.non.forest) %>% 
              rename(Forest=is.forest, Other=is.non.forest) %>% 
              gather(isfor_isnonfor, value, -PlotObservationID) %>% 
              filter(value==T) %>% 
              dplyr::select(-value), 
            by="PlotObservationID") %>% 
  mutate(Other=1*Forest!=1) %>% 
  gather(veg_type, value, -PlotObservationID, -isfor_isnonfor) %>% 
  filter(value==1) %>% 
  dplyr::select(-value)

#Build a confusion matrix to evaluate the comparison  
u <- union(cross.check$isfor_isnonfor, cross.check$veg_type)
t <- table( factor(cross.check$isfor_isnonfor, u), factor(cross.check$veg_type, u))
confm <- caret::confusionMatrix(t)
Confusion matrix between sPlot’s native classification of habitats (columns), and classification based on four criteria based on vegetation layers and growth forms (rows)
Forest Other
Forest 381411 25588
Other 28020 973463
Formulas of associated statistics are available on the help page of the caret package and associated references. The overall accuracy of the classification based on is.forest\is.non.forest, when tested against sPlot’s native habitat classification is 0.96, the Kappa statistics is 0.91.
Associated statistics of confusion matrix by class
x
Sensitivity 0.9315636
Specificity 0.9743877
Pos Pred Value 0.9371301
Neg Pred Value 0.9720215
Precision 0.9371301
Recall 0.9315636
F1 0.9343385
Prevalence 0.2906896
Detection Rate 0.2707958
Detection Prevalence 0.2889629
Balanced Accuracy 0.9529756
## [1] TRUE

Through the process described above, we managed to classify 1628735, of which 468259 is forest and 1160476 is non-forest.
The total number of plots with attribution to forest\non-forest (either coming from sPlot’s native classification, or from the process above) is: 1726506.

4 Export and update other objects

sPlot.traits <- sPlot.species %>% 
  arrange(Species) %>% 
  left_join(GF %>% 
              dplyr::select(Species, GrowthForm, is.tree.or.tall.shrub), 
            by="Species") %>% 
  left_join(try.combined.means %>% 
              rename(Species=Taxon_name), by="Species") %>% 
  ## some entries are duplicated (both at species and Genus level)
  ## Keep only genus-level averages
  group_by(Species) %>% 
  arrange(desc(n)) %>% 
  slice(1) %>% 
  ungroup() %>% 
  dplyr::select(-Rank_correct)
  
save(try.combined.means, CWM, sPlot.traits, trait.legend, file="../_output/Traits_CWMs_sPlot3.RData")

header <- header %>% 
  left_join(plot.vegtype %>% 
              dplyr::select(PlotObservationID, is.forest, is.non.forest),
            by="PlotObservationID") %>% 
  dplyr::select(PlotObservationID:ESY, is.forest:is.non.forest, everything())

save(header, file="../_output/header_sPlot3.0.RData")

APPENDIX

Appendix 1 - Growth forms of most common species

As assigned manually.

cat(readLines("../_derived/Species_missingGF_complete.csv"), sep = '\n')
species,GrowthForm
Taraxacum,herb
Quercus robur,tree
Corylus avellana,tree
Frangula alnus,shrub
Festuca ovina,herb
Vaccinium vitis-idaea,shrub
NA,NA
Rubus,shrub
Capsella bursa-pastoris,herb
Salix cinerea,tree
Solanum dulcamara,herb
Tripolium pannonicum,herb
Impatiens noli-tangere,herb
Ononis spinosa,shrub
Centaurea nigra,herb
Rubus ulmifolius,shrub
Alisma plantago-aquatica,herb
Spirodela polyrhiza,herb
Salix,NA
Helictochloa pratensis,herb
Ruscus aculeatus,shrub
Lophozonia,tree
Stachys recta,herb
Crataegus laevigata,shrub
Festuca rupicola,herb
Metrosideros diffusa,herb
Rhamnus cathartica,shrub\tree
Helianthemum oelandicum,herb
Dicksonia squarrosa,herb
Rosa,shrub
Carex viridula,herb
Podocarpus spinulosus,shrub
Pinus mugo,tree
Orthilia secunda,herb
Cyathea smithii,tree
Erica arborea,shrub\tree
Hippocrepis emerus,herb
Phillyrea latifolia,tree
Triglochin palustris,herb
Metrosideros fulgens,other
Apera spica-venti,herb
Crataegus,shrub
Blechnum discolor,herb
Blechnum novae-zelandiae,herb
Tragopogon pratensis,herb
Bellidiastrum michelii,herb
Sedum album,herb
Raphanus raphanistrum,herb
Quercus coccifera,tree
Quercus mongolica,tree
Hydrocharis morsus-ranae,herb
Camellia japonica,shrub\tree
Arbutus unedo,shrub\tree
Dactylorhiza majalis,herb
Trachelospermum asiaticum,other
Myosotis laxa,herb
Valeriana crispa,herb
Hieracium lachenalii,herb
Festuca drymeja,herb
Asplenium flaccidum,herb
Rubus australis,other
Adenostyles alpina,herb
Viola,herb
Hymenophyllum demissum,herb
Hieracium,herb
Senecio nemorensis,herb
Lemna,herb
Microsorum pustulatum,herb
Epilobium ciliatum,herb
Paederia foetida,herb
Ledum palustre,shrub
Arctostaphylos uva-ursi,shrub
Poaceae,herb
Epilobium,herb
Alchemilla,herb
Genista sagittalis,shrub
Blechnum nipponicum,herb
Biscutella laevigata,herb
Galeopsis,herb
Ribes uva-crispa,shrub
Prunus mahaleb,shrub\tree
Asparagus officinalis,shrub
Disporum smilacinum,herb
Brunella vulgaris,herb
Veronica anagallis-aquatica,herb
Rhododendron kaempferi,shrub
Festuca,herb
Lipandra polysperma,herb
Sedum rupestre,herb
Helictochloa versicolor,herb
Hymenophyllum nephrophyllum,herb
Cephalotaxus harringtonia,shrub\tree
Helleborus odorus,herb
Hyacinthoides non-scripta,herb
Artemisia maritima,shrub
Helictochloa bromoides,herb
Salix euxina,tree
Viburnum furcatum,shrub
Hymenophyllum multifidum,herb
Asplenium bulbiferum,herb
Cotinus coggygria,shrub
Juniperus phoenicea,shrub\tree
Artemisia indica,herb
Pieris japonica,shrub\tree
Genista scorpius,shrub
Viburnum wrightii,shrub
Ampelopsis glandulosa,other
Potentilla pusilla,herb
Blechnum fluviatile,herb
Rubus palmatus,shrub
Artemisia santonicum,herb\shrub
Senecio leucanthemifolius,herb
Thymus,herb
Solidago canadensis,herb
Echinops ritro,herb
Seseli elatum,herb
Cymbidium goeringii,herb
Pleioblastus argenteostriatus,herb
Reynoutria japonica,herb
Rubus angloserpens,shrub
Noccaea,herb
Smilax glauca,other
Polystichum spinulosum,herb
Scirpus maritimus,herb
Luzuriaga parviflora,herb
Bryonia cretica,other
Kadsura japonica,other
Betula,tree
Carex goodenoughii,herb
Thymus longicaulis,herb
Thelypteris limbosperma,herb
Callitriche,herb
Salix pentandra,tree
Chenopodiastrum murale,herb
Quercus,tree
Parthenocissus tricuspidata,other
Aria alnifolia,tree
Callicarpa mollis,shrub
Amaranthus hybridus,herb
Leptospermum scoparium,shrub\tree
Corylus sieboldiana,shrub
Pittosporum tobira,shrub\tree
Torilis arvensis,herb
Zanthoxylum bungeanum,shrub\tree
Crepis vesicaria,herb
Dioscorea tokoro,herb
Leptopteris superba,herb
Cyanus montanus,herb
Prunus cerasifera,shrub\tree
Salix appendiculata,shrub
Lathyrus laxiflorus,herb
Galeopsis ladanum,herb
Ericameria nauseosa,shrub
Cyclamen hederifolium,herb
Hymenophyllum revolutum,herb
Dendropanax trifidus,shrub\tree
Lastreopsis hispida,herb
Pilosella hoppeana,herb
Vandasina retusa,other
Oxybasis rubra,herb
Dianthus hyssopifolius,herb
Clinopodium nepeta,herb
Cardamine glanduligera,herb
Chamaesyce peplis,herb
Pueraria montana,other
Alyssum turkestanicum,herb
Minuartia sedoides,herb
Cyanus triumfettii,herb
Cyclosorus pozoi,herb
Cyclamen repandum,herb
Astilbe thunbergii,herb
Anthyllis montana,herb
Mitchella undulata,herb
Krascheninnikovia ceratoides,shrub
Dioscorea japonica,other
Sibbaldianthe bifurca,herb
Tripterospermum trinervium,NA
Cerasus jamasakura,tree
Hierochloe repens,herb
Festuca gautieri,herb
Salicornia perennans,herb
Salix atrocinerea,tree
Agrostis,herb
Oxybasis glauca,herb
Saxifraga exarata,herb
Hymenophyllum flabellatum,herb
Salix viminalis,shrub
Sasa borealis,herb\shrub
Puccinellia festuciformis,herb
Symplocos sawafutagi,shrub
Athyrium yokoscense,herb
Rubus buergeri,shrub
Prunus leveilleana,tree
Pertya scandens,shrub
Dioscorea quaternata,other
Cyathea dealbata,shrub\tree
Calamagrostis stricta,herb
Soldanella carpatica,herb
Selinum pyrenaeum,herb
Laurus nobilis,shrub\tree
Ononis natrix,shrub
Farfugium japonicum,herb
Cornus sanguinea,shrub
Vaccinium microcarpum,shrub
Limonium meyeri,herb
Vaccinium japonicum,shrub
Scandix pecten-veneris,herb
Lemmaphyllum microphyllum,herb
Amaranthus blitum,herb
Chimaphila maculata,herb
Euphorbia nicaeensis,herb\shrub
Dodonaea viscosa,shrub\tree
Coprosma microcarpa,shrub
Lomandra multiflora,herb
Microlaena stipoides,herb
Microstegium vimineum,herb
Pteretis struthiopteris,herb
Rumex scutatus,herb
Podospermum canum,herb
Ampelodesmos mauritanicus,herb
Tmesipteris tannensis,herb
Allium carinatum,herb
Hymenophyllum dilatatum,herb
Lindsaea trichomanoides,herb
Pilosella bauhini,herb
Hymenophyllum sanguinolentum,herb
Elaeagnus pungens,shrub
Vitis vinifera,other
Mespilus germanica,shrub\tree
Odontarrhena,NA
Myosotis,herb
Teucrium pyrenaicum,herb
Centaurea thuillieri,herb
Vaccinium smallii,shrub
Hymenophyllum,herb
Carex kitaibeliana,herb
Pogostemon stellatus,herb
Vicia,herb
Quercus dalechampii,tree
Sedum roseum,herb
Stauntonia hexaphylla,other
Pulmonaria affinis,herb
Vaccinium bracteatum,shrub\tree
Lonicera gracilipes,shrub
Dryopteris setosa,herb
Herniaria hirsuta,herb
Aralia elata,shrub\tree
Eurybia divaricata,herb
Hydrangea scandens,shrub
Mentha,herb
Lindera benzoin,shrub
Juniperus virginiana,tree
Ainsliaea acerifolia,herb
x Ammocalamagrostis,NA
Galium,herb
Ligustrum tschonoskii,shrub
Blechnum chambersii,herb
Ulex parviflorus,shrub
Artemisia gmelinii,herb
Paliurus spina-christi,shrub
Luzula,herb
Piper kadsura,other
Polygonum maritimum,herb
Ulmus,tree
Actinidia arguta,other
Chenopodiastrum hybridum,herb
Stemona lucida,other
Rubia tatarica,herb
Vaccinium hirtum,shrub
Rhododendron maximum,shrub
Anisocampium niponicum,herb
Sticherus cunninghamii,herb
Smilax sieboldii,other
Potentilla humifusa,herb
Cyathea colensoi,herb\shrub
Endiandra virens,tree
Polygonum equisetiforme,herb
Dryopteris lacera,herb
Hylodesmum podocarpum,herb
Rumex,herb
Aphananthe aspera,tree
Geranium solanderi,herb
Pseudopanax linearis,shrub
Sedum alpestre,herb
Lepisorus thunbergianus,herb
Aria japonica,tree
Elytrigia repens,herb
Ainsliaea apiculata,herb
Senecio,NA
Schisandra repanda,other
Cardamine,herb
Carex dolichostachya,herb
Potentilla supina,herb
Schizocodon soldanelloides,herb
Rhaphiolepis indica,shrub
Scilla lilio-hyacinthus,herb
Clinopodium menthifolium,herb
Aster,NA
Sasa palmata,herb
Brucea javanica,shrub
Anemone scherfelii,herb
Arundinella hirta,herb
Thymus nervosus,herb
Laportea bulbifera,herb
No suitable,NA
Potentilla montana,herb
Leptopteris hymenophylloides,herb
Solidago,herb
Compositae,NA
Pimpinella tragium,herb
Soldanella hungarica,herb
Leptorumohra mutica,herb
Artemisia pontica,herb
Verbascum,herb
Carex lenta,herb
Fraxinus chinensis,tree
Centranthus ruber,herb
Sesbania sesban,tree
Phormium colensoi,herb
Asparagus aphyllus,herb\shrub
Nasturtium,herb
Carex conica,herb
Lauraceae,NA
Dumasia truncata,other
Pilosella floribunda,herb
Goodenia geniculata,herb
Medicago intertexta,herb
Prunus,shrub\tree
Austrostipa scabra,herb
Juncus,herb
Sempervivum arachnoideum,herb
Thymus striatus,herb
Jasione crispa,herb
Echinochloa crusgalli,herb
Lindera glauca,shrub
Laburnum anagyroides,shrub
Oxalis pes-caprae,herb
Dianella nigra,herb
Jacobaea subalpina,herb
Campanula serrata,herb
Piptatherum coerulescens,herb
Carex pisiformis,herb
Geum sylvaticum,herb
Minuartia recurva,herb
Globularia repens,herb
Fraxinus,tree
Eucalyptus phaenophylla,tree
Osmorhiza aristata,herb
Leguminosae,NA
Helictochloa marginata,herb
Polygonatum lasianthum,herb
Rosa dumalis,shrub
Hymenophyllum scabrum,herb
Puccinellia gigantea,herb
Heloniopsis orientalis,herb
Anthemis cretica,herb
Styrax officinalis,shrub
Hosta sieboldiana,herb
Earina mucronata,herb
Calamagrostis hakonensis,herb
Tragopogon podolicus,herb
Thymus pulcherrimus,herb
Adenophora triphylla,herb
Aster ovatus,herb
Crepis lampsanoides,herb
Panicum boscii,herb
Pluchea dioscoridis,shrub
Amelanchier laevis,tree
Silene pusilla,herb
Eupatorium makinoi,herb
Polyphlebium venosum,herb
Uncinia,herb
Rubia argyi,other
Plagiogyria matsumureana,herb
Dryopteris,herb
Symphytum cordatum,herb
Ononis striata,herb
Allium,herb
Ruscus hypoglossum,shrub
Parathelypteris japonica,herb
Cyrtomium fortunei,herb
Ranunculus taisanensis,herb
Desmodium brachypodum,herb
Carex blepharicarpa,herb
Viburnum phlebotrichum,shrub
Atractylodes ovata,NA
Cichorium pumilum,herb
Ranunculus,herb
Cyperus gracilis,herb
Carex stenostachys,herb
Diplopterygium glaucum,herb
Sesleria rigida,herb
Centaurea,herb
Opuntia,other
Galium octonarium,herb
Pseudowintera axillaris,shrub\tree
Tricyrtis affinis,herb
Asplenium platyneuron,herb
Clematis terniflora,other
Parsonsia heterophylla,other
Raukaua edgerleyi,tree
Dianthus giganteiformis,herb
Viola sieheana,herb
Hosta sieboldii,herb
Sasa nipponica,herb
Cirsium,herb
Arachniodes standishii,NA
Paspalidium geminatum,herb
Alhagi graecorum,shrub
Cuscuta campestris,other
Allium saxatile,herb
Trifolium,herb
Persicaria longiseta,NA
Jacobaea maritima,NA
Acer shirasawanum,tree
Athyrium vidalii,herb
Centaurea nemoralis,herb
Circaea ×,herb
Dactylorhiza,herb
Xanthorrhoea acaulis,other
Cynoglossum,herb
Boehmeria silvestrii,herb\shrub
Serratula coronata,herb
Salix phylicifolia,shrub
Genista depressa,NA
Populus,tree
Phlegmariurus,NA
Atropa bella-donna,herb
Bignonia capreolata,other
Amelanchier,shrub\tree
Launaea nudicaulis,herb
Photinia glabra,tree
Suaeda acuminata,herb
Gonocarpus teucrioides,herb\shrub
Pulsatilla grandis,herb
Sesleria comosa,herb
Patzkea spadicea,herb
Koeleria nitidula,herb
Orobanche crenata,other
Achillea asiatica,herb
Paris tetraphylla,herb
Edraianthus graminifolius,herb
Clematis apiifolia,other
Thelypteris acuminata,herb
Patzkea paniculata,herb
Dichondra,herb
Dryopteris pseudomas,herb
Festuca hystrix,herb
Blechnum minus,herb
Maianthemum japonicum,herb
Millettia japonica,NA
Pteris cretica,herb
Leucanthemum rotundifolium,herb
Pyrrosia eleagnifolia,other
Elionurus citreus,herb
Ochlopoa supina,NA
Crocus veluchensis,herb
Galium maritimum,herb
Crepis albida,herb
Solidago curtisii,herb
Coptis trifolia,herb
Syneilesis palmata,herb
Chenopodium bonus-henricus,herb
Potentilla,herb
Artemisia lerchiana,herb
Lathyrus pisiformis,herb
Euphorbia plumerioides,NA
Ophiopogon planiscapus,herb
Ranunculus aduncus,herb
Scabiosa triniifolia,herb
Viola kusanoana,herb
Rytidosperma linkii,herb
Festuca dalmatica,herb
Berchemia racemosa,shrub
Lespedeza maximowiczii,shrub
Wisteria brachybotrys,NA
Quercus infectoria,shrub\tree
Asarum caucasicum,herb
Centaurea aspera,herb
Lechenaultia filiformis,NA
Tragopogon porrifolius,herb
Athyrium asplenioides,herb
Silene sericea,herb
Scrophularia alpestris,herb
Rhododendron pentandrum,NA
Thymus comosus,herb
Sanicula chinensis,herb
Inula oculus-christi,herb
Lamium,herb
Arachniodes aristata,NA
Onosma simplicissima,NA
Ranunculus pseudomontanus,herb
Corylus cornuta,shrub
Arachniodes sporadosora,NA
Orostachys spinosa,other
Olearia lacunosa,shrub\tree
Carthamus mitissimus,herb
Stewartia pseudocamellia,tree
Eucalyptus indurata,tree
Prosopis glandulosa,shrub\tree
Aurinia saxatilis,herb
Dampiera purpurea,herb\shrub
Cirsium nipponicum,NA
Patrinia villosa,NA
Galium pseudoaristatum,herb
Rhinanthus,herb
Leionema elatius,shrub
Arrhenatherum longifolium,herb
Limonium bellidifolium,herb
Brachiaria whiteana,herb
Adiantum capillus-veneris,herb
Vittadinia cuneata,herb
Carex rhizina,herb
Tephrosia,NA
Leontopodium nivale,herb
Crocus caeruleus,herb
Cuscuta,other
Pyrrosia lingua,herb
Ficaria fascicularis,herb
Pilosella peleteriana,herb
Dinebra decipiens,herb
Psychotria asiatica,shrub
Vicia pyrenaica,herb
Galax urceolata,herb
Aristolochia serpentaria,herb
Sedum brevifolium,herb
Impatiens atrosanguinea,herb
Dapsilanthus ramosus,herb
Nephrodium sabaei,herb
Silene rubella,herb
Blechnum procerum,herb
Phyllanthera grayi,tree
Lycopodium alpinum,herb
Codonopsis lanceolata,other
Persicaria senegalensis,herb
Bolboschoenus glaucus,herb
Clematis japonica,NA
Asplenium incisum,herb
Chrysothamnus,NA
Kunzea ericoides,shrub\tree
Elatostema involucratum,herb
Liriope minor,herb
Campanula spatulata,herb
Orobanche,other
Laserpitium krapffii,herb
Picrothamnus,NA
Thymus roegneri,herb
Achillea coarctata,herb
Cephalaria uralensis,herb
Artemisia nitrosa,herb
Ozothamnus tesselatus,NA
Sedum urvillei,herb
Lamium garganicum,herb
Pyrola asarifolia,herb
Orites lancifolius,shrub
Polygonatum falcatum,herb
Cerastium,herb
Gaultheria procumbens,herb
Keraudrenia hookeriana,NA
Polystichum polyblepharum,herb
Lindera sericea,NA
Paesia scaberula,herb
Litsea japonica,shrub
Crepis fraasii,herb
Hypecoum imberbe,herb
Plantago monosperma,herb
Quercus rosacea,tree
Halesia tetraptera,tree
Polystichum retrosopaleaceum,herb
Leptorumohra miqueliana,herb
Boehmeria spicata,shrub
lachenalii subsp.,NA
Amaranthus graecizans,herb
Cephalomanes obscurum,herb
Sedum amplexicaule,herb
Alectryon oleifolius,tree
Galium bungei,herb
Tmesipteris,NA
Blechnum filiforme,herb
Hieracium transylvanicum,herb
Viola orbiculata,herb
Spiraea crenata,shrub
Molinia japonica,herb
Actinidia polygama,other
Bursaria spinosa,shrub\tree
Acacia aneura,tree
Heterachne,NA
Oenanthe javanica,herb
Lemna aequinoctialis,herb
Calythrix,shrub
Senecio aegyptius,NA
Petasites frigidus,herb
Dalbergia densa,other
Carex morrowii,herb
Viola vaginata,herb
Alpinia intermedia,NA
Enkianthus campanulatus,NA
Leucopogon,NA
Menziesia ferruginea,shrub
Spiraea media,shrub
Dryopteris pacifica,herb
Minuartia setacea,herb
Salvia officinalis,herb
Coprosma dumosa,shrub
Bidens,NA
Aristida vagans,herb
Phragmites japonicus,herb
Lysimachia japonica,NA
Knautia arvernensis,herb
Ononis cristata,NA
Lamyropsis cynaroides,NA
Puccinellia tenuissima,NA
Burchardia congesta,herb
Galium trifidum,herb
Armeria canescens,herb
Minuartia laricifolia,herb
Carex reinii,herb
Picea,tree
Senna,NA
Asarum sieboldii,herb
Atriplex,NA
Pseudoraphis,NA
Symphyotrichum lateriflorum,herb
Panicum effusum,herb
Microlepia marginata,NA
Prunus apetala,shrub\tree
Alyssum obovatum,herb
Bromus,herb
Rubus pannosus,shrub
Sedobassia sedoides,herb
Alyssum hirsutum,herb
Astelia,NA
Prosartes lanuginosa,herb
Jacobaea adonidifolia,herb
Helleborus purpurascens,herb
Ulmus davidiana,tree
Campanula sparsa,herb
Gleichenia,NA
Veratrum maackii,NA
Sorghum virgatum,herb
Rhododendron lagopus,shrub
Blechnum nigrum,herb
Leucopogon muticus,shrub
Biscutella auriculata,herb
Geranium collinum,herb
Centranthus calcitrapae,herb
Oxalis griffithii,herb
Festuca pseudodalmatica,herb
Galatella angustissima,herb
Prenanthes,herb
Gaultheria myrsinoides,shrub
Sarcobatus baileyi,shrub
Vitis heyneana,other
Dioscorea gracillima,NA
Launaea fragilis,herb
Sonchus bulbosus,herb
Leptospermum polygalifolium,shrub
Digitaria,herb
Lycopodium volubile,herb
Aralia cordata,herb
Carex concinnoides,herb
Avenula pubescens,herb
Pleurospermum uralense,herb
Taraxacum hamatum,herb
Ranunculus reflexus,herb
Euphorbia subcordata,herb
Ferulago sylvatica,herb
Carthamus carduncellus,herb
Psychotria serpens,other
Sonchus,NA

SessionInfo

sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.7 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/openblas-base/libblas.so.3
## LAPACK: /usr/lib/libopenblasp-r0.2.18.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] viridis_0.5.1     viridisLite_0.3.0 caret_6.0-84      lattice_0.20-41  
##  [5] kableExtra_1.3.1  knitr_1.30        data.table_1.13.2 forcats_0.5.0    
##  [9] stringr_1.4.0     dplyr_1.0.2       purrr_0.3.4       readr_1.4.0      
## [13] tidyr_1.1.2       tibble_3.0.1      ggplot2_3.3.0     tidyverse_1.3.0  
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.2           jsonlite_1.7.1       splines_3.6.3       
##  [4] foreach_1.5.1        prodlim_2019.11.13   modelr_0.1.6        
##  [7] assertthat_0.2.1     highr_0.8            stats4_3.6.3        
## [10] cellranger_1.1.0     yaml_2.2.1           ipred_0.9-9         
## [13] pillar_1.4.3         backports_1.2.0      glue_1.4.2          
## [16] digest_0.6.25        rvest_0.3.6          colorspace_2.0-0    
## [19] recipes_0.1.15       htmltools_0.5.0      Matrix_1.2-18       
## [22] plyr_1.8.6           timeDate_3043.102    pkgconfig_2.0.3     
## [25] broom_0.7.0          haven_2.3.1          scales_1.1.1        
## [28] webshot_0.5.2        gower_0.2.2          lava_1.6.8.1        
## [31] generics_0.1.0       ellipsis_0.3.1       withr_2.3.0         
## [34] nnet_7.3-14          cli_2.2.0            survival_3.2-7      
## [37] magrittr_2.0.1       crayon_1.3.4         readxl_1.3.1        
## [40] evaluate_0.14        ps_1.4.0             fs_1.5.0            
## [43] fansi_0.4.1          nlme_3.1-150         MASS_7.3-53         
## [46] xml2_1.3.2           class_7.3-17         tools_3.6.3         
## [49] hms_0.5.3            lifecycle_0.2.0      munsell_0.5.0       
## [52] reprex_0.3.0         e1071_1.7-4          compiler_3.6.3      
## [55] rlang_0.4.9          grid_3.6.3           iterators_1.0.13    
## [58] rstudioapi_0.13      rmarkdown_2.5        ModelMetrics_1.2.2.2
## [61] gtable_0.3.0         codetools_0.2-18     DBI_1.1.0           
## [64] reshape2_1.4.4       R6_2.5.0             gridExtra_2.3       
## [67] lubridate_1.7.9.2    utf8_1.1.4           stringi_1.5.3       
## [70] Rcpp_1.0.5           vctrs_0.3.5          rpart_4.1-15        
## [73] dbplyr_2.0.0         tidyselect_1.1.0     xfun_0.19