diff --git a/code/02_Compile_dataset.Rmd b/code/02_Compile_dataset.Rmd
index 133b3b969c3867f04e8d4f903275969f5d290d5b..cf37d86c9616f4684866ac854aefdb9df896ac9f 100644
--- a/code/02_Compile_dataset.Rmd
+++ b/code/02_Compile_dataset.Rmd
@@ -17,6 +17,17 @@ knitr::opts_chunk$set(echo = TRUE)
 5) Link to EUNIS cross-link table, and assign Faber-Langedon Formation (FMS)  
 6) Assign plot elevation using external sources (FMS) 
 7) Add GIVD codes  
+8) filter(`Taxon group`!="Mushroom") from DT0 (There are also other mushroom species not correctly tagged...e.g. all Mycena species in NL Dataset), exclude also species in these genera:
+
+mushroom <- c("Mycena", "Boletus", "Russula","Calocybe","Collybia","Amanita","Amanitopsis","Coprinus",
+  "Galerina","Geoglossum","Hebeloma","Hydnum","Lactarius","Leucocarpia","Naucoria","Otidea","Polyporus",
+  "Sarcodom","Sarcoscyphus","Scleroderma","Stropharia","Tylopilus","Typhula", "Calyptella", "Chrysopsora", "Lacrymaria", "Dermoloma", 
+  "Alnicola", "Amanitina", "Bovista", "Cheilymenia","Clavulinopsis", "Clitocybe", "Entoloma", "Geaster", "Inocybe",
+  "Laccaria", "Laetiporus", "Lepista", "Macrolepiota", "Macrolepis", "Marasmius", "Panaeolus", "Psathyrella", "Psilocybe", 
+  "Rickenella", "Sarcoscypha", "Vascellum")
+
+
+  
 
 
 plots to delete from: (GIVD / NA-CA-004) (see email from Laura Boisver)
diff --git a/code/03_TaxonomicBackbone.Rmd b/code/03_TaxonomicBackbone.Rmd
index ed3ea08197a28f04dd71629a70a4380ccec7cd2b..0250ac5c8174c8be81c548a8c2d96e4b7ee75205 100644
--- a/code/03_TaxonomicBackbone.Rmd
+++ b/code/03_TaxonomicBackbone.Rmd
@@ -48,9 +48,21 @@ library(vegdata)
 
 ## Read in taxon names from [sPlot](https://www.idiv.de/sdiv/working_groups/wg_pool/splot/splot_database.html) and [TRY](https://www.try-db.org/TryWeb/Home.php)
 
-```{r, echo=F, eval=F}
+
+```{r, echo=F}
+## fungi genera #NOT COMPLETE LIST
+mushroom <- c("Mycena", "Boletus", "Russula","Calocybe","Collybia","Amanita","Amanitopsis","Coprinus",
+  "Galerina","Geoglossum","Hebeloma","Hydnum","Lactarius","Leucocarpia","Naucoria","Otidea","Polyporus",
+  "Sarcodom","Sarcoscyphus","Scleroderma","Stropharia","Tylopilus","Typhula", "Calyptella", "Chrysopsora", "Lacrymaria", "Dermoloma", 
+  "Alnicola", "Amanitina", "Bovista", "Cheilymenia","Clavulinopsis", "Clitocybe", "Entoloma", "Geaster", "Inocybe",
+  "Laccaria", "Laetiporus", "Lepista", "Macrolepiota", "Macrolepis", "Marasmius", "Panaeolus", "Psathyrella", "Psilocybe", 
+  "Rickenella", "Sarcoscypha", "Vascellum")
+```
+
+
+```{r, eval=F}
 #import and save splot names from DT table
-DT0 <- readr::read_delim("../sPlot_data_export/sPlot 3.0.1_species.csv", 
+DT0 <- readr::read_delim("../sPlot_data_export/sPlot_3_0_2_species.csv", 
                             delim="\t", 
                          col_type = cols(
                                 PlotObservationID = col_double(),
@@ -66,23 +78,36 @@ DT0 <- readr::read_delim("../sPlot_data_export/sPlot 3.0.1_species.csv",
                                 x_ = col_double()
                               )
                          ) 
+
+
+
+
+## Exclude fungi
 splot.species <- DT0 %>%
   rename(Species.original=`Turboveg2 concept`, Matched.concept=`Matched concept`) %>%
+  filter(`Taxon group`!="Mushroom") %>%
   dplyr::select(Species.original, Matched.concept) %>%
-  distinct()
-write_csv(splot.species, path = "../_derived/splot3.0.1.species.csv")
+  distinct() %>%
+  group_by(Matched.concept) %>%
+  mutate(fungi= word(Matched.concept, 1) %in% mushroom) %>%
+  ungroup() %>%
+  filter(fungi==F) %>%
+  dplyr::select(Species.original, Matched.concept)
+
+  
+write_csv(splot.species, path = "../_derived/splot3.0.2.species.csv")
 ```
 
 !!! Should I use the column from TRY using the full species name, or the column with only the two words from name strings?
 
 ```{r, message=F}
-splot.species <- read_csv("../_derived/splot3.0.1.species.csv")
+splot.species <- read_csv("../_derived/splot3.0.2.species.csv")
 
 try.species <- readr::read_csv("../_input/AccSpecies_TRY5.csv", col_names = F, locale = locale(encoding = 'Latin1')) %>%
   dplyr::select(-X6, -X7) %>%
   rename(try.ID=X1, FullSpecies=X2, Species=X3, Genus=X4, Family=X5, GrowthForm=X8)
-# Sneak in  species from the Alpine database (Borja & Riccardo), as a courtesy to Project #18
 
+# Sneak in  species from the Alpine database (Borja & Riccardo), as a courtesy to Project #18
 alpine.species <- read_delim("../_input/new_alpine_species.txt", col_names = F, delim = "\t", locale = locale(encoding = 'Latin1')) %>% 
   rename(Species=X1)
 ```
@@ -100,7 +125,8 @@ spec.list.TRY.sPlot <- splot.species %>%
   rename(Species=Matched.concept) %>%
   mutate(Source="S") %>%
   bind_rows(try.species %>% 
-              dplyr::select(Species) %>%
+              dplyr::select(FullSpecies) %>% ##using the full name from TRY
+              rename(Species=FullSpecies) %>% 
               mutate(Source="T")) %>%
   bind_rows(alpine.species %>% 
               mutate(Source="A")) %>%
@@ -124,7 +150,7 @@ When constructing the backbone for sPlot 2.1, a list of 4,093 "weird" species na
 ## String manipulation routines
 Stripping unwanted characters as well as abbreviation (such as hybrid markers) which would prevent name matching:
 
-```{r, eval = F}
+```{r}
 spec.list.TRY.sPlot <- spec.list.TRY.sPlot %>%
   mutate(OriginalNames=Species) %>%
   dplyr::select(OriginalNames, Species, Source) %>%
@@ -156,116 +182,758 @@ spec.list.TRY.sPlot <- spec.list.TRY.sPlot %>%
 For all names, that have a number in their first word, and consist of $>$ 1 words, remove that word:
 
 
-```{r, eval = F}
+```{r}
 spec.list.TRY.sPlot <- spec.list.TRY.sPlot %>% 
   mutate(firstWordWithNumbers=grepl('[0-9]', word(Species, 1))) %>%
   mutate(numberOfWords= sapply(gregexpr("\\W+", Species), length) + 1) %>%
   mutate(Species=ifelse((firstWordWithNumbers & numberOfWords > 1), 
                         sapply(Species, function(x) substr(x, start=regexpr(pattern =' ', text=x)+1, stop=nchar(x))), 
                         Species))
-
 ```
+
 Correct some name abbreviations using `taxname.abbr` in `vegdata`:
+```{r}
+spec.list.TRY.sPlot <- spec.list.TRY.sPlot %>% 
+  mutate(Species=taxname.abbr(spec.list.TRY.sPlot$Species)) %>%
+  dplyr::select(OriginalNames, Species)  %>% 
+  distinct()
+```
 
+A total of `r nrow(spec.list.TRY.sPlot %>% filter(OriginalNames != Species))` species names were modified. Although substantially improved, the species list has still quite a lot of inconsistencies.
+The total list submitted to TNRS containes `r length(unique(spec.list.TRY.sPlot$Species)` species names.
 
-```{r, eval = F}
-spec.list.TRY.sPlot <- spec.list.TRY.sPlot %>% 
-  mutate(testout=taxname.abbr(spec.list.TRY.sPlot$Species))
 
-write.csv(spec.list.TRY.sPlot$Species,  file = "../_derived/TNRS_submit/tnrs_submit_all.csv")
+# Match names against Taxonomic Name Resolution Service ([TNRS](http://tnrs.iplantcollaborative.org))
+
+```{r }
+#Export species name list
+write_csv(spec.list.TRY.sPlot %>% dplyr::select(Species) %>% distinct() ,  
+          path = "../_derived/TNRS_submit/tnrs_submit_iter1.csv")
+```
 
-seq1 <- c(seq(from =1, to = nrow(spec.list.TRY.sPlot), 50000), nrow(spec.list.TRY.sPlot)+1)
-for(i in 2:length(seq1)){
-  write.csv(spec.list.TRY.sPlot$Species[seq1[i-1]:(seq1[i]-1)],
-  file = paste(paste("../_derived/TNRS_submit/tnrs_submit", seq1[i], sep = "_"), "csv", sep = "."))
+The csv-file of species names was submitted to Taxonomic Name Resolution Service web application (Boyle et al. 2013, iPlant Collaborative (2015)). TNRS version 4.0 was used, which became available in August 2015 (this version also included The Plant List version 1.1).
+
+
+## TNRS settings {#ID}
+The following settings were used for resolving names on TNRS.
+
+### Sources for name resolution {#ID}
+The initial TNRS name resolution run was based on the **five standard sources** that were **ranked according to preference** in the following order (default of TNRS):
+
+1. The Plant List ([TPL](http://www.theplantlist.org/))[@TPL2013]
+2. The Global Compositae Checklist ([GCC](http://compositae.landcareresearch.co.nz/))[@Flann2009]
+3. The International Legume Database and Information Service ([ILDIS](http://www.ildis.org/))[@ILDIS2006]
+4. [Tropicos](http://www.tropicos.org/) [@TROPICOS2013]
+5. PLANTS Database ([USDA](https://plants.usda.gov/java/))[@USDA2012]
+
+
+### Family Classification
+Resolved names were assigned to families based on the [APGIII classification](http://onlinelibrary.wiley.com/doi/10.1111/j.1095-8339.2009.00996.x/abstract) [@Chase2009], the same classification system used by Tropicos.
+
+## Retrieve results
+Once the  matching process was finished, results were retrieved from TNRS using the `Detailed Download` option that included the full name information (parsed components, warnings, links to sources, etc.).  We retrieved all the matches for each species, constrained by source (TNRS default), where the name in the first source was selected as best match, unless there was `no suitable match found` in that source, the match from the next lower-ranked source was selected, until all resources where exhausted.
+
+# Manually inspecting name matching results {#ID}
+Manually inspect the TNRS-results table in a spreadsheat application (i.e. LibreOffice or Excel). Starting with the highest taxonomic rank considered (i.e. Family). For instance, if manual checking of the TRNS output reveals that all accepted names or synonyms that have accuracy scores >0.9 are correct taxon names, use the following selection procedure:
+
+* Name_matched_rank (==Family)
+* Taxonomic_status (==Accepted, Synomyn)
+* Family_score (>0.9)
+
+Continue this selection procedure for entries that were matched at lower taxonomic ranks, i.e. genus, species, etc..
+
+
+
+
+
+## Iteration 1 - Read and combine TNRS result files
+
+Read the downloaded TNRS files into `R`.
+```{r }
+tnrs.res0 <- readr::read_delim("../_derived/TNRS_submit/tnrs_results_iter1.txt", delim="\t", locale = locale(encoding = 'UTF-8'),quote="",
+          col_type = cols(
+                .default = col_character(),
+                Name_number = col_double(),
+                Overall_score = col_double(),
+                Name_score = col_double(),
+                Author_score = col_double(),
+                Family_score = col_double(),
+                Genus_score = col_double(),
+                Specific_epithet_score = col_double(),
+                Infraspecific_epithet_score = col_double(),
+                Infraspecific_epithet_2_score = col_double(),
+                Selected = col_logical()
+              ))
+```
+
+
+## Select best match for each submitted name
+
+Best matches are selected in successive steps, depending at which taxonomic level each record was matched. Records were sorted based on decreasing match scores. Matches at low taxonomic level (variety, subspecies) were favoured over matches at high taxonomic levels (family, sections). When having exactly the same ranks, the records were ranked based on their source, as explained above.  
+For each name submitted, only the record having the highest rank was retained.
+
+
+```{r}
+tnrs.res <- tnrs.res0 %>%
+  mutate(Name_matched_rank=factor(Name_matched_rank, 
+                                  levels=c("variety", "subspecies", "species", "genus", "family", "section", "supersection", "infraspecies", "forma", 
+                                           "race", "nothosubspecies", "proles", "monstr", "series"))) %>%
+  mutate(Source=factor(Source, levels=c("tpl",    #reorder priorities
+                                        "tpl;gcc",
+                                        "tpl;gcc;tropicos",
+                                        "tpl;gcc;tropicos;usda",
+                                        "tpl;gcc;usda",
+                                        "tpl;ildis",
+                                        "tpl;ildis;tropicos",
+                                        "tpl;ildis;usda",
+                                        "tpl;tropicos",
+                                        "tpl;tropicos;usda",
+                                        "tpl;usda",
+                                        "gcc",
+                                        "gcc;tropicos",
+                                        "gcc;tropicos;usda",
+                                        "gcc;usda", 
+                                        "ildis", 
+                                        "ildis;tropicos",
+                                        "ildis;tropicos;usda",
+                                        "ildis;usda",
+                                        "tropicos",
+                                        "tropicos;gcc",
+                                        "tropicos;usda",
+                                        "usda"  ))) %>%
+  mutate(Taxonomic_status=factor(Taxonomic_status, 
+                                levels=c("Accepted","Synonym", "No opinion","Invalid","Illegitimate","Misapplied","Rejected name"))) %>%
+  #filter(Taxonomic_status %in% c("Accepted", "Synonym")) %>%
+  arrange(Name_number, 
+          desc(Infraspecific_epithet_2_score),
+          desc(Infraspecific_epithet_score), 
+          desc(Specific_epithet_score),
+          desc(Genus_score), 
+          desc(Family_score),
+          desc(Name_score),
+          desc(Overall_score), 
+          #Taxonomic_status,
+          Source) %>%
+  group_by(Name_submitted) %>%
+  slice(1)
+```
+
+After this first step, there are `r sum(tnrs.res$Name_matched=="No suitable matches found.")` for which no match was found. Another `r sum(tnrs.res$Overall_score<0.9)` were unreliably matched (overall match score <0.9). 
+
+
+## Select correctly resolved names {#ID}
+### General procedure  {#ID}
+
+1. Open `tnrs.res` in a spreadsheet program and sort according to `Name_matched_rank`, `Taxonomic_status` and `Family_score`, and select thresholds for selection.
+2. Repeat selection for entries matched at lower taxonomic ranks, such `Name_matched_rank` ==:
+
+	* forma
+	* genus
+	* infraspecies
+	* ... 
+3. Adjust accuracy score threshold values, e.g. use higher or lower values for infraspec., variety, ...
+
+### Family level {#ID}
+Manually inspect sorted table and select all entries at the highest hierarchical level (family). Manually identify the family accuracy score threshold value above which a name can be considered a correct name. In the following case, this corresponds to a score $>$0.88.
+
+```{r}
+index.family <- which(tnrs.res$Name_matched_rank == "family" &
+                               (tnrs.res$Taxonomic_status == "Accepted" |
+                                tnrs.res$Taxonomic_status == "Synonym") &
+                                tnrs.res$Family_score > 0.88)
+length(index.family)
+```
+
+
+### Genus level
+```{r}
+index.genus <- which(tnrs.res$Name_matched_rank == "genus" &
+                        (       tnrs.res$Taxonomic_status %in% c("Synonym", "Accepted") &
+                                tnrs.res$Genus_score > 0.83) 
+                     |
+                        (       tnrs.res$Taxonomic_status == "No opinion" &
+                                tnrs.res$Genus_score >= 0.99))
+length(index.genus)
+```
+### Species level
+```{r }
+index.species  <- which(tnrs.res$Name_matched_rank == "species" &
+                            (     (tnrs.res$Taxonomic_status == "Accepted" |  #condition 1
+                                  tnrs.res$Taxonomic_status == "Synonym") &
+                                  tnrs.res$Genus_score > 0.78 &
+                                  tnrs.res$Name_score > 0.90) 
+                        |
+                            (     tnrs.res$Genus_score > 0.90 &         # condition 2 - effective for records with subspecies information
+                                  (tnrs.res$Specific_epithet_score > 0.90) 
+                        ))
+length(index.species)
+```
+
+### Subspecies level
+```{r, eval = T}
+index.subspec <- which( (tnrs.res$Name_matched_rank %in% c("infraspecies", "subspecies") |
+                                   is.na(tnrs.res$Name_matched_rank)) & # there are a few records at sub-species level which are not categorized
+                                (tnrs.res$Taxonomic_status == "Accepted" |
+                                 tnrs.res$Taxonomic_status == "Synonym"))
+length(index.subspec)
+index.variety <- which(tnrs.res$Name_matched_rank == "variety" &
+                                (tnrs.res$Taxonomic_status == "Accepted" |
+                                 tnrs.res$Taxonomic_status == "Synonym"))
+length(index.variety)
+
+index.infraspec <- which(tnrs.res$Name_matched_rank == "infraspecies")
+length(index.infraspec)
+
+index.forma <- which(tnrs.res$Name_matched_rank == "forma")
+length(index.forma)
+```
+
+### Identifying "non-matched" species that are spermatophyta
+```{r, eval = T}
+index.spermatophyt <- which(tnrs.res$Name_matched == "No suitable matches found."
+                                     & word(tnrs.res$Name_submitted, 1) == "Spermatophyta")
+length(index.spermatophyt)
+```
+
+## Select `certain` or `uncertain` names
+Select names that do not fulfill the search criteria, i.e. that were not selected as certain species, for further name matching.
+
+```{r, eval = T}
+index.tnrs <- c(index.family, index.forma, index.genus, index.species, index.subspec,
+               index.variety, index.spermatophyt)
+
+tnrs.res.certain <- tnrs.res[index.tnrs,]
+dim(tnrs.res.certain)
+write.csv(tnrs.res.certain, file = "../_derived/TNRS_submit/tnrs.res.iter1.certain.csv")
+
+tnrs.res.uncertain <- tnrs.res[-index.tnrs,]  
+dim(tnrs.res.uncertain)
+write.csv(tnrs.res.uncertain, file = "../_derived/TNRS_submit/tnrs.res.iter1.uncertain.csv")
+
+save(tnrs.res.certain, tnrs.res.uncertain, file="../_derived/TNRS_submit/tnrs.iter1.RData")
+
+```
+
+## Manual cleaning, delete subspecies information and rerun match in TNRS
+Many unmatched records do contain subspecies information which could not be retrieved in TNRS, although genus and species seem to be spelled correctly. Also, sometimes the mismatch derives from having the word 'species' or 'sp' at the end of the name. 
+```{r}
+#Ancillary function to change to lower case
+firstup <- function(x) {
+  substr(x, 1, 1) <- toupper(substr(x, 1, 1))
+  x
 }
+#Manual cleaning
+tnrs.submit.iter2 <- data.frame(old=tnrs.res.uncertain$Name_submitted) %>%
+  mutate(new=old) %>%
+  mutate(new=tolower(new)) %>%
+  mutate(new=firstup(new)) %>%
+  mutate(new=gsub(" [0-9]*$", "", new)) %>%
+  mutate(new=gsub(" sp.$", "", new)) %>%
+  mutate(new=gsub(" sp$", "", new)) %>%
+  mutate(new=gsub(" species$", "", new)) %>%
+  mutate(new=gsub(" *$", "", new)) %>%mutate(new=gsub('^Agropyrum', 'Agropyron', new)) %>%
+  mutate(new=gsub('^Anno ', 'Annona ', new)) %>%
+  mutate(new=gsub('Adpdytes dimidiata', 'Apodytes dimidiata', new)) %>%
+  mutate(new=gsub('Adenostorna fasciculaturn', 'Adenostoma fasciculaturn', new)) %>%
+  mutate(new=gsub('Arctostapliylos glallca', 'Arctostaphylos glauca', new)) %>%
+  mutate(new=gsub('Bituminosa bituminosa', 'Bituminaria bituminosa', new)) %>%
+  mutate(new=gsub('Causurina equisitifolia', 'Causuarina equisetifolia', new)) %>%
+  mutate(new=gsub('Convulvus arvensis', 'Convolvulus arvensis', new)) %>%
+  mutate(new=gsub('Diospyrus dygina', 'Diospyros dygina', new)) %>%
+  mutate(new=gsub('^Dodoea', 'Dodonaea', new)) %>%
+  mutate(new=gsub('^Boheravia', 'Boerhavia', new)) %>%
+  mutate(new=gsub('Centaria maculosa', 'Centaurea maculosa', new)) %>%
+  mutate(new=gsub('Chamrenerium angustifolium', 'Chamaenerion angustifolium', new)) %>%
+  mutate(new=gsub('^Chicorium', 'Cichorium', new)) %>%
+  mutate(new=gsub('^Cirsiumum', 'Cirsium', new)) %>%
+  mutate(new=gsub('^Colubrium', 'Colubrina', new)) %>%
+  mutate(new=gsub('^Corymbium', 'Corymbia', new)) %>%
+  mutate(new=gsub('Cosmos bipinnata', 'Cosmos bipinnatus', new)) %>%
+  mutate(new=gsub('Diospyrus dygina', 'Diospyros digyna', new)) %>%
+  mutate(new=gsub('Diospyros egbert', 'Diospyros egbert-walkeri', new)) %>%
+  mutate(new=gsub('Dispyrus halesioides', 'Diospyros halesioides', new)) %>%
+  mutate(new=gsub('^Drymis', 'Drimys', new)) %>%
+  mutate(new=gsub('^Dysoxylon', 'Dysoxylum', new)) %>%
+  mutate(new=gsub('^Eleaegnus', 'Elaeagnus', new)) %>%
+  mutate(new=gsub('^Eleutherant', 'Eleutherantera', new)) %>%
+  mutate(new=gsub('^Echicea', 'Echinacea', new)) %>%
+  mutate(new=gsub('Gauteria foliolata', 'Gaultheria foliolosa', new)) %>%
+  mutate(new=gsub('^Geophylla', 'Geophyla', new)) %>%
+  mutate(new=gsub('Gloichidion insignis', 'Glochidion insigne', new)) %>%
+  mutate(new=gsub('^Glycium', 'Glycine', new)) %>%
+  mutate(new=gsub('^Hammalis', 'Hamamelis', new)) %>%
+  mutate(new=gsub('^Hippochoeris', 'Hypochaeris', new)) %>%
+  mutate(new=gsub('Ilix tephrohylla', 'Ilex tephrophylla', new)) %>%
+  mutate(new=gsub('^Jasininum', 'Jasminum', new)) %>%
+  mutate(new=gsub('Jenipa conjuta', 'Jenipa conjunta', new)) %>%
+  mutate(new=gsub('^Lechytis', 'Lecythis', new)) %>%
+  mutate(new=gsub('Lespedeza juncus', 'Lespedeza juncea', new)) %>%
+  mutate(new=gsub('Licania apelata', 'Licania apetala', new)) %>%
+  mutate(new=gsub('Limeum arenicola', 'Limeum arenicolum', new)) %>%
+  mutate(new=gsub('^Maniota', 'Manihot', new)) %>%
+  mutate(new=gsub('^Menta', 'Mentha', new)) %>%
+  mutate(new=gsub('Metophyum brownei', 'Metopium brownei', new)) %>%
+  mutate(new=gsub('Miliusa tomentosum', 'Miliusa tomentosa', new)) %>%
+  mutate(new=gsub('Mimululus ringens', 'Mimulus ringens', new)) %>%
+  mutate(new=gsub('Nardus strictus', 'Nardus stricta', new)) %>%
+  mutate(new=gsub('Neea glomeratha', 'Neea glomerata', new)) %>%
+  mutate(new=gsub('^Onopordon', 'Onopordum', new)) %>%
+  mutate(new=gsub('^Orbigynia', 'Orbignya', new)) %>%
+  mutate(new=gsub('Orites excelsa', 'Orites excelsus', new)) %>%
+  mutate(new=gsub('Paedorata lutea', 'Paederota lutea', new)) %>%
+  mutate(new=gsub('Palaquin ellipticum', 'Palaquium ellipticum', new)) %>%
+  mutate(new=gsub('Palmeria arfakensis', 'Palmeria arfakiana', new)) %>%
+  mutate(new=gsub('Petalostcmum purpureum', 'Petalostemum purpureum', new)) %>%
+  mutate(new=gsub('Petalostimum purpureum', 'Petalostemum purpureum', new)) %>%
+  mutate(new=gsub('^Petrosileum', 'Petroselinum', new)) %>%
+  mutate(new=gsub('Phlomis herba', 'Phlomis herba-venti', new)) %>%
+  mutate(new=gsub('^Phyllirea', 'Phillyrea', new)) %>%
+  mutate(new=gsub('Physilus pumula', 'Physalus pumila', new)) %>%
+  mutate(new=gsub('Picea maria', 'Picea mariana', new)) %>%
+  mutate(new=gsub('Picea retroXexa', 'Picea retroflexa', new)) %>%
+  mutate(new=gsub('Pilayella litoralis', 'Pilayella littoralis', new)) %>%
+  mutate(new=gsub('Placocarpus schaereri', 'Platecarpus schaerer', new)) %>%
+  mutate(new=gsub('Placocarpus schraereri', 'Platecarpus schaerer', new)) %>%
+  mutate(new=gsub('^Pulteea', 'Pultenaea', new)) %>%
+  mutate(new=gsub('Quercus rubrum', 'Quercus rubra', new)) %>%
+  mutate(new=gsub('Rubus fruticosa', 'Rubus fruticosus', new)) %>%
+  mutate(new=gsub('Rubus saxatile', 'Rubus saxatilis', new)) %>%
+  mutate(new=gsub('Rubus sylvatici', 'Rubus sylvaticus', new)) %>%
+  mutate(new=gsub('^Sanguiria', 'Sanguinaria', new)) %>%
+  mutate(new=gsub('Sarauja nepaulensis', 'Sarauja nepalensis', new)) %>%
+  mutate(new=gsub('^Sateria', 'Setaria', new)) %>%
+  mutate(new=gsub('Sauraiea nepulensis', 'Saurauia nepalensis', new)) %>%
+  mutate(new=gsub('Schneckia australis', 'Schenckia australis', new)) %>%
+  mutate(new=gsub('Smirnium oleastrum', 'Smyrnium olusatrum', new)) %>%
+  mutate(new=gsub('Solms laubachia', 'Solms-laubachia himalayensis', new)) %>%
+  mutate(new=gsub('Stellaria chamaejasme', 'Stellera chamaejasme', new)) %>%
+  mutate(new=gsub('Steraria parviflora', 'Setaria parviflora', new)) %>%
+  mutate(new=gsub('^Stuartia', 'Stewartia', new)) %>%
+  mutate(new=gsub('Sycops sinensis', 'Sycopsis sinensis', new)) %>%
+  mutate(new=gsub('Tacetum vulgare', 'Tanacetum vulgare', new)) %>%
+  mutate(new=gsub('Talinurn angustissimun', 'Talinun angustissimun', new)) %>%
+  mutate(new=gsub('Talloma hodgsoni', 'Talauma hodgsonii', new)) %>%
+  mutate(new=gsub('Taraxacum albo', 'Taraxacum album', new)) %>%
+  mutate(new=gsub('Tetragonia falcata', 'Tetragona falcata', new)) %>%
+  mutate(new=gsub('Trapogogon', 'Tragopogon', new)) %>%
+  mutate(new=gsub('Zyzyphus saeri', 'Zizyphus saeri', new)) %>%
+  mutate(new=gsub('^Helicrysum', 'Helichrysum', new)) %>%
+  mutate(new=gsub('^Diceropappus rhinocerotis', 'Elytropappus rhinocerotis', new)) %>%
+  mutate(new=gsub('^Euphorbiace ', 'Euphorbiacaea ', new)) %>%
+  mutate(new=gsub('^Gloecapsa', 'Gloeocapsa', new)) %>%
+  mutate(new=gsub('Glycirhiza', 'Glycyrrhiza', new)) %>%
+  mutate(new=gsub('Abiesnordmannia', 'Abies nordmannia', new)) %>%
+  mutate(new=gsub('Alnus inca', 'Alnus incana', new)) %>%
+  mutate(new=gsub('Amalencier alnifolia', 'Amalenchier alnifolia', new)) 
+  
+  
+  
+
+# delete remaining records of mushroom species
+tnrs.submit.iter2 <- tnrs.submit.iter2 %>%
+  filter(!word(new,1) %in% mushroom)
+  
+# Extract family name for unidentified species
+tnrs.submit.iter2 <- tnrs.submit.iter2 %>%
+  mutate(family.lev=str_extract(word(new,1), pattern='([^\\s]+acea)')) %>%
+  mutate(new=ifelse(is.na(family.lev), new, family.lev)) %>%
+  dplyr::select(-family.lev)
+
+#Cut to the first 2 words in the name string
+tnrs.submit.iter2 <- tnrs.submit.iter2 %>%
+  group_by(new) %>%
+  mutate(Name_binomial=paste(word(new, c(1,2)), collapse=" ")) %>%
+  ungroup() %>%
+  mutate(Name_binomial=gsub(' NA$', '', Name_binomial))
+  
+#save species name list to be submitted to TNRS
+write_csv(tnrs.submit.iter2 %>% dplyr::select(Name_binomial), path="../_derived/TNRS_submit/tnrs.submit_iter2.csv")
+```
 
+## Iteration 2 - Reimport resolved species names from TNRS and mark solved
+
+```{r}
+tnrs.res.iter2.raw <- readr::read_delim("../_derived/TNRS_submit/tnrs_results_iter2.txt", delim="\t", locale = locale(encoding = 'UTF-8'),quote="",
+          col_type = cols(
+                .default = col_character(),
+                Name_number = col_double(),
+                Overall_score = col_double(),
+                Name_score = col_double(),
+                Author_score = col_double(),
+                Family_score = col_double(),
+                Genus_score = col_double(),
+                Specific_epithet_score = col_double(),
+                Infraspecific_epithet_score = col_double(),
+                Infraspecific_epithet_2_score = col_double(),
+                Selected = col_logical()
+              ))
+
+tnrs.res.iter2 <- tnrs.res.iter2.raw %>%
+  mutate(Name_matched_rank=factor(Name_matched_rank, 
+                                  levels=c("variety", "subspecies", "species", "genus", "family", "section", "supersection", "infraspecies", "forma", 
+                                           "race", "nothosubspecies", "proles", "monstr", "series"))) %>%
+  mutate(Source=factor(Source, levels=c("tpl",    #reorder priorities
+                                        "tpl;gcc", "tpl;gcc;tropicos", "tpl;gcc;tropicos;usda", "tpl;gcc;usda","tpl;ildis","tpl;ildis;tropicos",
+                                        "tpl;ildis;usda","tpl;tropicos","tpl;tropicos;usda","tpl;usda","gcc","gcc;tropicos","gcc;tropicos;usda",
+                                        "gcc;usda", "ildis", "ildis;tropicos","ildis;tropicos;usda","ildis;usda","tropicos","tropicos;gcc",
+                                        "tropicos;usda","usda"  ))) %>%
+  mutate(Taxonomic_status=factor(Taxonomic_status, 
+                                levels=c("Accepted","Synonym", "No opinion","Invalid","Illegitimate","Misapplied","Rejected name"))) %>%
+ arrange(Name_number, 
+          desc(Infraspecific_epithet_2_score),
+          desc(Infraspecific_epithet_score), 
+          desc(Specific_epithet_score),
+          desc(Genus_score), 
+          desc(Family_score),
+          desc(Name_score),
+          desc(Overall_score), 
+          Source) %>%
+  group_by(Name_submitted) %>%
+  slice(1)
 ```
 
 
-# Match names against Taxonomic Name Resolution Service ([TNRS](http://tnrs.iplantcollaborative.org))
+### Family level
+```{r, eval = T}
+index.family <- which(tnrs.res.iter2$Name_matched_rank == "family" &
+                               (tnrs.res.iter2$Taxonomic_status == "Accepted" |
+                                tnrs.res.iter2$Taxonomic_status == "Synonym") &
+                                tnrs.res.iter2$Family_score > 0.88)
+length(index.family)
 
-## Slice `CleanedNames` into chunks
-... of 1000 species, and query both TPL and TNRS
+```
 
+### Genus level
 ```{r, eval = F}
+index.genus <- which(tnrs.res.iter2$Name_matched_rank == "genus" &
+                                 (tnrs.res.iter2$Taxonomic_status %in% c("Accepted","Synonym") &
+                                 tnrs.res.iter2$Genus_score >= 0.90 &
+                                 tnrs.res.iter2$Name_score > 0.49))
+length(index.genus)
 
 
-for(i in 2:length(seq1)){
-  sel.species <- spec.list.TRY.sPlot$Species[seq1[i-1]:(seq1[i]-1)]
-  tpl.tmp <- TPL(sel.species)
-  if(!is.null(nrow(tpl.tmp))){
-    write_csv(tpl.tmp, path = paste("../_derived/TPL/tpl_out_", i, ".csv", sep=""))}
-  print(i)
-}
+```
 
-for(i in 2:length(seq1)){
-  sel.species <- spec.list.TRY.sPlot$Species[seq1[i-1]:(seq1[i]-1)]
-  tnrs.tmp <- gnr_resolve(sel.species)
-  if(!is.null(nrow(tnrs.tmp))){
-    write_csv(tnrs.tmp, path = paste("../_derived/TNRS/tnrs_out_", i, ".csv", sep=""))}
-  print(i)
-}
 
+### Species level
+```{r, eval = T}
+index.species <- which(tnrs.res.iter2$Name_matched_rank == "species" &
+                                 #(tnrs.res.iter2$Taxonomic_status == "Accepted" |
+                                 #  tnrs.res.iter2$Taxonomic_status == "Synonym") &
+                                  tnrs.res.iter2$Genus_score >= 0.80 &
+                                  tnrs.res.iter2$Specific_epithet_score > 0.90)
+length(index.species)
+```
+
+
+### Subspecies level
+```{r, eval = T}
+index.infraspec <- which(tnrs.res.iter2$Name_matched_rank == "infraspecies")
+length(index.infraspec)
+
+index.subspec <- which((tnrs.res.iter2$Name_matched_rank %in% c("infraspecies", "subspecies") |
+                                   is.na(tnrs.res.iter2$Name_matched_rank)) & # there are a few records at sub-species level which are not categorized
+                                (tnrs.res.iter2$Taxonomic_status == "Accepted" |
+                                 tnrs.res.iter2$Taxonomic_status == "Synonym"))
+length(index.subspec)
+
+index.variety <- which(tnrs.res.iter2$Name_matched_rank == "variety" &
+                                (tnrs.res.iter2$Taxonomic_status == "Accepted" |
+                                 tnrs.res.iter2$Taxonomic_status == "Synonym"))
+length(index.variety)
 
+index.forma <- which(tnrs.res.iter2$Name_matched_rank == "forma")
+length(index.forma)
+
+index.spermatophyt <- which(tnrs.res.iter2$Name_matched == "No suitable matches found."
+                                     & word(tnrs.res.iter2$Name_submitted, 1) == "Spermatophyta")
+length(index.spermatophyt)
 
 ```
 
-Compile output from TNRS, and sort matches
-```{r, message=F}
-myfiles.tnrs <- list.files("../_derived/TNRS/", full.names = T)
-myfiles.tpl <- list.files("../_derived/TPL/", full.names = T)
-
-tpl.out <- read_csv(myfiles.tpl[1])
-for(i in 2:length(myfiles.tpl)) {
-  tpl.out <- tpl.out %>%
-    bind_rows(read_csv(myfiles.tpl[i]))
-  print(i)
-}
 
-tnrs.out <- read_csv(myfiles.tnrs[1])
-for(i in 2:length(myfiles.tnrs)) {
-  tnrs.out <- tnrs.out %>%
-    bind_rows(read_csv(myfiles.tnrs[i]))
-  print(i)
-}
+```{r, eval = T}
+index.tnrs.iter2 <- c(index.family, index.forma, index.genus, index.species, index.subspec,
+               index.variety, index.spermatophyt)
 
+tnrs.res.iter2.certain <- tnrs.res.iter2[index.tnrs.iter2,]
+dim(tnrs.res.iter2.certain)
+write.csv(tnrs.res.iter2.certain, file = "../_derived/TNRS_submit/tnrs.res.iter2.certain.csv")
 
+tnrs.res.iter2.uncertain <- tnrs.res.iter2[-index.tnrs.iter2,]
+dim(tnrs.res.iter2.uncertain)
+write.csv(tnrs.res.iter2.uncertain, file = "../_derived/TNRS_submit/tnrs.res.iter2.uncertain.csv")
 
+save(tnrs.res.iter2.certain, tnrs.res.iter2.uncertain, 
+     tnrs.submit.iter2, file="../_derived/TNRS_submit/tnrs.iter2.RData")
 ```
 
 
+**Generate list of `uncertain` species that are still to be resolved on TNRS:**
+```{r, eval = T}
+write_csv(tnrs.res.iter2.uncertain[,2], path = "../_derived/TNRS_submit/tnrs_submit_iter3.csv")
+```
 
 
+## Iteration 3 - Reimport resolved species names from `TNRS_NCBI` 
+In the last iteration, records were submitted to `TNRS NCBI`. 
+
+```{r}
+tnrs.res.iter3.raw <- readr::read_delim("../_derived/TNRS_submit/tnrs_results_iter3.txt", delim="\t", locale = locale(encoding = 'UTF-8'),quote="",
+          col_type = cols(
+                .default = col_character(),
+                Name_number = col_double(),
+                Overall_score = col_double(),
+                Name_score = col_double(),
+                Author_score = col_double(),
+                Family_score = col_double(),
+                Genus_score = col_double(),
+                Specific_epithet_score = col_double(),
+                Infraspecific_epithet_score = col_double(),
+                Infraspecific_epithet_2_score = col_double(),
+                Selected = col_logical()
+              ))
+
+tnrs.ncbi <- tnrs.res.iter3.raw %>%
+  mutate(Name_matched_rank=factor(Name_matched_rank, 
+                                  levels=c("variety", "subspecies", "species", "genus", "family", "section", "supersection", "infraspecies", "forma", 
+                                           "race", "nothosubspecies", "proles", "monstr", "series"))) %>%
+  mutate(Source=factor(Source, levels=c("tpl",    #reorder priorities
+                                        "tpl;gcc", "tpl;gcc;tropicos", "tpl;gcc;tropicos;usda",
+                                        "tpl;gcc;usda","tpl;ildis","tpl;ildis;tropicos",
+                                        "tpl;ildis;usda","tpl;tropicos","tpl;tropicos;usda",
+                                        "tpl;usda","gcc","gcc;tropicos","gcc;tropicos;usda",
+                                        "gcc;usda", "ildis","ildis;tropicos","ildis;tropicos;usda",
+                                        "ildis;usda","tropicos","tropicos;gcc","tropicos;usda","usda"  ))) %>%
+  mutate(Taxonomic_status=factor(Taxonomic_status, 
+                                levels=c("Accepted","Synonym", "No opinion","Invalid","Illegitimate","Misapplied","Rejected name"))) %>%
+ arrange(Name_number, 
+          desc(Infraspecific_epithet_2_score),
+          desc(Infraspecific_epithet_score), 
+          desc(Specific_epithet_score),
+          desc(Genus_score), 
+          desc(Family_score),
+          desc(Name_score),
+          desc(Overall_score), 
+          Source) %>%
+  group_by(Name_submitted) %>%
+  slice(1)
+```
 
-## TNRS settings {#ID}
-The following settings were used for resolving names on TNRS.
+### Family level
+```{r, eval = T}
+index.family <- which(tnrs.ncbi$Name_matched_rank == "family" &
+                                (tnrs.ncbi$Taxonomic_status == "Accepted"|
+                                 tnrs.ncbi$Taxonomic_status == "Synonym") &
+                                tnrs.ncbi$Family_score > 0.85)
+length(index.family)
+```
+### Genus level
+```{r, eval = T}
+index.genus <- which(tnrs.ncbi$Name_matched_rank == "genus" &
+                       tnrs.ncbi$Taxonomic_status  %in% c("Accepted", "Synonym", "No opinion") &
+                         (
+                           (tnrs.ncbi$Genus_score > 0.89 & 
+                             tnrs.ncbi$Name_score > 0.49)   |
+                           (tnrs.ncbi$Genus_score > 0.99 &
+                               tnrs.ncbi$Name_score > 0.2)
+                          ))
+length(index.genus)
+```
+### Species level
+```{r, eval = T}
+index.species.1 <- which(tnrs.ncbi$Name_matched_rank == "species" &
+                                   (tnrs.ncbi$Taxonomic_status == "Accepted" |
+                                    tnrs.ncbi$Taxonomic_status == "Synonym") &
+                                    tnrs.ncbi$Name_score > 0.94 & 
+                                    tnrs.ncbi$Specific_epithet_score>=0.67)
+length(index.species.1)
+
+index.species.2 <- which(tnrs.ncbi$Name_matched_rank == "species" &
+                                   (tnrs.ncbi$Taxonomic_status == "Accepted" |
+                                   tnrs.ncbi$Taxonomic_status == "Synonym") &
+                                   tnrs.ncbi$Genus_score > 0.81 &
+                                   tnrs.ncbi$Name_score > 0.51 & 
+                                   tnrs.ncbi$Specific_epithet_score>=0.67)
+length(index.species.2)
+
+index.species.3 <- which(tnrs.ncbi$Name_matched_rank == "species" &
+                                   tnrs.ncbi$Taxonomic_status == "No opinion"  &
+                                   tnrs.ncbi$Genus_score > 0.7 &
+                                   tnrs.ncbi$Specific_epithet_score > 0.75)
+length(index.species.3)
+
+index.species <- unique(c(index.species.1, index.species.2, index.species.3))
+length(index.species)
+```
+### Variety level
+```{r, eval = T}
+index.var <- which((tnrs.ncbi$Name_matched_rank == "subspecies" |
+                              tnrs.ncbi$Name_matched_rank == "unknown" |
+                              tnrs.ncbi$Name_matched_rank == "variety") &
+                             (tnrs.ncbi$Taxonomic_status == "Accepted" |
+                              tnrs.ncbi$Taxonomic_status == "No opinion" |
+                              tnrs.ncbi$Taxonomic_status == "Synonym"))
+length(index.var)
+```
 
-### Sources for name resolution {#ID}
-The initial TNRS name resolution run was based on the **five standard sources** that were **ranked according to preference** in the following order (default of TNRS):
+### Select `certain` or `uncertain` names
+```{r, eval = T}
+index.ncbi <- c(index.family, index.genus, index.species, index.var)
+length(index.ncbi)
 
-1. The Plant List ([TPL](http://www.theplantlist.org/))[@TPL2013]
-2. The Global Compositae Checklist ([GCC](http://compositae.landcareresearch.co.nz/))[@Flann2009]
-3. The International Legume Database and Information Service ([ILDIS](http://www.ildis.org/))[@ILDIS2006]
-4. [Tropicos](http://www.tropicos.org/) [@TROPICOS2013]
-5. PLANTS Database ([USDA](https://plants.usda.gov/java/))[@USDA2012]
+tnrs.ncbi.certain <- tnrs.ncbi[index.ncbi,]
+dim(tnrs.ncbi.certain)
+write_csv(tnrs.ncbi.certain, path = "../_derived/TNRS_submit/tnrs.ncbi.certain.csv")
 
-Because it is possible that the best match is found in lower ranked sources, see section [TNRS settings], two additional name resolution runs were realized in which the highest ranking was given to **(1) Tropicos**, or **(2)** the sixth source available in TNRS, **NCBI** (The National Center for Biotechnology Information's Taxonomy database; [@NCBI2003]), respectively, see section [TNRS settings].
+tnrs.ncbi.uncertain <- tnrs.ncbi[-index.ncbi,]
+dim(tnrs.ncbi.uncertain)
+write_csv(tnrs.ncbi.uncertain, path = "../_derived/TNRS_submit/tnrs.ncbi.uncertain.csv")
+save(tnrs.ncbi.certain, tnrs.ncbi.uncertain, file="../_derived/TNRS_submit/tnrs.iter3.RData")
+```
 
-### Family Classification
-Resolved names were assigned to families based on the [APGIII classification](http://onlinelibrary.wiley.com/doi/10.1111/j.1095-8339.2009.00996.x/abstract) [@Chase2009], the same classification system used by Tropicos.
 
-## Retrieve results
-Once the  matching process was finished, results were retrieved from TNRS using the `Detailed Download` option that included the full name information (parsed components, warnings, links to sources, etc.).  We retrived the single best match for each species, constrained by source (TNRS default), where the name in the first source was selected as best match, unless there was `no suitable match found` in that source, the match from the next lower-ranked source was selected, until all resources where exhausted.
 
-# Manually inspecting name matching results {#ID}
-Manually inspect the TNRS-results table in a spreadsheat application (i.e. LibreOffice or Excel). Starting with the highest taxonomic rank considered (i.e. Family). For instance, if manual checking of the TRNS output reveals that all accepted names or synonyms that have accuracy scores >0.9 are correct taxon names, use the following selection procedure:
 
-* Name_matched_rank (==Family)
-* Taxonomic_status (==Accepted, Synomyn)
-* Family_score (>0.9)
 
-Continue this selection procedure for entries that were matched at lower taxonomic ranks, i.e. genus, species, etc..
+
+## Iteration 4 - Using `The Plant List` matching tools for unresolved names
+Generate names list from `tnrs.ncbi.uncertain` to be matched against `The Plant List`, using `Taxonstand::TPL`.  
+
+```{r, eval = F}
+tpl.submit <- tnrs.ncbi.uncertain %>% dplyr::select(Name_submitted)
+#write_csv(tpl.submit, path="../_derived/TPL/tpl.submit.csv")
+
+tpl.ncbi <- TPL(tpl.submit)
+write_csv(tpl.ncbi, file = "../_derived/TPL/tpl_results_iter4.csv")
+```
+
+
+# Merge the resolved species lists
+## Read files
+
+```{r, eval = F}
+load("../_derived/TNRS_submit/tnrs.iter1.RData")
+load("../_derived/TNRS_submit/tnrs.iter2.RData")
+load("../_derived/TNRS_submit/tnrs.iter3.RData")
+Read in the `tpl.ncbi` table:
+
+```{r, eval = T}
+tpl.ncbi <- read_csv("../_derived/TPL/tpl_results_iter4.csv", 
+                     col_types = cols(
+                       .default = col_character(),
+                      Hybrid.marker = col_logical(),
+                      Plant.Name.Index = col_logical(),
+                      TPL.version = col_double(),
+                      Typo = col_logical(),
+                      WFormat = col_logical(),
+                      Higher.level = col_logical(),
+                      Date = col_date(format = "")
+                    ))
+```
+
+
+Combine the `certain` data sets:
+```{r, eval = F}
+
+Backbone <- spec.list.TRY.sPlot %>%
+  rename(Name_submitted1=Species) %>%
+  left_join(tnrs.submit.iter2 %>%
+              dplyr::select(-new) %>%
+              rename(Name_submitted1=old, Name_submitted2=Name_binomial),
+            by="Name_submitted1") %>%
+  mutate(Name_submitted=ifelse(!is.na(Name_submitted2), Name_submitted2, Name_submitted1)) %>%
+  left_join(tnrs.res.certain %>% 
+              bind_rows(tnrs.res.iter2.certain) %>%
+              bind_rows(tnrs.ncbi.certain), 
+            by="Name_submitted")
+  
+  
+
+
+tnrs.tpl.all.trop.certain <- rbind(tnrs.tpl.certain, tnrs.trop.small.certain)
+dim(tnrs.tpl.all.trop.certain)
+```
+... and add the four additional columns:
+
+```{r, eval = F}
+names(tnrs.tpl.all.trop.certain)
+
+tnrs.tpl.all.trop.certain$Manual.matching <- NA
+tnrs.tpl.all.trop.certain$Status.correct <- NA
+tnrs.tpl.all.trop.certain$name.correct <- NA
+tnrs.tpl.all.trop.certain$rank.correct <- NA
+```
+
+### Pick the respective `NCBI` data sets
+... for the 8,177 certain species:
+```{r, eval = F}
+names(tnrs.ncbi.certain.comb)
+tnrs.ncbi.certain.comb$rank.correct <- NA
+```
+Combine the with the big list above:
+```{r, eval = F}
+tnrs.tpl.all.trop.certain.2 <- rbind(tnrs.tpl.all.trop.certain, tnrs.ncbi.certain.comb)
+dim(tnrs.tpl.all.trop.certain.2)
+names(tnrs.tpl.all.trop.certain.2)
+```
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+### Tag names that could not be resolved
+If names were not corrected, set `Taxonomic.status == ""`
+```{r, eval = F}
+ncbi.uncertain.corr.uncertain.2$Status.correct[
+                                    ncbi.uncertain.corr.uncertain.2$Status.correct==""] <-
+    ncbi.uncertain.corr.uncertain.2$Taxonomic.status[
+                                        ncbi.uncertain.corr.uncertain.2$Status.correct ==""]
+
+summary(ncbi.uncertain.corr.uncertain.2$Status.correct)
+str(ncbi.uncertain.corr.uncertain.2$Status.correct)
+```
+
+... and assign `No suitable matches found.` to the remaining species:
+```{r, eval = F}
+ncbi.uncertain.corr.uncertain.2$Status.correct <-
+    as.character(ncbi.uncertain.corr.uncertain.2$Status.correct)
+ncbi.uncertain.corr.uncertain.2$Status.correct
+[is.na(ncbi.uncertain.corr.uncertain.2$Status.correct)] <- "No suitable matches found."
+```
+
+Add uncorrected names in column `X` to `name.correct`:
+```{r, eval = F}
+ncbi.uncertain.corr.uncertain.2$name.correct[
+                                    ncbi.uncertain.corr.uncertain.2$Genus.correct==""] <-
+    as.character(ncbi.uncertain.corr.uncertain.2[,41])[
+        ncbi.uncertain.corr.uncertain.2$Genus.correct==""]
+```
+
+Assign `No suitable matches found.` to remaining species in `name.correct` according to `Status.correct`.
+
+```{r, eval = F}
+ncbi.uncertain.corr.uncertain.2$name.correct[ncbi.uncertain.corr.uncertain.2$Status.correct==
+                                             "No suitable matches found."] <-
+    "No suitable matches found."
+
+write.csv(ncbi.uncertain.corr.uncertain.2, file = "ncbi.uncertain.corr.uncertain.2.csv")
+```
+Done! Use `ncbi.uncertain.corr.uncertain.2` for later merging wit