library(tidyverse) load("/data/sPlot/releases/sPlot2.1/DT2_20161025.RData") DT2.0 <- DT2 ##### Filter out all species entry of non-vascular plants ##### #use taxon groups from sPlot 3.0 load("/data/sPlot/releases/sPlot3.0/DT_sPlot3.0.RData") DT3.0 <- DT2 rm(DT2) dt3.species <- DT3.0 %>% dplyr::rename(Taxon.group=taxon_group) %>% distinct(species, species_original, Taxon.group) %>% filter(Taxon.group != "Unknown") %>% separate(species, sep=" ", into = c("genus", "species")) %>% dplyr::select(genus, Taxon.group) %>% mutate(Taxon.group=replace(Taxon.group, list=genus=="Friesodielsia", values="Vascular plant")) %>% mutate(Taxon.group=replace(Taxon.group, list=genus=="Lamprothamnus", values="Alga_Stonewort")) %>% filter(!is.na(genus)) %>% filter(!genus %in% c("Hepatica", "×")) %>% distinct() dt2.species <- DT2.0 %>% distinct(species) %>% mutate(species0 = species) %>% # mutate(Taxon.group=replace(Taxon.group, # list=Taxon.group=="Unknown", # values=NA)) %>% # mutate(Taxon.group=replace(Taxon.group, # list=Taxon.group %in% c("Alga", "Stonewort"), # values="Alga_Stonewort")) %>% separate(species, sep=" ", into = c("genus", "species")) %>% left_join(dt3.species, by="genus") %>% # mutate(Taxon.group=coalesce(Taxon.group.x, as.character(Taxon.group.y))) %>% dplyr::select(species0, Taxon.group) %>% rename(species=species0) DT2 <- DT2.0 %>% mutate(Taxon.group=replace(Taxon.group, list=Taxon.group=="Unknown", values=NA)) %>% mutate(Taxon.group=replace(Taxon.group, list=Taxon.group %in% c("Alga", "Stonewort"), values="Alga_Stonewort")) %>% left_join(dt2.species, by="species") %>% #coalesce prioritizing sPlot 3.0 mutate(Taxon.group=coalesce(as.character(Taxon.group.y), as.character(Taxon.group.x))) %>% dplyr::select(-Taxon.group.x, -Taxon.group.y) %>% filter(!Taxon.group %in% c("Alga_Stonewort", "Lichen", "Moss")) ### exclude all taxa matched at family or higher level #ending "...aceae or ...psida" DT2.out <- DT2 %>% filter(!grepl(pattern="[A-Za-z]*aceae$|[A-Za-z]*opsida$|^[A-Za-z]*ales$|^[A-Za-z]*phyta$", species)) ## filter(species %in% DT2 %>% # distinct(species) %>% # filter(!grepl(pattern="[A-Za-z]*aceae$", species)) %>% # filter(!grepl(pattern="[A-Za-z]*opsida$", species)) %>% # filter(!grepl(pattern="^[A-Za-z]*ales$", species)) %>% # filter(!grepl(pattern="^[A-Za-z]*phyta$", species)) %>% # pull(species)) DT2 <- DT2.out save(DT2, file = "../sPlot/_derived_data/DT2_20161025_filtered.RData") #### Add column tree/shrub #### load("../sPlot/_derived_data/DT2_20161025_filtered.RData") load("/data/sPlot2.0/TRY.all.mean.sd.3.by.genus.species.tree.Rdata") gf <- TRY.all.mean.sd.3.by.genus.species.tree %>% dplyr::select(species=StandSpeciesName,is.tree.or.tall, is.shrub) %>% filter(complete.cases(.)) ## only cover ~half of the species load("/data/sPlot/releases/sPlot3.0/Traits_CWMs_sPlot3.RData") gf2 <- sPlot.traits %>% mutate(is.shrub=NA) %>% mutate(is.shrub=replace(is.shrub, list=str_detect(GrowthForm, "shrub"), values=T)) %>% dplyr::select(species, is.shrub, is.tree.or.tall.shrub) %>% filter(complete.cases(.)) DT2 <- DT2 %>% left_join(gf, by="species") %>% left_join(gf2, by="species") %>% mutate(is.tree.or.tall=coalesce(is.tree.or.tall, is.tree.or.tall.shrub)) %>% mutate(is.shrub=coalesce(is.shrub.x, is.shrub.y)) %>% dplyr::select(-is.tree.or.tall.shrub, -is.shrub.x, -is.shrub.y) tmp <- DT2 %>% distinct(species, is.tree.or.tall, is.shrub) table(tmp$is.tree.or.tall, tmp$is.shrub, exclude=NULL) " FALSE TRUE <NA> FALSE 15222 3535 0 TRUE 6571 1498 0 <NA> 0 0 31062" save(DT2, file = "../sPlot/_derived_data/DT2_20161025_filtered.RData") ### use info from header. Assign to is.tree.or.tall all entries from plots ### where only woody species were sampled load("/data/sPlot/releases/sPlot2.1/sPlot_header_20161124.RData") source("A96_fixheaderPaper11.R") header <- fix.header11(header) plants.recorded <- header %>% dplyr::select(PlotObservationID, plants_recorded2) %>% filter(plants_recorded2 != "complete") gf.complement <- DT2 %>% filter(PlotObservationID %in% (plants.recorded %>% pull(PlotObservationID))) %>% left_join(plants.recorded, by="PlotObservationID") %>% dplyr::select(species, plants_recorded2) %>% distinct(species, .keep_all = T) %>% mutate(is.tree.or.tall2 = ifelse(plants_recorded2=="woody_large", T, NA)) %>% mutate(is.shrub2 = ifelse(plants_recorded2=="woody_large", F, NA)) %>% mutate(is.shrub2 = ifelse(plants_recorded2=="woody_all", T, is.shrub2)) DT2 <- DT2 %>% left_join(gf.complement, by="species") %>% mutate(is.tree.or.tall=coalesce(is.tree.or.tall, is.tree.or.tall2)) %>% mutate(is.shrub=coalesce(is.shrub, is.shrub2)) %>% dplyr::select(-is.tree.or.tall2, -is.shrub2) tmp <- DT2 %>% distinct(species, is.tree.or.tall, is.shrub) table(tmp$is.tree.or.tall, tmp$is.shrub, exclude=NULL) " FALSE TRUE <NA> FALSE 15222 3535 0 TRUE 9684 1498 0 <NA> 0 3852 24097" ### check the most species rich genera tmp2 <- tmp %>% filter(is.na(is.tree.or.tall) & is.na(is.shrub)) %>% separate(species, into = c("genus", "species")) %>% group_by(genus) %>% summarize(n=n()) %>% arrange(desc(n)) %>% slice(1:200) sum(tmp2$n) ## 10528 species in the 200 most speciose genera genera.gf <- c('Carex' = 'h','Astragalus' = 'h','Acacia' = 't','Euphorbia' = NA,'Silene' = 'h', 'Eucalyptus' = 't','Ranunculus' = 'h','Rubus' = 's','Senecio' = 'h','Viola' = 'h', 'Galium' = 'h','Erica' = 's','Festuca' = 'h','Ficus' = 't','Hypericum' = 'h', 'Hieracium' = 'h','Cyperus' = 'h','Centaurea' = 'h','Miconia' = 't','Salix' = 't', 'Solanum' = 'h','Allium' = 'h','Quercus' = 't','Potentilla' = 'h','Juncus' = 'h', 'Dianthus' = 'h','Veronica' = 'h','Indigofera' = 's','Syzygium' = 't','Artemisia' = 'h', 'Campanula' = 'h','Inga' = 't','Saxifraga' = 'h','Poa' = 'h','Taraxacum' = 'h', 'Eugenia' = 't','Trifolium' = 'h','Cirsium' = 'h','Eragrostis' = 'h','Diospyros' = 't', 'Alyssum' = 'h','Alchemilla' = 'h','Stipa' = 'h','Geranium' = 'h','Polygala' = 's', 'Pedicularis' = 'h','Psychotria' = 't','Ocotea' = 't','Piper' = 's','Panicum' = 'h', 'Aconitum' = 'h','Draba' = 'h','Ilex' = 't','Oxytropis' = 'h','Aristida' = 'h', 'Polygonum' = 'h','Cerastium' = 'h','Minuartia' = 'h','Erigeron' = 'h', 'Acantholimon' = 'h','Prunus' = 't','Asplenium' = 'h','Croton' = NA,'Limonium' = 'h', 'Plantago' = 'h','Thymus' = 'h','Rosa' = 's','Rumex' = 'h','Verbascum' = 'h', 'Astracantha' = 'h','Atriplex' = NA,'Restio' = NA,'Salvia' = 'h','Aspalathus' = 's', 'Crotalaria' = NA,'Helichrysum' = 'h','Pouteria' = 't','Sedum' = 'h','Crassula' = 'h', 'Vicia' = 'h','Rhynchospora' = 'h','Gentiana' = 'h','Elymus' = 'h','Oxalis' = 'h', 'Linum' = 'h','Erysimum' = 'h','Dryopteris' = 'h','Eleocharis' = 'h', 'Delphinium' = 'h','Ipomoea' = 'h','Melaleuca' = 't','Clematis' = 's', 'Epilobium' = 'h','Saussurea' = 'h','Crepis' = 'h','Lathyrus' = 'h','Ribes' = 's', 'Agrostis' = 'h','Pelargonium' = 'h','Tephrosia' = NA) genera.gf2 <- tmp2$genus genera.gf2 <- genera.gf2[which(!genera.gf2 %in% names(genera.gf))] #paste(paste("'", genera.gf2, "' = ''", sep=""), collapse=",") genera.gf2 <- c('Gypsophila' = 'h','Asperula' = 'h','Paronychia' = 'h','Arenaria' = 'h', 'Cliffortia' = 's','Thesium' = 's','Achillea' = 'h','Scutellaria' = 'h', 'Calamagrostis' = 'h','Elaphoglossum' = 'h','Stachys' = 'h','Huperzia' = 'h', 'Linaria' = 'h','Gentianella' = 'h','Valeriana' = 'h','Phylica' = 's', 'Stellaria' = 'h','Cardamine' = 'h','Ficinia' = 'h','Fimbristylis' = 'h', 'Lobelia' = 's','Ornithogalum' = 'h','Heliotropium' = 'h','Iris' = 'h', 'Diplostephium' = 't','Scorzonera' = 'h','Baccharis' = 's','Asparagus' = 's', 'Cheilanthes' = 'h','Solidago' = 'h','Thlaspi' = 'h','Lupinus' = 'h', 'Anemone' = 'h','Aster' = 'h','Genista' = 's','Peucedanum' = 'h', 'Selaginella' = 'h','Isatis' = 'h','Aethionema' = 'h','Agathosma' = 's', 'Erodium' = 'h','Parrya' = 'h','Polystichum' = 'h','Tragopogon' = 'h', 'Wahlenbergia' = 'h','Acanthophyllum' = 's','Anthemis' = 'h','Athyrium' = 'h', 'Corydalis' = 'h','Eryngium' = 'h','Hermannia' = 's','Hesperis' = 's', 'Justicia' = 's','Pentacalia' = NA,'Scleria' = 'h','Scrophularia' = 'h', 'Berberis' = 's','Bupleurum' = NA,'Castilleja' = 'h','Eremogone' = NA, 'Gnidia' = NA,'Goodenia' = 'h','Myosotis' = 'h','Papaver' = 'h', 'Pentaschistis' = 'h','Salsola' = 's','Tetraria' = 's','Teucrium' = 's', 'Crataegus' = 't','Dioscorea' = 'h','Lachemilla' = NA,'Othonna' = NA, 'Zygophyllum' = NA,'Carduus' = 'h','Colchicum' = 'h','Convolvulus' = 'h', 'Desmodium' = 'h','Moraea' = 'h','Muraltia' = 's','Nepeta' = 'h', 'Onosma' = 'h','Xyris' = 'h','Clinopodium' = 'h','Hibiscus' = NA, 'Hydrocotyle' = 'h','Knautia' = 'h','Leandra' = NA,'Luzula' = 'h', 'Packera' = 'h','Pteris' = 'h','Ruschia' = 'h','Androsace' = 'h', 'Centella' = 'h','Consolida' = 'h','Grevillea' = 't','Lampranthus' = 'h', 'Lilium' = 'h','Phyllanthus' = 'h','Puccinellia' = 'h','Rhamnus' = 't', 'Sporobolus' = 'h','Thalictrum' = 'h','Angelica' = 'h','Armeria' = 'h', 'Calceolaria' = 'h','Lactuca' = 'h','Lotus' = 'h','Nassella' = 'h', 'Platanthera' = 'h','Scabiosa' = 'h','Symphyotrichum' = 'h','Arabis' = 'h', 'Asclepias' = 'h','Barleria' = 'h','Begonia' = 'h','Crocus' = 'h', 'Eriocaulon' = 'h','Eupatorium' = 'h','Gynoxys' = NA,'Jurinea' = 'h', 'Lepidium' = 'h','Orobanche' = 'h','Paspalum' = 'h','Potamogeton' = 'h', 'Pterostylis' = 'h','Reseda' = 'h','Seseli' = 'h','Chusquea' = 's', 'Eriogonum' = 'h','Herniaria' = 'h','Melica' = 'h','Myrcia' = NA, 'Penstemon' = 'h','Plectranthus' = 'h','Selago' = 'h','Tanacetum' = 'h', 'Tillandsia' = 'h','Bomarea' = NA,'Boronia' = 's','Bromus' = 'h', 'Echinops' = 'h','Euphrasia' = 'h','Gagea' = 'h','Hymenophyllum' = 'h', 'Metalasia' = NA,'Phlox' = 'h','Primula' = 'h','Pultenaea' = 's', 'Scirpus' = 'h','Sida' = 's','Sideritis' = 'h','Swainsona' = NA, 'Brachyotum' = 's','Cleome' = NA,'Commelina' = 'h','Cuscuta' = 'h', 'Cynanchum' = 'h','Espeletia' = 's','Halenia' = 'h','Lonicera' = 'h', 'Muhlenbergia' = 'h','Puya' = NA,'Sesleria' = 'h','Trigonella' = NA, 'Ageratina' = 's','Bartsia' = 'h','Blechnum' = 'h','Chlorophytum' = 'h', 'Conyza' = 'h','Cousinia' = 'h','Dendrobium' = 'h','Ehrharta' = 'h', 'Epidendrum' = 'h','Epipactis' = 'h','Helianthemum' = 'h','Hibbertia' = 't', 'Inula' = 'h','Isoetes' = 'h','Lotononis' = NA,'Oenothera' = 'h', 'Rhododendron' = 's','Rhynchosia' = 'h','Rorippa' = 'h','Sorbus' = 't', 'Stylidium' = 'h','Symplocos' = 't','Valerianella' = 'h','Asarum' = 'h', 'Boechera' = 'h','Bulbophyllum' = 'h','Drimia' = 'h','Drosanthemum' = NA, 'Gnaphalium' = 'h','Liatris' = 'h') genera.gf <- c(genera.gf, genera.gf2) tmp3 <- tmp %>% mutate(species2=species) %>% separate(species2, into = c("genus", "sp")) %>% left_join(data.frame(genus=names(genera.gf), gf=genera.gf), by="genus") %>% mutate(is.tree.or.tall=ifelse(gf=="t", T, F)) %>% mutate(is.shrub=ifelse(gf=="s", T, F)) DT2 <- DT2 %>% left_join(tmp3 %>% dplyr::select(species, is.tree.or.tall, is.shrub), by="species") %>% mutate(is.tree.or.tall=coalesce(is.tree.or.tall.x, is.tree.or.tall.y)) %>% mutate(is.shrub=coalesce(is.shrub.x, is.shrub.y)) %>% dplyr::select(-is.tree.or.tall.x, -is.tree.or.tall.y, -is.shrub.x, -is.shrub.y, -plants_recorded2) tmp <- DT2 %>% distinct(species, is.tree.or.tall, is.shrub) table(tmp$is.tree.or.tall, tmp$is.shrub, exclude=NULL) " FALSE TRUE <NA> FALSE 24841 5155 0 TRUE 10242 1784 0 <NA> 0 3256 12610" save(DT2, file = "../sPlot/_derived_data/DT2_20161025_filtered2.RData") ###### calculate total matrix Mij on the whole database ###### #### resample by species (20 plots per species) to calculate Mij more quickly DT2 <- DT2 %>% dplyr::rename(SPECIES_NR=species, RELEVE_NR=PlotObservationID) length(unique(DT2$SPECIES_NR)) ### delete rare species (<=10 occurrences) comm.sp <- (DT2 %>% group_by(SPECIES_NR) %>% summarize(n.occurrences=n()) %>% filter(n.occurrences>5) %>% dplyr::select(SPECIES_NR))$SPECIES_NR DT.norare <- as.tbl(DT2) %>% filter(SPECIES_NR %in% comm.sp) #### subset DT.norare #set.seed(1984) source("../../Ancillary_Functions/stratified_fifer.R") system.time(resampling0 <- stratified(DT.norare %>% mutate(SPECIES_NR=factor(SPECIES_NR)), group="SPECIES_NR", size=20, replace=F)) ### ~300 secs sum(table(resampling0$SPECIES_NR)<20) ### 4887 species with less than 10 occurrences length(unique(resampling0$RELEVE_NR)) # 191540 ### unique plots to cast for MiJ DT.res <- DT.norare %>% filter(RELEVE_NR %in% unique(resampling0$RELEVE_NR)) %>% mutate(presabs=(COV_PERC>0)*1) %>% mutate(RELEVE_NR=factor(RELEVE_NR)) %>% mutate(SPECIES_NR=factor(SPECIES_NR)) nrow(DT.res) # 4963090 rows ## Much faster using sparsMatrix than cast system.time(DT.matrix.pa.res <- sparseMatrix(as.integer((DT.res$RELEVE_NR)), as.integer((DT.res$SPECIES_NR)), x = DT.res$presabs, dimnames=list(levels(DT.res$RELEVE_NR), levels(DT.res$SPECIES_NR))) ) #~2 secs!!! system.time(Mij.res <- crossprod(DT.matrix.pa.res, DT.matrix.pa.res)) ### ~ 7 secs system.time(Mij.res <- sweep(Mij.res, 2, diag(Mij.res), FUN="/")) ### 441.245 secs save(Mij.res, file="../sPlot/_derived_data/Mij_tot_AllPlots_sparse_filtered.rData") ##### Additional Figures and Tables ###### library(tidyverse) library(sp) library(rgdal) library(viridis) library(sf) library(rnaturalearth) library(dggridR) library(cowplot) load("/data/sPlot/releases/sPlot2.1/sPlot_header_20161124.RData") ### run Header.fix! source("A96_fixheaderPaper11.R") header.filtered <- fix.header11(header) header.filtered <- header.filtered %>% mutate(rel.area=cut(`Relevé area (m²)`, breaks=c(0,9,50,200,400,800,2000,Inf), right=T)) # load mydata load("../sPlot/_derived_data/Resample1/Mydata_global.RData") mydata <- mydata %>% #mutate(isforest=as.numeric(isforest)) %>% ##classification based on land cover mutate(isforest=ifelse(isforest, "for", "nonfor")) %>% mutate(isforest=factor(isforest)) %>% mutate(CONTINENT=factor(CONTINENT)) ### Relevées used in each iteration for BRTs nrows <- 99 set.seed(999) ## stratified by biome, forest, realm, releve area - each group is capped to 100 relevées rel.list <- lapply(1:nrows, function(x){mydata %>% group_by(sBiomeName, isforest, REALM, cut(Rel.area, c(0,150, 600, 1200, Inf))) %>% sample_frac(1) %>% slice(1:100) %>% #sample_n(100, replace=T) %>% ungroup() %>% dplyr::select(RELEVE_NR) %>% distinct() %>% pull(RELEVE_NR)}) res <- data.frame(PlotObservationID=unlist(rel.list)) %>% group_by(PlotObservationID) %>% summarize(n=n()) header.resampled <- header.filtered %>% filter(PlotObservationID %in% res$PlotObservationID) ###### graph of distribution of plot size across biomes ###### biome.order <- c('Polar and subpolar zone' ,'Alpine' ,'Boreal zone' ,'Temperate midlatitudes' , 'Dry midlatitudes' ,'Dry tropics and subtropics' ,'Subtrop. with year-round rain' , 'Subtropics with winter rain' ,'Tropics with summer rain' ,'Tropics with year-round rain') biome.labs <- c('Polar &\n subpolar' ,'Alpine' ,'Boreal zone' ,'Temperate\n midlatitudes' , 'Dry midlatitudes' ,'Dry tropics\n & subtropics' ,'Subtropics -\n year-round rain' , 'Subtropics -\n winter rain' ,'Tropics -\n summer rain' ,'Tropics -\n year-round rain') plotdistr <- mydata %>% mutate(sBiomeName=factor(sBiomeName, levels=biome.order, labels=biome.labs)) %>% mutate(plot_size=cut(Rel.area, c(0,150, 600, 1200, Inf), labels=c("(0, 150]", "(150, 600]", "(600, 1200]", ">1200"))) %>% group_by(isforest, sBiomeName, plot_size) %>% mutate(isforest=factor(isforest, levels=c("for", "nonfor"), labels=c("Forest", "Non forest"))) %>% summarize(n=n()) %>% complete(isforest, sBiomeName, plot_size, fill=list(n=NA)) ggplotsize <- ggplot(data=plotdistr) + geom_bar(aes(x=sBiomeName, y=n, group=plot_size, fill=plot_size), stat="identity", position ="dodge") + theme_classic() + theme(axis.text.x = element_text(angle=45, hjust=1, vjust = 1)) + facet_grid(isforest~.) + scale_y_log10(labels = function(x) format(x, scientific = F)) + xlab("Biome") + ylab("Number of plots") + scale_fill_discrete(name= bquote(Plot~size~(m^{2}))) ggsave(filename="../sPlot/_derived_data/Resample1/_pics/FigS3_plotsize.png", device = "png", dpi=400, width = 6, height=4, ggplotsize) ###### graph of distribution sampling completeness across biomes ###### completeness_distr <- mydata %>% mutate(plants_recorded=factor(plants_recorded, levels=c("woody_large", "woody_all", "complete"), labels=c("only trees", "trees & shrubs", "complete vegetation"))) %>% mutate(sBiomeName=factor(sBiomeName, levels=biome.order, labels=biome.labs)) %>% group_by(sBiomeName, plants_recorded, isforest) %>% mutate(isforest=factor(isforest, levels=c("for", "nonfor"), labels=c("Forest", "Non forest"))) %>% summarize(n=n()) %>% complete(isforest, sBiomeName, plants_recorded, fill=list(n=NA)) ggcompleteness <- ggplot(data=completeness_distr) + geom_bar(aes(x=sBiomeName, y=n, group=plants_recorded, fill=plants_recorded), stat="identity", position ="dodge") + theme_classic() + theme(axis.text.x = element_text(angle=45, hjust=1, vjust = 1), legend.position = "bottom") + facet_grid(isforest~.) + scale_y_log10(labels = function(x) format(x, scientific = F)) + xlab("Biome") + ylab("Number of plots") + scale_fill_discrete(name= "Plants recorded") ggsave(filename="../sPlot/_derived_data/Resample1/_pics/FigS4_completeness.png", device = "png", dpi=400, width = 7, height=5, ggcompleteness) ##### Table 1 ##### ### create table of databases with plot number and biblio #Import databases and create reference tags library(bib2df) bib.db <- bib2df("/data/sPlot/users/Francesco/_sPlot_Management/Consortium/sPlot_References.bib") #Import database-level information databases <- read_csv("/data/sPlot/users/Francesco/_sPlot_Management/Consortium/Databases.out.csv") # create citation tags that can be picked up by Manubot databases <- databases %>% left_join(bib.db %>% dplyr::select(BIBTEXKEY, DOI, URL), by="BIBTEXKEY") %>% dplyr::select(-DOI, -URL) # Create Table 1 databases21 <- fread("/data/sPlot/users/Francesco/_sPlot_Management/Consortium/Databases.out2.1.csv") header.resampled <- header.resampled %>% mutate(Dataset=replace(Dataset, list=Dataset %in% c("Argentina_Chaco_Espinal","Argentina_Cordoba"), "Argentina_Central")) %>% mutate(Dataset=replace(Dataset, list=Dataset=="SIVIM wetlands", "Spain_sivim_wetlands")) %>% left_join(databases21 %>% dplyr::rename(Dataset=`label`) %>% dplyr::select(`GIVD ID`, Dataset), by="Dataset") table1 <- header.resampled %>% group_by(`GIVD ID`) %>% summarize(contributed_plots=n(), .groups = 'drop') %>% left_join(databases %>% filter(`Still in sPlot`==T, Via!="Aggregator") %>% dplyr::select(-Via, -`Still in sPlot`) %>% distinct(), by="GIVD ID") %>% filter(!is.na(contributed_plots)) %>% dplyr::select(`GIVD ID`, `Dataset name`=`DB_name GIVD`, `Nr. of unique plots used` = contributed_plots, Ref=BIBTEXKEY) %>% distinct() %>% arrange(`GIVD ID`) %>% replace_na(list(Ref="")) write_csv(table1, "../sPlot/_output/TableS1_Databases.csv") ##### Fig S1 - PLOT distribution of resampled plots ############### #### Transform to spatial data.frame mycrs <- CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0" ) header.filtered.sf <- SpatialPointsDataFrame(coords=header.filtered %>% dplyr::select(POINT_X, POINT_Y), proj4string = mycrs, data=header.filtered %>% dplyr::select(-POINT_X, -POINT_Y)) %>% st_as_sf() %>% st_transform("+proj=eck4") header.resampled.sf <- header.filtered.sf[which(header.filtered$PlotObservationID %in% res$PlotObservationID),] #### Get geographic data countries <- ne_countries(returnclass = "sf") %>% st_transform(crs = "+proj=eck4") %>% st_geometry() graticules <- ne_download(type = "graticules_15", category = "physical", returnclass = "sf") %>% st_transform(crs = "+proj=eck4") %>% st_geometry() bb <- ne_download(type = "wgs84_bounding_box", category = "physical", returnclass = "sf") %>% st_transform(crs = "+proj=eck4") %>% st_geometry() label_parse <- function(breaks) { parse(text = breaks) } ## basic graph of the world in Eckert projection w3a <- ggplot() + #geom_sf(data = graticules, col = "grey20", lwd = 0.1) + geom_sf(data = countries, fill = "grey90", col = NA, lwd = 0.3) + geom_sf(data = bb, col = "grey20", fill = NA) + coord_sf(crs = "+proj=eck4") + theme_minimal() + theme(axis.text = element_blank(), legend.title=element_text(size=12), legend.text=element_text(size=12), legend.background = element_rect(size=0.1, linetype="solid", colour = 1), legend.key.height = unit(1.1, "cm"), legend.key.width = unit(1.1, "cm")) + scale_fill_viridis(label=label_parse) ## plots all releves in header ggAll0 <- w3a + geom_sf(data=header.filtered.sf, aes(col=plants_recorded2), pch="+", cex=0.9) + scale_color_brewer(palette="Pastel1") + ggtitle("All filtered plots") ggsave(filename="../sPlot/_derived_data/Resample1/_pics/AllfilteredPlot.png", device = "png", dpi=400, width = 6, height=4, ggAll0) ## plots all releves with fornonfor info ggAll <- w3a + geom_sf(data=header.filtered.sf %>% filter(fornonfor!="out") %>% mutate(fornonf=factor(fornonfor)), aes(col=fornonfor), pch="+", cex=0.9) + scale_color_brewer(palette="Set1", direction=-1) + ggtitle("All classified plots") ggsave(filename="../sPlot/_derived_data/Resample1/_pics/AllClassifiedPlots.png", device = "png", dpi=400, width = 6, height=4, ggAll) ## resampled plots header.resampled.sf <- header.resampled.sf %>% mutate(fornonfor=fct_recode(fornonfor, Forest="for", `Non Forest`="nonfor")) ggRes.for <- w3a + geom_sf(data=header.resampled.sf %>% filter(fornonfor=="Forest"), aes(col=fornonfor), pch="+", cex=0.9, alpha=1/6) + scale_color_brewer(palette="Set1", direction=-1, name = "Vegetation type") + #guides(color = guide_legend(override.aes= list(alpha = 1, cex=3))) + theme(legend.position="none", legend.background = element_rect(color = NA), plot.title = element_text(hjust=0.5)) + ggtitle(paste("Forest (n = ", header.resampled.sf %>% filter(fornonfor=="Forest") %>% nrow(), ")", sep="")) #ggsave(filename="../sPlot/_derived_data/Resample1/_pics/AllResampledPlots.png", device = "png", dpi=600, # width = 6, height=3.5, ggRes) ggRes.nonfor <- w3a + geom_sf(data=header.resampled.sf %>% filter(fornonfor=="Non Forest"), aes(col=fornonfor), pch="+", cex=0.9, alpha=1/6) + scale_color_brewer(palette="Set1", direction=-1, name = "Vegetation type") + #guides(color = guide_legend(override.aes= list(alpha = 1, cex=3))) + theme(legend.position="none", legend.background = element_rect(color = NA), plot.title = element_text(hjust=0.5))+ ggtitle(paste("Non-Forest (n = ", header.resampled.sf %>% filter(fornonfor=="Non Forest") %>% nrow(), ")", sep="")) #ggRes.plants_rec <- w3a + # geom_sf(data=header.resampled.sf, aes(col=plants_recorded2), pch="+", cex=0.9) + # ggtitle("All Resampled plots") header.resampled.sf.res <- header.resampled.sf %>% left_join(res) ggRes.leverage <- w3a + geom_sf(data=header.resampled.sf.res, aes(col=n), pch="+", cex=0.9) + scale_color_viridis(name="Plot leverage (%)", limits=c(0,100), breaks=scales::pretty_breaks(n=5)) + theme(legend.position="bottom", legend.background = element_rect(color = NA), plot.title = element_text(hjust=0.5))+ ggtitle("Plot Leverage") #ggsave(filename="../sPlot/_derived_data/Resample1/_pics/AllResampledPlots_leverage.png", device = "png", dpi=600, # width = 6, height=4, ggRes.leverage) FigS1 <- plot_grid(ggRes.for, ggRes.nonfor, ggRes.leverage, nrow=3, labels = c("A", "B", "C"), rel_heights = c(1,1,1.4)) ggsave(filename="../sPlot/_derived_data/Resample1/_pics/FigS1_AllResampledPlots_and_leverage.png", device = "png", dpi=600, width = 6, height=9, FigS1) #### PLOT density of all used plots (for vs nonfor) in hexagons #### #### build hexagon grid dggs <- dgconstruct(spacing=200, metric=T, resround='down') #Get the corresponding grid cells for each earthquake epicenter (lat-long pair) header.resampled$cell <- dgGEO_to_SEQNUM(dggs, header.resampled$POINT_X, header.resampled$POINT_Y)$seqnum #Calculate mean metric for each cell header.resampled.out <- header.resampled %>% dplyr::select(cell, fornonfor) %>% group_by(cell) %>% summarise(count=log(n(),10), count.for=log(sum(fornonfor=="for"),10), count.nonfor=log(sum(fornonfor=="nonfor"),10)) #Get the grid cell boundaries for cells grid <- dgcellstogrid(dggs, header.resampled.out$cell, frame=F) %>% st_as_sf() %>% mutate(cell = header.resampled.out$cell) %>% mutate(value.out=header.resampled.out$count) %>% st_transform("+proj=eck4") %>% st_wrap_dateline(options = c("WRAPDATELINE=YES")) ## plotting ggCount <- w3a + geom_sf(data=grid, aes(fill=value.out),lwd=0, alpha=0.9) + geom_sf(data = countries, col = "grey10", fill=NA, lwd = 0.3) + labs(fill = "# of Plots\n(Log 10)") ggsave(filename="../sPlot/_derived_data/Resample1/_pics/FigSXXV_AllResampledPlots.png", device = "png", dpi=400, width = 6, height=3.5, ggCount) #### List of plots used by dataset, with publication #### db21 <- read_csv("/data/sPlot/users/Francesco/_sPlot_Management/Consortium/Databases.out2.1.csv") dblatest <- read_csv("/data/sPlot/users/Francesco/_sPlot_Management/Consortium/Databases.out.csv") dblist <- header.resampled %>% mutate(Dataset=replace(Dataset, list=Dataset %in% c("Argentina_Chaco_Espinal","Argentina_Cordoba"), "Argentina_Central")) %>% mutate(Dataset=replace(Dataset, list=Dataset=="SIVIM wetlands", "Spain_sivim_wetlands")) %>% group_by(Dataset) %>% summarize(n=n()) %>% left_join(db21 %>% dplyr::select(`GIVD ID`, Dataset=label), by="Dataset") %>% left_join(dblatest %>% dplyr::select(`GIVD ID`,`DB_name GIVD`, Citation), by="GIVD ID") ## get missing citations from givd givd.dois <- jsonlite::fromJSON('https://www.givd.info/api/givd/byMetaDB/splot') %>% as.data.frame %>% dplyr::select(id, doi) %>% filter(id %in% (dblist %>% dplyr::filter(is.na(Citation)) %>% pull(`GIVD ID`))) %>% filter(doi!="" & doi!="not") %>% mutate(doi=str_remove(doi, pattern=" http://hdl.handle.net/|https://doi.org/|doi.org/")) %>% mutate(url=paste0("https://doi.org/", doi)) ## download bibtex from URLS: ## from https://stackoverflow.com/questions/57340204/r-convert-list-of-dois-to-bibtex #library(curl) # urls <- paste0("https://doi.org/", givd.dois %>% pull(doi)) # bib.path <- "../sPlot/_derived_data/Resample1/doibibs.txt" # h <- new_handle() # handle_setheaders(h, "accept" = "application/x-bibtex") # #curl_download(urls[6], destfile = bib.path, handle = h, mode = "a") # walk(urls, ~ { # curl(., handle = h) %>% # readLines(warn = T) %>% # write(file = bib.path, append = TRUE) # }) dblist <- dblist %>% left_join(givd.dois %>% dplyr::select(`GIVD ID`=id, -doi, url)) %>% mutate(Citation=coalesce(Citation, url)) %>% dplyr::select(`GIVD ID`, `Dataset name`=`DB_name GIVD`, `No. Plots`=n, Reference=Citation) %>% arrange(`GIVD ID`) %>% mutate(Reference=ifelse(is.na(Reference), "", Reference)) write_csv(dblist, path = "../sPlot/_derived_data/Resample1/PlotList.csv") ## Test for mistakes in ecoregion attribution ##mydata.test <- mydata %>% ## sample_n(10000) %>% ## mutate(ECO_NAME=factor(ECO_NAME)) ##(test <- sample(levels(mydata.test$ECO_NAME), 1)) ##ggplot(data=mydata.test %>% ## mutate(key.eco=ECO_NAME == test), ## aes(x=POINT_X, y=POINT_Y, col=key.eco, cex=0.5+key.eco)) + ## geom_point(pch="+") ############# Not updated below load("/data/sPlot/releases/sPlot2.1/sPlot_header_20161124.RData") source("/data/sPlot/users/Francesco/_sPlot_Management/Fix.header.R") header <- fix.header(header, exclude.sophy = F) #### select only forest plots, having complete vegetation ## as in Paper #1 header2 <- as.tbl(header) %>% filter(!is.na(POINT_X)) %>% filter(!is.na(POINT_Y)) %>% #filter(!is.na(`Relevé area (m²)`) & `Relevé area (m²)` != -1) %>% mutate(`Relevé area (m²)`=replace(`Relevé area (m²)`, list=(`Relevé area (m²)` %in% c(0, -1)), value=NA)) %>% filter(`Location uncertainty (m)`<50000) %>% ##later addition filter(is.na(`Plants recorded`) | `Plants recorded` %in% c("all vascular plants", "All vascular plants", "All vascular plants and bryophytes", "All vascular plants and dominant cryptogams", "complete", "Complete vegetation", "Complete vegetation (including non-terricolous tax", "Dominant vascular plants", "#N/A")) %>% mutate(is.forest=ifelse(is.na(is.forest), F, is.forest)) %>% mutate(is.non.forest=ifelse(is.na(is.non.forest), F, is.non.forest)) %>% #mutate(releve.forest.sel=(`Relevé area (m²)`>=100 & `Relevé area (m²)`<=1000 & is.forest==T)) %>% #mutate(releve.nonfor.sel=(`Relevé area (m²)`>=5 & `Relevé area (m²)`<=100 & is.non.forest==T)) %>% mutate(fornonfor=factor(is.forest+(2*is.non.forest), labels = c("out", "for", "nonfor"))) %>% dplyr::select(PlotObservationID, Country, `Relevé area (m²)`, `Herbs identified (y/n)`, `Plants recorded`, `is.forest`, `is.non.forest`, `BiomeID`, POINT_X:fornonfor) xseq <- seq(-2, 4, by=1) FigS0 <- ggplot(data=header2 %>% filter(fornonfor != "out") %>% mutate(fornonfor=fct_drop(fornonfor)) %>% mutate(fornonfor=factor(fornonfor, levels=c("for", "nonfor"), labels=c("Forest", "Non Forest"))), ) + geom_histogram(aes(x=log10(`Relevé area (m²)`), fill=fornonfor), binwidth = 0.5) + facet_grid(fornonfor~.) + scale_fill_brewer(palette="Dark2") + scale_x_continuous(breaks=xseq, labels=10^xseq) + theme_bw() + theme(legend.position = "none", strip.background = element_blank(), axis.title.y = element_blank() ) ggsave(filename = "../sPlot/_pics/FigS0_hist_ReleveArea.png", width = 4, height = 6, dpi=300, FigS0)