diff --git a/Makefile b/Makefile index 3de32b7f73c6b4f66332a300444a3575bfce779b..341ef53168de3159440a6bba5dff77a720af1772 100644 --- a/Makefile +++ b/Makefile @@ -25,10 +25,12 @@ profile: ./runprofile.jl -o example_results container: + #TODO create a Singularity container echo "Not yet implemented (#43)" release: echo "Not yet implemented." install: - echo "Not relevant. Use `julia run.jl` to run Persefone." + #TODO install Julia and/or package dependencies? + echo "Not yet implemented." diff --git a/Manifest.toml b/Manifest.toml index 97c3a35f6a26d25a0f2325452100f0ce06f90709..4701490c43bf37648fa48a3db6c3b9024fd36546 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -2,7 +2,7 @@ julia_version = "1.9.3" manifest_format = "2.0" -project_hash = "95079802d452de8f9a12096a3facc5e629c3d6d3" +project_hash = "88b08cc01ff4cf4b3ac05aaa043f66221dec37b4" [[deps.AbstractFFTs]] deps = ["ChainRulesCore", "LinearAlgebra"] @@ -132,6 +132,11 @@ git-tree-sha1 = "5084cc1a28976dd1642c9f337b28a3cb03e0f7d2" uuid = "324d7699-5711-5eae-9e2f-1d82baa6b597" version = "0.10.7" +[[deps.Chain]] +git-tree-sha1 = "8c4920235f6c561e401dfe569beb8b924adad003" +uuid = "8be319e6-bccf-4806-a6f7-6fae938471bc" +version = "0.5.0" + [[deps.ChainRulesCore]] deps = ["Compat", "LinearAlgebra", "SparseArrays"] git-tree-sha1 = "e7ff6cadf743c098e08fca25c91103ee4303c9bb" @@ -229,10 +234,16 @@ uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" version = "1.14.0" [[deps.DataFrames]] -deps = ["Compat", "DataAPI", "Future", "InvertedIndices", "IteratorInterfaceExtensions", "LinearAlgebra", "Markdown", "Missings", "PooledArrays", "PrettyTables", "Printf", "REPL", "Random", "Reexport", "SnoopPrecompile", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"] -git-tree-sha1 = "d4f69885afa5e6149d0cab3818491565cf41446d" +deps = ["Compat", "DataAPI", "Future", "InlineStrings", "InvertedIndices", "IteratorInterfaceExtensions", "LinearAlgebra", "Markdown", "Missings", "PooledArrays", "PrettyTables", "Printf", "REPL", "Random", "Reexport", "SentinelArrays", "SnoopPrecompile", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"] +git-tree-sha1 = "aa51303df86f8626a962fccb878430cdb0a97eee" uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" -version = "1.4.4" +version = "1.5.0" + +[[deps.DataFramesMeta]] +deps = ["Chain", "DataFrames", "MacroTools", "OrderedCollections", "Reexport"] +git-tree-sha1 = "7f13b2f9fa5fc843a06596f1cc917ed1a3d6740b" +uuid = "1313f7d8-7da2-5740-9ea0-a2ca25f37964" +version = "0.14.0" [[deps.DataStructures]] deps = ["Compat", "InteractiveUtils", "OrderedCollections"] diff --git a/Project.toml b/Project.toml index 8c1f7e3aeefade823f98b74ab08d617dd5d173f3..91651ed6adbbe90e633fdad8d8b05045aa3f10a2 100644 --- a/Project.toml +++ b/Project.toml @@ -7,6 +7,8 @@ version = "0.2.0" Agents = "46ada45e-f475-11e8-01d0-f70cc89e6671" ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +DataFramesMeta = "1313f7d8-7da2-5740-9ea0-a2ca25f37964" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" GeoArrays = "2fb1d81b-e6a0-5fc5-82e6-8e06903437ab" diff --git a/src/Persefone.jl b/src/Persefone.jl index b8a2ae13c1afcff7fe27ed53f01b37980b7a9054..dd56cdba309bb88de6c5929c3bfb6132ad11d83f 100644 --- a/src/Persefone.jl +++ b/src/Persefone.jl @@ -18,6 +18,8 @@ using ArgParse, CSV, Dates, + DataFrames, + DataFramesMeta, Distributed, GeoArrays, #XXX this is a big dependency - can we get rid of it? Logging, diff --git a/src/analysis/analyse_nature.R b/src/analysis/analyse_nature.R index bebe052b1cef996f8c421870ee93258281c1d9db..da9cba6f707582c1420b12c498bc94075afbb887 100755 --- a/src/analysis/analyse_nature.R +++ b/src/analysis/analyse_nature.R @@ -4,6 +4,8 @@ ### This file visualises the output of the nature model. ### +##TODO replace this with Julia code using Makie (issue #47) + library(tidyverse) library(ggplot2) library(ggsci) @@ -22,7 +24,7 @@ map_output_file = "landscape_map" populationTrends = function() { print("Plotting population trends over time.") - popdata = read.csv2(paste(datadir, popfile, sep="/")) %>% + popdata = read.csv(paste(datadir, popfile, sep="/")) %>% mutate(Date = as.POSIXct(strptime(Date,format="%Y-%m-%d"))) ggplot(data=popdata, aes(x=Date, y=Abundance, color=Species)) + geom_point() + @@ -35,7 +37,7 @@ populationTrends = function() { visualiseMap = function() { print("Visualising individuals on the landscape map.") landcover = rast(paste(datadir, mapfile, sep="/")) - inddata = read.csv2(paste(datadir, indfile, sep="/")) %>% select(Date,Species,X,Y) %>% + inddata = read.csv(paste(datadir, indfile, sep="/")) %>% select(Date,Species,X,Y) %>% mutate(Date = as.POSIXct(strptime(Date,format="%Y-%m-%d"))) for (d in unique(inddata$Date)) { ## somehow, d is changed into a number by the for loop, so we have to convert back diff --git a/src/core/output.jl b/src/core/output.jl index 18aab3a0b805963280d7bd34923e6c85a510ab8d..debff8c7dfc93e4b78b30120eff07034328d8253 100644 --- a/src/core/output.jl +++ b/src/core/output.jl @@ -122,39 +122,48 @@ end """ DataOutput -A struct for organising model output. This is designed for text-based data output -that is updated more or less regularly (e.g. population data in csv files). -Submodels can register their own output functions using [`newdataoutput!`](@ref). +A struct for organising model output. This is used to collect model data +in an in-memory dataframe or for CSV output. Submodels can register their +own output functions using [`newdataoutput!`](@ref). Struct fields: - - filename: the name of the file to be created in the user-specified output directory - - header: a string to be written to the start of the file as it is initialised - - outputfunction: a function that takes a model object and returns a string to write to file + - name: a string identifier for the data collection (used as file name) + - header: a list of column names + - outputfunction: a function that takes a model object and returns data values to record (formatted as a vector of vectors) - frequency: how often to call the output function (daily/monthly/yearly/end/never) """ struct DataOutput - filename::String - header::String + name::String + header::Vector{String} outputfunction::Function frequency::String end """ - newdataoutput!(model, filename, header, outputfunction, frequency) + newdataoutput!(model, name, header, outputfunction, frequency) Create and register a new data output. This function must be called by all submodels that want to have their output functions called regularly. """ -function newdataoutput!(model::AgentBasedModel, filename::String, header::String, +function newdataoutput!(model::AgentBasedModel, name::String, header::Vector{String}, outputfunction::Function, frequency::String) if !(frequency in ("daily", "monthly", "yearly", "end", "never")) - Base.error("Invalid frequency '$frequency' for $filename.") #TODO replace with exception + Base.error("Invalid frequency '$frequency' for $name.") #TODO replace with exception end - ndo = DataOutput(filename, header, outputfunction, frequency) + ndo = DataOutput(name, header, outputfunction, frequency) append!(model.dataoutputs, [ndo]) if frequency != "never" - open(joinpath(@param(core.outdir), filename), "w") do f - println(f, header) + if @param(core.csvoutput) + open(joinpath(@param(core.outdir), name*".csv"), "w") do f + println(f, join(header, ",")) + end + end + if @param(core.storedata) + df = DataFrame() + for h in header + df[!,h] = Any[] #XXX allow specifying types? + end + model.datatables[name] = df end end end @@ -166,7 +175,7 @@ Cycle through all registered data outputs and activate them according to their configured frequency. """ function outputdata(model::AgentBasedModel) - #TODO enable output every X days + #XXX enable output every X days, or weekly? #XXX all output functions except for "end" are run on the first update # -> should they all be run on the last update, too? startdate = @param(core.startdate) @@ -179,11 +188,19 @@ function outputdata(model::AgentBasedModel) (output.frequency == "monthly" && isnextmonth(model.date)) || (output.frequency == "yearly" && isnextyear(model.date)) || (output.frequency == "end" && model.date == @param(core.enddate)) - open(joinpath(@param(core.outdir), output.filename), "a") do f - outstring = output.outputfunction(model) - (outstring[end] != '\n') && (outstring *= '\n') - print(f, outstring) - end + data = output.outputfunction(model) + if @param(core.csvoutput) + open(joinpath(@param(core.outdir), output.name*".csv"), "a") do f + for row in data + println(f, join(row, ",")) + end + end + end + if @param(core.storedata) + for row in data + push!(model.datatables[output.name], row) + end + end end end end diff --git a/src/core/simulation.jl b/src/core/simulation.jl index 34eec0a5b53c65c7092f22be6a876b9a2b567bbc..ea8a26e73a0dea6c370a21628ad69c6b633d8c6f 100644 --- a/src/core/simulation.jl +++ b/src/core/simulation.jl @@ -64,6 +64,7 @@ function initmodel(settings::Dict{String, Any}) with_logger(logger) do events = Vector{FarmEvent}() dataoutputs = Vector{DataOutput}() + datatables = Dict{String, DataFrame}() landscape = initlandscape(settings["world.landcovermap"], settings["world.farmfieldsmap"]) weather = initweather(settings["world.weatherfile"], @@ -79,6 +80,7 @@ function initmodel(settings::Dict{String, Any}) :weather=>weather, :crops=>crops, :dataoutputs=>dataoutputs, + :datatables=>datatables, :events=>events) model = AgentBasedModel(Union{Farmer,Animal,FarmPlot}, space, properties=properties, rng=StableRNG(settings["core.seed"]), warn=false) diff --git a/src/crop/farmplot.jl b/src/crop/farmplot.jl index af15dc55a1e0b7be963fb5aa8c5c188e5b3c56ab..9389bfab4c1dcf9d73c89de2319bc31acb3fe4ef 100644 --- a/src/crop/farmplot.jl +++ b/src/crop/farmplot.jl @@ -47,7 +47,7 @@ function initfields!(model::AgentBasedModel) #XXX does this phase calculation work out? month(model.date) < 3 ? phase = janfirst : phase = marchfirst fp = add_agent!((x,y), FarmPlot, model, [(x,y)], - model.crops["natural grass"], phase, false, + model.crops["natural grass"], phase, 0.0, 0.0, 0.0, 0.0, Vector{EventType}()) model.landscape[x,y].fieldid = fp.id convertid[rawid] = fp.id diff --git a/src/nature/ecologicaldata.jl b/src/nature/ecologicaldata.jl index f1721a73d631332587c9792f6ac3c8f874764e0e..445fc7cc155712b05fe807c24dcbd67e35c768c2 100644 --- a/src/nature/ecologicaldata.jl +++ b/src/nature/ecologicaldata.jl @@ -3,8 +3,8 @@ ### This file includes the functions for collecting and saving ecological output data. ### -const POPFILE = "populations.csv" -const INDFILE = "individuals.csv" +const POPTABLE = "populations" +const INDDATA = "individuals" """ initecologicaldata() @@ -12,9 +12,9 @@ const INDFILE = "individuals.csv" Create output files for each data group collected by the nature model. """ function initecologicaldata(model::AgentBasedModel) - newdataoutput!(model, POPFILE, "Date;Species;Abundance", + newdataoutput!(model, POPTABLE, ["Date", "Species", "Abundance"], savepopulationdata, @param(nature.popoutfreq)) - newdataoutput!(model, INDFILE, "Date;ID;X;Y;Species;Sex;Age", + newdataoutput!(model, INDDATA, ["Date","ID","X","Y","Species","Sex","Age"], saveindividualdata, @param(nature.indoutfreq)) end @@ -32,9 +32,9 @@ function savepopulationdata(model::AgentBasedModel) (typeof(a) != Animal) && continue pops[a.traits["name"]] += 1 end - data = "" + data = [] for p in keys(pops) - data *= join([model.date, p, pops[p]], ";")*"\n" + push!(data, [model.date, p, pops[p]]) end data end @@ -48,11 +48,10 @@ monthly, yearly, or at the end of a simulation, depending on the parameter `nature.indoutfreq`. WARNING: Produces very big files! """ function saveindividualdata(model::AgentBasedModel) - data = "" + data = [] for a in allagents(model) (typeof(a) != Animal) && continue - entry = join([model.date,a.id,a.pos[1],a.pos[2],a.traits["name"],a.sex,a.age], ";") - data *= entry*"\n" + push!(data, [model.date,a.id,a.pos[1],a.pos[2],a.traits["name"],a.sex,a.age]) end data end diff --git a/src/parameters.toml b/src/parameters.toml index 90bd53b5000958020dd84cf3fd66e0fd9a0ffae5..99cea9792669c6a99e81c3e6289d89db47620825 100644 --- a/src/parameters.toml +++ b/src/parameters.toml @@ -10,6 +10,8 @@ configfile = "src/parameters.toml" # location of the configuration file outdir = "results" # location and name of the output folder overwrite = "ask" # overwrite the output directory? (true/false/"ask") +csvoutput = true # save collected data in CSV files +storedata = true # keep collected data in memory loglevel = "debug" # verbosity level: "debug", "info", "warn" processors = 2 # number of processors to use on parallel runs seed = 2 # seed value for the RNG (0 -> random value) diff --git a/test/io_tests.jl b/test/io_tests.jl index d9073c3d061ac4ebf66cb05402adfe297691b5ec..42d1fa089afadb76874739a0e360109f635824f3 100644 --- a/test/io_tests.jl +++ b/test/io_tests.jl @@ -58,4 +58,5 @@ end @test isfile(joinpath(outdir, "end.csv")) @test countlines(joinpath(outdir, "end.csv")) == 2 rm(outdir, force=true, recursive=true) + #TODO test dataframe output end diff --git a/test/simulation_tests.jl b/test/simulation_tests.jl index 013c38268bdab64476c2afd99862c2e03b921111..51de43adf8f8d5a05e7486c2b85e4a6693c84bb0 100644 --- a/test/simulation_tests.jl +++ b/test/simulation_tests.jl @@ -48,7 +48,7 @@ end rand1 = rand() Random.seed!(1) model = initialise(TESTPARAMETERS, 218) - #XXX upstream problem with ArgParse (https://github.com/carlobaldassi/ArgParse.jl/issues/121) + #XXX upstream problem with ArgParse (https://github.com/carlobaldassi/ArgParse.jl/issues/121) - should work again with Julia 1.10 @test_broken rand() == rand1 Random.seed!(1) @test @param(core.seed) == 218 diff --git a/test/test_parameters.toml b/test/test_parameters.toml index ac63e7c5cae100bd89e8279ae7f9390d41b09a7e..65f0d8123f91a50ead4345583034055dda97f0ee 100644 --- a/test/test_parameters.toml +++ b/test/test_parameters.toml @@ -9,6 +9,8 @@ configfile = "test_parameters.toml" # location of the configuration file outdir = "results_testsuite" # location and name of the output folder overwrite = true # overwrite the output directory? (true/false/"ask") +csvoutput = true # save collected data in CSV files +storedata = true # keep collected data in memory loglevel = "warn" # verbosity level: "debug", "info", "warn" processors = 6 # number of processors to use on parallel runs seed = 1 # seed value for the RNG (0 -> random value)