Runtime data now collected in dataframes

closes #64

Runtime data now collected in dataframes
fddae638 · xo30xoqa · c3964585 · fddae638 · fddae638 · fddae638
Commit fddae638 authored 1 year ago by xo30xoqa
--- a/Makefile
+++ b/Makefile
@@ -25,10 +25,12 @@ profile:
 	./runprofile.jl -o example_results

 container:
+	#TODO create a Singularity container
 	echo "Not yet implemented (#43)"

 release:
 	echo "Not yet implemented."

 install:
-	echo "Not relevant. Use `julia run.jl` to run Persefone."
+	#TODO install Julia and/or package dependencies?
+	echo "Not yet implemented."
--- a/Manifest.toml
+++ b/Manifest.toml
@@ -2,7 +2,7 @@

 julia_version = "1.9.3"
 manifest_format = "2.0"
-project_hash = "95079802d452de8f9a12096a3facc5e629c3d6d3"
+project_hash = "88b08cc01ff4cf4b3ac05aaa043f66221dec37b4"

 [[deps.AbstractFFTs]]
 deps = ["ChainRulesCore", "LinearAlgebra"]
@@ -132,6 +132,11 @@ git-tree-sha1 = "5084cc1a28976dd1642c9f337b28a3cb03e0f7d2"
 uuid = "324d7699-5711-5eae-9e2f-1d82baa6b597"
 version = "0.10.7"

+[[deps.Chain]]
+git-tree-sha1 = "8c4920235f6c561e401dfe569beb8b924adad003"
+uuid = "8be319e6-bccf-4806-a6f7-6fae938471bc"
+version = "0.5.0"
+
 [[deps.ChainRulesCore]]
 deps = ["Compat", "LinearAlgebra", "SparseArrays"]
 git-tree-sha1 = "e7ff6cadf743c098e08fca25c91103ee4303c9bb"
@@ -229,10 +234,16 @@ uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
 version = "1.14.0"

 [[deps.DataFrames]]
-deps = ["Compat", "DataAPI", "Future", "InvertedIndices", "IteratorInterfaceExtensions", "LinearAlgebra", "Markdown", "Missings", "PooledArrays", "PrettyTables", "Printf", "REPL", "Random", "Reexport", "SnoopPrecompile", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"]
-git-tree-sha1 = "d4f69885afa5e6149d0cab3818491565cf41446d"
+deps = ["Compat", "DataAPI", "Future", "InlineStrings", "InvertedIndices", "IteratorInterfaceExtensions", "LinearAlgebra", "Markdown", "Missings", "PooledArrays", "PrettyTables", "Printf", "REPL", "Random", "Reexport", "SentinelArrays", "SnoopPrecompile", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"]
+git-tree-sha1 = "aa51303df86f8626a962fccb878430cdb0a97eee"
 uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
-version = "1.4.4"
+version = "1.5.0"
+
+[[deps.DataFramesMeta]]
+deps = ["Chain", "DataFrames", "MacroTools", "OrderedCollections", "Reexport"]
+git-tree-sha1 = "7f13b2f9fa5fc843a06596f1cc917ed1a3d6740b"
+uuid = "1313f7d8-7da2-5740-9ea0-a2ca25f37964"
+version = "0.14.0"

 [[deps.DataStructures]]
 deps = ["Compat", "InteractiveUtils", "OrderedCollections"]

--- a/Project.toml
+++ b/Project.toml
@@ -7,6 +7,8 @@ version = "0.2.0"
 Agents = "46ada45e-f475-11e8-01d0-f70cc89e6671"
 ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
 CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+DataFramesMeta = "1313f7d8-7da2-5740-9ea0-a2ca25f37964"
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 GeoArrays = "2fb1d81b-e6a0-5fc5-82e6-8e06903437ab"

--- a/src/Persefone.jl
+++ b/src/Persefone.jl
@@ -18,6 +18,8 @@ using
    ArgParse,
    CSV,
    Dates,
+    DataFrames,
+    DataFramesMeta,
    Distributed,
    GeoArrays, #XXX this is a big dependency - can we get rid of it?
    Logging,

--- a/src/analysis/analyse_nature.R
+++ b/src/analysis/analyse_nature.R
@@ -4,6 +4,8 @@
 ### This file visualises the output of the nature model.
 ###

+##TODO replace this with Julia code using Makie (issue #47)
+
 library(tidyverse)
 library(ggplot2)
 library(ggsci)
@@ -22,7 +24,7 @@ map_output_file = "landscape_map"

 populationTrends = function() {
    print("Plotting population trends over time.")
-    popdata = read.csv2(paste(datadir, popfile, sep="/")) %>%
+    popdata = read.csv(paste(datadir, popfile, sep="/")) %>%
        mutate(Date = as.POSIXct(strptime(Date,format="%Y-%m-%d")))
    ggplot(data=popdata, aes(x=Date, y=Abundance, color=Species)) +
        geom_point() +
@@ -35,7 +37,7 @@ populationTrends = function() {
 visualiseMap = function() {
    print("Visualising individuals on the landscape map.")
    landcover = rast(paste(datadir, mapfile, sep="/"))
-    inddata = read.csv2(paste(datadir, indfile, sep="/")) %>% select(Date,Species,X,Y) %>%
+    inddata = read.csv(paste(datadir, indfile, sep="/")) %>% select(Date,Species,X,Y) %>%
        mutate(Date = as.POSIXct(strptime(Date,format="%Y-%m-%d")))
    for (d in unique(inddata$Date)) {
        ## somehow, d is changed into a number by the for loop, so we have to convert back

--- a/src/core/output.jl
+++ b/src/core/output.jl
@@ -122,39 +122,48 @@ end
 """
    DataOutput

-A struct for organising model output. This is designed for text-based data output
-that is updated more or less regularly (e.g. population data in csv files).
-Submodels can register their own output functions using [`newdataoutput!`](@ref).
+A struct for organising model output. This is used to collect model data
+in an in-memory dataframe or for CSV output. Submodels can register their
+own output functions using [`newdataoutput!`](@ref).

 Struct fields:
-    - filename: the name of the file to be created in the user-specified output directory
-    - header: a string to be written to the start of the file as it is initialised
-    - outputfunction: a function that takes a model object and returns a string to write to file
+    - name: a string identifier for the data collection (used as file name)
+    - header: a list of column names
+    - outputfunction: a function that takes a model object and returns data values to record (formatted as a vector of vectors)
    - frequency: how often to call the output function (daily/monthly/yearly/end/never)
 """
 struct DataOutput
-    filename::String
-    header::String
+    name::String
+    header::Vector{String}
    outputfunction::Function
    frequency::String
 end

 """
-    newdataoutput!(model, filename, header, outputfunction, frequency)
+    newdataoutput!(model, name, header, outputfunction, frequency)

 Create and register a new data output. This function must be called by all submodels
 that want to have their output functions called regularly.
 """
-function newdataoutput!(model::AgentBasedModel, filename::String, header::String,
+function newdataoutput!(model::AgentBasedModel, name::String, header::Vector{String},
                        outputfunction::Function, frequency::String)
    if !(frequency in ("daily", "monthly", "yearly", "end", "never"))
-        Base.error("Invalid frequency '$frequency' for $filename.") #TODO replace with exception
+        Base.error("Invalid frequency '$frequency' for $name.") #TODO replace with exception
    end
-    ndo = DataOutput(filename, header, outputfunction, frequency)
+    ndo = DataOutput(name, header, outputfunction, frequency)
    append!(model.dataoutputs, [ndo])
    if frequency != "never"
-        open(joinpath(@param(core.outdir), filename), "w") do f
-            println(f, header)
+        if @param(core.csvoutput)
+            open(joinpath(@param(core.outdir), name*".csv"), "w") do f
+                println(f, join(header, ","))
+            end
+        end
+        if @param(core.storedata)
+            df = DataFrame()
+            for h in header
+                df[!,h] = Any[] #XXX allow specifying types?
+            end
+            model.datatables[name] = df
        end
    end
 end
@@ -166,7 +175,7 @@ Cycle through all registered data outputs and activate them according to their
 configured frequency.
 """
 function outputdata(model::AgentBasedModel)
-    #TODO enable output every X days
+    #XXX enable output every X days, or weekly?
    #XXX all output functions except for "end" are run on the first update
    # -> should they all be run on the last update, too?
    startdate = @param(core.startdate)
@@ -179,11 +188,19 @@ function outputdata(model::AgentBasedModel)
            (output.frequency == "monthly" && isnextmonth(model.date)) ||
            (output.frequency == "yearly" && isnextyear(model.date)) ||
            (output.frequency == "end" && model.date == @param(core.enddate))
-            open(joinpath(@param(core.outdir), output.filename), "a") do f
-                outstring = output.outputfunction(model)
-                (outstring[end] != '\n') && (outstring *= '\n')
-                print(f, outstring)
-            end                
+            data = output.outputfunction(model)
+            if @param(core.csvoutput)
+                open(joinpath(@param(core.outdir), output.name*".csv"), "a") do f
+                    for row in data
+                        println(f, join(row, ","))
+                    end
+                end                
+            end
+            if @param(core.storedata)
+                for row in data
+                    push!(model.datatables[output.name], row)
+                end
+            end
        end
    end
 end
--- a/src/core/simulation.jl
+++ b/src/core/simulation.jl
@@ -64,6 +64,7 @@ function initmodel(settings::Dict{String, Any})
    with_logger(logger) do
        events = Vector{FarmEvent}()
        dataoutputs = Vector{DataOutput}()
+        datatables = Dict{String, DataFrame}()
        landscape = initlandscape(settings["world.landcovermap"],
                                  settings["world.farmfieldsmap"])
        weather = initweather(settings["world.weatherfile"],
@@ -79,6 +80,7 @@ function initmodel(settings::Dict{String, Any})
                                      :weather=>weather,
                                      :crops=>crops,
                                      :dataoutputs=>dataoutputs,
+                                      :datatables=>datatables,
                                      :events=>events)
        model = AgentBasedModel(Union{Farmer,Animal,FarmPlot}, space, properties=properties,
                                rng=StableRNG(settings["core.seed"]), warn=false)

--- a/src/crop/farmplot.jl
+++ b/src/crop/farmplot.jl
@@ -47,7 +47,7 @@ function initfields!(model::AgentBasedModel)
                #XXX does this phase calculation work out?
                month(model.date) < 3 ? phase = janfirst : phase = marchfirst
                fp = add_agent!((x,y), FarmPlot, model, [(x,y)],
-                                model.crops["natural grass"], phase, false,
+                                model.crops["natural grass"], phase,
                                0.0, 0.0, 0.0, 0.0, Vector{EventType}())
                model.landscape[x,y].fieldid = fp.id
                convertid[rawid] = fp.id

--- a/src/nature/ecologicaldata.jl
+++ b/src/nature/ecologicaldata.jl
@@ -3,8 +3,8 @@
 ### This file includes the functions for collecting and saving ecological output data.
 ###

-const POPFILE = "populations.csv"
-const INDFILE = "individuals.csv"
+const POPTABLE = "populations"
+const INDDATA = "individuals"

 """
    initecologicaldata()
@@ -12,9 +12,9 @@ const INDFILE = "individuals.csv"
 Create output files for each data group collected by the nature model.
 """
 function initecologicaldata(model::AgentBasedModel)
-    newdataoutput!(model, POPFILE, "Date;Species;Abundance",
+    newdataoutput!(model, POPTABLE, ["Date", "Species", "Abundance"],
                   savepopulationdata, @param(nature.popoutfreq))
-    newdataoutput!(model, INDFILE, "Date;ID;X;Y;Species;Sex;Age",
+    newdataoutput!(model, INDDATA, ["Date","ID","X","Y","Species","Sex","Age"],
                   saveindividualdata, @param(nature.indoutfreq))
 end

@@ -32,9 +32,9 @@ function savepopulationdata(model::AgentBasedModel)
        (typeof(a) != Animal) && continue
        pops[a.traits["name"]] += 1
    end
-    data = ""
+    data = []
    for p in keys(pops)
-        data *= join([model.date, p, pops[p]], ";")*"\n"
+        push!(data, [model.date, p, pops[p]])
    end
    data
 end
@@ -48,11 +48,10 @@ monthly, yearly, or at the end of a simulation, depending on the parameter
 `nature.indoutfreq`. WARNING: Produces very big files!
 """
 function saveindividualdata(model::AgentBasedModel)
-    data = ""
+    data = []
    for a in allagents(model)
        (typeof(a) != Animal) && continue
-        entry = join([model.date,a.id,a.pos[1],a.pos[2],a.traits["name"],a.sex,a.age], ";")
-        data *= entry*"\n"
+        push!(data, [model.date,a.id,a.pos[1],a.pos[2],a.traits["name"],a.sex,a.age])
    end
    data
 end

--- a/src/parameters.toml
+++ b/src/parameters.toml
@@ -10,6 +10,8 @@
 configfile = "src/parameters.toml" # location of the configuration file
 outdir = "results" # location and name of the output folder
 overwrite = "ask" # overwrite the output directory? (true/false/"ask")
+csvoutput = true # save collected data in CSV files
+storedata = true # keep collected data in memory
 loglevel = "debug" # verbosity level: "debug", "info", "warn"
 processors = 2 # number of processors to use on parallel runs
 seed = 2 # seed value for the RNG (0 -> random value)

--- a/test/io_tests.jl
+++ b/test/io_tests.jl
@@ -58,4 +58,5 @@ end
    @test isfile(joinpath(outdir, "end.csv"))
    @test countlines(joinpath(outdir, "end.csv")) == 2
    rm(outdir, force=true, recursive=true)
+    #TODO test dataframe output
 end
--- a/test/simulation_tests.jl
+++ b/test/simulation_tests.jl
@@ -48,7 +48,7 @@ end
    rand1 = rand()
    Random.seed!(1)
    model = initialise(TESTPARAMETERS, 218)
-    #XXX upstream problem with ArgParse (https://github.com/carlobaldassi/ArgParse.jl/issues/121)
+    #XXX upstream problem with ArgParse (https://github.com/carlobaldassi/ArgParse.jl/issues/121) - should work again with Julia 1.10
    @test_broken rand() == rand1 
    Random.seed!(1)
    @test @param(core.seed) == 218

--- a/test/test_parameters.toml
+++ b/test/test_parameters.toml
@@ -9,6 +9,8 @@
 configfile = "test_parameters.toml" # location of the configuration file
 outdir = "results_testsuite" # location and name of the output folder
 overwrite = true # overwrite the output directory? (true/false/"ask")
+csvoutput = true # save collected data in CSV files
+storedata = true # keep collected data in memory
 loglevel = "warn" # verbosity level: "debug", "info", "warn"
 processors = 6 # number of processors to use on parallel runs
 seed = 1 # seed value for the RNG (0 -> random value)