From a20d431f580e9a6950560a6e14d064b5073ccd45 Mon Sep 17 00:00:00 2001
From: Daniel Vedder <daniel.vedder@idiv.de>
Date: Sun, 22 Jan 2023 03:03:31 +0100
Subject: [PATCH] Implemented parameter scanning

This is a major commit with quite deep-reaching changes in the code
base. It enables the user to specify multiple values for a model
parameter, which will then be combined into a full-factorial
simulation experiment. Everything seems to work as intended, although
some refactoring would be good, and we still need to add parallelisation
for issue #42.

And yes, I coded all of this between 23:30 and 03:00 on a Saturday
night :sweat_smile: :see_no_evil:
---
 src/core/input.jl        |  69 +++++++++++++++++-------
 src/core/output.jl       |  19 ++++++-
 src/core/simulation.jl   | 113 +++++++++++++++++++++++++++++----------
 test/io_tests.jl         |   7 +--
 test/landscape_tests.jl  |   8 +--
 test/paramscan.toml      |  28 ++++++++++
 test/runtests.jl         |   4 +-
 test/simulation_tests.jl |  22 +++++++-
 8 files changed, 214 insertions(+), 56 deletions(-)
 create mode 100644 test/paramscan.toml

diff --git a/src/core/input.jl b/src/core/input.jl
index 2f5687e..a049e71 100644
--- a/src/core/input.jl
+++ b/src/core/input.jl
@@ -6,6 +6,13 @@
 ## Note: much of this code was adapted from the GeMM model by Leidinger et al.
 ## (https://github.com/CCTB-Ecomods/gemm/blob/master/src/input.jl)
 
+"""
+The file that stores all default parameters.
+"""
+const PARAMFILE = joinpath(pkgdir(Persephone), "src/parameters.toml")
+## (DO NOT CHANGE THIS VALUE! Instead, specify simulation-specific configuration files
+## by using the "--configfile" commandline argument, or when invoking simulate().) 
+
 """
     @param(domainparam)
 
@@ -21,7 +28,25 @@ object is available.
 macro param(domainparam)
     domain = String(domainparam.args[1])
     paramname = String(domainparam.args[2].value)
-    :($(esc(:model)).settings[$domain][$paramname])
+    :($(esc(:model)).settings[$(domain*"."*paramname)])
+end
+
+"""
+    flattenTOML(dict)
+
+An internal utility function to convert the two-dimensional dict returned
+by `TOML.parsefile()` into a one-dimensional dict, so that instead of
+writing `settings["domain"]["param"]` one can use `settings["domain.param"]`.
+Can be reversed with `expandTOML()`.
+"""
+function flattenTOML(tomldict)
+    flatdict = Dict{String, Any}()
+    for domain in keys(tomldict)
+        for param in keys(tomldict[domain])
+            flatdict[domain*"."*param] = tomldict[domain][param]
+        end
+    end
+    flatdict
 end
 
 """
@@ -32,39 +57,47 @@ Precedence: commandline parameters - user config file - default values
 """
 function getsettings(configfile::String, seed::Union{Int64,Nothing}=nothing)
     # read in and merge configurations from the commandline, the default config file
-    # and a user-supplied config file
-    defaults::Dict{String, Dict{String, Any}} = TOML.parsefile(configfile)
+    # and a config file supplied by the user (via the --configfile option)
+    defaults::Dict{String, Any} = TOML.parsefile(PARAMFILE) |> flattenTOML
     commandline = parsecommandline()
+    scanparams = Vector{String}()
     if haskey(commandline, "configfile") && isfile(commandline["configfile"])
-        configs = TOML.parsefile(commandline["configfile"])
+        configs = TOML.parsefile(commandline["configfile"]) |> flattenTOML
+    elseif configfile != PARAMFILE && isfile(configfile)
+        configs = TOML.parsefile(configfile) |> flattenTOML
     else
         configs = nothing
     end
     settings = deepcopy(defaults)
-    for domain in keys(defaults)
-        for param in keys(defaults[domain])
-            if param in keys(commandline)
-                settings[domain][param] = commandline[param]
-            elseif !isnothing(configs) && param in keys(configs[domain])
-                settings[domain][param] = configs[domain][param]
+    for param in keys(defaults)
+        if split(param, ".")[2] in keys(commandline)
+            settings[param] = commandline[split(param, ".")[2]]
+        elseif !isnothing(configs) && param in keys(configs)
+            settings[param] = configs[param]
+            # check whether a parameter is given as a list for parameter scanning
+            vectup = Union{Vector,Tuple}
+            if (isa(configs[param], vectup) && !isa(defaults[param], vectup)) ||
+                isa(configs[param], Union{Vector{vectup},Tuple{vectup}})
+                push!(scanparams, param)
             end
         end
     end
     # pre-process certain parameters
     if !isnothing(seed)
-        settings["core"]["seed"] = seed
+        settings["core.seed"] = seed
     end
-    if settings["core"]["seed"] == 0
-        settings["core"]["seed"] = abs(rand(RandomDevice(), Int32))
+    if settings["core.seed"] == 0
+        settings["core.seed"] = abs(rand(RandomDevice(), Int32))
     end
-    defaultoutdir = defaults["core"]["outdir"]
-    if settings["core"]["outdir"] == defaultoutdir
-        outdir = defaultoutdir*"_"*string(Dates.today())*"_s"*string(settings["core"]["seed"])
-        settings["core"]["outdir"] = outdir
+    defaultoutdir = defaults["core.outdir"]
+    if settings["core.outdir"] == defaultoutdir
+        outdir = defaultoutdir*"_"*string(Dates.today())*"_s"*string(settings["core.seed"])
+        settings["core.outdir"] = outdir
     end
-    if settings["core"]["startdate"] > settings["core"]["enddate"]
+    if settings["core.startdate"] > settings["core.enddate"]
         Base.error("Enddate is earlier than startdate.")
     end
+    settings["internal.scanparams"] = scanparams
     settings
 end
 
diff --git a/src/core/output.jl b/src/core/output.jl
index c9deaac..38c09f0 100644
--- a/src/core/output.jl
+++ b/src/core/output.jl
@@ -57,7 +57,7 @@ function setupdatadir(model::AgentBasedModel)
             println(f, "# WARNING: Your repository contains uncommitted changes. This may")
             println(f, "#          compromise the reproducibility of this simulation run.\n")
         end
-        TOML.print(f, model.settings)
+        TOML.print(f, expandTOML(model.settings))
     end
     # Copy the map files to the output folder
     lcmap = @param(core.landcovermap)
@@ -68,6 +68,23 @@ function setupdatadir(model::AgentBasedModel)
     cp(ffmap, joinpath(@param(core.outdir), basename(ffmap)), force = true)
 end
 
+
+"""
+    expandTOML(dict)
+
+An internal utility function to re-convert the one-dimensional dict created
+by `flattenTOML()` into the two-dimensional dict needed by `TOML.print()`.
+"""
+function expandTOML(settingsdict)
+    fulldict = Dict{String, Dict{String, Any}}()
+    for parameter in keys(settingsdict)
+        domain, param = split(parameter, ".")
+        !(domain in keys(fulldict)) && (fulldict[domain] = Dict{String,Any}())
+        fulldict[domain][param] = settingsdict[parameter]
+    end
+    fulldict
+end
+
 """
     DataOutput
 
diff --git a/src/core/simulation.jl b/src/core/simulation.jl
index ca13ff2..352640b 100644
--- a/src/core/simulation.jl
+++ b/src/core/simulation.jl
@@ -3,40 +3,90 @@
 ### This file includes the core functions for initialising and running simulations.
 ###
 
+#XXX With the parameter scanning, code execution has become rather difficult to follow.
+# Can I refactor this into two clear, separate paths - one for the normal case (single
+# run) and one for parameter scanning?
+
+"""
+    initmodels(settings)
+
+Initialise one or more model objects using ready-made settings dicts. This is
+a helper function for `initialise()`.
+"""
+function initmodels(settingsdicts::Vector{Dict{String, Any}})
+    #TODO parallelise model initialisation
+    @debug "Initialising model object(s)."
+    # generate a set of models from these dicts
+    models = Vector{AgentBasedModel}()
+    for settings in settingsdicts
+        events = Vector{FarmEvent}()
+        dataoutputs = Vector{DataOutput}()
+        landscape = initlandscape(settings["core.landcovermap"], settings["core.farmfieldsmap"])
+        space = GridSpace(size(landscape), periodic=false)
+        properties = Dict{Symbol,Any}(:settings=>settings,
+                                      :date=>settings["core.startdate"],
+                                      :landscape=>landscape,
+                                      :dataoutputs=>dataoutputs,
+                                      :events=>events)
+        model = AgentBasedModel(Union{Farmer,Animal,FarmPlot}, space, properties=properties,
+                                rng=StableRNG(settings["core.seed"]), warn=false)
+        setupdatadir(model)
+        initfarms!(model)
+        initfields!(model)
+        initnature!(model)
+        push!(models, model)
+    end
+    models
+end
+
 """
-The file that stores all default parameters.
+    paramscan(settings)
+
+Initialise a list of model objects, covering all possible parameter combinations
+given by the settings (i.e. a full-factorial experiment). This is a helper function
+for `initialise()`.
 """
-const PARAMFILE = "src/parameters.toml"
-## (DO NOT CHANGE THIS VALUE! Instead, specify simulation-specific configuration files
-## by using the "--configfile" commandline argument, or when invoking simulate().) 
+function paramscan(settings::Dict{String,Any}, scanparams::Vector{String})
+    # recursively generate a set of settings dicts covering all combinations
+    function generatecombinations(params::Vector{String})
+        (length(params) == 0) && return [settings]
+        param = pop!(params)
+        combinations = Vector{Dict{String,Any}}()
+        for comb in generatecombinations(params)
+            for value in settings[param]
+                newcombination = deepcopy(comb)
+                newcombination[param] = value
+                if comb["core.outdir"] == settings["core.outdir"]
+                    outdir = joinpath(comb["core.outdir"], "$(split(param, ".")[2])_$(value)")
+                else
+                    outdir = "$(comb["core.outdir"])_$(split(param, ".")[2])_$(value)"
+                end
+                newcombination["core.outdir"] = outdir
+                push!(combinations, newcombination)
+            end
+        end
+        combinations
+    end
+    generatecombinations(scanparams)
+end
 
 """
     initialise(config=PARAMFILE, seed=nothing)
 
 Initialise the model: read in parameters, create the output data directory,
-and instantiate the AgentBasedModel object. Optionally allows specifying the
-configuration file and overriding the `seed` parameter.
+and instantiate the AgentBasedModel object(s). Optionally allows specifying the
+configuration file and overriding the `seed` parameter. This returns a single
+model object unless the config file contains multiple values for one or more
+parameters, in which case it creates a full-factorial simulation experiment
+and returns a vector of model objects.
 """
 function initialise(config::String=PARAMFILE, seed::Union{Int64,Nothing}=nothing)
     @info "Simulation run started at $(Dates.now())."
     settings = getsettings(config, seed)
-    events = Vector{FarmEvent}()
-    dataoutputs = Vector{DataOutput}()
-    landscape = initlandscape(settings["core"]["landcovermap"], settings["core"]["farmfieldsmap"])
-    space = GridSpace(size(landscape), periodic=false)
-    properties = Dict{Symbol,Any}(:settings=>settings,
-                                  :date=>settings["core"]["startdate"],
-                                  :landscape=>landscape,
-                                  :dataoutputs=>dataoutputs,
-                                  :events=>events)
-    @debug "Setting up model."
-    model = AgentBasedModel(Union{Farmer,Animal,FarmPlot}, space, properties=properties,
-                            rng=StableRNG(settings["core"]["seed"]), warn=false)
-    setupdatadir(model)
-    initfarms!(model)
-    initfields!(model)
-    initnature!(model)
-    model
+    scanparams = settings["internal.scanparams"]
+    delete!(settings, "internal.scanparams")
+    models = initmodels(paramscan(settings, scanparams))
+    (length(models) == 1) ? models[1] : models
 end
 
 """
@@ -85,10 +135,19 @@ end
 """
     simulate(config=PARAMFILE, seed=nothing)
 
-Initialise a model object and carry out a complete simulation run, optionally
-specifying a configuration file and a seed for the RNG.
+Initialise one or more model objects and carry out a full simulation experiment,
+optionally specifying a configuration file and a seed for the RNG.
+
+This is the default way to run a Persephone simulation.
 """
 function simulate(config::String=PARAMFILE, seed::Union{Int64,Nothing}=nothing)
-    model = initialise(config, seed)
-    simulate!(model)
+    models = initialise(config, seed)
+    if isa(models, Vector)
+        for m in models
+            @info "Executing run $(m.settings["core.outdir"])"
+            simulate!(m) #TODO parallelise
+        end
+    else
+        simulate!(models)
+    end
 end
diff --git a/test/io_tests.jl b/test/io_tests.jl
index 5884d9d..2ee9b64 100644
--- a/test/io_tests.jl
+++ b/test/io_tests.jl
@@ -9,7 +9,7 @@
     space = GridSpace((10,10), periodic=false)
     model = AgentBasedModel(Animal, space, properties=properties, warn=false)
 
-    @test @param(core.configfile) == TESTPARAMETERS
+    @test @param(core.configfile) == basename(TESTPARAMETERS)
     @test @param(core.startdate) == Date(2022, 2, 1)
     @test @param(nature.targetspecies) == ["Wolpertinger", "Wyvern"]
     @param(core.enddate) = Date(2022,1,3)
@@ -24,8 +24,9 @@ end
     model = AgentBasedModel(Animal, space, properties=properties, warn=false)
     # test that the output directory is created with all files
     outdir = @param(core.outdir)
-    logstring = "Setting up output directory results_testsuite_$(Dates.today())_s1."
-    @test_logs (:debug, logstring) min_level=Logging.Debug Ps.setupdatadir(model)
+    @test_logs((:debug, "Setting up output directory results_testsuite."),
+               min_level=Logging.Debug, match_mode=:any,
+               Ps.setupdatadir(model))
     @test isdir(outdir)
     @test isfile(joinpath(outdir, @param(core.landcovermap)))
     @test isfile(joinpath(outdir, @param(core.farmfieldsmap)))
diff --git a/test/landscape_tests.jl b/test/landscape_tests.jl
index ab8dac4..ddf5eb0 100644
--- a/test/landscape_tests.jl
+++ b/test/landscape_tests.jl
@@ -25,19 +25,19 @@ function smalltestlandscape(agenttype::Type=Animal)
     end
     landscape[6,4] = Pixel(Ps.water, 0, [])
     space = GridSpace(size(landscape), periodic=false)
-    properties = Dict{Symbol,Any}(:date=>TESTSETTINGS["core"]["startdate"],
+    properties = Dict{Symbol,Any}(:date=>TESTSETTINGS["core.startdate"],
                                   :landscape=>landscape,
                                   :events=>Vector{FarmEvent}(),
                                   :dataoutputs=>Vector{DataOutput}(),
                                   :settings=>TESTSETTINGS)
     return AgentBasedModel(agenttype, space, properties=properties,
-                           rng=StableRNG(TESTSETTINGS["core"]["seed"]), warn=false)
+                           rng=StableRNG(TESTSETTINGS["core.seed"]), warn=false)
 end
 
 @testset "Landscape initialisation" begin
     # initialise the landscape part of the model
-    landscape = Ps.initlandscape(TESTSETTINGS["core"]["landcovermap"],
-                                 TESTSETTINGS["core"]["farmfieldsmap"])
+    landscape = Ps.initlandscape(TESTSETTINGS["core.landcovermap"],
+                                 TESTSETTINGS["core.farmfieldsmap"])
     space = GridSpace(size(landscape), periodic=false)
     properties = Dict{Symbol,Any}(:landscape=>landscape, :settings=>TESTSETTINGS)
     model = AgentBasedModel(FarmPlot, space, properties=properties, warn=false)
diff --git a/test/paramscan.toml b/test/paramscan.toml
new file mode 100644
index 0000000..d17505e
--- /dev/null
+++ b/test/paramscan.toml
@@ -0,0 +1,28 @@
+### Persephone - a socio-economic-ecological model of European agricultural landscapes.
+###
+### This configuration file is intended to test the parameter scanning feature.
+###
+
+[core]
+configfile = "test/paramscan.toml" # location of the configuration file
+landcovermap = "landcover_jena.tif" # location of the landcover map
+farmfieldsmap = "fields_jena.tif" # location of the field geometry map
+outdir = "results_test_paramscan" # location and name of the output folder
+overwrite = ["ask",true] # overwrite the output directory? (true/false/"ask")
+loglevel = ["debug", "info"] # verbosity level: "debug", "info", "quiet"
+seed = [1,2,3] # seed value for the RNG (0 -> random value)
+# dates to start and end the simulation
+startdate = 2022-01-01
+enddate = 2022-01-02
+
+[farm]
+farmmodel = "FieldManager" # which version of the farm model to use (not yet implemented)
+
+[nature]
+targetspecies = ["Wolpertinger", "Wyvern"] # list of target species to simulate
+popoutfreq = "daily" # output frequency population-level data, daily/monthly/yearly/end/never
+indoutfreq = "end" # output frequency individual-level data, daily/monthly/yearly/end/never
+	
+[crop]
+cropmodel = "linear" # crop growth model to use, "linear" or "aquacrop" (not yet implemented)
+
diff --git a/test/runtests.jl b/test/runtests.jl
index 69bd24d..45b93bf 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -16,7 +16,7 @@ using Test
 
 const Ps = Persephone
 
-const TESTPARAMETERS = "test_parameters.toml"
+const TESTPARAMETERS = joinpath(pkgdir(Persephone), "test/test_parameters.toml")
 const TESTSETTINGS = Ps.getsettings(TESTPARAMETERS)
 
 @testset "Persephone Tests" begin
@@ -34,5 +34,5 @@ const TESTSETTINGS = Ps.getsettings(TESTPARAMETERS)
     @testset "Farm model" begin
         include("farm_tests.jl")
     end
-    rm(TESTSETTINGS["core"]["outdir"], force=true, recursive=true)
+    rm(TESTSETTINGS["core.outdir"], force=true, recursive=true)
 end
diff --git a/test/simulation_tests.jl b/test/simulation_tests.jl
index 1be977a..d9766bb 100644
--- a/test/simulation_tests.jl
+++ b/test/simulation_tests.jl
@@ -5,7 +5,7 @@
 
 @testset "Model initialisation" begin
     model = initialise(TESTPARAMETERS)
-    @test typeof(model.settings) == Dict{String, Dict{String, Any}}
+    @test typeof(model.settings) == Dict{String, Any}
     @test model.date == Date(2022,2,1)
     @test typeof(model.landscape) == Matrix{Pixel}
     @test typeof(model.dataoutputs) == Vector{DataOutput}
@@ -14,6 +14,26 @@
     @test nagents(model) == 2092+10+28
 end
 
+@testset "Parameter scanning" begin
+    config = "paramscan.toml"
+    testdirs = ["results_test_paramscan/seed_1_loglevel_debug_overwrite_ask",
+                "results_test_paramscan/seed_1_loglevel_debug_overwrite_true",
+                "results_test_paramscan/seed_2_loglevel_info_overwrite_true"]
+    settings = Ps.getsettings(config)
+    scanparams = settings["internal.scanparams"]
+    @test length(scanparams) == 3
+    @test sort(["core.overwrite", "core.loglevel", "core.seed"]) == sort(scanparams)
+    scan = Ps.paramscan(settings, scanparams)
+    outdirs = (s["core.outdir"] for s in scan)
+    @test length(outdirs) == 12
+    @test length(initialise(config)) == 12 #XXX This takes a long time
+    for dir in testdirs
+        @test dir in outdirs
+        @test isdir(dir)
+    end
+    rm("results_test_paramscan", force=true, recursive=true)
+end
+
 @testset "Model simulation" begin
     # The primary reason for this testset is to make sure that a complete
     # simulation will run through without errors. Thus, there are few tests.
-- 
GitLab