diff --git a/Makefile b/Makefile
index 3de32b7f73c6b4f66332a300444a3575bfce779b..341ef53168de3159440a6bba5dff77a720af1772 100644
--- a/Makefile
+++ b/Makefile
@@ -25,10 +25,12 @@ profile:
 	./runprofile.jl -o example_results
 
 container:
+	#TODO create a Singularity container
 	echo "Not yet implemented (#43)"
 
 release:
 	echo "Not yet implemented."
 
 install:
-	echo "Not relevant. Use `julia run.jl` to run Persefone."
+	#TODO install Julia and/or package dependencies?
+	echo "Not yet implemented."
diff --git a/Manifest.toml b/Manifest.toml
index 97c3a35f6a26d25a0f2325452100f0ce06f90709..4701490c43bf37648fa48a3db6c3b9024fd36546 100644
--- a/Manifest.toml
+++ b/Manifest.toml
@@ -2,7 +2,7 @@
 
 julia_version = "1.9.3"
 manifest_format = "2.0"
-project_hash = "95079802d452de8f9a12096a3facc5e629c3d6d3"
+project_hash = "88b08cc01ff4cf4b3ac05aaa043f66221dec37b4"
 
 [[deps.AbstractFFTs]]
 deps = ["ChainRulesCore", "LinearAlgebra"]
@@ -132,6 +132,11 @@ git-tree-sha1 = "5084cc1a28976dd1642c9f337b28a3cb03e0f7d2"
 uuid = "324d7699-5711-5eae-9e2f-1d82baa6b597"
 version = "0.10.7"
 
+[[deps.Chain]]
+git-tree-sha1 = "8c4920235f6c561e401dfe569beb8b924adad003"
+uuid = "8be319e6-bccf-4806-a6f7-6fae938471bc"
+version = "0.5.0"
+
 [[deps.ChainRulesCore]]
 deps = ["Compat", "LinearAlgebra", "SparseArrays"]
 git-tree-sha1 = "e7ff6cadf743c098e08fca25c91103ee4303c9bb"
@@ -229,10 +234,16 @@ uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
 version = "1.14.0"
 
 [[deps.DataFrames]]
-deps = ["Compat", "DataAPI", "Future", "InvertedIndices", "IteratorInterfaceExtensions", "LinearAlgebra", "Markdown", "Missings", "PooledArrays", "PrettyTables", "Printf", "REPL", "Random", "Reexport", "SnoopPrecompile", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"]
-git-tree-sha1 = "d4f69885afa5e6149d0cab3818491565cf41446d"
+deps = ["Compat", "DataAPI", "Future", "InlineStrings", "InvertedIndices", "IteratorInterfaceExtensions", "LinearAlgebra", "Markdown", "Missings", "PooledArrays", "PrettyTables", "Printf", "REPL", "Random", "Reexport", "SentinelArrays", "SnoopPrecompile", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"]
+git-tree-sha1 = "aa51303df86f8626a962fccb878430cdb0a97eee"
 uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
-version = "1.4.4"
+version = "1.5.0"
+
+[[deps.DataFramesMeta]]
+deps = ["Chain", "DataFrames", "MacroTools", "OrderedCollections", "Reexport"]
+git-tree-sha1 = "7f13b2f9fa5fc843a06596f1cc917ed1a3d6740b"
+uuid = "1313f7d8-7da2-5740-9ea0-a2ca25f37964"
+version = "0.14.0"
 
 [[deps.DataStructures]]
 deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
diff --git a/Project.toml b/Project.toml
index 8c1f7e3aeefade823f98b74ab08d617dd5d173f3..91651ed6adbbe90e633fdad8d8b05045aa3f10a2 100644
--- a/Project.toml
+++ b/Project.toml
@@ -7,6 +7,8 @@ version = "0.2.0"
 Agents = "46ada45e-f475-11e8-01d0-f70cc89e6671"
 ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
 CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+DataFramesMeta = "1313f7d8-7da2-5740-9ea0-a2ca25f37964"
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 GeoArrays = "2fb1d81b-e6a0-5fc5-82e6-8e06903437ab"
diff --git a/src/Persefone.jl b/src/Persefone.jl
index b8a2ae13c1afcff7fe27ed53f01b37980b7a9054..dd56cdba309bb88de6c5929c3bfb6132ad11d83f 100644
--- a/src/Persefone.jl
+++ b/src/Persefone.jl
@@ -18,6 +18,8 @@ using
     ArgParse,
     CSV,
     Dates,
+    DataFrames,
+    DataFramesMeta,
     Distributed,
     GeoArrays, #XXX this is a big dependency - can we get rid of it?
     Logging,
diff --git a/src/analysis/analyse_nature.R b/src/analysis/analyse_nature.R
index bebe052b1cef996f8c421870ee93258281c1d9db..da9cba6f707582c1420b12c498bc94075afbb887 100755
--- a/src/analysis/analyse_nature.R
+++ b/src/analysis/analyse_nature.R
@@ -4,6 +4,8 @@
 ### This file visualises the output of the nature model.
 ###
 
+##TODO replace this with Julia code using Makie (issue #47)
+
 library(tidyverse)
 library(ggplot2)
 library(ggsci)
@@ -22,7 +24,7 @@ map_output_file = "landscape_map"
 
 populationTrends = function() {
     print("Plotting population trends over time.")
-    popdata = read.csv2(paste(datadir, popfile, sep="/")) %>%
+    popdata = read.csv(paste(datadir, popfile, sep="/")) %>%
         mutate(Date = as.POSIXct(strptime(Date,format="%Y-%m-%d")))
     ggplot(data=popdata, aes(x=Date, y=Abundance, color=Species)) +
         geom_point() +
@@ -35,7 +37,7 @@ populationTrends = function() {
 visualiseMap = function() {
     print("Visualising individuals on the landscape map.")
     landcover = rast(paste(datadir, mapfile, sep="/"))
-    inddata = read.csv2(paste(datadir, indfile, sep="/")) %>% select(Date,Species,X,Y) %>%
+    inddata = read.csv(paste(datadir, indfile, sep="/")) %>% select(Date,Species,X,Y) %>%
         mutate(Date = as.POSIXct(strptime(Date,format="%Y-%m-%d")))
     for (d in unique(inddata$Date)) {
         ## somehow, d is changed into a number by the for loop, so we have to convert back
diff --git a/src/core/output.jl b/src/core/output.jl
index 18aab3a0b805963280d7bd34923e6c85a510ab8d..debff8c7dfc93e4b78b30120eff07034328d8253 100644
--- a/src/core/output.jl
+++ b/src/core/output.jl
@@ -122,39 +122,48 @@ end
 """
     DataOutput
 
-A struct for organising model output. This is designed for text-based data output
-that is updated more or less regularly (e.g. population data in csv files).
-Submodels can register their own output functions using [`newdataoutput!`](@ref).
+A struct for organising model output. This is used to collect model data
+in an in-memory dataframe or for CSV output. Submodels can register their
+own output functions using [`newdataoutput!`](@ref).
 
 Struct fields:
-    - filename: the name of the file to be created in the user-specified output directory
-    - header: a string to be written to the start of the file as it is initialised
-    - outputfunction: a function that takes a model object and returns a string to write to file
+    - name: a string identifier for the data collection (used as file name)
+    - header: a list of column names
+    - outputfunction: a function that takes a model object and returns data values to record (formatted as a vector of vectors)
     - frequency: how often to call the output function (daily/monthly/yearly/end/never)
 """
 struct DataOutput
-    filename::String
-    header::String
+    name::String
+    header::Vector{String}
     outputfunction::Function
     frequency::String
 end
 
 """
-    newdataoutput!(model, filename, header, outputfunction, frequency)
+    newdataoutput!(model, name, header, outputfunction, frequency)
 
 Create and register a new data output. This function must be called by all submodels
 that want to have their output functions called regularly.
 """
-function newdataoutput!(model::AgentBasedModel, filename::String, header::String,
+function newdataoutput!(model::AgentBasedModel, name::String, header::Vector{String},
                         outputfunction::Function, frequency::String)
     if !(frequency in ("daily", "monthly", "yearly", "end", "never"))
-        Base.error("Invalid frequency '$frequency' for $filename.") #TODO replace with exception
+        Base.error("Invalid frequency '$frequency' for $name.") #TODO replace with exception
     end
-    ndo = DataOutput(filename, header, outputfunction, frequency)
+    ndo = DataOutput(name, header, outputfunction, frequency)
     append!(model.dataoutputs, [ndo])
     if frequency != "never"
-        open(joinpath(@param(core.outdir), filename), "w") do f
-            println(f, header)
+        if @param(core.csvoutput)
+            open(joinpath(@param(core.outdir), name*".csv"), "w") do f
+                println(f, join(header, ","))
+            end
+        end
+        if @param(core.storedata)
+            df = DataFrame()
+            for h in header
+                df[!,h] = Any[] #XXX allow specifying types?
+            end
+            model.datatables[name] = df
         end
     end
 end
@@ -166,7 +175,7 @@ Cycle through all registered data outputs and activate them according to their
 configured frequency.
 """
 function outputdata(model::AgentBasedModel)
-    #TODO enable output every X days
+    #XXX enable output every X days, or weekly?
     #XXX all output functions except for "end" are run on the first update
     # -> should they all be run on the last update, too?
     startdate = @param(core.startdate)
@@ -179,11 +188,19 @@ function outputdata(model::AgentBasedModel)
             (output.frequency == "monthly" && isnextmonth(model.date)) ||
             (output.frequency == "yearly" && isnextyear(model.date)) ||
             (output.frequency == "end" && model.date == @param(core.enddate))
-            open(joinpath(@param(core.outdir), output.filename), "a") do f
-                outstring = output.outputfunction(model)
-                (outstring[end] != '\n') && (outstring *= '\n')
-                print(f, outstring)
-            end                
+            data = output.outputfunction(model)
+            if @param(core.csvoutput)
+                open(joinpath(@param(core.outdir), output.name*".csv"), "a") do f
+                    for row in data
+                        println(f, join(row, ","))
+                    end
+                end                
+            end
+            if @param(core.storedata)
+                for row in data
+                    push!(model.datatables[output.name], row)
+                end
+            end
         end
     end
 end
diff --git a/src/core/simulation.jl b/src/core/simulation.jl
index 34eec0a5b53c65c7092f22be6a876b9a2b567bbc..ea8a26e73a0dea6c370a21628ad69c6b633d8c6f 100644
--- a/src/core/simulation.jl
+++ b/src/core/simulation.jl
@@ -64,6 +64,7 @@ function initmodel(settings::Dict{String, Any})
     with_logger(logger) do
         events = Vector{FarmEvent}()
         dataoutputs = Vector{DataOutput}()
+        datatables = Dict{String, DataFrame}()
         landscape = initlandscape(settings["world.landcovermap"],
                                   settings["world.farmfieldsmap"])
         weather = initweather(settings["world.weatherfile"],
@@ -79,6 +80,7 @@ function initmodel(settings::Dict{String, Any})
                                       :weather=>weather,
                                       :crops=>crops,
                                       :dataoutputs=>dataoutputs,
+                                      :datatables=>datatables,
                                       :events=>events)
         model = AgentBasedModel(Union{Farmer,Animal,FarmPlot}, space, properties=properties,
                                 rng=StableRNG(settings["core.seed"]), warn=false)
diff --git a/src/crop/farmplot.jl b/src/crop/farmplot.jl
index af15dc55a1e0b7be963fb5aa8c5c188e5b3c56ab..9389bfab4c1dcf9d73c89de2319bc31acb3fe4ef 100644
--- a/src/crop/farmplot.jl
+++ b/src/crop/farmplot.jl
@@ -47,7 +47,7 @@ function initfields!(model::AgentBasedModel)
                 #XXX does this phase calculation work out?
                 month(model.date) < 3 ? phase = janfirst : phase = marchfirst
                 fp = add_agent!((x,y), FarmPlot, model, [(x,y)],
-                                model.crops["natural grass"], phase, false,
+                                model.crops["natural grass"], phase,
                                 0.0, 0.0, 0.0, 0.0, Vector{EventType}())
                 model.landscape[x,y].fieldid = fp.id
                 convertid[rawid] = fp.id
diff --git a/src/nature/ecologicaldata.jl b/src/nature/ecologicaldata.jl
index f1721a73d631332587c9792f6ac3c8f874764e0e..445fc7cc155712b05fe807c24dcbd67e35c768c2 100644
--- a/src/nature/ecologicaldata.jl
+++ b/src/nature/ecologicaldata.jl
@@ -3,8 +3,8 @@
 ### This file includes the functions for collecting and saving ecological output data.
 ###
 
-const POPFILE = "populations.csv"
-const INDFILE = "individuals.csv"
+const POPTABLE = "populations"
+const INDDATA = "individuals"
 
 """
     initecologicaldata()
@@ -12,9 +12,9 @@ const INDFILE = "individuals.csv"
 Create output files for each data group collected by the nature model.
 """
 function initecologicaldata(model::AgentBasedModel)
-    newdataoutput!(model, POPFILE, "Date;Species;Abundance",
+    newdataoutput!(model, POPTABLE, ["Date", "Species", "Abundance"],
                    savepopulationdata, @param(nature.popoutfreq))
-    newdataoutput!(model, INDFILE, "Date;ID;X;Y;Species;Sex;Age",
+    newdataoutput!(model, INDDATA, ["Date","ID","X","Y","Species","Sex","Age"],
                    saveindividualdata, @param(nature.indoutfreq))
 end
 
@@ -32,9 +32,9 @@ function savepopulationdata(model::AgentBasedModel)
         (typeof(a) != Animal) && continue
         pops[a.traits["name"]] += 1
     end
-    data = ""
+    data = []
     for p in keys(pops)
-        data *= join([model.date, p, pops[p]], ";")*"\n"
+        push!(data, [model.date, p, pops[p]])
     end
     data
 end
@@ -48,11 +48,10 @@ monthly, yearly, or at the end of a simulation, depending on the parameter
 `nature.indoutfreq`. WARNING: Produces very big files!
 """
 function saveindividualdata(model::AgentBasedModel)
-    data = ""
+    data = []
     for a in allagents(model)
         (typeof(a) != Animal) && continue
-        entry = join([model.date,a.id,a.pos[1],a.pos[2],a.traits["name"],a.sex,a.age], ";")
-        data *= entry*"\n"
+        push!(data, [model.date,a.id,a.pos[1],a.pos[2],a.traits["name"],a.sex,a.age])
     end
     data
 end
diff --git a/src/parameters.toml b/src/parameters.toml
index 90bd53b5000958020dd84cf3fd66e0fd9a0ffae5..99cea9792669c6a99e81c3e6289d89db47620825 100644
--- a/src/parameters.toml
+++ b/src/parameters.toml
@@ -10,6 +10,8 @@
 configfile = "src/parameters.toml" # location of the configuration file
 outdir = "results" # location and name of the output folder
 overwrite = "ask" # overwrite the output directory? (true/false/"ask")
+csvoutput = true # save collected data in CSV files
+storedata = true # keep collected data in memory
 loglevel = "debug" # verbosity level: "debug", "info", "warn"
 processors = 2 # number of processors to use on parallel runs
 seed = 2 # seed value for the RNG (0 -> random value)
diff --git a/test/io_tests.jl b/test/io_tests.jl
index d9073c3d061ac4ebf66cb05402adfe297691b5ec..42d1fa089afadb76874739a0e360109f635824f3 100644
--- a/test/io_tests.jl
+++ b/test/io_tests.jl
@@ -58,4 +58,5 @@ end
     @test isfile(joinpath(outdir, "end.csv"))
     @test countlines(joinpath(outdir, "end.csv")) == 2
     rm(outdir, force=true, recursive=true)
+    #TODO test dataframe output
 end
diff --git a/test/simulation_tests.jl b/test/simulation_tests.jl
index 013c38268bdab64476c2afd99862c2e03b921111..51de43adf8f8d5a05e7486c2b85e4a6693c84bb0 100644
--- a/test/simulation_tests.jl
+++ b/test/simulation_tests.jl
@@ -48,7 +48,7 @@ end
     rand1 = rand()
     Random.seed!(1)
     model = initialise(TESTPARAMETERS, 218)
-    #XXX upstream problem with ArgParse (https://github.com/carlobaldassi/ArgParse.jl/issues/121)
+    #XXX upstream problem with ArgParse (https://github.com/carlobaldassi/ArgParse.jl/issues/121) - should work again with Julia 1.10
     @test_broken rand() == rand1 
     Random.seed!(1)
     @test @param(core.seed) == 218
diff --git a/test/test_parameters.toml b/test/test_parameters.toml
index ac63e7c5cae100bd89e8279ae7f9390d41b09a7e..65f0d8123f91a50ead4345583034055dda97f0ee 100644
--- a/test/test_parameters.toml
+++ b/test/test_parameters.toml
@@ -9,6 +9,8 @@
 configfile = "test_parameters.toml" # location of the configuration file
 outdir = "results_testsuite" # location and name of the output folder
 overwrite = true # overwrite the output directory? (true/false/"ask")
+csvoutput = true # save collected data in CSV files
+storedata = true # keep collected data in memory
 loglevel = "warn" # verbosity level: "debug", "info", "warn"
 processors = 6 # number of processors to use on parallel runs
 seed = 1 # seed value for the RNG (0 -> random value)