Skip to content
Snippets Groups Projects
Commit 99a32318 authored by Marco Matthies's avatar Marco Matthies
Browse files

Fix weather data errors and extract weather R script

parent b7a3886f
No related branches found
No related tags found
No related merge requests found
TODO.md
docs/planning
*results*/
profile*txt
\ No newline at end of file
profile*txt
# R stuff
.Rhistory
.Rprofile
renv/
# temporary downloaded files from DWD
/data/regions/auxiliary/DWDdata/
/data/regions/auxiliary/daily_kl_historical_tageswerte_*.zip
/data/regions/auxiliary/soil_daily_historical_*.txt
/data/regions/auxiliary/soil_daily_historical_*.txt.gz
......@@ -15,77 +15,117 @@ library(rdwd)
#rdwd::updateRdwd() # run this now and again to make sure we have the latest file index
## SELECTION PARAMETERS
region = "Oberrhein" # select from `stationid` list below
startdate = as.Date("1990-01-01") # earliest date to include (if available)
enddate = as.Date("2024-12-31") # latest date to include (if available)
## DOWNLOAD DATA
### observed climate data (these include most of our parameters)
stationid = c("Jena" = 2444,
"Eichsfeld" = 2925,
"Thüringer Becken" = 896,
"Hohenlohe" = 3761,
"Bodensee" = 6263,
"Oberrhein" = 5275)
observed_url = selectDWD(id = stationid[region],
res = "daily",
per = "historical",
var = "kl")
climdata = dataDWD(observed_url, dir = getwd()) %>% as_tibble
### derived agrometeorological variables (needed for the potential evapotranspiration)
### (see https://bookdown.org/brry/rdwd/use-case-derived-data.html)
deriv_base = "ftp://opendata.dwd.de/climate_environment/CDC/derived_germany"
soil_index = indexFTP(folder="soil/daily", base=deriv_base) %>%
createIndex(base=deriv_base)
colnames(soil_index)[1:2] = c("var", "res") # inverted column order in this folder
derived_url = selectDWD(id = stationid[region],
res = "daily",
per = "historical",
var = "soil",
base = deriv_base,
findex = soil_index)
soildata = dataDWD(derived_url, base = deriv_base, dir = getwd())
## PROCESS DATA
weather = climdata %>%
## drop values outside of the specified date range
region_to_stationid = c(
"Jena" = 2444,
"Eichsfeld" = 2925,
"Thüringer Becken" = 896,
"Hohenlohe" = 3761,
"Bodensee" = 6263,
"Oberrhein" = 5275
)
download_data = function(
region, # e.g. "Jena"
startdate = as.Date("1990-01-01"), # earliest date to include (if available)
enddate = as.Date("2024-12-31") # latest date to include (if available)
) {
## DOWNLOAD DATA
### observed climate data (these include most of our parameters)
regions = names(region_to_stationid)
if (! region %in% regions) {
cat("Available regions: ", regions, "\n")
stop(paste("Region ", region), " not found in available regions.\n")
}
stationid = region_to_stationid[region]
observed_url = selectDWD(id = stationid,
res = "daily",
per = "historical",
var = "kl")
climdata = dataDWD(observed_url, dir = getwd()) %>% as_tibble
### derived agrometeorological variables (needed for the potential evapotranspiration)
### (see https://bookdown.org/brry/rdwd/use-case-derived-data.html)
deriv_base = "ftp://opendata.dwd.de/climate_environment/CDC/derived_germany"
soil_index = indexFTP(folder="soil/daily", base=deriv_base) %>%
createIndex(base=deriv_base)
colnames(soil_index)[1:2] = c("var", "res") # inverted column order in this folder
derived_url = selectDWD(id = stationid[region],
res = "daily",
per = "historical",
var = "soil",
base = deriv_base,
findex = soil_index)
soildata = dataDWD(derived_url, base = deriv_base, dir = getwd())
## PROCESS DATA
weather = climdata %>%
## drop values outside of the specified date range
filter(MESS_DATUM >= startdate, MESS_DATUM <= enddate) %>%
## select relevant variables and convert place-holder values to NA
select(MESS_DATUM, FM, RSK, SDK, NM, TMK, TXK, TNK, UPM) %>%
mutate(date=MESS_DATUM, MESS_DATUM=NULL,
mean_windspeed=FM, FM=NULL,
precipitation=RSK, RSK=NULL,
sunshine_hours=SDK, SDK=NULL,
mean_cloud_cover=NM, NM=NULL,
mean_humidity=UPM, UPM=NULL,
mean_temperature=TMK, TMK=NULL,
max_temperature=TXK, TXK=NULL,
min_temperature=TNK, TNK=NULL)
firstdate = weather$date[1]
lastdate = weather$date[nrow(weather)]
ETo = soildata[names(soildata)[grep("v2", names(soildata))]][[1]] %>%
as_tibble %>% select(Datum, VPGFAO) %>%
filter(Datum >= firstdate, Datum <= lastdate) %>%
mutate(potential_evapotranspiration=VPGFAO, VPGFAO=NULL)
# align and combine the two data sets timewise
gapbefore = rep(NA, as.numeric(ETo$Datum[1] - firstdate))
gapafter = rep(NA, as.numeric(lastdate - ETo$Datum[nrow(ETo)]))
potevap = c(gapbefore, ETo$potential_evapotranspiration, gapafter)
weather = weather %>% mutate(potential_evapotranspiration = potevap)
write.csv(weather, file=paste0(region, "_weather.csv"), row.names=FALSE)
## select relevant variables and convert place-holder values to NA
select(MESS_DATUM, FM, RSK, SDK, NM, TMK, TXK, TNK, UPM) %>%
mutate(date=MESS_DATUM, MESS_DATUM=NULL,
mean_windspeed=FM, FM=NULL,
precipitation=RSK, RSK=NULL,
sunshine_hours=SDK, SDK=NULL,
mean_cloud_cover=NM, NM=NULL,
mean_humidity=UPM, UPM=NULL,
mean_temperature=TMK, TMK=NULL,
max_temperature=TXK, TXK=NULL,
min_temperature=TNK, TNK=NULL)
firstdate = min(weather$date, na.rm = TRUE)
lastdate = max(weather$date, na.rm = TRUE)
all_dates = tibble(date = seq.Date(firstdate, lastdate, by = "day"))
ETo = soildata[names(soildata)[grep("v2", names(soildata))]][[1]] %>%
as_tibble %>% select(Datum, VPGFAO) %>%
filter(Datum >= firstdate, Datum <= lastdate) %>%
mutate(potential_evapotranspiration=VPGFAO, VPGFAO=NULL)
weather = all_dates %>%
left_join(weather, by="date") %>%
left_join(ETo %>% rename(date = Datum), by="date")
return(weather)
}
main = function(args, script_name = "extract_weather_data.R") {
print_usage = function(script_name, args) {
cat(paste("usage:", script_name), "region1 [region2] ...\n")
cat(paste(" ", script_name), "--all\n")
}
if (length(args) == 0) {
cat("error: not enough arguments\n")
print_usage(script_name, args)
return(-1)
}
if ("-h" %in% args || "--help" %in% args) {
print_usage(script_name, args)
return(0)
}
if ("--all" %in% args) {
regions = names(region_to_stationid)
} else {
regions = args
}
for (region in regions) {
weather = download_data(region)
write.csv(weather, file=paste0("weather_", gsub(" ", "_", region), ".csv"), row.names=FALSE)
}
return(0)
}
# Run main function if executed directly
if (sys.nframe() == 0) {
args = commandArgs(trailingOnly = TRUE)
cat("\nRegions = ", args, "\n\n")
status = main(args)
quit(status = status)
}
Source diff could not be displayed: it is too large. Options to address this: view the blob.
This diff is collapsed.
......@@ -23,7 +23,7 @@ enddate = 2023-12-31 # last day of the simulation
#enddate = 2022-01-02 # last day of the simulation (test value)
[world]
mapdirectory = "data/regions/jena-small" # the directory in which all geographic data are stored
mapdirectory = "data/regions/jena" # the directory in which all geographic data are stored
mapresolution = 10 # map resolution in meters
landcovermap = "landcover.tif" # name of the landcover map in the map directory
farmfieldsmap = "fields.tif" # name of the field geometry map in the map directory
......
......@@ -28,24 +28,27 @@ Load a weather file, extract the values that are relevant to this model run
(specified by start and end dates), and return a dictionary of Weather objects
mapped to dates.
**Note:** This requires a weather file in the format produced by `data/extract_weather_data.R`.
**Note:** This requires a weather file in the format produced by
`data/regions/auxiliary/extract_weather_data.R`.
"""
function initweather(weatherfile::String, startdate::Date, enddate::Date)
@debug "Initialising weather"
data = CSV.File(weatherfile, missingstring="NA", dateformat="yyyymmdd",
data = CSV.File(weatherfile, missingstring="NA", dateformat="yyyy-mm-dd",
types=[Date, Float64, Float64, Float64, Float64, Float64,
Float64, Float64, Float64])
Float64, Float64, Float64, Float64])
weather = Dict{Date,Weather}()
for row in data
if row.date >= startdate && row.date <= enddate
weather[row.date] = Weather(row.mean_windspeed, row.precipitation,
row.sunshine_hours, row.mean_cloud_cover,
row.mean_humidity, row.mean_temperature,
row.max_temperature, row.min_temperature)
row.max_temperature, row.min_temperature,
row.potential_evapotranspiration)
end
end
if length(weather) <= Dates.value(enddate-startdate)
@warn "There are missing days in the weather input file."
@warn ("There are missing days in the weather input file:"
* " expected $(Dates.value(enddate-startdate) + 1), got $(length(weather)).")
end
weather
end
......
......@@ -60,10 +60,6 @@ end
@testset "Weather initialisation" begin
# these tests are specific to the Jena weather file
model = inittestmodel()
@test_logs((:warn, "There are missing days in the weather input file."),
Ps.initweather(joinpath(TESTSETTINGS["world.mapdirectory"],
TESTSETTINGS["world.weatherfile"]),
Date("2022-01-01"), Date("2023-12-31")))
@test length(keys(model.weather)) == 59
@test ismissing(Ps.windspeed(model))
@test Ps.precipitation(model) == 1.3
......@@ -73,5 +69,5 @@ end
@test Ps.maxtemp(model) == 7.5
@test Ps.mintemp(model) == 0.1
stepsimulation!(model)
@test Ps.vapourpressure(model) == 7.7
@test Ps.evapotranspiration(model) == 0.6
end
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment