diff --git a/data/regions/auxiliary/extract_weather_data.R b/data/regions/auxiliary/extract_weather_data.R index 1463984e97741f1ee7e9881fd3b1e5438885b562..57f737911bbbe5ad29fdbba3547596423b6c3153 100755 --- a/data/regions/auxiliary/extract_weather_data.R +++ b/data/regions/auxiliary/extract_weather_data.R @@ -1,31 +1,91 @@ #!/usr/bin/Rscript ### -### Extract the relevant data from DWD weather files. (See the HTML documentation -### for instructions on how to obtain the data files.) +### Download and extract the required weather data from the DWD archives. ### -### Daniel Vedder, 27/07/2023 +### Daniel Vedder, original 27/07/2023, revised 11/2/2025 ### library(tidyverse) +library(rdwd) -## replace this with the correct file name -weatherfile = "produkt_klima_tag_19490701_20231231_00896.txt" +## See here for rdwd docs: +## https://bookdown.org/brry/rdwd/ +## https://www.rdocumentation.org/packages/rdwd/versions/1.8.0 +## https://www.dwd.de/DE/leistungen/cdc/cdc_ueberblick-klimadaten.html?nn=16102 -data = read.table(weatherfile, sep=";", header=T) +#rdwd::updateRdwd() # run this now and again to make sure we have the latest file index -weather = data %>% - ## drop values before 1990 to save space - filter(MESS_DATUM>=19900101) %>% +## SELECTION PARAMETERS + +region = "Oberrhein" # select from `stationid` list below + +startdate = as.Date("1990-01-01") # earliest date to include (if available) +enddate = as.Date("2024-12-31") # latest date to include (if available) + +## DOWNLOAD DATA + +### observed climate data (these include most of our parameters) + +stationid = c("Jena" = 2444, + "Eichsfeld" = 2925, + "Thüringer Becken" = 896, + "Hohenlohe" = 3761, + "Bodensee" = 6263, + "Oberrhein" = 5275) + +observed_url = selectDWD(id = stationid[region], + res = "daily", + per = "historical", + var = "kl") + +climdata = dataDWD(observed_url, dir = getwd()) %>% as_tibble + +### derived agrometeorological variables (needed for the potential evapotranspiration) +### (see https://bookdown.org/brry/rdwd/use-case-derived-data.html) + +deriv_base = "ftp://opendata.dwd.de/climate_environment/CDC/derived_germany" +soil_index = indexFTP(folder="soil/daily", base=deriv_base) %>% + createIndex(base=deriv_base) +colnames(soil_index)[1:2] = c("var", "res") # inverted column order in this folder + +derived_url = selectDWD(id = stationid[region], + res = "daily", + per = "historical", + var = "soil", + base = deriv_base, + findex = soil_index) + +soildata = dataDWD(derived_url, base = deriv_base, dir = getwd()) + +## PROCESS DATA + +weather = climdata %>% + ## drop values outside of the specified date range + filter(MESS_DATUM >= startdate, MESS_DATUM <= enddate) %>% ## select relevant variables and convert place-holder values to NA - select(MESS_DATUM, FM, RSK, SDK, VPM, TMK, TXK, TNK) %>% + select(MESS_DATUM, FM, RSK, SDK, NM, TMK, TXK, TNK, UPM) %>% mutate(date=MESS_DATUM, MESS_DATUM=NULL, - mean_windspeed=na_if(FM, -999), FM=NULL, - precipitation=na_if(RSK, -999), RSK=NULL, - sunshine_hours=na_if(SDK, -999), SDK=NULL, - mean_vapour_pressure=na_if(VPM, -999), VPM=NULL, - mean_temperature=na_if(TMK, -999), TMK=NULL, - max_temperature=na_if(TXK, -999), TXK=NULL, - min_temperature=na_if(TNK, -999), TNK=NULL) - -## replace with the desired file name -write.csv(weather, file="weather.csv", row.names=FALSE) + mean_windspeed=FM, FM=NULL, + precipitation=RSK, RSK=NULL, + sunshine_hours=SDK, SDK=NULL, + mean_cloud_cover=NM, NM=NULL, + mean_humidity=UPM, UPM=NULL, + mean_temperature=TMK, TMK=NULL, + max_temperature=TXK, TXK=NULL, + min_temperature=TNK, TNK=NULL) + +firstdate = weather$date[1] +lastdate = weather$date[nrow(weather)] + +ETo = soildata[names(soildata)[grep("v2", names(soildata))]][[1]] %>% + as_tibble %>% select(Datum, VPGFAO) %>% + filter(Datum >= firstdate, Datum <= lastdate) %>% + mutate(potential_evapotranspiration=VPGFAO, VPGFAO=NULL) + +# align and combine the two data sets timewise +gapbefore = rep(NA, as.numeric(ETo$Datum[1] - firstdate)) +gapafter = rep(NA, as.numeric(lastdate - ETo$Datum[nrow(ETo)])) +potevap = c(gapbefore, ETo$potential_evapotranspiration, gapafter) +weather = weather %>% mutate(potential_evapotranspiration = potevap) + +write.csv(weather, file=paste0(region, "_weather.csv"), row.names=FALSE)