Skip to content
Snippets Groups Projects
Commit 1adea55f authored by xo30xoqa's avatar xo30xoqa
Browse files

Rewrote extract_weather_data.R to use rdwd

This almost fully automates downloading and processing the weather data
parent c034fec6
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/Rscript #!/usr/bin/Rscript
### ###
### Extract the relevant data from DWD weather files. (See the HTML documentation ### Download and extract the required weather data from the DWD archives.
### for instructions on how to obtain the data files.)
### ###
### Daniel Vedder, 27/07/2023 ### Daniel Vedder, original 27/07/2023, revised 11/2/2025
### ###
library(tidyverse) library(tidyverse)
library(rdwd)
## replace this with the correct file name ## See here for rdwd docs:
weatherfile = "produkt_klima_tag_19490701_20231231_00896.txt" ## https://bookdown.org/brry/rdwd/
## https://www.rdocumentation.org/packages/rdwd/versions/1.8.0
## https://www.dwd.de/DE/leistungen/cdc/cdc_ueberblick-klimadaten.html?nn=16102
data = read.table(weatherfile, sep=";", header=T) #rdwd::updateRdwd() # run this now and again to make sure we have the latest file index
weather = data %>% ## SELECTION PARAMETERS
## drop values before 1990 to save space
filter(MESS_DATUM>=19900101) %>% region = "Oberrhein" # select from `stationid` list below
startdate = as.Date("1990-01-01") # earliest date to include (if available)
enddate = as.Date("2024-12-31") # latest date to include (if available)
## DOWNLOAD DATA
### observed climate data (these include most of our parameters)
stationid = c("Jena" = 2444,
"Eichsfeld" = 2925,
"Thüringer Becken" = 896,
"Hohenlohe" = 3761,
"Bodensee" = 6263,
"Oberrhein" = 5275)
observed_url = selectDWD(id = stationid[region],
res = "daily",
per = "historical",
var = "kl")
climdata = dataDWD(observed_url, dir = getwd()) %>% as_tibble
### derived agrometeorological variables (needed for the potential evapotranspiration)
### (see https://bookdown.org/brry/rdwd/use-case-derived-data.html)
deriv_base = "ftp://opendata.dwd.de/climate_environment/CDC/derived_germany"
soil_index = indexFTP(folder="soil/daily", base=deriv_base) %>%
createIndex(base=deriv_base)
colnames(soil_index)[1:2] = c("var", "res") # inverted column order in this folder
derived_url = selectDWD(id = stationid[region],
res = "daily",
per = "historical",
var = "soil",
base = deriv_base,
findex = soil_index)
soildata = dataDWD(derived_url, base = deriv_base, dir = getwd())
## PROCESS DATA
weather = climdata %>%
## drop values outside of the specified date range
filter(MESS_DATUM >= startdate, MESS_DATUM <= enddate) %>%
## select relevant variables and convert place-holder values to NA ## select relevant variables and convert place-holder values to NA
select(MESS_DATUM, FM, RSK, SDK, VPM, TMK, TXK, TNK) %>% select(MESS_DATUM, FM, RSK, SDK, NM, TMK, TXK, TNK, UPM) %>%
mutate(date=MESS_DATUM, MESS_DATUM=NULL, mutate(date=MESS_DATUM, MESS_DATUM=NULL,
mean_windspeed=na_if(FM, -999), FM=NULL, mean_windspeed=FM, FM=NULL,
precipitation=na_if(RSK, -999), RSK=NULL, precipitation=RSK, RSK=NULL,
sunshine_hours=na_if(SDK, -999), SDK=NULL, sunshine_hours=SDK, SDK=NULL,
mean_vapour_pressure=na_if(VPM, -999), VPM=NULL, mean_cloud_cover=NM, NM=NULL,
mean_temperature=na_if(TMK, -999), TMK=NULL, mean_humidity=UPM, UPM=NULL,
max_temperature=na_if(TXK, -999), TXK=NULL, mean_temperature=TMK, TMK=NULL,
min_temperature=na_if(TNK, -999), TNK=NULL) max_temperature=TXK, TXK=NULL,
min_temperature=TNK, TNK=NULL)
## replace with the desired file name
write.csv(weather, file="weather.csv", row.names=FALSE) firstdate = weather$date[1]
lastdate = weather$date[nrow(weather)]
ETo = soildata[names(soildata)[grep("v2", names(soildata))]][[1]] %>%
as_tibble %>% select(Datum, VPGFAO) %>%
filter(Datum >= firstdate, Datum <= lastdate) %>%
mutate(potential_evapotranspiration=VPGFAO, VPGFAO=NULL)
# align and combine the two data sets timewise
gapbefore = rep(NA, as.numeric(ETo$Datum[1] - firstdate))
gapafter = rep(NA, as.numeric(lastdate - ETo$Datum[nrow(ETo)]))
potevap = c(gapbefore, ETo$potential_evapotranspiration, gapafter)
weather = weather %>% mutate(potential_evapotranspiration = potevap)
write.csv(weather, file=paste0(region, "_weather.csv"), row.names=FALSE)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment