#!/usr/bin/Rscript
###
### Extract the relevant data from DWD weather files. (See the HTML documentation
### for instructions on how to obtain the data files.)
###
### Daniel Vedder, 27/07/2023
### 

library(tidyverse)

## replace this with the correct file name
weatherfile = "daily_weather_jena_dwd/produkt_klima_tag_18210101_20221231_02444.txt"

data = read.table(weatherfile, sep=";", header=T)

weather = data %>%
    ## drop values before 2000 to save space
	filter(MESS_DATUM>=20000101) %>%
    ## select relevant variables and convert place-holder values to NA
    select(MESS_DATUM, FM, RSK, SDK, VPM, TMK, TXK, TNK) %>%
    mutate(date=MESS_DATUM, MESS_DATUM=NULL,
           mean_windspeed=na_if(FM, -999), FM=NULL,
           precipitation=na_if(RSK, -999), RSK=NULL,
           sunshine_hours=na_if(SDK, -999), SDK=NULL,
           mean_vapour_pressure=na_if(VPM, -999), VPM=NULL,
           mean_temperature=na_if(TMK, -999), TMK=NULL,
           max_temperature=na_if(TXK, -999), TXK=NULL,
           min_temperature=na_if(TNK, -999), TNK=NULL)

## replace with the desired file name
write.csv(weather, file="weather_jena.csv", row.names=FALSE)