Backtesting

Row

Reality (black) vs. forecast

Row

Table of forecasting errors

RMSE MDAE MASE MAPE SMAPE MAAPE
THI_uniformula 7.441 5.312 1.991 19.867 16.835 18.840
THI_multiforec 7.318 5.144 1.948 19.480 16.530 18.488

Components

Row

Trend component

Extra regressors

Row

Yearly seasonality

Weekly seasonality

Forecasts (ggplotly)

Row

Temperature

Row

Humidity

Row

Temperature-Humidity Index

Forecasts (dygraphs)

Row

Temperature (°C)

Row

Humidity (%)

Row

Temperature-Humidity Index

---
title: "Temperature-Humidity Index @ Kaunas"
output: 
  flexdashboard::flex_dashboard:
    theme: journal
    orientation: rows
    source_code: embed
    vertical_layout: fill
---

```{r setup, include=FALSE}

# Install / load required R libraries
if (!"pacman" %in% rownames(installed.packages())) install.packages("pacman")
pacman::p_load(xts, prophet, TSrepr, plotly, dygraphs)

# Helper function to estimate various forecasting errors
forecastingErrors <- function(realVals,foreVals,inSamp) {
  RMSE <- TSrepr::rmse(realVals,foreVals)
  MDAE <- TSrepr::mdae(realVals,foreVals)
  MASE <- TSrepr::mae(realVals,foreVals) / inSamp
  MAPE <- TSrepr::mape(realVals,foreVals)
  SMAPE <- TSrepr::smape(realVals,foreVals)
  MAAPE <- TSrepr::maape(realVals,foreVals)
  data.frame(RMSE,MDAE,MASE,MAPE,SMAPE,MAAPE)
}

# Forecasting horizon length
forecastingHorizon <- 28

# Load prepared xts data
myDataFull <- readRDS('waqi-covid19-airqualitydata.rds')
myDataTrain <- myDataFull[1:(nrow(myDataFull)-forecastingHorizon-1)]

```

```{r backtesting, include=FALSE}

# Denominator for MASE forecasting error calculations
inSampleMAE <- mean(abs(diff(myDataTrain$THI)), na.rm=T)

# Univariate forecasting of temperature for myDataTrain
df <- data.frame(ds=as.Date(time(myDataTrain)), y=as.numeric(myDataTrain[,'temperature']))
tempModel <- prophet(df, yearly.seasonality=T, weekly.seasonality=T, daily.seasonality=F)
myFuture <- make_future_dataframe(tempModel, periods = forecastingHorizon, freq='day')
tempForecast <- predict(tempModel, myFuture)

# Univariate forecasting of humidity for myDataTrain
df <- data.frame(ds=as.Date(time(myDataTrain)), y=as.numeric(myDataTrain[,'humidity']))
humidModel <- prophet(df, yearly.seasonality=T, weekly.seasonality=T, daily.seasonality=F)
myFuture <- make_future_dataframe(humidModel, periods = forecastingHorizon, freq='day')
humidForecast <- predict(humidModel, myFuture)

# Multivariate forecasting of THI for myDataTrain
df <- data.frame(ds=as.Date(time(myDataTrain)), y=as.numeric(myDataTrain[,'THI']),
                 myDataTrain[,'temperature'], myDataTrain[,'humidity'])
indexModel <- prophet(yearly.seasonality=T, weekly.seasonality=T, daily.seasonality=F)
indexModel <- add_regressor(indexModel, 'temperature')
indexModel <- add_regressor(indexModel, 'humidity')
indexModel <- fit.prophet(indexModel, df)
myFuture <- make_future_dataframe(indexModel, periods = forecastingHorizon, freq='day')
myFuture <- cbind(myFuture, temperature=tempForecast$yhat, humidity=humidForecast$yhat)
indexForecast <- predict(indexModel, myFuture)

# Calculate forecasting errors for THI on testing data (last observations)
tempPred <- tail(tempForecast$yhat, forecastingHorizon)
humidPred <- tail(humidForecast$yhat, forecastingHorizon)
THI_uniformula <- c(rep(NA, 2*forecastingHorizon), 0.8*tempPred + (humidPred/100)*(tempPred-14.4) + 46.4)
THI_multiforec <- c(rep(NA, 2*forecastingHorizon), tail(indexForecast$yhat, forecastingHorizon))
forecastedTHI <- cbind(tail(myDataFull$THI, 3*forecastingHorizon), THI_uniformula, THI_multiforec)
idx <- 1:forecastingHorizon + 2*forecastingHorizon
errorsTable <- rbind(THI_uniformula=forecastingErrors(forecastedTHI[idx,1], forecastedTHI[idx,2], inSampleMAE),
                     THI_multiforec=forecastingErrors(forecastedTHI[idx,1], forecastedTHI[idx,3], inSampleMAE))

```

```{r forecasting, include=FALSE}

# Univariate forecasting of temperature for myDataFull
df <- data.frame(ds=as.Date(time(myDataFull)), y=as.numeric(myDataFull[,'temperature']))
tempModel <- prophet(df, yearly.seasonality=T, weekly.seasonality=T, daily.seasonality=F)
myFuture <- make_future_dataframe(tempModel, periods = forecastingHorizon, freq='day')
tempForecast <- predict(tempModel, myFuture)

# Univariate forecasting of humidity for myDataFull
df <- data.frame(ds=as.Date(time(myDataFull)), y=as.numeric(myDataFull[,'humidity']))
humidModel <- prophet(df, yearly.seasonality=T, weekly.seasonality=T, daily.seasonality=F)
myFuture <- make_future_dataframe(humidModel, periods = forecastingHorizon, freq='day')
humidForecast <- predict(humidModel, myFuture)

# Multivariate forecasting of THI for myDataFull
df <- data.frame(ds=as.Date(time(myDataFull)), y=as.numeric(myDataFull[,'THI']),
                 myDataFull[,'temperature'], myDataFull[,'humidity'])
indexModel <- prophet(yearly.seasonality=T, weekly.seasonality=T, daily.seasonality=F)
indexModel <- add_regressor(indexModel, 'temperature')
indexModel <- add_regressor(indexModel, 'humidity')
indexModel <- fit.prophet(indexModel, df)
myFuture <- make_future_dataframe(indexModel, periods = forecastingHorizon, freq='day')
myFuture <- cbind(myFuture, temperature=tempForecast$yhat, humidity=humidForecast$yhat)
indexForecast <- predict(indexModel, myFuture)

```

Backtesting
=======================================================================

Row
-------------------------------------

### **Reality (black) vs. forecast**

```{r plot.xts, fig.width = 10}

gg <- xts::plot.xts(forecastedTHI)
xts::addLegend(legend.names = names(forecastedTHI), lty=1, col=1:ncol(forecastedTHI))

```

Row {data-height=110}
-------------------------------------

### **Table of forecasting errors**

```{r table}

knitr::kable(errorsTable, digits=3)

```

Components
=======================================================================

```{r prophet, fig.show='hide'}

gg <- prophet_plot_components(indexModel, indexForecast)

```

Row {.tabset .tabset-fade}
-------------------------------------

### **Trend component**

```{r prophet1}

ggplotly(gg[[1]])

```

### **Extra regressors**

```{r prophet4}

ggplotly(gg[[4]])

```

Row {.tabset .tabset-fade}
-------------------------------------

### **Yearly seasonality**

```{r prophet3}

ggplotly(gg[[3]])

```

### **Weekly seasonality**

```{r prophet2}

ggplotly(gg[[2]])

```

Forecasts (ggplotly)
=======================================================================

Row
-------------------------------------

### **Temperature**

```{r ggplotly-temp}

ggplotly(plot(tempModel, tempForecast, ylabel='Celsius (°C)', xlabel='Time (daily)'))

```

Row
-------------------------------------

### **Humidity**

```{r ggplotly-humid}

ggplotly(plot(humidModel, humidForecast, ylabel='Percentage (%)', xlabel='Time (daily)'))

```

Row
-------------------------------------

### **Temperature-Humidity Index**

```{r ggplotly-THI}

ggplotly(plot(indexModel, indexForecast, ylabel='THI', xlabel='Time (daily)'))

```

Forecasts (dygraphs)
=======================================================================

```{r dygraphs, include=FALSE}

timeStamps <- tail(time(myDataFull),1) + 1:forecastingHorizon

```

Row
-------------------------------------

### **Temperature (°C)**

```{r dygraphs-temp}

df <- tempForecast[tempForecast$ds %in% timeStamps, c('yhat_lower','yhat','yhat_upper')]
names(df) <- c('Lo.80', 'Point.Forecast', 'Hi.80')
myFore <- xts(df, order.by = timeStamps)
plotData <- cbind(myDataFull[,'temperature'], myFore)
dygraph(plotData, group = "Air-Data") %>%
  dySeries(c('temperature'), label = 'Temperature') %>%
  dySeries(c("Lo.80", "Point.Forecast", "Hi.80"), label = "Forecast") %>%
  dyCrosshair(direction = "vertical")

```

Row
-------------------------------------

### **Humidity (%)**

```{r dygraphs-humid}

df <- humidForecast[humidForecast$ds %in% timeStamps, c('yhat_lower','yhat','yhat_upper')]
names(df) <- c('Lo.80', 'Point.Forecast', 'Hi.80')
myFore <- xts(df, order.by = timeStamps)
plotData <- cbind(myDataFull[,'humidity'], myFore)
dygraph(plotData, group = "Air-Data") %>%
  dySeries(c('humidity'), label = 'Humidity') %>%
  dySeries(c("Lo.80", "Point.Forecast", "Hi.80"), label = "Forecast") %>%
  dyCrosshair(direction = "vertical")

```

Row
-------------------------------------

### **Temperature-Humidity Index**

```{r dygraphs-THI}

df <- indexForecast[indexForecast$ds %in% timeStamps, c('yhat_lower','yhat','yhat_upper')]
names(df) <- c('Lo.80', 'Point.Forecast', 'Hi.80')
myFore <- xts(df, order.by = timeStamps)
plotData <- cbind(myDataFull[,'THI'], myFore)
dygraph(plotData, group = "Air-Data") %>%
  dySeries(c('THI'), label = 'THI') %>%
  dySeries(c("Lo.80", "Point.Forecast", "Hi.80"), label = "Forecast") %>%
  dyRangeSelector() %>% dyCrosshair(direction = "vertical")

```