Hi Mitch,
Thanks for the reply. I am having difficulties in understanding! A reproducible 'By-Hand' numerical example based on data in fpp3 may help:
Let's say there are 2 models: arima210
and arima013
, each forecasting 5 steps ahead out-of-sample (OOS).
AIM: I want to combine equally weighted.
#===============================
options(pillar.print_max = Inf)
options(pillar.width = Inf)
options(digits = 12)
options(pillar.signif = 12)
library(fpp3)
library(cowplot)
library(distributional)
train <- global_economy %>%
filter(Code == "CAF") %>%
filter(Year <= 2012)
fit <- train %>%
model(arima210 = ARIMA(Exports ~ pdq(2,1,0)),
arima013 = ARIMA(Exports ~ pdq(0,1,3)))
fit %>% select('arima210') %>% report()
fit %>% select('arima210') %>% tidy()
fit %>% select('arima210') %>% glance()
fit %>% select('arima013') %>% report()
fit %>% select('arima013') %>% tidy()
fit %>% select('arima013') %>% glance()
f_arima210 <- fit %>%
forecast(h=5) %>%
filter(.model=='arima210')
f_arima210
f_arima210$.mean
f_arima210.sigma <- rep(0, 5)
for (i in 1:5) {
f_arima210.sigma[i] <- t(matrix(unlist(f_arima210[[4]][i])))[2]
}
f_arima210.sigma
f_arima013 <- fit %>%
forecast(h=5) %>%
filter(.model=='arima013')
f_arima013
f_arima013$.mean
f_arima013.sigma <- rep(0, 5)
for (i in 1:5) {
f_arima013.sigma[i] <- t(matrix(unlist(f_arima013[[4]][i])))[2]
}
f_arima013.sigma
test <- global_economy %>%
filter(Code == "CAF") %>%
filter(Year > 2012)
test$Exports
p_arima210 <- f_arima210 %>%
autoplot(bind_rows(train, test))
p_arima013 <- f_arima013 %>%
autoplot(bind_rows(train, test))
plot_grid(p_arima210, p_arima013, labels = c('f_arima210', 'f_arima013'), ncol = 1)
fc.mean <- 0.5*f_arima210$.mean + 0.5*f_arima013$.mean
fc.mean
fe_arima210 <- f_arima210$.mean - test$Exports
fe_arima013 <- f_arima013$.mean - test$Exports
fe <- data.frame(fe_arima210,fe_arima013)
COV <- cov(fe)
COV
COR <- cov2cor(COV)
COR
#Var(aX + bY) = a^2*Var(X) + b^2*Var(Y) + 2*a*b*COR(X,Y)*sqrt(Var(X))*sqrt(Var(Y))
a <- 0.5
b <- 0.5
fc.sigma <- sqrt( a^2*f_arima210.sigma^2 + b^2*f_arima013.sigma^2 + 2*a*b*COR[1,2]*f_arima210.sigma*f_arima013.sigma )
fc.sigma
f_arima210.sigma
f_arima013.sigma
COR
fc.sigma
#PI's
L80 <- fc.mean + qnorm(p=.1)*fc.sigma
U80 <- fc.mean + qnorm(p=.9)*fc.sigma
L95 <- fc.mean + qnorm(p=.025)*fc.sigma
U95 <- fc.mean + qnorm(p=.975)*fc.sigma
L80
U80
L95
U95
#===============================
#dynamic forecasts ARIMA, Combination
f_D5 <- train %>%
model(arima210 = ARIMA(Exports ~ pdq(2,1,0)),
arima013 = ARIMA(Exports ~ pdq(0,1,3))
) %>%
mutate(Combination = (arima210 + arima013)/2) %>%
forecast(h = 5, point_forecast = lst(.mean = mean))
f_D5
f_D5 %>%
hilo()
f_D5 %>%
autoplot()
fcc.sigma <- rep(0, 15)
for (i in 1:15) {
fcc.sigma[i] <- t(matrix(unlist(f_D5[[4]][i])))[2]
}
fcc.sigma[11:15]
#dynamic forecasts Ensemble
ensemble <- f_D5 %>%
filter(.model %in% c("arima210", "arima013")) %>%
summarise(
Exports = dist_mixture(
Exports[1], Exports[2],
weights = c(0.5, 0.5)),
.mean = mean(Exports),
.sd = sqrt(variance(Exports))
) %>%
mutate(.model = "Ensemble") %>%
as_tibble() %>%
select(.model,Year,Exports,.mean,.sd)
ensemble
I have calculated the combined forecast means: fc.mean
, and both the the covariance and correlation matrices:
A combined summation variance including weighting a and b is:
Var(aX + bY) = a^2Var(X) + b^2Var(Y) + 2ab*COR(X,Y)*sqrt(Var(X))*sqrt(Var(Y)).
The results from 'By-Hand'
fc.sigma: [1] 2.66196163315 2.99746982500 3.33905502046 3.92779455129 4.39688110002
and from mutate(Combination = (arima210 + arima013)/2)
fcc.sigma[11:15]: [1] 2.63371832249 2.96566439305 3.30365549204 3.88632914027 4.35073920269
are different!
However, from fabletools:::forecast.model_combination()
, there is the matrix calculation:
fc_cov <- map_dbl(fc_sd, function(sigma) (diag(sigma) %*% fc_cov %*% t(diag(sigma)))[1, 2])
Although I'm not certain how this is related to Var(aX + bY)?
I would be grateful for guidance.
Cheers,
Amarjit