Hi.
I have a problem when using refit and forecast model with log transformation. If I ’m using an ETS model I receive non automatically back-transformed forecast values. My data and code:
library(tidyverse)
library(fable)
library(tsibble)
data <- tsibble(
structure(
list(month = structure(c(17167, 17198, 17226, 17257, 17287, 17318, 17348,
17379, 17410, 17440, 17471, 17501, 17532, 17563,
17591, 17622, 17652, 17683, 17713, 17744, 17775,
17805, 17836, 17866, 17897, 17928, 17956, 17987,
18017, 18048, 18078, 18109, 18140, 18170, 18201,
18231, 18262, 18293, 18322, 18353, 18383, 18414,
18444, 18475, 18506, 18536, 18567, 18597, 18628,
18659, 18687, 18718, 18748, 18779, 18809, 18840,
18871, 18901, 18932, 18962, 18993, 19024, 19052,
19083, 19113, 19144, 19174, 19205, 19236, 19266,
19297, 19327, 19358, 19389, 19417, 19448, 19478,
19509, 19539, 19570, 19601, 19631, 19662),
class = c("yearmonth", "vctrs_vctr")),
value = c(6172527.08, 6278120.18, 6973437.67, 5941742.44, 6415414.72,
7134907.23, 7445743.23, 8837412.12, 8171847.1, 8749329.27,
9773831.06, 15278919.05, 9738849.92, 9137987.69, 10151452.82,
8922484.91, 9993093.83, 9896496.87, 10263955.39, 12990312.4,
11285538.08, 12311065.04, 13600448.11, 16150351.86, 10488556.39,
9666922.03, 10962254.61, 10726475.04, 10338934.12, 9007254.09,
11490401.27, 12237168.07, 10960711.85, 11437661.59, 11868812.49,
17699210.56, 10071808.96, 9686972.65, 9239713.14, 5857380.29,
7291757.13, 8603353.53, 9918336.83, 11568844.24, 11352571.86,
11897875.22, 11940052.97, 16144488.94, 8139411.95, 9976848.7,
10872662.72, 9927701.01, 10454437.36, 10301583.61, 11628598.76,
15274281.06, 13926966.29, 16781025.09, 18954174.13, 24819775.19,
14844829.9, 14914718.28, 9144095.4, 12329146.38, 15453918.68,
16500534.43, 16615146.82, 23951715.69, 21142575.18, 24326462.9,
26426176.4, 36349408.53, 21010510.06, 20223290.62, 27817532.26,
20604363.23, 29664737.32, 24873837.66, 23208638.22, 31670247.77,
24799038.42, 30370313.52, 36115921.76)),
class = c("tbl_ts", "tbl_df", "tbl", "data.frame"), row.names = c(NA, -83L),
key = structure(list( .rows = structure(list(1:83), ptype = integer(0),
class = c("vctrs_list_of", "vctrs_vctr", "list"))),
class = c("tbl_df", "tbl", "data.frame"),
row.names = c(NA, -1L)),
index = structure("month", ordered = TRUE), index2 = "month",
interval = structure(list( year = 0, quarter = 0, month = 1, week = 0, day = 0, hour = 0,
minute = 0, second = 0, millisecond = 0, microsecond = 0,
nanosecond = 0, unit = 0), .regular = TRUE,
class = c("interval", "vctrs_rcrd", "vctrs_vctr"))))
forecast_horizon <- 4
data_test <- b2b_data %>%
slice_tail(n = forecast_horizon)
data_train <- b2b_data %>%
anti_join(data_test, by = "month")
model_df <- data_train %>%
model(arima = ARIMA(value),
arima_log = ARIMA(log(value+1)),
ets = ETS(value),
ets_log = ETS(log(value+1)))
data_fc <- model_df %>%
forecast(data_test)
fc_acc <- data_fc %>%
accuracy(data) %>%
select(.model, RMSE, MAPE, ME) %>%
arrange(MAPE)
best_model <- fc_acc %>%
slice_min(MAPE, with_ties = FALSE)
final_model <- model_df %>%
select(!!best_model$.model) %>%
refit(data)
prediction <- final_model %>%
forecast(h = forecast_horizon)
My result:
> prediction
# A fable: 4 x 4 [1M]
# Key: .model [1]
.model month value .mean
<chr> <mth> <dist> <dbl>
1 ets_log 2023 Гру t(N(2.9, 5.5e-05)) 17.7
2 ets_log 2024 Січ t(N(2.9, 6.5e-05)) 17.1
3 ets_log 2024 Лют t(N(2.9, 7.6e-05)) 17.1
4 ets_log 2024 Бер t(N(2.9, 8.7e-05)) 17.2
When choosing the 'arima_log' model I receive back-transformed forecast values:
> final_model <- model_df %>%
+ select(arima_log) %>%
+ refit(data)
> prediction <- final_model %>%
+ forecast(h = forecast_horizon)
> prediction
# A fable: 4 x 4 [1M]
# Key: .model [1]
.model month value .mean
<chr> <mth> <dist> <dbl>
1 arima_log 2023 Гру t(N(18, 0.02)) 46964704.
2 arima_log 2024 Січ t(N(17, 0.027)) 26696181.
3 arima_log 2024 Лют t(N(17, 0.034)) 28135074.
4 arima_log 2024 Бер t(N(17, 0.04)) 24254807.
Referred here by Forecasting: Principles and Practice, by Rob J Hyndman and George Athanasopoulos