My data set is a weekly data that contains two variables Production and Shipment. Production is the independent variable and Shipment is the dependent variable. First I'm trying to forecast Production values and use that as a regressor to forecast Shipment variable.
If I run the Arima using the training set date range From-> "2018-12-31" To-> "2021-11-22"
The model runs within 10 minutes and I could see the model values.
Using the Same model, If I extend the training set data range From -"2018-12-31" To-> "2021-12-27"
The model runs for so long as it never finished the model execution and I couldnt view the model output.
Could you please help me with this query.
Thank you for the support
Original.df<-structure(list(YearWeek = c("201901", "201902", "201903", "201904",
"201905", "201906", "201907", "201908", "201909", "201910", "201911",
"201912", "201913", "201914", "201915", "201916", "201917", "201918",
"201919", "201920", "201921", "201922", "201923", "201924", "201925",
"201926", "201927", "201928", "201929", "201930", "201931", "201932",
"201933", "201934", "201935", "201936", "201937", "201938", "201939",
"201940", "201941", "201942", "201943", "201944", "201945", "201946",
"201947", "201948", "201949", "201950", "201951", "201952", "202001",
"202002", "202003", "202004", "202005", "202006", "202007", "202008",
"202009", "202010", "202011", "202012", "202013", "202014", "202015",
"202016", "202017", "202018", "202019", "202020", "202021", "202022",
"202023", "202024", "202025", "202026", "202027", "202028", "202029",
"202030", "202031", "202032", "202033", "202034", "202035", "202036",
"202037", "202038", "202039", "202040", "202041", "202042", "202043",
"202044", "202045", "202046", "202047", "202048", "202049", "202050",
"202051", "202052", "202053", "202101", "202102", "202103", "202104",
"202105", "202106", "202107", "202108", "202109", "202110", "202111",
"202112", "202113", "202114", "202115", "202116", "202117", "202118",
"202119", "202120", "202121", "202122", "202123", "202124", "202125",
"202126", "202127", "202128", "202129", "202130", "202131", "202132",
"202133", "202134", "202135", "202136", "202137", "202138", "202139",
"202140", "202141", "202142", "202143", "202144", "202145", "202146",
"202147", "202148", "202149", "202150", "202151", "202152", "202201",
"202202", "202203"), Shipment = c(399, 1336, 1018, 1126, 1098,
1235, 1130, 1258, 897, 1333, 1221, 1294, 1628, 1611, 1484, 1238,
1645, 1936, 1664, 1482, 2060, 1964, 1875, 1645, 2039, 1640, 733,
1764, 1639, 1968, 1692, 1677, 1542, 1299, 1328, 1130, 1741, 1929,
1843, 1427, 1467, 1450, 1041, 1238, 1721, 1757, 1813, 1001, 1208,
1916, 1435, 540, 681, 1436, 1170, 938, 1206, 1648, 1169, 1311,
1772, 1333, 1534, 1365, 1124, 846, 732, 753, 1266, 1652, 1772,
1814, 1649, 1191, 1298, 986, 1296, 1066, 777, 1041, 1388, 1289,
1097, 1356, 1238, 1732, 1109, 1104, 1155, 1334, 1094, 770, 1411,
1304, 1269, 1093, 1096, 1121, 943, 695, 1792, 2033, 1586, 768,
685, 993, 1406, 1246, 1746, 1740, 938, 160, 1641, 1373, 1023,
1173, 1611, 928, 1038, 1009, 1274, 1369, 1231, 1053, 1163, 880,
870, 1131, 882, 1143, 632, 394, 510, 543, 535, 824, 874, 591,
512, 448, 247, 452, 470, 747, 545, 639, 326, 414, 604, 640, 458,
272, 524, 589, 666, 217, 215, 348, 537, 466), Production = c(794,
1400, 1505, 1055, 1396, 1331, 1461, 1623, 1513, 1667, 1737, 1264,
1722, 1587, 2094, 1363, 2007, 1899, 1749, 1693, 1748, 1455, 2078,
1702, 1736, 1885, 860, 1372, 1716, 1290, 1347, 1451, 1347, 1409,
1203, 1235, 1397, 1557, 1406, 1451, 1704, 670, 1442, 1336, 1611,
1401, 1749, 744, 1558, 1665, 1317, 41, 441, 1351, 1392, 1180,
1447, 1265, 1485, 1494, 1543, 1581, 1575, 1597, 1191, 1386, 889,
1002, 1573, 1380, 1346, 1243, 1009, 965, 1051, 905, 1094, 1194,
891, 1033, 921, 880, 1135, 1058, 1171, 1022, 956, 880, 902, 983,
1014, 945, 1021, 1058, 1191, 1139, 1292, 573, 1173, 514, 1292,
1310, 1239, 41, 41, 1182, 1028, 1028, 1196, 1214, 1045, 256, 1451,
1344, 1352, 1257, 1444, 786, 1369, 1185, 1262, 1025, 949, 1051,
941, 727, 911, 951, 987, 1136, 884, 770, 959, 1102, 1109, 1098,
988, 983, 1002, 904, 1147, 1149, 919, 1058, 1112, 479, 1028,
1154, 1126, 1155, 1208, 536, 839, 1178, 1225, 539, 41, 862, 839,
873)), row.names = c(NA, 160L), class = "data.frame")
# Converting the df to accomodate leap year for weekly observations
Original.df <- Original.df %>%
mutate(
isoweek =stringr::str_replace(YearWeek, "^(\\d{4})(\\d{2})$", "\\1-W\\2-1"),
date = ISOweek::ISOweek2date(isoweek)
)
#creating test and train data- 1st case- Training data until WK47("2021-11-22")
Original.train.df <- Original.df %>%
filter(date >= "2018-12-31", date <= "2021-11-22")
Original.test.df <- Original.df %>%
filter(date >= "2021-11-29", date <= "2021-12-27")
Shipment.Test.df<- Original.test.df %>%
dplyr::select(-YearWeek, -Production, -date,-isoweek) %>% as_tibble()
# splitting the original train data to contain only Week, Dependent and Independent variables
Total.train.df<-Original.train.df %>%
mutate(Week.1 = yearweek(ISOweek::ISOweek(date))) %>%
dplyr::select(-YearWeek,-date,-isoweek) %>%
as_tsibble(index = Week.1)
#Model.1-Fitting forecast model(Arima with Fourier terms) to Production.qty with the training
#until WK47(2021-11-22)
lambda_production<-Total.train.df %>% features(Production,features = guerrero) %>% pull(lambda_guerrero)
bestfit.Prod.1.AICc <- Inf
for(K in seq(25)){
fit.Prod.1 <- Total.train.df %>%
model(ARIMA(box_cox(Production,lambda_production) ~ fourier(K = K), stepwise = FALSE, approximation = FALSE))
if(purrr::pluck(glance(fit.Prod.1), "AICc") < bestfit.Prod.1.AICc)
{
bestfit.Prod.1.AICc <- purrr::pluck(glance(fit.Prod.1), "AICc")
bestfit.Prod.1<- fit.Prod.1
bestK.Prod.1 <- K
}
}
bestK.Prod.1
glance(bestfit.Prod.1)
#creating test and train data- 2nd case- Training data until WK52("2021-12-27")
Original.train.df_2 <- Original.df %>%
filter(date >= "2018-12-31", date <= "2021-12-27")
Original.test.df_2 <- Original.df %>%
filter(date >= "2022-01-03", date <= "2022-01-17")
Shipment.Test.df_2<- Original.test.df_2 %>%
dplyr::select(-YearWeek, -Production, -date,-isoweek) %>% as_tibble()
# splitting the original train data to contain only Week, Dependent and Independent variables
Total.train.df_2<-Original.train.df_2 %>%
mutate(Week.1 = yearweek(ISOweek::ISOweek(date))) %>%
dplyr::select(-YearWeek,-date,-isoweek) %>%
as_tsibble(index = Week.1)
#Model.2-Fitting forecast model(Arima with Fourier terms) to Production.qty with the training
#until WK52
lambda_production_2<-Total.train.df_2 %>% features(Production,features = guerrero) %>% pull(lambda_guerrero)
bestfit.Prod.2.AICc <- Inf
for(K in seq(25)){
fit.Prod.2 <- Total.train.df %>%
model(ARIMA(box_cox(Production,lambda_production_2) ~ fourier(K = K), stepwise = FALSE, approximation = FALSE))
if(purrr::pluck(glance(fit.Prod.1), "AICc") < bestfit.Prod.1.AICc)
{
bestfit.Prod.2.AICc <- purrr::pluck(glance(fit.Prod.2), "AICc")
bestfit.Prod.2<- fit.Prod.2
bestK.Prod.2 <- K
}
}
bestK.Prod.2
glance(bestfit.Prod.2)
On the above model 2 never got executed fully and still the model is running.
As you can see from above, model 1 and model 2 didnt have any difference other than the training data ,so could you please let me know what is it that im missing here.
Thank you