ARIMA Modeling running time issue

My data set is a weekly data that contains two variables Production and Shipment. Production is the independent variable and Shipment is the dependent variable. First I'm trying to forecast Production values and use that as a regressor to forecast Shipment variable.

If I run the Arima using the training set date range From-> "2018-12-31" To-> "2021-11-22"
The model runs within 10 minutes and I could see the model values.

Using the Same model, If I extend the training set data range From -"2018-12-31" To-> "2021-12-27"
The model runs for so long as it never finished the model execution and I couldnt view the model output.

Could you please help me with this query.
Thank you for the support

Original.df<-structure(list(YearWeek = c("201901", "201902", "201903", "201904", 
"201905", "201906", "201907", "201908", "201909", "201910", "201911", 
"201912", "201913", "201914", "201915", "201916", "201917", "201918", 
"201919", "201920", "201921", "201922", "201923", "201924", "201925", 
"201926", "201927", "201928", "201929", "201930", "201931", "201932", 
"201933", "201934", "201935", "201936", "201937", "201938", "201939", 
"201940", "201941", "201942", "201943", "201944", "201945", "201946", 
"201947", "201948", "201949", "201950", "201951", "201952", "202001", 
"202002", "202003", "202004", "202005", "202006", "202007", "202008", 
"202009", "202010", "202011", "202012", "202013", "202014", "202015", 
"202016", "202017", "202018", "202019", "202020", "202021", "202022", 
"202023", "202024", "202025", "202026", "202027", "202028", "202029", 
"202030", "202031", "202032", "202033", "202034", "202035", "202036", 
"202037", "202038", "202039", "202040", "202041", "202042", "202043", 
"202044", "202045", "202046", "202047", "202048", "202049", "202050", 
"202051", "202052", "202053", "202101", "202102", "202103", "202104", 
"202105", "202106", "202107", "202108", "202109", "202110", "202111", 
"202112", "202113", "202114", "202115", "202116", "202117", "202118", 
"202119", "202120", "202121", "202122", "202123", "202124", "202125", 
"202126", "202127", "202128", "202129", "202130", "202131", "202132", 
"202133", "202134", "202135", "202136", "202137", "202138", "202139", 
"202140", "202141", "202142", "202143", "202144", "202145", "202146", 
"202147", "202148", "202149", "202150", "202151", "202152", "202201", 
"202202", "202203"), Shipment = c(399, 1336, 1018, 1126, 1098, 
1235, 1130, 1258, 897, 1333, 1221, 1294, 1628, 1611, 1484, 1238, 
1645, 1936, 1664, 1482, 2060, 1964, 1875, 1645, 2039, 1640, 733, 
1764, 1639, 1968, 1692, 1677, 1542, 1299, 1328, 1130, 1741, 1929, 
1843, 1427, 1467, 1450, 1041, 1238, 1721, 1757, 1813, 1001, 1208, 
1916, 1435, 540, 681, 1436, 1170, 938, 1206, 1648, 1169, 1311, 
1772, 1333, 1534, 1365, 1124, 846, 732, 753, 1266, 1652, 1772, 
1814, 1649, 1191, 1298, 986, 1296, 1066, 777, 1041, 1388, 1289, 
1097, 1356, 1238, 1732, 1109, 1104, 1155, 1334, 1094, 770, 1411, 
1304, 1269, 1093, 1096, 1121, 943, 695, 1792, 2033, 1586, 768, 
685, 993, 1406, 1246, 1746, 1740, 938, 160, 1641, 1373, 1023, 
1173, 1611, 928, 1038, 1009, 1274, 1369, 1231, 1053, 1163, 880, 
870, 1131, 882, 1143, 632, 394, 510, 543, 535, 824, 874, 591, 
512, 448, 247, 452, 470, 747, 545, 639, 326, 414, 604, 640, 458, 
272, 524, 589, 666, 217, 215, 348, 537, 466), Production = c(794, 
1400, 1505, 1055, 1396, 1331, 1461, 1623, 1513, 1667, 1737, 1264, 
1722, 1587, 2094, 1363, 2007, 1899, 1749, 1693, 1748, 1455, 2078, 
1702, 1736, 1885, 860, 1372, 1716, 1290, 1347, 1451, 1347, 1409, 
1203, 1235, 1397, 1557, 1406, 1451, 1704, 670, 1442, 1336, 1611, 
1401, 1749, 744, 1558, 1665, 1317, 41, 441, 1351, 1392, 1180, 
1447, 1265, 1485, 1494, 1543, 1581, 1575, 1597, 1191, 1386, 889, 
1002, 1573, 1380, 1346, 1243, 1009, 965, 1051, 905, 1094, 1194, 
891, 1033, 921, 880, 1135, 1058, 1171, 1022, 956, 880, 902, 983, 
1014, 945, 1021, 1058, 1191, 1139, 1292, 573, 1173, 514, 1292, 
1310, 1239, 41, 41, 1182, 1028, 1028, 1196, 1214, 1045, 256, 1451, 
1344, 1352, 1257, 1444, 786, 1369, 1185, 1262, 1025, 949, 1051, 
941, 727, 911, 951, 987, 1136, 884, 770, 959, 1102, 1109, 1098, 
988, 983, 1002, 904, 1147, 1149, 919, 1058, 1112, 479, 1028, 
1154, 1126, 1155, 1208, 536, 839, 1178, 1225, 539, 41, 862, 839, 
873)), row.names = c(NA, 160L), class = "data.frame")

# Converting the df to accomodate leap year for weekly observations
Original.df <- Original.df %>%
  mutate(
    isoweek =stringr::str_replace(YearWeek, "^(\\d{4})(\\d{2})$", "\\1-W\\2-1"),
    date = ISOweek::ISOweek2date(isoweek)
  )

#creating test and train data- 1st case- Training data until WK47("2021-11-22")
Original.train.df <- Original.df %>%
  filter(date >= "2018-12-31", date <= "2021-11-22")

Original.test.df <- Original.df %>%
  filter(date >= "2021-11-29", date <= "2021-12-27")

Shipment.Test.df<- Original.test.df %>%
  dplyr::select(-YearWeek, -Production, -date,-isoweek) %>% as_tibble()

# splitting the original train data to contain only Week, Dependent and Independent variables
Total.train.df<-Original.train.df %>%
  mutate(Week.1 = yearweek(ISOweek::ISOweek(date))) %>%
  dplyr::select(-YearWeek,-date,-isoweek) %>%
  as_tsibble(index = Week.1)

#Model.1-Fitting forecast model(Arima with Fourier terms) to Production.qty with the training 
#until WK47(2021-11-22)

lambda_production<-Total.train.df %>% features(Production,features = guerrero) %>% pull(lambda_guerrero)

bestfit.Prod.1.AICc <- Inf

for(K in seq(25)){
  fit.Prod.1 <- Total.train.df %>% 
    model(ARIMA(box_cox(Production,lambda_production) ~ fourier(K = K), stepwise = FALSE, approximation = FALSE))
  
  if(purrr::pluck(glance(fit.Prod.1), "AICc") < bestfit.Prod.1.AICc)
  {
    bestfit.Prod.1.AICc <- purrr::pluck(glance(fit.Prod.1), "AICc")
    bestfit.Prod.1<- fit.Prod.1
    bestK.Prod.1 <- K
  }
}

bestK.Prod.1
glance(bestfit.Prod.1)

#creating test and train data- 2nd case- Training data until WK52("2021-12-27")
Original.train.df_2 <- Original.df %>%
  filter(date >= "2018-12-31", date <= "2021-12-27")

Original.test.df_2 <- Original.df %>%
  filter(date >= "2022-01-03", date <= "2022-01-17")

Shipment.Test.df_2<- Original.test.df_2 %>%
  dplyr::select(-YearWeek, -Production, -date,-isoweek) %>% as_tibble()

# splitting the original train data to contain only Week, Dependent and Independent variables
Total.train.df_2<-Original.train.df_2 %>%
  mutate(Week.1 = yearweek(ISOweek::ISOweek(date))) %>%
  dplyr::select(-YearWeek,-date,-isoweek) %>%
  as_tsibble(index = Week.1)


#Model.2-Fitting forecast model(Arima with Fourier terms) to Production.qty with the training 
#until WK52

lambda_production_2<-Total.train.df_2 %>% features(Production,features = guerrero) %>% pull(lambda_guerrero)

bestfit.Prod.2.AICc <- Inf

for(K in seq(25)){
  fit.Prod.2 <- Total.train.df %>% 
    model(ARIMA(box_cox(Production,lambda_production_2) ~ fourier(K = K), stepwise = FALSE, approximation = FALSE))
  
  if(purrr::pluck(glance(fit.Prod.1), "AICc") < bestfit.Prod.1.AICc)
  {
    bestfit.Prod.2.AICc <- purrr::pluck(glance(fit.Prod.2), "AICc")
    bestfit.Prod.2<- fit.Prod.2
    bestK.Prod.2 <- K
  }
}

bestK.Prod.2
glance(bestfit.Prod.2)

On the above model 2 never got executed fully and still the model is running.

As you can see from above, model 1 and model 2 didnt have any difference other than the training data ,so could you please let me know what is it that im missing here.
Thank you

model(ARIMA

I'm not familiar with this. What package is it from?

It is from Fable package which allows tidyverse functions to pair with forecast package

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.