Hello All,
Just wanted to clarify the difference in converting a normal df into a ts object vs tsibble object.
In ts object we are specifying the frequency and we wont be including the date column while we convert to a ts object.
However in the tsibble object, we can directly include a date column by saying the date column as a index so the frequency will be considered automatically.
-
Class of a ts object is shown as "ts matrix" whereas the class of a tsibble object shows as a "dataframe". Does having a tsibble object for forecasting would still give the same results as a ts object? Also since tsibble being a dataframe how does it impact the performance when passing Arima or Arimax to a tsibble to that of a normal ts object?
-
The date column which I give as an index while converting to a tsibble object is not visible when I pass the View function to the tsibble converted object. Is it possible to View the index column of a tsibble object?
Please see the below example where I'm trying to convert a df to a tsibble object, while doing that I'm trying to remove some of the columns and intend to keep a univariate time series but i get this error- "Error in manip_at(.tbl, .vars, .funs, enquo(.funs), caller_env(), .include_group_vars = TRUE, :
argument ".funs" is missing, with no default"
Please advise
library(tidyverse)
library(astsa)
library(vars)
library(fpp3)
#original data frame
Original.df<- structure(list(YearWeek = c("201901", "201902", "201903", "201904",
"201905", "201906", "201907", "201908", "201909", "201910", "201911",
"201912", "201913", "201914", "201915", "201916", "201917", "201918",
"201919", "201920", "201921", "201922", "201923", "201924", "201925",
"201926", "201927", "201928", "201929", "201930", "201931", "201932",
"201933", "201934", "201935", "201936", "201937", "201938", "201939",
"201940", "201941", "201942", "201943", "201944", "201945", "201946",
"201947", "201948", "201949", "201950", "201951", "201952", "202001",
"202002", "202003", "202004", "202005", "202006", "202007", "202008",
"202009", "202010", "202011", "202012", "202013", "202014", "202015",
"202016", "202017", "202018", "202019", "202020", "202021", "202022",
"202023", "202024", "202025", "202026", "202027", "202028", "202029",
"202030", "202031", "202032", "202033", "202034", "202035", "202036",
"202037", "202038", "202039", "202040", "202041", "202042", "202043",
"202044", "202045", "202046", "202047", "202048", "202049", "202050",
"202051", "202052", "202053", "202101", "202102", "202103", "202104",
"202105", "202106", "202107", "202108", "202109", "202110", "202111",
"202112", "202113", "202114", "202115", "202116", "202117", "202118",
"202119", "202120", "202121", "202122", "202123", "202124", "202125",
"202126", "202127", "202128", "202129", "202130", "202131", "202132",
"202133", "202134", "202135", "202136", "202137", "202138", "202139",
"202140", "202141", "202142", "202143"), Shipment = c(418, 1442,
1115, 1203, 1192, 1353, 1191, 1411, 933, 1384, 1362, 1353, 1739,
1751, 1595, 1380, 1711, 2058, 1843, 1602, 2195, 2159, 2009, 1812,
2195, 1763, 821, 1892, 1781, 2071, 1789, 1789, 1732, 1384, 1435,
1247, 1839, 2034, 1963, 1599, 1596, 1548, 1084, 1350, 1856, 1882,
1979, 1021, 1311, 2031, 1547, 591, 724, 1535, 1268, 1021, 1269,
1763, 1275, 1411, 1847, 1379, 1606, 1473, 1180, 926, 800, 840,
1375, 1755, 1902, 1921, 1743, 1275, 1425, 1088, 1416, 1168, 842,
1185, 1570, 1435, 1209, 1470, 1368, 1926, 1233, 1189, 1245, 1465,
1226, 887, 1489, 1369, 1358, 1179, 1200, 1226, 1066, 823, 1913,
2308, 1842, 910, 794, 1098, 1557, 1417, 1851, 1876, 1010, 160,
1803, 1607, 1185, 1347, 1700, 981, 1191, 1058, 1464, 1513, 1333,
1169, 1294, 978, 962, 1254, 987, 1290, 758, 436, 579, 636, 614,
906, 982, 649, 564, 502, 274, 473, 506, 902, 639, 810, 398, 488
), Production = c(0, 198, 1436, 1055, 1396, 1330, 1460, 1628,
1513, 1673, 1737, 1274, 1726, 1591, 2094, 1411, 2009, 1909, 1759,
1693, 1748, 1455, 2078, 1717, 1737, 1886, 862, 1382, 1779, 1423,
1460, 1454, 1347, 1409, 1203, 1235, 1397, 1563, 1411, 1455, 1706,
688, 1446, 1336, 1618, 1404, 1759, 746, 1560, 1665, 1317, 0,
441, 1390, 1392, 1180, 1477, 1265, 1485, 1495, 1543, 1584, 1575,
1609, 1233, 1420, 908, 1008, 1586, 1392, 1385, 1259, 1010, 973,
1053, 905, 1101, 1196, 891, 1033, 925, 889, 1136, 1058, 1179,
1047, 967, 900, 904, 986, 1014, 945, 1030, 1066, 1191, 1143,
1292, 574, 1174, 515, 1296, 1315, 1241, 0, 0, 1182, 1052, 1107,
1207, 1254, 1055, 258, 1471, 1344, 1353, 1265, 1444, 791, 1397,
1186, 1264, 1032, 949, 1059, 954, 798, 956, 1074, 1136, 1209,
975, 833, 994, 1127, 1153, 1202, 1234, 1336, 1484, 1515, 1151,
1175, 976, 1135, 1272, 869, 1900, 1173), Net.Production.Qty = c(22,
188, 1428, 1031, 1382, 1368, 1456, 1578, 1463, 1583, 1699, 1318,
1582, 1537, 2118, 1567, 1961, 1897, 1767, 1603, 1666, 1419, 2186,
1621, 1677, 1840, 698, 1290, 1411, 927, 1754, 1222, 1411, 1549,
1491, 1359, 1179, 1945, 1463, 1465, 1764, 764, 810, 1308, 1830,
1542, 1695, 544, 1482, 1673, 1659, 0, 445, 1358, 1364, 1224,
1417, 1239, 1387, 1595, 1469, 1624, 1643, 1763, 1217, 1456, 568,
1290, 1666, 1428, 1327, 773, 1118, 1231, 1143, 921, 1083, 1124,
935, 903, 937, 849, 1132, 1032, 1143, 1081, 891, 886, 880, 1002,
1072, 969, 1000, 996, 1243, 1183, 1306, 650, 1226, 553, 1306,
1379, 1359, 0, 0, 1182, 988, 1099, 1173, 1244, 1039, 254, 1425,
1318, 1385, 1221, 1364, 739, 1397, 1112, 1160, 924, 971, 1015,
978, 828, 868, 994, 1090, 1165, 783, 887, 934, 1023, 1045, 1114,
1052, 1186, 1456, 1401, 1249, 779, 430, 1625, 1498, 883, 1860,
1101)), row.names = c(NA, 148L), class = "data.frame")
#Converting the df to accomodate leap year for weekly observations
Original.df <- Original.df %>%
mutate(isoweek = str_replace(YearWeek,
"^(\\d{4})(\\d{2})$",
"\\1-W\\2-1"),
date = ISOweek::ISOweek2date(isoweek))
View(Original.df)
#creating test and train data
Original.train.df<-with(Original.df,Original.df[(Original.df$date >= "2018-12-31" &
Original.df$date <= "2021-03-29"),])
Original.test.df<-with(Original.df,Original.df[(Original.df$date >= "2021-04-05" &
Original.df$date <= "2021-10-25"),])
#splitting the original train data with multiple variables to have only one variable(univariate time series)
Net.Production.train.df<-Original.train.df %>%
mutate(Week.1 = ISOweek(date)) %>%
mutate(Week.1 = yearweek(Week.1)) %>%
mutate_at(vars(-YearWeek,-Shipment,-Production,-date)) %>%
as_tsibble(index = Week.1)
Please advise
Thank you,
Vignesh