Thanks @robjhyndman!
I read your article which was really helpful in understanding this concept and now am trying to forecast these variables separately, but because many of these stop data input at different dates, I am struggling quite a bit to forecast them. I have created a new reprex to include example of variability in date inputs for different variables, but the original data is huge with several variables and observations.
Here I am trying to find the max date first before forecasting them and getting the error at filter(!is.na(value)) in the second last line of my code below :
Error: Problem with filter()
input ..1
. x object 'value' not found i Input ..1
is !is.na(value)
. i The error occured in group 1: ind_variables = "fx".
Am I going in the right direction or is there a better way to forecast them separately? Also, while forecasting these variables separately, I would still need my category as dependent variable, I believe. Please correct me if I am wrong.
Thanks for your help!
# Sample Data
df <- data.frame(
date = c("2018-01-01",
"2018-02-01",
"2018-03-01","2018-04-01",
"2018-05-01","2018-06-01",
"2018-07-01",
"2018-08-01","2018-09-01",
"2018-10-01","2018-11-01",
"2018-12-01","2019-01-01",
"2019-02-01",
"2019-03-01","2019-04-01",
"2019-05-01","2019-06-01",
"2019-07-01",
"2019-08-01","2019-09-01",
"2019-10-01","2019-11-01",
"2019-12-01","2020-01-01",
"2020-02-01",
"2020-03-01","2020-04-01",
"2020-05-01","2020-06-01",
"2020-07-01",
"2020-08-01","2020-09-01",
"2020-10-01","2020-11-01",
"2020-12-01"),
category1 = c(2347748,
2184392,2471622,2184902,
1517822,2613641,2060101,
2498327,2580796,
2186383,2606230,2669831,
2089040,2131080,2204262,
2050067,2226354,
2292815,2179322,2373398,
2382017,2497947,2782331,
2563736,2061292,
2087140,2136628,449335,
1105069,1535344,NA,NA,
NA,NA,NA,NA),
category2 = c(1284719,
825255,1028516,1125614,
1311307,1221256,
1109260.65,1196302,1018945,
1200231,1148146,913217,
1277256,980282,853458,
1007229.58,1246084,
1193005,1503203.64,
1451290,1343771,1582470,
1233360.75588,
1271090.30412,1337158,1024617,
969186,580039,745976,
588006,NA,NA,NA,NA,NA,
NA),
interest = c(7,6.75,6.5,
6.5,6.5,6.5,6.5,6.5,
6.5,6.5,6.5,6.5,
6.5,6.5,6.5,6.5,6.5,
6.5,6.5,6,5.5,5,5,
4.5,4.5,4.25,3.75,
3.75,3,2.25,2.25,2.25,
2.25,2.25,2.25,2.25),
inflation = c(0.0029,
0.0032,9e-04,0.0022,0.004,
0.0126,0.0033,-9e-04,
0.0048,0.0045,-0.0021,
0.0015,0.0032,0.0043,
0.0075,0.0057,0.0013,
0.00013234236394144,
0.0019,0.0011,-4e-04,
0.001,0.0051,0.0115,
0.0021,0.0025,7e-04,
-0.0031,-0.0038,0.0026,
NA,NA,NA,NA,NA,NA),
fx = c(3.2099,
3.2409,3.2786,3.4069,
3.6355,3.7726,3.8281,
3.9292,4.1159,3.7578,
3.786665,3.885055,
3.74168181818182,3.723625,
3.8459,3.8956,4.0009,
3.8582,3.7787,4.0194,
4.1209,4.0864,4.1547,
4.1089,4.1489,4.3404,4.88,
5.325,5.6429,5.196,
5.35104166666667,
5.23063333333333,5.254225,
5.29031666666667,
5.32640833333333,5.35),
gdp = c(555675.6,
528921.5,560120.6,
559332.8,547016.5,580697.8,
583000.6,582691.2,
550474.3,587272.8,590537.5,
601844.5,575506.6,
563908.6,574201,587270.1,
599428.1,593574,
609182.3,607961.9,570176.4,
613627.6,627545.9,
647015.4,608395.1,
595037.7,599988.2,546313.8,
555930.3,NA,NA,NA,NA,
NA,NA,NA),
unemp = c(12.2,12.6,
13.1,12.9,12.7,12.4,
12.3,12.1,11.9,11.7,
11.6,11.6,12,12.4,
12.7,12.5,12.3,12,11.8,
11.8,11.8,
11.552647894735,11.2,11,11.2,
11.6,12.2,12.6,12.9,NA,
NA,NA,NA,NA,NA,NA),
trade = c(2.824516098,
2.998715922,6.419975433,
5.920627359,
6.064274705,5.789203554,
3.873510234,2.774624983,
5.071292182,5.791589075,
4.07682386,6.428270414,
1.697146724,3.275887912,
4.561136714,5.794189064,
5.686735734,
5.020100064,2.093569536,
3.094216555,3.764325507,
2.555100864,3.515693495,
5.599341675,-1.67206317,
2.337179577,3.859122154,
6.063049758,
4.272492579,7.463275482,NA,NA,
NA,NA,NA,NA),
register = c(5295,4823,
6868,6963,6455,6466,
8298,8837,8022,9336,
8908,8868,8338,8118,
8956,9922,10572,8978,
10543,11211,10651,
11326,10514,10018,8299,
NA,NA,NA,NA,NA,NA,
NA,NA,NA,NA,NA)
)
# Assigning Present and Forecast Dates
present <- max(df$date[which(!is.na(df[, "category1"]))])
next_fc <- min(df$date[which(is.na(df[, "category1"]))])
last_fc <- max(df$date)
# Finding Max Date of each variable
ind_var <- df%>%
select(-category1, -category2)%>%
gather(key = "ind_variables", value = "Value", -date)%>%
group_by(ind_variables)%>%
filter(!is.na(value))%>%
summarize(max_date = max(date))