Hello,
I have a question, i fail to write a simple code with "initial_time_split" to do out_the_sample prediction for machine_learning basic function but let's try simply with an ols regression of Q on P below:
for example what i would like:
*i train model from may 1 to may 3, then i predict may 4
*i train model from may 1 to may 4, then i predict may 5
*i train model from may 1 to may 5, then i predict may 6
...
Any suggestion about the function to use ? or sample code?
Thank you,
*here is sample code but the data is not balanced
df_split <- initial_time_split(df, prop = ., lag = ?)
train_data <- training(df_split)
test_data <- testing(df_split)
*here is a sample data with the date t, the predictor P and the outcome Q
dput(df)
structure(list(t = c("01-05-2021", "01-05-2021", "01-05-2021",
"01-05-2021", "01-05-2021", "01-05-2021", "01-05-2021", "01-05-2021",
"01-05-2021", "01-05-2021", "01-05-2021", "01-05-2021", "01-05-2021",
"01-05-2021", "03-05-2021", "03-05-2021", "03-05-2021", "03-05-2021",
"03-05-2021", "03-05-2021", "04-05-2021", "04-05-2021", "04-05-2021",
"04-05-2021", "04-05-2021", "04-05-2021", "04-05-2021", "04-05-2021",
"04-05-2021", "04-05-2021", "04-05-2021", "04-05-2021", "04-05-2021",
"04-05-2021", "04-05-2021", "05-05-2021", "05-05-2021", "05-05-2021",
"05-05-2021", "05-05-2021", "05-05-2021", "05-05-2021", "05-05-2021",
"05-05-2021", "05-05-2021", "05-05-2021", "06-05-2021", "06-05-2021",
"06-05-2021", "06-05-2021", "06-05-2021"), T = c("05-15-2021",
"05-15-2021", "05-15-2021", "06-19-2021", "06-19-2021", "06-19-2021",
"06-19-2021", "06-19-2021", "06-19-2021", "06-19-2021", "06-19-2021",
"07-17-2021", "07-17-2021", "07-17-2021", "07-17-2021", "07-17-2021",
"07-17-2021", "07-17-2021", "05-15-2021", "05-15-2021", "05-15-2021",
"05-15-2021", "05-15-2021", "05-15-2021", "05-15-2021", "05-15-2021",
"05-15-2021", "06-19-2021", "06-19-2021", "06-19-2021", "06-19-2021",
"06-19-2021", "06-19-2021", "07-17-2021", "07-17-2021", "07-17-2021",
"07-17-2021", "07-17-2021", "07-17-2021", "07-17-2021", "07-17-2021",
"07-17-2021", "07-17-2021", "07-17-2021", "07-17-2021", "07-17-2021",
"07-17-2021", "07-17-2021", "07-17-2021", "07-17-2021", "07-17-2021"
), P = c(0.307226768393632, 0.301688731220703, 0.396139887693312,
0.323209211730937, 0.319602633192871, 0.312321320233133, 0.30329073088301,
0.322303373168933, 0.103883903792236, 0.100133319383373, 0.110331003036399,
0.303272396369116, 0.373632906761169, 0.313616062927236, 0.331809311366162,
0.303132639693213, 0.397136088790893, 0.383877823336263, 0.236323307903033,
0.233372881698608, 0.231780329830088, 0.23386123036873, 0.233308869933082,
0.229373693777893, 0.0928117196083069, 0.0916033303396606, 0.0936313619063331,
0.396189322171021, 0.392973268331063, 0.389768168238667, 0.386373310360718,
0.387233363333833, 0.383031632908323, 0.363130308318273, 0.362333112682333,
0.339339339130839, 0.336762032999268, 0.333986731161336, 0.361321991729736,
0.333963620030893, 0.333963620030893, 0.333963620030893, 0.333963620030893,
0.333963620030893, 0.333963620030893, 0.333963620030893, 0.333963620030893,
0.333963620030893, 0.333963620030893, 0.333963620030893, 0.333963620030893
), Q = c(0.48332, 0.48332, 0.48332, 0.343432, 0.343432, 0.343432,
0.343432, 0.343432, 0.343432, 0.343432, 0.343432, 0.328632, 0.328632,
0.328632, 0.328632, 0.328632, 0.328632, 0.328632, 0.48332, 0.48332,
0.48332, 0.48332, 0.48332, 0.48332, 0.48332, 0.48332, 0.48332,
0.30832, 0.30832, 0.30832, 0.30832, 0.30832, 0.30832, 0.32372,
0.32372, 0.32372, 0.32372, 0.32372, 0.32372, 0.32372, 0.32373,
0.32374, 0.32375, 0.32376, 0.32377, 0.32378, 0.32371, 0.3238,
0.32381, 0.32389, 0.32383)), class = "data.frame", row.names = c(NA,
-51L))