I am trying to speed up my forecasting. In the example below, I use Holt-Winter method with the zoo
package to estimate my model. However, this takes 10 ours to run!
It would be wonderful if you might advise how I may improve this performance, and suggest what factors lead to my estimation to take so long
# Loading Libraries -------------------------------------------------------
library(zoo)
#> Warning: package 'zoo' was built under R version 3.5.1
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
library(forecast)
#> Warning: package 'forecast' was built under R version 3.5.1
library(ggplot2)
#> Warning: package 'ggplot2' was built under R version 3.5.1
library(xts)
#> Warning: package 'xts' was built under R version 3.5.1
library(fpp2)
#> Warning: package 'fpp2' was built under R version 3.5.1
#> Loading required package: fma
#> Warning: package 'fma' was built under R version 3.5.1
#> Loading required package: expsmooth
#> Warning: package 'expsmooth' was built under R version 3.5.1
library(tidyverse)
#> Warning: package 'tidyverse' was built under R version 3.5.1
#> Error: package or namespace load failed for 'tidyverse' in loadNamespace(j <- i[[1L]], c(lib.loc, .libPaths()), versionCheck = vI[[j]]):
#> there is no package called 'glue'
library(data.table)
#>
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:xts':
#>
#> first, last
library(dplyr)
#> Error: package or namespace load failed for 'dplyr' in loadNamespace(j <- i[[1L]], c(lib.loc, .libPaths()), versionCheck = vI[[j]]):
#> there is no package called 'glue'
library(readxl)
library(googlesheets)
#> Warning: package 'googlesheets' was built under R version 3.5.1
#> Error: package or namespace load failed for 'googlesheets' in loadNamespace(j <- i[[1L]], c(lib.loc, .libPaths()), versionCheck = vI[[j]]):
#> there is no package called 'glue'
#read two csv file from google drive.
D1103orgdata <-"https://docs.google.com/spreadsheets/d/e/2PACX-1vSDZwdXrLWmtglt-audp83HEDBvIqbJYOuk4FcsKLYCGCY7s3666xIWcsTd3crpmkh1zMN9jBDHpInr/pub?gid=1123672661&single=true&output=csv"
D1103interpolated <- "https://docs.google.com/spreadsheets/d/e/2PACX-1vSUI423Hyq6Ulg7a2huyyWw10r73OFm4ybUMRBvn6zxTThtijD1wf9FF8qRmtsZrdPR5_VMoc13DPu0/pub?gid=522057281&single=true&output=csv"
#time diff= 1minute
to.minute <- function(x) as.POSIXct(trunc(as.POSIXct(x, origin = "1970-01-01"), "mins"))
z <- read.csv.zoo(url(D1103orgdata), FUN = to.minute, aggregate = function(x) tail(x, 1))
zz <- na.approx(as.zoo(as.ts(z)))
time(zz) <- as.POSIXct(time(zz), origin = "1970-01-01")
time(z) <- as.POSIXct(time(z), origin = "1970-01-01")
z.interpolated <- as.data.frame(merge(zz, zoo(, time(zz))))
df3 <- data.table::fread(D1103interpolated,blank.lines.skip = TRUE)
# Forecasting D1103-to test & train known data ------------------------------------------
start_time <- as.POSIXct("2015-10-27 19:50")
end_time <- as.POSIXct("2015-12-31 23:59")
mytsTT2 <- ts(zoo(
z.interpolated$meter_value,
order.by = seq.POSIXt(start_time, end_time, by = "mins"),
frequency = 10080))
# Plotting-forecasting for test & train data ------------------------------
ActualData2Plot <-ts(df3[1:171651,3]) # plot all existing data
# for Accuracy test -------------------------------------------------------------
ActualData <-ts(df3[93851:171651,3]) #1/1/2016 0:00 (refer to csv column number)
# HW-forecasting test and train data --------------------------------------
#Holt-winter
hw <- HoltWinters(mytsTT2, beta = TRUE, gamma = FALSE)
#fcHW <- forecast(hw, 77801)
# **forecasting HW takes 10hours to get the output**.
#output of fcHW can be view in this efile:
D1103Read <-"https://docs.google.com/spreadsheets/d/e/2PACX-1vSuJLKwLeqC2z-ha0AtD_Q6_WlXuOeJ2LWcqxZAHkDyDuSNE5S7fwgSP45mth9JoTia3_CWNqCXafzG/pub?output=csv"
D1103fcHW <- data.table::fread(D1103Read,blank.lines.skip = TRUE)
D1103fcHW
#> V1 Point.Forecast Lo.80 Hi.80 Lo.95 Hi.95
#> 1: 93851 187283.0 187283.0 187283.1 187283.04 187283.1
#> 2: 93852 187283.3 187283.3 187283.3 187283.28 187283.3
#> 3: 93853 187283.6 187283.5 187283.6 187283.53 187283.6
#> 4: 93854 187283.8 187283.8 187283.9 187283.77 187283.9
#> 5: 93855 187284.1 187284.0 187284.1 187284.00 187284.2
#> ---
#> 77797: 171647 207468.0 117082.5 297853.4 69235.30 345700.6
#> 77798: 171648 207468.2 117081.0 297855.4 69232.89 345703.5
#> 77799: 171649 207468.5 117079.5 297857.4 69230.49 345706.5
#> 77800: 171650 207468.7 117078.0 297859.4 69228.08 345709.4
#> 77801: 171651 207469.0 117076.5 297861.4 69225.68 345712.3
View(D1103fcHW)
# Plotting-forecasting with HoltWinter for test & train data ------------------------------
#fcHW %>% autoplot()+autolayer(ActualData2)+scale_y_continuous(labels = scales::format_format(scientific = FALSE)) +ylab("Meter_value")
#accuracy(as.numeric(fcHW$mean),as.numeric(ActualData))
# ME RMSE MAE MPE MAPE
# Test set -3889.856 4890.119 3890.256 -1.989017 1.98923
Created on 2018-11-06 by the reprex package (v0.2.1.9000)
**Is it possible to change the points of forecast "93851" to date and time format? **
Since my forecasting point starts from 01-Jan-2016 00:00:00
D1103fcHW <- data.table::fread(D1103Read,blank.lines.skip = TRUE)
D1103fcHW
#> V1 Point.Forecast Lo.80 Hi.80 Lo.95 Hi.95
#> 1: 93851 187283.0 187283.0 187283.1 187283.04 187283.1
#> 2: 93852 187283.3 187283.3 187283.3 187283.28 187283.3
#> 3: 93853 187283.6 187283.5 187283.6 187283.53 187283.6
#> 4: 93854 187283.8 187283.8 187283.9 187283.77 187283.9
#> 5: 93855 187284.1 187284.0 187284.1 187284.00 187284.2
#> ---
#> 77797: 171647 207468.0 117082.5 297853.4 69235.30 345700.6
#> 77798: 171648 207468.2 117081.0 297855.4 69232.89 345703.5
#> 77799: 171649 207468.5 117079.5 297857.4 69230.49 345706.5
#> 77800: 171650 207468.7 117078.0 297859.4 69228.08 345709.4
#> 77801: 171651 207469.0 117076.5 297861.4 69225.68 345712.3