Problem:
I have a data frame called FID (see below) that contains two columns for Year & Month, and Sighting_Frequency (counts of birds).
The data frame contains 3 years of observations between 2015-2017 , indicating I have 36 months of data. I have run a Bayesian time series analysis with MCMC using the bsts() function in the bsts package (see the R-code below) by following the tutorial below.
I want to produce a holdout Mean Absolute Percentage Error (MAPE) Plot as seen in the diagram below, which illustrates the actual vs predicted values with credible intervals for the holdout period using the package ggplot().
I am getting stuck when I am attempting to produce the d2 data frame (see the tutorial and R-code below) because I keep on experiencing this error message:-
Error in data.frame(c(10^as.numeric(-colMeans(bsts.model$one.step.prediction.errors[-(1:burn), :
arguments imply differing number of rows: 48, 32
I have been struggling to figure out the problem. If anyone can help me solve this issue, I would be deeply appreciative.
Many thanks in advance.
Tutorial
Plot I am trying to produce
R-code:
######################################
##Time Series Model using the bsts() function
#######################################
#Open packages for the time series analysis
library(lubridate)
library(bsts)
library(dplyr)
library(ggplot2)
##Create a time series object
myts2 <- ts(BSTS_Dataframe$Sightings_Frequency, start=c(2015, 1), end=c(2017, 12), frequency=12)
#upload the data into the windows() function
x <- window(myts2, start=c(2015, 01), end=c(2017, 12))
y <- log(x)
Run the bsts model
ss <- AddLocalLinearTrend(list(), y)
ss <- AddSeasonal(ss, y, nseasons = 3)
##Produce the bsts model using the bsts function
bsts.model <- bsts(y, state.specification = ss, family = "logit", niter = 100, ping = 0, seed = 123)
##Open plotting window
dev.new()
#plot the bsts.model
plot(bsts.model)
##Get a suggested number of burns
burn<-bsts::SuggestBurn(0.1, bsts.model)
p<-predict.bsts(bsts.model, horizon = 12, burn=burn, quantiles=c(.25, .975))
##Actual vs predicted
##Fitted vs predictions
##Acutal data and dates
d2 <- data.frame(
c(10^as.numeric(-colMeans(bsts.model$one.step.prediction.errors[-(1:burn),])+y), #fitted vs predictions
10^as.numeric(p$mean)),
as.numeric(BSTS_Dataframe$Sightings_Frequency), ## actual data and dates
as.Date(time(BSTS_Dataframe$Sightings_Frequency)))
######################################
This is the error message
######################################
Error in data.frame(c(10^as.numeric(-colMeans(bsts.model$one.step.prediction.errors[-(1:burn), :
arguments imply differing number of rows: 48, 32
######################################
##Name the d2 data frame columns
names(d2) <- c("Fitted", "Actual", "Date")
MAPE (mean absolute percentage error)
MAPE <- dplyr::filter(d2, year(Date)>2017) %>% dplyr::summarise(MAPE=mean(abs(Actual-Fitted)/Actual))
95% forecast credible interval
posterior.interval <- cbind.data.frame(
10^as.numeric(p$interval[1,]),
10^as.numeric(p$interval[2,]),
subset(d2, year(Date)>2017)$Date)
names(posterior.interval) <- c("LL", "UL", "Date")
Join intervals to the forecast
d3 <- left_join(d2, posterior.interval, by="Date")
Plot actual versus predicted with credible intervals for the holdout period
ggplot(data=d3, aes(x=Date)) +
geom_line(aes(y=Actual, colour = "Actual"), size=1.2) +
geom_line(aes(y=Fitted, colour = "Fitted"), size=1.2, linetype=2) +
theme_bw() + theme(legend.title = element_blank()) + ylab("") + xlab("") +
geom_vline(xintercept=as.numeric(as.Date("2017-12-01")), linetype=2) +
geom_ribbon(aes(ymin=LL, ymax=UL), fill="grey", alpha=0.5) +
ggtitle(paste0("BSTS -- Holdout MAPE = ", round(100*MAPE,2), "%")) +
theme(axis.text.x=element_text(angle = -90, hjust = 0))
**FID Data frame:**
structure(list(Year = structure(1:32, .Label = c("2015-01", "2015-02",
"2015-03", "2015-04", "2015-05", "2015-08", "2015-09", "2015-10",
"2015-11", "2015-12", "2016-01", "2016-02", "2016-03", "2016-04",
"2016-05", "2016-07", "2016-08", "2016-09", "2016-10", "2016-11",
"2016-12", "2017-01", "2017-02", "2017-03", "2017-04", "2017-05",
"2017-07", "2017-08", "2017-09", "2017-10", "2017-11", "2017-12"
), class = "factor"), Sightings_Frequency = c(36L, 28L, 39L,
46L, 5L, 22L, 10L, 15L, 8L, 33L, 33L, 29L, 31L, 23L, 8L, 9L,
40L, 41L, 40L, 30L, 30L, 44L, 37L, 41L, 42L, 20L, 7L, 27L, 35L,
27L, 43L, 38L)), class = "data.frame", row.names = c(NA, -32L
))