For Loop for RMSE in R

I am working on a data frame where I have the data on different states across the UK. I have applied the ARIMA model to the data frame and am interested in getting the RMSE and Error for each column/state. How can I do this using for loop for all the 14 columns so that I won't have to do it manually?

Data:

structure(list(Date = structure(c(289094400, 297043200, 304992000, 
312854400, 320716800, 328665600), tzone = "UTC", class = c("POSIXct", 
"POSIXt")), NORTH = c(4.06976744186047, 5.51675977653633, 7.2799470549305, 
4.75015422578655, 4.59363957597172, 3.15315315315317), YORKSANDTHEHUMBER = c(4.0121120363361, 
5.45851528384282, 9.52380952380951, 6.04914933837431, 3.03030303030299, 
5.42099192618225), NORTHWEST = c(6.57894736842105, 6.95256660168939, 
6.50060753341436, 5.5904164289789, 4.59211237169096, 4.70041322314051
), EASTMIDS = c(4.98489425981872, 8.20143884892085, 6.91489361702127, 
5.22388059701494, 5.61465721040189, 4.64465584778958), WESTMIDS = c(4.65838509316771, 
4.74777448071216, 8.66855524079319, 6.56934306569344, 3.22896281800389, 
3.17535545023698), EASTANGLIA = c(6.74525212835624, 8.58895705521476, 
8.47457627118643, 10.7291666666667, 4.8447789275635, 4.84522207267835
), OUTERSEAST = c(6.7110371602884, 7.53638253638255, 9.47317544707589, 
8.56512141280351, 3.82269215128102, 2.11515863689776), OUTERMET = c(4.54545454545458, 
6.58505698607005, 7.36633663366336, 7.08225746956843, 4.3747847054771, 
1.68316831683168), LONDON = c(8.11719500480309, 10.3065304309196, 
6.32299637535239, 7.65151515151515, 1.30190007037299, 2.1535255296978
), SOUTHWEST = c(6.17577197149644, 7.71812080536912, 7.63239875389407, 
9.45489628557649, 2.46804759806079, 2.19354838709679), WALES = c(6.09418282548476, 
8.35509138381203, 7.40963855421687, 7.01065619742007, 1.15303983228513, 
3.47150259067357), SCOTLAND = c(5.15222482435597, 4.12026726057908, 
5.40106951871658, 8.67579908675796, -0.280112044817908, 2.94943820224719
), NIRELAND = c(4.54545454545454, 4.94752623688156, 4.42857142857145, 
2.96397628818967, 6.06731620903454, 0.0835073068893502), UK = c(5.76890543055322, 
7.20302836425676, 7.39543442582184, 7.22885986848197, 3.23472252213347, 
2.95766398929048)), row.names = c(NA, -6L), class = c("tbl_df", 
"tbl", "data.frame"))

Code:

in_sample <- pc %>% 
  dplyr::filter(Date < '2020-03-01')
st(in_sample)

out_sample <-pc %>% 
  dplyr::filter(Date >= '2020-03-01')

st(out_sample)

ar_data = in_sample
ar_data %<>% dplyr::select(-Date)
ar_model4=apply(ar_data,2,function(x){
  return(
    list(
      
      summary(arima(x, order = c(1,0,0))) %>% 
        forecast::forecast(h = 4, level = 0.95)
      
    ))   
  
} )
ar_model4

names(ar_data)

error <- out_sample$NORTH[1:4]-ar_model4[["NORTH"]][[1]][["mean"]]
sqrt(mean(error^2))

Hi @TalhaAsif,
You can work with the list output and make use of the accuracy() function.

suppressPackageStartupMessages(library(forecast))
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(magrittr))

pc <- structure(list(Date = structure(c(289094400, 297043200, 304992000, 
312854400, 320716800, 328665600), tzone = "UTC", class = c("POSIXct", 
"POSIXt")), NORTH = c(4.06976744186047, 5.51675977653633, 7.2799470549305, 
4.75015422578655, 4.59363957597172, 3.15315315315317), YORKSANDTHEHUMBER = c(4.0121120363361, 
5.45851528384282, 9.52380952380951, 6.04914933837431, 3.03030303030299, 
5.42099192618225), NORTHWEST = c(6.57894736842105, 6.95256660168939, 
6.50060753341436, 5.5904164289789, 4.59211237169096, 4.70041322314051
), EASTMIDS = c(4.98489425981872, 8.20143884892085, 6.91489361702127, 
5.22388059701494, 5.61465721040189, 4.64465584778958), WESTMIDS = c(4.65838509316771, 
4.74777448071216, 8.66855524079319, 6.56934306569344, 3.22896281800389, 
3.17535545023698), EASTANGLIA = c(6.74525212835624, 8.58895705521476, 
8.47457627118643, 10.7291666666667, 4.8447789275635, 4.84522207267835
), OUTERSEAST = c(6.7110371602884, 7.53638253638255, 9.47317544707589, 
8.56512141280351, 3.82269215128102, 2.11515863689776), OUTERMET = c(4.54545454545458, 
6.58505698607005, 7.36633663366336, 7.08225746956843, 4.3747847054771, 
1.68316831683168), LONDON = c(8.11719500480309, 10.3065304309196, 
6.32299637535239, 7.65151515151515, 1.30190007037299, 2.1535255296978
), SOUTHWEST = c(6.17577197149644, 7.71812080536912, 7.63239875389407, 
9.45489628557649, 2.46804759806079, 2.19354838709679), WALES = c(6.09418282548476, 
8.35509138381203, 7.40963855421687, 7.01065619742007, 1.15303983228513, 
3.47150259067357), SCOTLAND = c(5.15222482435597, 4.12026726057908, 
5.40106951871658, 8.67579908675796, -0.280112044817908, 2.94943820224719
), NIRELAND = c(4.54545454545454, 4.94752623688156, 4.42857142857145, 
2.96397628818967, 6.06731620903454, 0.0835073068893502), UK = c(5.76890543055322, 
7.20302836425676, 7.39543442582184, 7.22885986848197, 3.23472252213347, 
2.95766398929048)), row.names = c(NA, -6L), class = c("tbl_df", 
"tbl", "data.frame"))

head(pc)
#> # A tibble: 6 × 15
#>   Date                NORTH YORKSANDTH…¹ NORTH…² EASTM…³ WESTM…⁴ EASTA…⁵ OUTER…⁶
#>   <dttm>              <dbl>        <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
#> 1 1979-03-01 00:00:00  4.07         4.01    6.58    4.98    4.66    6.75    6.71
#> 2 1979-06-01 00:00:00  5.52         5.46    6.95    8.20    4.75    8.59    7.54
#> 3 1979-09-01 00:00:00  7.28         9.52    6.50    6.91    8.67    8.47    9.47
#> 4 1979-12-01 00:00:00  4.75         6.05    5.59    5.22    6.57   10.7     8.57
#> 5 1980-03-01 00:00:00  4.59         3.03    4.59    5.61    3.23    4.84    3.82
#> 6 1980-06-01 00:00:00  3.15         5.42    4.70    4.64    3.18    4.85    2.12
#> # … with 7 more variables: OUTERMET <dbl>, LONDON <dbl>, SOUTHWEST <dbl>,
#> #   WALES <dbl>, SCOTLAND <dbl>, NIRELAND <dbl>, UK <dbl>, and abbreviated
#> #   variable names ¹​YORKSANDTHEHUMBER, ²​NORTHWEST, ³​EASTMIDS, ⁴​WESTMIDS,
#> #   ⁵​EASTANGLIA, ⁶​OUTERSEAST
#> # ℹ Use `colnames()` to see all variable names

in_sample <- pc %>% 
  dplyr::filter(Date < '1980-03-01')
str(in_sample)
#> tibble [4 × 15] (S3: tbl_df/tbl/data.frame)
#>  $ Date             : POSIXct[1:4], format: "1979-03-01" "1979-06-01" ...
#>  $ NORTH            : num [1:4] 4.07 5.52 7.28 4.75
#>  $ YORKSANDTHEHUMBER: num [1:4] 4.01 5.46 9.52 6.05
#>  $ NORTHWEST        : num [1:4] 6.58 6.95 6.5 5.59
#>  $ EASTMIDS         : num [1:4] 4.98 8.2 6.91 5.22
#>  $ WESTMIDS         : num [1:4] 4.66 4.75 8.67 6.57
#>  $ EASTANGLIA       : num [1:4] 6.75 8.59 8.47 10.73
#>  $ OUTERSEAST       : num [1:4] 6.71 7.54 9.47 8.57
#>  $ OUTERMET         : num [1:4] 4.55 6.59 7.37 7.08
#>  $ LONDON           : num [1:4] 8.12 10.31 6.32 7.65
#>  $ SOUTHWEST        : num [1:4] 6.18 7.72 7.63 9.45
#>  $ WALES            : num [1:4] 6.09 8.36 7.41 7.01
#>  $ SCOTLAND         : num [1:4] 5.15 4.12 5.4 8.68
#>  $ NIRELAND         : num [1:4] 4.55 4.95 4.43 2.96
#>  $ UK               : num [1:4] 5.77 7.2 7.4 7.23

max(pc$Date)
#> [1] "1980-06-01 UTC"

out_sample <- pc %>% 
  dplyr::filter(Date >= '1980-03-01')

str(out_sample)
#> tibble [2 × 15] (S3: tbl_df/tbl/data.frame)
#>  $ Date             : POSIXct[1:2], format: "1980-03-01" "1980-06-01"
#>  $ NORTH            : num [1:2] 4.59 3.15
#>  $ YORKSANDTHEHUMBER: num [1:2] 3.03 5.42
#>  $ NORTHWEST        : num [1:2] 4.59 4.7
#>  $ EASTMIDS         : num [1:2] 5.61 4.64
#>  $ WESTMIDS         : num [1:2] 3.23 3.18
#>  $ EASTANGLIA       : num [1:2] 4.84 4.85
#>  $ OUTERSEAST       : num [1:2] 3.82 2.12
#>  $ OUTERMET         : num [1:2] 4.37 1.68
#>  $ LONDON           : num [1:2] 1.3 2.15
#>  $ SOUTHWEST        : num [1:2] 2.47 2.19
#>  $ WALES            : num [1:2] 1.15 3.47
#>  $ SCOTLAND         : num [1:2] -0.28 2.95
#>  $ NIRELAND         : num [1:2] 6.0673 0.0835
#>  $ UK               : num [1:2] 3.23 2.96

ar_data <- in_sample
ar_data %<>% dplyr::select(-Date)

ar_model4 <- apply(ts(ar_data), 2, function(x){
  return(
    list(
      
      summary(arima(x, order = c(1,0,0))) %>% 
        forecast::forecast(h = 4, level = 0.95)

    ))   
  
} )


ar_model4["NORTH"]
#> $NORTH
#> $NORTH[[1]]
#>   Point Forecast    Lo 95    Hi 95
#> 5       5.777252 3.531516 8.022988
#> 6       5.462437 3.113578 7.811297
#> 7       5.558931 3.200615 7.917247
#> 8       5.529355 3.170152 7.888557

ar_acc <- function(x) {
  lapply(x, accuracy)
}

lapply(ar_model4, ar_acc)
#> $NORTH
#> $NORTH[[1]]
#>                       ME     RMSE      MAE       MPE     MAPE      MASE
#> Training set -0.09475076 1.145805 0.963584 -6.057918 17.99253 0.5036177
#>                     ACF1
#> Training set -0.09326252
#> 
#> 
#> $YORKSANDTHEHUMBER
#> $YORKSANDTHEHUMBER[[1]]
#>                       ME     RMSE      MAE       MPE    MAPE      MASE
#> Training set -0.05977668 2.011166 1.649155 -10.99287 27.9057 0.5505529
#>                     ACF1
#> Training set -0.03437068
#> 
#> 
#> $NORTHWEST
#> $NORTHWEST[[1]]
#>                        ME      RMSE       MAE        MPE     MAPE      MASE
#> Training set -0.007876762 0.4967319 0.3958455 -0.7715418 6.450142 0.6841556
#>                   ACF1
#> Training set 0.1031595
#> 
#> 
#> $EASTMIDS
#> $EASTMIDS[[1]]
#>                      ME     RMSE      MAE       MPE     MAPE      MASE
#> Training set -0.1555894 1.140082 1.114796 -5.936614 18.99868 0.5399309
#>                    ACF1
#> Training set -0.2483479
#> 
#> 
#> $WESTMIDS
#> $WESTMIDS[[1]]
#>                      ME    RMSE      MAE       MPE     MAPE      MASE
#> Training set -0.0138041 1.63487 1.481729 -6.982903 24.82222 0.7275999
#>                     ACF1
#> Training set -0.01556635
#> 
#> 
#> $EASTANGLIA
#> $EASTANGLIA[[1]]
#>                      ME     RMSE      MAE       MPE     MAPE      MASE
#> Training set -0.0527908 1.407758 1.093829 -3.317547 13.02043 0.7789557
#>                   ACF1
#> Training set 0.0227916
#> 
#> 
#> $OUTERSEAST
#> $OUTERSEAST[[1]]
#>                      ME     RMSE       MAE        MPE     MAPE      MASE
#> Training set 0.07618138 1.019316 0.8206062 -0.6609494 10.26132 0.6707601
#>                    ACF1
#> Training set 0.04985799
#> 
#> 
#> $OUTERMET
#> $OUTERMET[[1]]
#>                     ME     RMSE       MAE       MPE     MAPE      MASE
#> Training set 0.1269083 1.079639 0.9800819 -1.510839 17.25898 0.9469508
#>                     ACF1
#> Training set -0.05624214
#> 
#> 
#> $LONDON
#> $LONDON[[1]]
#>                         ME     RMSE      MAE       MPE     MAPE      MASE
#> Training set -0.0007377823 1.330424 1.087997 -2.612724 13.16195 0.4351184
#>                    ACF1
#> Training set -0.1843688
#> 
#> 
#> $SOUTHWEST
#> $SOUTHWEST[[1]]
#>                       ME     RMSE       MAE       MPE     MAPE      MASE
#> Training set -0.03993769 1.158442 0.8906344 -2.793223 11.79064 0.7743371
#>                    ACF1
#> Training set 0.02184324
#> 
#> 
#> $WALES
#> $WALES[[1]]
#>                      ME      RMSE       MAE       MPE     MAPE      MASE
#> Training set -0.1344581 0.6000549 0.4963522 -2.535543 7.413846 0.4130138
#>                    ACF1
#> Training set -0.1390781
#> 
#> 
#> $SCOTLAND
#> $SCOTLAND[[1]]
#>                      ME     RMSE      MAE       MPE   MAPE      MASE      ACF1
#> Training set 0.02780535 1.696759 1.387155 -7.032724 23.342 0.7447826 0.0891379
#> 
#> 
#> $NIRELAND
#> $NIRELAND[[1]]
#>                      ME      RMSE       MAE      MPE     MAPE      MASE
#> Training set -0.0126476 0.7465817 0.6154223 -4.26247 16.92764 0.7739144
#>                    ACF1
#> Training set 0.08377092
#> 
#> 
#> $UK
#> $UK[[1]]
#>                       ME      RMSE       MAE      MPE     MAPE      MASE
#> Training set -0.01330888 0.6562075 0.5557926 -1.21634 8.648642 0.9298837
#>                   ACF1
#> Training set 0.0168258

Created on 2022-08-18 by the reprex package (v2.0.1)

Hope this is helpful.

1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.