Hi All,
I use the below code to automate the forecast error checking process. So I have like 6 input Forecast files(F1,F2,F3,F4,F5,F6) and I have Error measurement levels file(contains different levels) and the code uses the information from the error measurement file to group the input file(F1,F2,..) to calculate the forecast accuracy. But the problem is I'm getting the same accuracy numbers at all levels.
Also I could be able to attach only 1 picture and I tried to fit in input file, error measurement file and output file in 1 picture. As you could see from the attached snapshot- output image shows accuracy numbers which are same across all levels as it shouldn't be.
If someone could please help me with this request. Thank you so much!
P.S.-I have edited my code above to reproduce the sample input data-(F1..F6) and Error measurement levels(used as a criteria in loop to group the input data) and the output data-after running the whole script.
Actually the script runs and it generates the output without any errors, but the problem is in the output itself, it supposed to show different accuracies at different levels instead it shows same accuracy at all levels. So I'm not sure on what I'm missing here to generate correct accuracy at all levels.
setwd("~/Level of Forecast/New folder")
library("openxlsx")
library("dplyr")
library("data.table")
library("tidyr")
#devtools::install_github("tidyverse/dtplyr")
#library("dtplyr")
levels = read.xlsx("Error Measurement Levels.xlsx", sheet = "EM_Levels")
outlist = list()
for (j in 1:3)
{
rdata = read.xlsx(paste0('F', j, '.xlsx'), sheet = paste0('F', j))
rdata[is.na(rdata)] <- 0
outdf = data.table(FL = paste0('F', j), EL = levels$NAME)
for (i in 1:nrow(levels))
{
level_i = c(unlist(strsplit(
levels$`ERROR.MEASUREMENT.LEVEL`[i], " - "
)), "Time")
mdata = rdata %>%
group_by_at(level_i) %>%
summarise(
ACTUALSALESUNITS = sum(ACTUALSALESUNITS),
FORECAST = sum(FORECAST)
)
mdata = setDT(mdata)
mdata[, ERROR := (FORECAST - ACTUALSALESUNITS)]
mdatasums = mdata[, lapply(.SD, sum), by = level_i, .SDcols = !"Time"]
acc = max(0, (1 - (
abs(sum(mdatasums$ERROR)) / sum(mdatasums$ACTUALSALESUNITS)
)))
outdf$ACCY[i] = acc
}
outdf = spread(outdf, EL, ACCY)
outlist[[paste0('F', j)]] = outdf
}
outdata <- do.call("rbind", outlist)
reorder = c("FL", paste0("E", 1:nrow(levels)))
setcolorder(outdata, reorder)
write.xlsx(outdata, "Level_of_Forecast_Results.xlsx")
Input.F1 <-
structure(
list(
`Prodhier2(MarketSegment)` = c(
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C"
),
`Prodhier6(Application)` = c(
"C1",
"C1",
"C1",
"C1",
"C1",
"C1",
"C1",
"C1",
"C1",
"C1",
"C1",
"C1",
"C2",
"C2",
"C2",
"C2",
"C2",
"C2",
"C2",
"C2",
"C2",
"C2",
"C2",
"C2"
),
`ProdHier10(Market.Family)` = c(
"C1X",
"C1X",
"C1X",
"C1X",
"C1X",
"C1X",
"C1X",
"C1X",
"C1X",
"C1X",
"C1X",
"C1X",
"C1Y",
"C1Y",
"C1Y",
"C1Y",
"C1Y",
"C1Y",
"C1Y",
"C1Y",
"C1Y",
"C1Y",
"C1Y",
"C1Y"
),
Product.ID = c(
"x1",
"x1",
"x1",
"x1",
"x1",
"x1",
"x1",
"x1",
"x1",
"x1",
"x1",
"x1",
"x2",
"x2",
"x2",
"x2",
"x2",
"x2",
"x2",
"x2",
"x2",
"x2",
"x2",
"x2"
),
Actual.delivery.location = c(
"A1",
"A1",
"A1",
"A1",
"A1",
"A1",
"A1",
"A1",
"A1",
"A1",
"A1",
"A1",
"A2",
"A2",
"A2",
"A2",
"A2",
"A2",
"A2",
"A2",
"A2",
"A2",
"A2",
"A2"
),
Sales.Territory.Realigned = c(1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2),
Customer = c(
"customer1",
"customer1",
"customer1",
"customer1",
"customer1",
"customer1",
"customer1",
"customer1",
"customer1",
"customer1",
"customer1",
"customer1",
"customer2",
"customer2",
"customer2",
"customer2",
"customer2",
"customer2",
"customer2",
"customer2",
"customer2",
"customer2",
"customer2",
"customer2"
),
Time = c(
"Aug 2018",
"Sep 2018",
"Oct 2018",
"Nov 2018",
"Dec 2018",
"Jan 2019",
"Feb 2019",
"Mar 2019",
"Apr 2019",
"May 2019",
"Jun 2019",
"Jul 2019",
"Aug 2018",
"Sep 2018",
"Oct 2018",
"Nov 2018",
"Dec 2018",
"Jan 2019",
"Feb 2019",
"Mar 2019",
"Apr 2019",
"May 2019",
"Jun 2019",
"Jul 2019"
),
ACTUALSALESUNITS = c(1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 2, 1,
1, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1),
FORECAST = c(
3.11248,
2.654922,
1.660692,
1.648912,
2.212118,
2.32017,
1.862171,
4.707202,
3.016398,
3.583942,
4.912196,
2.78406,
1.55624,
1.32746,
0.830346,
0.824457,
1.106058,
1.160085,
0.931085,
2.353601,
1.508199,
1.79197,
2.456098,
1.39203
)
),
row.names = c(NA,
24L),
class = "data.frame"
)
Input.F2 <-
structure(
list(
`Prodhier2(MarketSegment)` = c(
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C"
),
`Prodhier6(Application)` = c(
"C1",
"C1",
"C1",
"C1",
"C1",
"C1",
"C1",
"C1",
"C1",
"C1",
"C1",
"C1",
"C2",
"C2",
"C2",
"C2",
"C2",
"C2",
"C2",
"C2",
"C2",
"C2",
"C2",
"C2"
),
`ProdHier10(Market.Family)` = c(
"C1X",
"C1X",
"C1X",
"C1X",
"C1X",
"C1X",
"C1X",
"C1X",
"C1X",
"C1X",
"C1X",
"C1X",
"C1Y",
"C1Y",
"C1Y",
"C1Y",
"C1Y",
"C1Y",
"C1Y",
"C1Y",
"C1Y",
"C1Y",
"C1Y",
"C1Y"
),
Product.ID = c(
"x1",
"x1",
"x1",
"x1",
"x1",
"x1",
"x1",
"x1",
"x1",
"x1",
"x1",
"x1",
"x2",
"x2",
"x2",
"x2",
"x2",
"x2",
"x2",
"x2",
"x2",
"x2",
"x2",
"x2"
),
Actual.delivery.location = c(
"A1",
"A1",
"A1",
"A1",
"A1",
"A1",
"A1",
"A1",
"A1",
"A1",
"A1",
"A1",
"A2",
"A2",
"A2",
"A2",
"A2",
"A2",
"A2",
"A2",
"A2",
"A2",
"A2",
"A2"
),
Sales.Territory.Realigned = c(1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2),
Customer = c(
"customer1",
"customer1",
"customer1",
"customer1",
"customer1",
"customer1",
"customer1",
"customer1",
"customer1",
"customer1",
"customer1",
"customer1",
"customer2",
"customer2",
"customer2",
"customer2",
"customer2",
"customer2",
"customer2",
"customer2",
"customer2",
"customer2",
"customer2",
"customer2"
),
Time = c(
"Aug 2018",
"Sep 2018",
"Oct 2018",
"Nov 2018",
"Dec 2018",
"Jan 2019",
"Feb 2019",
"Mar 2019",
"Apr 2019",
"May 2019",
"Jun 2019",
"Jul 2019",
"Aug 2018",
"Sep 2018",
"Oct 2018",
"Nov 2018",
"Dec 2018",
"Jan 2019",
"Feb 2019",
"Mar 2019",
"Apr 2019",
"May 2019",
"Jun 2019",
"Jul 2019"
),
ACTUALSALESUNITS = c(1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 2, 1,
1, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1),
FORECAST = c(
0.685876,
0.708762,
0.586104,
1.23953,
1.29325,
0.664536,
1.755134,
1.523486,
0.984382,
1.728112,
0.822219,
0.607892,
0.342938,
0.35438,
0.293051,
0.619764,
0.646625,
0.332268,
0.877567,
0.761743,
0.492191,
0.864056,
0.411109,
0.303946
)
),
row.names = c(NA,
24L),
class = "data.frame"
)
Input.F3 <-
structure(
list(
`Prodhier2(MarketSegment)` = c(
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C",
"C"
),
`Prodhier6(Application)` = c(
"C1",
"C1",
"C1",
"C1",
"C1",
"C1",
"C1",
"C1",
"C1",
"C1",
"C1",
"C1",
"C2",
"C2",
"C2",
"C2",
"C2",
"C2",
"C2",
"C2",
"C2",
"C2",
"C2",
"C2"
),
`ProdHier10(Market.Family)` = c(
"C1X",
"C1X",
"C1X",
"C1X",
"C1X",
"C1X",
"C1X",
"C1X",
"C1X",
"C1X",
"C1X",
"C1X",
"C1Y",
"C1Y",
"C1Y",
"C1Y",
"C1Y",
"C1Y",
"C1Y",
"C1Y",
"C1Y",
"C1Y",
"C1Y",
"C1Y"
),
Product.ID = c(
"x1",
"x1",
"x1",
"x1",
"x1",
"x1",
"x1",
"x1",
"x1",
"x1",
"x1",
"x1",
"x2",
"x2",
"x2",
"x2",
"x2",
"x2",
"x2",
"x2",
"x2",
"x2",
"x2",
"x2"
),
Actual.delivery.location = c(
"A1",
"A1",
"A1",
"A1",
"A1",
"A1",
"A1",
"A1",
"A1",
"A1",
"A1",
"A1",
"A2",
"A2",
"A2",
"A2",
"A2",
"A2",
"A2",
"A2",
"A2",
"A2",
"A2",
"A2"
),
Sales.Territory.Realigned = c(1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2),
Customer = c(
"customer1",
"customer1",
"customer1",
"customer1",
"customer1",
"customer1",
"customer1",
"customer1",
"customer1",
"customer1",
"customer1",
"customer1",
"customer2",
"customer2",
"customer2",
"customer2",
"customer2",
"customer2",
"customer2",
"customer2",
"customer2",
"customer2",
"customer2",
"customer2"
),
Time = c(
"Aug 2018",
"Sep 2018",
"Oct 2018",
"Nov 2018",
"Dec 2018",
"Jan 2019",
"Feb 2019",
"Mar 2019",
"Apr 2019",
"May 2019",
"Jun 2019",
"Jul 2019",
"Aug 2018",
"Sep 2018",
"Oct 2018",
"Nov 2018",
"Dec 2018",
"Jan 2019",
"Feb 2019",
"Mar 2019",
"Apr 2019",
"May 2019",
"Jun 2019",
"Jul 2019"
),
ACTUALSALESUNITS = c(1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 2, 1,
1, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1),
FORECAST = c(
0.97302,
0.866456,
0.896104,
0.80321,
0.71581,
0.646458,
0.59278,
0.615512,
0.838848,
0.92426,
0.947328,
0.843384,
0.486515,
0.433232,
0.448052,
0.40161,
0.35791,
0.323232,
0.296395,
0.30776,
0.419424,
0.462135,
0.473668,
0.421696
)
),
row.names = c(NA,
24L),
class = "data.frame"
)
Error.Measurement.Level <-
structure(
list(
ERROR.MEASUREMENT.LEVEL = c(
"Prodhier2(MarketSegment)",
"Prodhier6(Application)",
"ProdHier10(Market.Family)",
"Product.ID",
"Prodhier2(MarketSegment) - Actual.delivery.location",
"Prodhier2(MarketSegment) - Sales.Territory.Realigned",
"Prodhier2(MarketSegment) - Customer",
"Prodhier2(MarketSegment) - Actual.delivery.location - Customer",
"Prodhier2(MarketSegment) - Actual.delivery.location - Sales.Territory.Realigned",
"Prodhier2(MarketSegment) - Sales.Territory.Realigned - Customer",
"Prodhier2(MarketSegment) - Actual.delivery.location - Sales.Territory.Realigned - Customer",
"Prodhier6(Application) - Actual.delivery.location",
"Prodhier6(Application) - Sales.Territory.Realigned",
"Prodhier6(Application) - Customer",
"Prodhier6(Application) - Actual.delivery.location - Customer",
"Prodhier6(Application) - Actual.delivery.location - Sales.Territory.Realigned",
"Prodhier6(Application) - Sales.Territory.Realigned - Customer",
"Prodhier6(Application) - Actual.delivery.location - Sales.Territory.Realigned - Customer",
"ProdHier10(Market.Family) - Actual.delivery.location",
"ProdHier10(Market.Family) - Sales.Territory.Realigned",
"ProdHier10(Market.Family) - Customer",
"ProdHier10(Market.Family) - Actual.delivery.location - Customer",
"ProdHier10(Market.Family) - Actual.delivery.location - Sales.Territory.Realigned",
"ProdHier10(Market.Family) - Sales.Territory.Realigned - Customer",
"ProdHier10(Market.Family) - Actual.delivery.location - Sales.Territory.Realigned - Customer",
"Product.ID - Actual.delivery.location",
"Product.ID - Sales.Territory.Realigned",
"Product.ID - Customer",
"Product.ID - Actual.delivery.location - Customer",
"Product.ID - Actual.delivery.location - Sales.Territory.Realigned",
"Product.ID - Sales.Territory.Realigned - Customer",
"Product.ID - Actual.delivery.location - Sales.Territory.Realigned - Customer"
),
NAME = c(
"E1",
"E2",
"E3",
"E4",
"E5",
"E6",
"E7",
"E8",
"E9",
"E10",
"E11",
"E12",
"E13",
"E14",
"E15",
"E16",
"E17",
"E18",
"E19",
"E20",
"E21",
"E22",
"E23",
"E24",
"E25",
"E26",
"E27",
"E28",
"E29",
"E30",
"E31",
"E32"
)
),
row.names = c(NA, 32L),
class = "data.frame"
)
Output.Level.of.forecast.results <-
structure(
list(
FL = c("F1", "F2", "F3"),
E1 = c(0.331842193548387,
0.609642612903226, 0.467574161290323),
E2 = c(0.331842193548387,
0.609642612903226, 0.467574161290323),
E3 = c(0.331842193548387,
0.609642612903226, 0.467574161290323),
E4 = c(0.331842193548387,
0.609642612903226, 0.467574161290323),
E5 = c(0.331842193548387,
0.609642612903226, 0.467574161290323),
E6 = c(0.331842193548387,
0.609642612903226, 0.467574161290323),
E7 = c(0.331842193548387,
0.609642612903226, 0.467574161290323),
E8 = c(0.331842193548387,
0.609642612903226, 0.467574161290323),
E9 = c(0.331842193548387,
0.609642612903226, 0.467574161290323),
E10 = c(0.331842193548387,
0.609642612903226, 0.467574161290323),
E11 = c(0.331842193548387,
0.609642612903226, 0.467574161290323),
E12 = c(0.331842193548387,
0.609642612903226, 0.467574161290323),
E13 = c(0.331842193548387,
0.609642612903226, 0.467574161290323),
E14 = c(0.331842193548387,
0.609642612903226, 0.467574161290323),
E15 = c(0.331842193548387,
0.609642612903226, 0.467574161290323),
E16 = c(0.331842193548387,
0.609642612903226, 0.467574161290323),
E17 = c(0.331842193548387,
0.609642612903226, 0.467574161290323),
E18 = c(0.331842193548387,
0.609642612903226, 0.467574161290323),
E19 = c(0.331842193548387,
0.609642612903226, 0.467574161290323),
E20 = c(0.331842193548387,
0.609642612903226, 0.467574161290323),
E21 = c(0.331842193548387,
0.609642612903226, 0.467574161290323),
E22 = c(0.331842193548387,
0.609642612903226, 0.467574161290323),
E23 = c(0.331842193548387,
0.609642612903226, 0.467574161290323),
E24 = c(0.331842193548387,
0.609642612903226, 0.467574161290323),
E25 = c(0.331842193548387,
0.609642612903226, 0.467574161290323),
E26 = c(0.331842193548387,
0.609642612903226, 0.467574161290323),
E27 = c(0.331842193548387,
0.609642612903226, 0.467574161290323),
E28 = c(0.331842193548387,
0.609642612903226, 0.467574161290323),
E29 = c(0.331842193548387,
0.609642612903226, 0.467574161290323),
E30 = c(0.331842193548387,
0.609642612903226, 0.467574161290323),
E31 = c(0.331842193548387,
0.609642612903226, 0.467574161290323),
E32 = c(0.331842193548387,
0.609642612903226, 0.467574161290323)),
row.names = c(NA,-3L),
class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x000001bcbfe31ef0>)