Hi All,
Need help with achieving right values on lagged data please. In this sample data, A_2 Code uses previous values from A_1, A_3 from A_2, A_4 from A_3 and New Code from last category A_4. If we had all the values in Sales column, lag works fine. However, because each category is dependent upon previous to fill NAs, the existing NA cause issues and can't pick up the right value here.
Any help would be greatly appreciated here!!
Sample Data with Incorrect results below
library(dplyr)
df <- data.frame(
stringsAsFactors = FALSE,
check.names = FALSE,
ACCTDT = c(202309L,202309L,
202309L,202309L,202310L,202310L,202310L,202310L,
202311L,202311L,202311L,202311L,202309L,
202309L,202309L,202310L,202310L,202310L,202311L,
202311L,202311L),
`ACT/FC` = c("ACT","ACT","ACT",
"ACT","FC","FC","FC","FC","FC","FC","FC",
"FC","ACT","ACT","ACT","FC","FC","FC","FC",
"FC","FC"),
Country = c("CA","CA","CA",
"CA","CA","CA","CA","CA","CA","CA","CA","CA",
"CA","CA","CA","CA","CA","CA","CA","CA",
"CA"),
Type = c("A","A","A","A",
"A","A","A","A","A","A","A","A","B","B",
"B","B","B","B","B","B","B"),
Code = c("A_1","A_2","A_3",
"A_4","A_1","A_2","A_3","A_4","A_1","A_2",
"A_3","A_4","A_1","A_2","A_3","A_1","A_2",
"A_3","A_1","A_2","A_3"),
Sales = c(100L,90L,80L,
120L,110L,NA,NA,NA,200L,NA,NA,NA,89L,95L,100L,
125L,NA,NA,225L,NA,NA)
)
# A_2 uses previous values from A_1, A_3 from A_2, A_4 from A_3 and New Code from last category A_4
df1 <- df %>%
# Applying Lead to keep categories right while using lags on each category
mutate(Code = lead(Code)) %>%
mutate(Code = recode(Code, "A_1" = "New code")) %>%
group_by(Country, Type, Code) %>%
# Applying Lag on Sales as value of A_2 depends on A_1, value of A_3 on A_2 and so on.
#A_1 was renamed as A-2 and similar approach for remaining categories within Code using lead to perform lag on right category
mutate(Sale2 = case_when(
`ACT/FC` == "ACT" ~ NA,
`ACT/FC` == "FC" ~ lag(Sales),
TRUE ~ Sales
)
)
# Works fine for 202310, ACT/FC as FC with Country CA, Type A and all categories in Code
# But for the rest of the data, it only grabs values for A-2 Code as initial values in Sales were NA for A_3 & A_4
Correct Result below in Sales2 Column
# Correct Result below in Sales2 Column
df_ans <- data.frame(
stringsAsFactors = FALSE,
check.names = FALSE,
ACCTDT = c(202309L,
202309L,202309L,202309L,202310L,202310L,
202310L,202310L,202310L,202311L,202311L,
202311L,202311L,202311L,202309L,202309L,202309L,
202310L,202310L,202310L,202310L,202311L,
202311L,202311L),
`ACT/FC` = c("ACT",
"ACT","ACT","ACT","FC","FC","FC","FC","FC",
"FC","FC","FC","FC","FC","ACT","ACT",
"ACT","FC","FC","FC","FC","FC","FC","FC"),
Country = c("CA","CA",
"CA","CA","CA","CA","CA","CA","CA","CA",
"CA","CA","CA","CA","CA","CA","CA",
"CA","CA","CA","CA","CA","CA","CA"),
Type = c("A","A",
"A","A","A","A","A","A","A","A","A","A",
"A","A","B","B","B","B","B","B","B",
"B","B","B"),
Code = c("A_1",
"A_2","A_3","A_4","A_1","A_2","A_3","A_4",
"New Code","A_1","A_2","A_3","A_4",
"New Code","A_1","A_2","A_3","A_1","A_2","A_3",
"New Code","A_1","A_2","A_3"),
Sales = c(100L,90L,
80L,120L,110L,NA,NA,NA,NA,200L,NA,NA,
NA,NA,89L,95L,100L,125L,NA,NA,NA,225L,
NA,NA),
Sales2 = c(NA,NA,NA,
NA,NA,100L,90L,80L,120L,NA,110L,100L,
90L,80L,NA,NA,NA,NA,89L,95L,100L,NA,
125L,89L)
)