Hi All,
Looking to get results per group in a better way where some of the columns have same starting names, end with sequential numbers & depend on previous column. The number of such columns can vary per group.
In below example, columns starting with Code & ending in sequential manner depend on previous Code number. Currently, working solution is manual per group in a list. Looking for a better way where it applies to either each list of dataframe OR can be applied in original long data without splitting into groups
Thanks for your help!!
# Sample Data
library(tidyverse)
df <- data.frame(
stringsAsFactors = FALSE,
check.names = FALSE,
ACCTDT = c(202309L,202309L,
202309L,202309L,202310L,202310L,202310L,202310L,
202311L,202311L,202311L,202311L,202309L,
202309L,202309L,202310L,202310L,202310L,202311L,
202311L,202311L),
`ACT/FC` = c("ACT","ACT","ACT",
"ACT","FC","FC","FC","FC","FC","FC","FC",
"FC","ACT","ACT","ACT","FC","FC","FC","FC",
"FC","FC"),
Country = c("CA","CA","CA",
"CA","CA","CA","CA","CA","CA","CA","CA","CA",
"CA","CA","CA","CA","CA","CA","CA","CA",
"CA"),
Type = c("A","A","A","A",
"A","A","A","A","A","A","A","A","B","B",
"B","B","B","B","B","B","B"),
Code = c("Code_1","Code_2","Code_3",
"Code_4","Code_1","Code_2","Code_3","Code_4","Code_1","Code_2",
"Code_3","Code_4","Code_1","Code_2","Code_3","Code_1","Code_2",
"Code_3","Code_1","Code_2","Code_3"),
Sales = c(100L,90L,80L,
120L,110L,NA,NA,NA,200L,NA,NA,NA,89L,95L,100L,
125L,NA,NA,225L,NA,NA),
Rates = c(0.1, 0.12,0.13,
0.14, 0.15, 0.16,0.17,0.18,0.19,0.2,0.21,0.22,0.23,0.24,0.25,
0.26,0.27,0.28,0.29,0.3,0.31)
)
# Need values for all Codes based on previous values of Codes & Rates
# Code_2 uses previous values from Code_1, Code_3 from Code_2, Code_4 from Code_3 and Code_New from last category Code_4
# Tried doing it in long grouped data structure with no success. Thus, making data wide instead and splitting data into groups as below
# Creating List by Type
df_list <- df %>%
group_by(Type) %>%
group_split()
df_list <- map(df_list, function(df){
df <- df %>%
pivot_wider(names_from = Code, values_from = Sales) %>%
mutate("Code_New" = NA)
return(df)
})
# Working solution - Manual and to be repeated per list of dataframes
# Getting values for Columns starting with Code and each of these column depend on previous column or category of Code if data was still in long format
for(i in which(df_list[[1]]$`ACT/FC` == "FC")){
df_list[[1]]$Code_2[i] = round(df_list[[1]]$Code_1[i-1] * (1 - df_list[[1]]$Rates[i-1]), 0)
df_list[[1]]$Code_3[i] = round(df_list[[1]]$Code_2[i-1] * (1 - df_list[[1]]$Rates[i-1]), 0)
df_list[[1]]$Code_4[i] = round(df_list[[1]]$Code_3[i-1] * (1 - df_list[[1]]$Rates[i-1]), 0)
df_list[[1]]$Code_New[i] = round(df_list[[1]]$Code_4[i-1] * (1 - df_list[[1]]$Rates[i-1]), 0)
}
# Looking to get the results such that calculations can be applied to all list of dataframes.