Hi! I wanted to understand more about arules and association rules/MarketBasket.
What is this data telling me exactly? What is a good range of confidence range to use?
How can I make my reporting more understandable?
#instal packages needed to perform Market Basket Analysis
#Data was downloaded from DART with timeframe 1/1/2019 - 4/30/2019
install.packages(c("arules", "arulesViz", "tidyverse", "readxl", "plyr", "ggplot2", "knitr", "lubridate", "ddply"))
#read excel ito R dataframe
library(readxl)
Customer_Retail_Data <- read_excel("C:/Users/avu/Desktop/Marketing Directives/Customer Retail Data 2019 YTD 4302019.xlsx")
#view Data
View(Customer_Retail_Data)
#Use ddply to group OrderID and Date, and then to be in a singles format (combine all products from that CustomerID and Date as one row, with each item, seperated by a ',')
library(plyr)
transactionData <- ddply(Customer_Retail_Data,c("CustomerID","Date"),
function(df1)paste(df1$CategoryID,
collapse = ","))
#view Data
View(transactionData)
#as OrderID and Date will not be of any use in rule mining, so I am setting them to NULL
#set column OrderID of dataframe TransactionData
transactionData$CustomerID <- NULL
#set column Date of dataframe TransactionData
transactionData$Date <-NULL
#rename column to items
colnames(transactionData) <- c("items")
#show dataframe transactionData. This format is called "basket" format.
View(transactionData)
#transactionData: Data to be written
write.csv(transactionData,"C:/Users/avu/Desktop/Marketing Directives/CustomerRetailData2019YTD.csv", quote = FALSE, row.names = FALSE)
#Load this transaction data into an object of transaction class
library(arules)
ctr <- read.transactions('C:/Users/avu/Desktop/Marketing Directives/CustomerRetailData2019YTD.csv', format = 'basket', sep=',')
#data is right skewed
summary(ctr)
Create an item frequency plot for the top 20 items
if (!require("RColorBrewer")) {
install color package of R
install.packages("RColorBrewer")
#include library RColorBrewer
library(RColorBrewer)
}
itemFrequencyPlot(ctr,topN=20,type="absolute",col=brewer.pal(8,'Pastel2'), main="Absolute Item Frequency Plot")
itemFrequencyPlot(ctr,topN=20,type="relative",col=brewer.pal(8,'Pastel2'),main="Relative Item Frequency Plot")
Min Support as 0.001, confidence as 0.8.
association.rules <- apriori(ctr, parameter = list(supp=0.001, conf=0.6,minlen=1))
summary(association.rules)
inspect(association.rules[1:285])