Hello,
I do not know how Amazon does a great job of keeping track of EACH customer's shopping habits and gives out each individual suggestion for them. I do not think I am there yet to do that kind of task. If you know a method/system to do this, please recommend me a source or a book for me to read up.
library(tidyverse)
library(dplyr)
library(data.table)
#>
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:dplyr':
#>
#> between, first, last
#> The following object is masked from 'package:purrr':
#>
#> transpose
library(reprex)
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:data.table':
#>
#> hour, isoweek, mday, minute, month, quarter, second, wday,
#> week, yday, year
#> The following object is masked from 'package:base':
#>
#> date
data = structure(list(CUSTOMER_NUMBER = c(0L, 0L, 0L, 0L, 0L, 138990000L,
138990000L, 138990000L, 138990000L, 138990000L, 138990000L, 138990000L,
138990000L, 138990000L, 138990000L, 138990000L, 138990000L, 138990000L,
138990000L, 138990000L, 138990000L, 138990000L, 138990000L, 138990000L,
138990000L, 138990000L, 138990000L, 138990000L, 138990000L, 138990000L,
138990000L, 138990000L, 138990000L, 209020998L, 209020998L, 209020998L,
209020998L, 209020998L, 209020998L, 209020998L, 209020998L, 209020998L,
209020998L, 209020998L, 209100072L, 209100072L, 209100072L, 209100072L,
209100072L, 209100072L), ITEM_CATEGORY_DESCR = c("BARRIER COVERS",
"FACEMASKS", "FACEMASKS", "GLOVES LATEX", "GLOVES NITRILE", "GLOVES LATEX",
"GLOVES NITRILE", "GLOVES NITRILE", "GLOVES NITRILE", "GLOVES NITRILE",
"GLOVES NITRILE", "GLOVES NITRILE", "GLOVES NITRILE", "GLOVES NITRILE",
"GLOVES NITRILE", "GLOVES NITRILE", "GLOVES NITRILE", "GLOVES NITRILE",
"GLOVES NITRILE", "GLOVES NITRILE", "GLOVES NITRILE", "GLOVES NITRILE",
"GLOVES NITRILE", "GLOVES NITRILE", "SURFACE DISINFECTANT WIPES",
"SURFACE DISINFECTANT WIPES", "SURFACE DISINFECTANT WIPES", "SURFACE DISINFECTANT WIPES",
"SURFACE DISINFECTANT WIPES", "SURFACE DISINFECTANT WIPES", "SURFACE DISINFECTANT WIPES",
"SURFACE DISINFECTANT WIPES", "SURFACE DISINFECTANT WIPES", "BITE REGISTRATION MATERIAL",
"ENDODONTIC HAND FILES", "ENDODONTIC HAND FILES", "FACEMASKS",
"FACEMASKS", "GLOVES LATEX", "GLOVES LATEX", "GLOVES NITRILE",
"IMPRESSION MATERIAL VINYL POLYSILOXANE", "IMPRESSION MATERIAL VINYL POLYSILOXANE",
"SPONGES", "ANGLES PROPHY DISPOSABLE", "ANGLES PROPHY DISPOSABLE",
"ANGLES PROPHY DISPOSABLE", "ANGLES PROPHY DISPOSABLE", "ANGLES PROPHY DISPOSABLE",
"ANGLES PROPHY DISPOSABLE"), month = structure(c(17471, 17410,
17622, 17198, 17410, 17106, 16922, 17045, 17075, 17106, 17198,
17226, 17257, 17257, 17287, 17318, 17379, 17410, 17440, 17471,
17501, 17532, 17591, 17622, 16953, 17136, 17198, 17379, 17410,
17440, 17471, 17501, 17563, 16922, 17106, 17226, 17106, 17410,
17014, 17410, 17106, 17318, 17379, 17106, 16983, 17106, 17198,
17287, 17379, 17440), class = "Date"), Count = c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 4L, 1L, 1L, 1L, 2L, 1L,
3L, 3L, 4L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
3L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L),
`PRIVATE LABEL` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 11,
0, 7, 1, 1, 1, 2, 1, 12, 5, 5, 8, 4, 5, 2, 3, 0, 0, 5, 0,
2, 3, 0, 0, 0, 5, 0, 0, 0, 10, 0, 3, 2, 0, 0, 0, 0, 0, 0),
SUNDRY = c(3, 5, 24, 30, 1, 2, 1, 2, 2, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 0, 1, 0, 0, 1,
2, 3, 0, 1, 10, 10, 0, 1, 0, 0, 3, 3, 3, 3, 2, 3)), row.names = c(NA,
-50L), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), vars = c("CUSTOMER_NUMBER",
"ITEM_CATEGORY_DESCR"), drop = TRUE, indices = list(0L, 1:2,
3L, 4L, 5L, 6:23, 24:32, 33L, 34:35, 36:37, 38:39, 40L, 41:42,
43L, 44:49), group_sizes = c(1L, 2L, 1L, 1L, 1L, 18L, 9L,
1L, 2L, 2L, 2L, 1L, 2L, 1L, 6L), biggest_group_size = 18L, labels = structure(list(
CUSTOMER_NUMBER = c(0L, 0L, 0L, 0L, 138990000L, 138990000L,
138990000L, 209020998L, 209020998L, 209020998L, 209020998L,
209020998L, 209020998L, 209020998L, 209100072L), ITEM_CATEGORY_DESCR = c("BARRIER COVERS",
"FACEMASKS", "GLOVES LATEX", "GLOVES NITRILE", "GLOVES LATEX",
"GLOVES NITRILE", "SURFACE DISINFECTANT WIPES", "BITE REGISTRATION MATERIAL",
"ENDODONTIC HAND FILES", "FACEMASKS", "GLOVES LATEX", "GLOVES NITRILE",
"IMPRESSION MATERIAL VINYL POLYSILOXANE", "SPONGES", "ANGLES PROPHY DISPOSABLE"
)), row.names = c(NA, -15L), class = "data.frame", vars = c("CUSTOMER_NUMBER",
"ITEM_CATEGORY_DESCR"), drop = TRUE))
Created on 2018-07-26 by the reprex
package (v0.2.0).
My goal is simple: I want to see whether a unique CUSTOMER_NUMBER
keeps buying
(or repeat the purchase of) the same item over time or whether he or she tries my house brand this month and switches to the branded one next month. The scope here is to focus on ITEM_CATEGORY_DESCRIPTION
that has a Private Label and I want to see if customers keep buying this over months or at some point they just stop!
This may seem a lot but if you have any suggestion on how to do this, that would be great.
Thanks, all!