I am trying to calculate the total sales per each category in this picture.
For instance, I want to know the total sales in the "0%" category which has 28% of customers who never buy product type X (just an example).
library(tidyverse)
library(scales)
#>
#> Attaching package: 'scales'
#> The following object is masked from 'package:purrr':
#>
#> discard
#> The following object is masked from 'package:readr':
#>
#> col_factor
library(forcats)
library(dplyr)
library(ggplot2)
library(data.table)
#>
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:dplyr':
#>
#> between, first, last
#> The following object is masked from 'package:purrr':
#>
#> transpose
library(reprex)
library(cowplot)
#>
#> Attaching package: 'cowplot'
#> The following object is masked from 'package:ggplot2':
#>
#> ggsave
mydata = structure(list(CUSTOMER_NUMBER = c(763420229L, 763599479L, 763666659L,
209200247L, 220251006L, 220437298L, 230184233L, 230200334L, 236200563L,
230463752L, 230466880L, 230466880L, 230466880L, 230469588L, 230469588L,
230469588L, 230479567L, 230510950L, 230543888L, 230544743L, 230570530L,
230572134L, 240464708L, 240466127L, 240467476L, 240474105L, 240485068L,
240486091L, 240486091L, 240501296L, 240507813L, 240522378L, 240522378L,
240522378L, 240529459L, 236200563L, 236823615L, 240208537L, 240436982L,
240595281L, 246106706L, 301200230L, 301563787L, 304201229L, 328186705L,
328200045L, 341200204L, 342200014L, 230572134L, 230572134L, 230573124L,
230600067L, 230600067L, 230601356L, 230601356L, 230601375L, 230604671L,
230617650L, 230634825L, 230654949L, 230663394L, 240529459L, 240529459L,
240573686L, 240573686L, 240574566L, 240574566L, 240596586L, 240604202L,
240604202L, 240604202L, 240607185L, 240609376L, 240609376L, 240609376L,
240609376L, 240659645L, 240659645L, 240667986L, 240667986L, 240667986L,
240681719L, 240681719L, 240681719L, 240692717L, 240692717L, 240694632L,
345200233L, 370200190L, 373200660L, 373200898L, 373201028L, 373295333L,
373521838L, 408136882L, 408176824L, 410200742L, 410264679L, 422885464L,
448200196L, 230670568L, 230683250L, 230683250L, 230727285L, 230783963L,
230784521L, 230784521L, 230797923L, 230797923L, 230805368L, 230868831L,
230868831L, 230868831L, 240702261L, 240702261L, 240710402L, 240710402L,
240719797L, 240719797L, 240735777L, 240736658L, 240760283L, 240760283L,
240770209L, 240781099L, 240794754L, 240800496L, 240800496L, 240800496L,
240812071L, 240828640L, 240846280L, 240846280L, 240846280L, 240852741L,
240852741L, 240860714L, 240865473L, 246116767L, 236106572L, 236106572L,
236130384L, 236165055L, 236165120L, 236175438L, 236200098L, 236200216L,
236200216L, 236200260L, 236200260L, 236200280L, 236200280L, 236200281L,
236200281L, 236200356L, 236200430L, 236200430L, 236200481L, 236200497L,
236200620L, 236200658L, 236200664L, 236200675L, 236203632L, 236218371L,
246125771L, 246143936L, 246143936L, 246172812L, 246172812L, 246186630L,
246193305L, 246200037L, 246200037L, 246200039L, 246200172L, 246200193L,
246200193L, 246200222L, 236222693L, 236232873L, 236241351L, 236265643L,
236283022L, 236284975L, 236284975L, 236284975L, 236287761L, 236287761L,
236317551L, 236341958L, 236363175L, 456200747L, 456248560L, 610201135L,
610201176L, 610421956L, 620200254L, 704202681L, 704792324L, 710231150L
), Cust_Segments = structure(c(5L, 5L, 6L, 5L, 1L, 5L, 5L, 3L,
4L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 5L, 5L, 5L, 5L, 5L, 1L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 1L, 5L, 4L, 3L, 5L, 3L, 5L,
5L, 3L, 3L, 1L, 3L, 1L, 5L, 5L, 5L, 5L, 1L, 5L, 5L, 5L, 5L, 5L,
3L, 5L, 5L, 3L, 5L, 5L, 5L, 3L, 3L, 1L, 1L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
3L, 1L, 3L, 4L, 3L, 5L, 5L, 5L, 3L, 3L, 5L, 5L, 6L, 5L, 5L, 5L,
5L, 5L, 5L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
3L, 1L, 1L, 5L, 3L, 3L, 5L, 5L, 5L, 1L, 5L, 1L, 1L, 1L, 3L, 3L,
5L, 3L, 2L, 1L, 1L, 5L, 5L, 3L, 3L, 1L, 5L, 5L, 5L, 5L, 5L, 5L,
1L, 1L, 3L, 1L, 1L, 1L, 3L, 3L, 6L, 5L, 2L, 5L, 3L, 5L, 1L, 1L,
3L, 3L, 5L, 5L, 3L, 3L, 3L, 5L, 1L, 1L, 5L, 6L, 5L, 5L, 5L, 1L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 1L, 5L, 2L, 4L, 3L, 1L, 5L, 2L,
5L), .Label = c("0", "1", "2", "3", "4", "5"), class = "factor"),
QtySold = c(1L, 1L, 1L, 1L, 2L, -1L, 1L, 1L, 1L, 7L, 1L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 27L, 1L, 2L, 2L, 2L,
1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, -1L, -1L, 1L, 1L,
1L, 4L, 1L, 0L, 2L, 2L, 1L, 1L, 6L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 1L, 2L, 3L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 3L, 2L, 4L, 6L, 2L, 1L, 1L, 4L, 6L, 3L, 1L, 1L, 1L,
2L, 2L, 2L, 1L, 1L, 1L, 0L, -1L, 2L, 1L, -1L, 0L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 3L, 3L, 1L, 2L, 2L, 2L,
1L, 1L, 6L, 6L, 3L, 3L, 1L, 3L, 1L, 1L, 3L, 2L, 1L, 2L, 2L,
3L, 5L, 3L, 1L, 1L, 1L, 1L, 1L, 3L, 2L, 3L, 2L, 3L, 1L, 1L,
2L, 1L, 6L, 3L, 3L, 2L, 2L, 1L, 4L, 1L, 1L, 2L, 1L, 1L, 2L,
1L, 4L, 1L, 1L, 1L, 1L, 1L, 3L, 4L, 3L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 5L, 1L, 1L, 1L, 1L,
3L, 2L, 2L, 1L, 1L, 2L, 1L, -1L, 1L, 3L, 1L, 1L, 1L), PRODUCT_SUB_LINE_DESCR = structure(c(2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = c("PRIVATE LABEL", "SUNDRY"
), class = "factor"), Sales = c(165.95, 165.95, 165.95, 80.58,
0, -84.33, 0, 93.7, 0, 385, 67.25, 134.5, 135.2, 49.95, 49.95,
54.95, 67.25, 67.25, 118.78, 56.95, 1825.2, 67.25, 103.42,
100.72, 134.5, 67.25, 134.5, 125.02, 62.51, 67.6, 67.25,
97.88, 97.88, 97.88, 122.3, -94.85, -93.7, 89.01, 93.7, 79.64,
374.8, 79.64, 0, 159.28, 187.4, 83.47, 84.33, 523.56, 67.6,
70.5, 61.15, 0, 67.6, 67.25, 67.6, 67.6, 67.25, 109.3, 67.25,
135.2, 202.8, 122.3, 122.3, 67.25, 67.25, 50.36, 108.16,
70.5, 63.87, 63.87, 63.87, 187.53, 134.5, 269, 404.2, 135.2,
63.19, 63.52, 250.04, 375.06, 188.52, 62.74, 62.74, 63.06,
134.5, 141, 134.5, 87, 93.7, 70.28, 0, -67, 149.7, 82.52,
-94.85, 0, 93.7, 0, 147.22, 189.7, 68.95, 61.5, 132, 58.61,
46.74, 67.25, 70.5, 201.75, 201.75, 67.25, 121.04, 121.04,
121.68, 67.25, 70.5, 377.04, 393.3, 201.75, 201.75, 64.12,
170.85, 54.08, 56.4, 201.75, 134.5, 68.95, 118.6, 118.6,
178.8, 259.8, 201.75, 50.36, 50.36, 54.08, 68.95, 71.95,
194.85, 129.12, 211.5, 127.78, 192.66, 66.97, 70.5, 113.9,
69.3, 363.12, 182.04, 189.96, 117.8, 122.32, 62.05, 249.48,
58.61, 63.45, 128.44, 31.01, 31.01, 62.02, 58.18, 275.8,
62.5, 71.95, 62.51, 56.95, 65.95, 201.75, 244.6, 178.97,
67.25, 67.6, 67.6, 67.25, 64.95, 134.5, 61.15, 70.5, 134.5,
67.25, 134.5, 71.95, 64.95, 65.83, 67.6, 336.25, 64.56, 64.56,
64.9, 67.6, 202.8, 103.42, 129.9, 67.69, 69.74, 179.9, 0,
-87.26, 93.7, 224.88, 67, 93.7, 93.7), MarginDollars = c(57.85,
57.85, 57.85, 34.1, -94.36, -37.85, -46.48, 47.22, -47.18,
93.8, 25.65, 51.3, 50.76, 8.35, 8.35, 10.82, 25.65, 25.65,
35.58, 15.35, 685.26, 25.65, 20.22, 22.72, 51.3, 25.65, 51.3,
41.82, 20.91, 26, 25.65, 14.68, 14.68, 13.44, 39.1, -47.43,
-47.22, 42.53, 47.22, 33.16, 188.88, 33.16, 0, 66.32, 94.44,
36.05, 37.85, 239.04, 25.38, 26.37, 19.55, -41.6, 25.38,
25.65, 25.17, 25.38, 25.65, 26.1, 25.65, 50.34, 78, 39.1,
39.1, 25.65, 25.65, 11.36, 23.72, 26.37, 22.27, 22.27, 22.27,
62.73, 51.3, 102.6, 154.6, 50.34, 21.59, 21.3, 83.64, 125.46,
61.23, 21.14, 21.14, 20.63, 51.3, 52.74, 51.3, 40.52, 47.22,
22.86, 0, -19.58, 54.86, 35.1, -47.67, 0, 47.22, -47.18,
54.26, 96.74, 27.35, 19.9, 43.74, 17.01, 5.14, 25.65, 26.37,
76.95, 76.95, 25.65, 37.84, 37.84, 37.24, 25.65, 26.37, 123.72,
128.52, 76.95, 76.95, 19.99, 46.05, 11.86, 12.27, 76.95,
51.3, 27.35, 35.4, 35.4, 52.14, 51.8, 76.95, 11.36, 11.36,
11.86, 27.35, 27.82, 70.05, 45.92, 79.11, 44.58, 66, 22.84,
26.37, 30.7, 27.08, 113.52, 57.24, 57.57, 33.36, 34.06, 20.45,
79.76, 17.01, 19.32, 44, 8.68, 8.68, 17.36, 16.58, 109.4,
18.37, 27.82, 20.91, 12.82, 24.35, 76.95, 78.2, 48.28, 25.65,
25.38, 25.38, 25.65, 23.35, 51.3, 19.55, 26.37, 51.3, 25.65,
51.3, 27.82, 20.82, 23.61, 25.38, 128.25, 22.96, 22.96, 22.68,
26, 76.14, 20.22, 46.7, 23.56, 23.26, 85.54, -46.48, -39.84,
47.22, 85.44, 19.82, 47.22, 47.22)), row.names = c(NA, -201L
), class = c("data.table",
"data.frame"))
result1 = mydata %>%
filter(!is.na(QtySold), !is.na(PRODUCT_SUB_LINE_DESCR), QtySold > 0, Sales > 0, MarginDollars > 0) %>%
group_by(CUSTOMER_NUMBER,PRODUCT_SUB_LINE_DESCR,Cust_Segments) %>%
summarise(Quants = sum(QtySold),
Total_Sales = sum(Sales)) %>%
spread(PRODUCT_SUB_LINE_DESCR,Quants,fill=0) %>%
mutate(Total_Orders = `PRIVATE LABEL` + SUNDRY,
PL_Order_Percentage= round((`PRIVATE LABEL` / Total_Orders) * 100),
category = cut(PL_Order_Percentage,breaks = c(0,1,11,21,31,41,51,61,71,81,91,100,Inf),
labels = c('0%','1%-10%','11%-20%',
'21%-30%','31%-40%','41%-50%',
'51%-60%','61%-70%','71%-80%','81%-90%','91%-99%','100%'),include.lowest = T,right = F))
reult2 = result1 %>% group_by(category, Total_Sales) %>%
summarise(count=n()) %>%
mutate(percent= paste0(round(count/sum(count)*100),'%'))
Created on 2018-07-17 by the reprex
package (v0.2.0).
My issue is that the code does not group the same customer ID/Number together and it stills spread customers based on the product type purchased. Say customer A buys 2 product X at $3 and 1 product Y at $2. The result gives me two rows for customer A:
Customer ID Product X (quantity) Product Y(quantity) Total Sales
A 2 0 $3
A 0 1 $2
but I want this following result:
Customer ID Product X (quantity) Product Y(quantity) Total Sales
A 2 1 $5