I am trying to sort groups into categories based on the number of duplicates .
The following code tells us how many duplicates are in each group, with the data grouped based on the 'Branch Code':
repeatsfirst <- group_size(group_by(mergedfirst,mergedfirst$`Branch Code`))
Giving us the following output:
[1] 7 15
I then want to create a column that has a number in each row indicating the number of times a group (Branch Code) is duplicated.
It should look something like this:
Branch Code Number of repeats
80012 7
80012 7
80012 7
80012 7
80012 7
80012 7
80012 7
50246 15
50246 15
50246 15
50246 15
50246 15
50246 15
50246 15
50246 15
50246 15
50246 15
50246 15
50246 15
50246 15
50246 15
50246 15
A minimum reproducible example of the 'mergedfirst' dataset is shown below:
structure(list(`Branch Code` = c(80012, 80012, 80012, 80012,
80012, 80012, 80012, 504246, 504246, 504246, 504246, 504246,
504246, 504246, 504246, 504246, 504246, 504246, 504246, 504246,
504246, 504246), `Location Type` = c("Rural", "Rural", "Rural",
"Rural", "Rural", "Rural", "Rural", "Urban", "Urban", "Urban",
"Urban", "Urban", "Urban", "Urban", "Urban", "Urban", "Urban",
"Urban", "Urban", "Urban", "Urban", "Urban"), Type = c("LM",
"LM", "LM", "LM", "LM", "LM", "LM", "LM", "LM", "LM", "LM", "LM",
"LM", "LM", "LM", "LM", "LM", "LM", "LM", "LM", "LM", "LM"),
Status = c("Open", "Open", "Open", "Open", "Open", "Open",
"Open", "Open", "Open", "Open", "Open", "Open", "Open", "Open",
"Open", "Open", "Open", "Open", "Open", "Open", "Open", "Open"
), Segment = c("Agency", "Agency", "Agency", "Agency", "Agency",
"Agency", "Agency", "Agency", "Agency", "Agency", "Agency",
"Agency", "Agency", "Agency", "Agency", "Agency", "Agency",
"Agency", "Agency", "Agency", "Agency", "Agency"), `Multiple (partner that owns multiple branches)` = c("Multiple 13",
"Multiple 13", "Multiple 13", "Multiple 13", "Multiple 13",
"Multiple 13", "Multiple 13", "Multiple 13", "Multiple 13",
"Multiple 13", "Multiple 13", "Multiple 13", "Multiple 13",
"Multiple 13", "Multiple 13", "Multiple 13", "Multiple 13",
"Multiple 13", "Multiple 13", "Multiple 13", "Multiple 13",
"Multiple 13"), RetailType = c("Convenience", "Convenience",
"Convenience", "Convenience", "Convenience", "Convenience",
"Convenience", "Convenience", "Convenience", "Convenience",
"Convenience", "Convenience", "Convenience", "Convenience",
"Convenience", "Convenience", "Convenience", "Convenience",
"Convenience", "Convenience", "Convenience", "Convenience"
), `Volume of transactions` = c("1130", "1130", "1130", "1130",
"1130", "1130", "1130", "964", "964", "964", "964", "964",
"964", "964", "964", "964", "964", "964", "964", "964", "964",
"964"), `Open hours` = c("108.25", "108.25", "108.25", "108.25",
"108.25", "108.25", "108.25", "108.50", "108.50", "108.50",
"108.50", "108.50", "108.50", "108.50", "108.50", "108.50",
"108.50", "108.50", "108.50", "108.50", "108.50", "108.50"
), `X Pos` = c(551872, 551872, 551872, 551872, 551872, 551872,
551872, 433091, 433091, 433091, 433091, 433091, 433091, 433091,
433091, 433091, 433091, 433091, 433091, 433091, 433091, 433091
), `Y Pos` = c(170269, 170269, 170269, 170269, 170269, 170269,
170269, 265060, 265060, 265060, 265060, 265060, 265060, 265060,
265060, 265060, 265060, 265060, 265060, 265060, 265060, 265060
), Urbanity = c("Medium Density", "Medium Density", "Medium Density",
"Medium Density", "Medium Density", "Medium Density", "Medium Density",
"Low Density", "Low Density", "Low Density", "Low Density",
"Low Density", "Low Density", "Low Density", "Low Density",
"Low Density", "Low Density", "Low Density", "Low Density",
"Low Density", "Low Density", "Low Density")), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -22L))
I would be so grateful for a helping hand!