Using dplyr to create a list of suburb names based on unique postcodes

scottyd22 · November 15, 2022, 12:42pm

Below is one approach that collapses all suburb names into one column separated by a comma. The Population column is NA for all rows because the postcodes in postcode_names do not exist in postcode_pop. I'm assuming these are both just subsets of larger datasets.

library(tidyverse)

postcode_names = data.frame(
  stringsAsFactors = FALSE,
  Postcode = c(4161L,4155L,4154L,4153L,
               4159L,4157L,4158L,4160L,4160L,4163L,4160L,4165L,
               4165L,4157L,4164L,4165L,4178L,4179L,4179L,4172L),
  SA2.name = c("Alexandra Hills",
               "Belmont - Gumdale","Belmont - Gumdale","Belmont - Gumdale",
               "Birkdale","Capalaba","Thorneside","Wellington Point",
               "Cleveland","Cleveland","Ormiston","Redland Bay",
               "Sheldon - Mount Cotton","Sheldon - Mount Cotton","Thornlands",
               "Victoria Point","Brisbane Port - Lytton",
               "Manly - Lota","Manly West","Murarrie")
  )

postcode_pop = data.frame(check.names = FALSE,
                          Postcode = c(4000L,4001L,4002L,4003L,
                                       4004L,4005L,4006L,4007L,4008L,4009L,4010L,4011L,
                                       4012L,4013L,4014L,4017L,4018L,4019L,4020L,4021L),
                          Population = c(93931L,43756L,21878L,
                                         10939L,6419L,25019L,82679L,31752L,1288L,1704L,11896L,
                                         26789L,46214L,13738L,50730L,81535L,25238L,81964L,
                                         21014L,17717L)
                          )

out = postcode_names %>%
  arrange(Postcode, SA2.name) %>%
  group_by(Postcode) %>%
  mutate(SA2.name = paste(SA2.name, collapse = ', ')) %>%
  ungroup() %>%
  distinct() %>%
  left_join(postcode_pop) %>%
  select(Postcode, Population, SA2.name)
#> Joining, by = "Postcode"

out
#> # A tibble: 14 × 3
#>    Postcode Population SA2.name                                           
#>       <int>      <int> <chr>                                              
#>  1     4153         NA Belmont - Gumdale                                  
#>  2     4154         NA Belmont - Gumdale                                  
#>  3     4155         NA Belmont - Gumdale                                  
#>  4     4157         NA Capalaba, Sheldon - Mount Cotton                   
#>  5     4158         NA Thorneside                                         
#>  6     4159         NA Birkdale                                           
#>  7     4160         NA Cleveland, Ormiston, Wellington Point              
#>  8     4161         NA Alexandra Hills                                    
#>  9     4163         NA Cleveland                                          
#> 10     4164         NA Thornlands                                         
#> 11     4165         NA Redland Bay, Sheldon - Mount Cotton, Victoria Point
#> 12     4172         NA Murarrie                                           
#> 13     4178         NA Brisbane Port - Lytton                             
#> 14     4179         NA Manly - Lota, Manly West

Created on 2022-11-15 with reprex v2.0.2.9000