Clean table_Subgroups_pivot_longer

Hi everyone,

I am trying to clean another table. This one is with more subgroups.

I need to pivot_longer and pivot_wider. See below the code that @FactOREO gave me (thanks again :slight_smile: )for another example, the dataframe which i would like to clean (start_17) and the outcome that i would like to get (outcome_17)

Thanks very much in advance

start_17 dataframe:

structure(list(Department = c("London", "Male", "2017", "2017,Quartal1",
"2017,Quartal 2", "2017,Quartal 3", "2017,Quartal4", "Female",
"2017", "2017,Quartal1", "2017,Quartal 2", "2017,Quartal 3",
"2017,Quartal4", "Child", "2017", "2017,Quartal4", "Home", "2017",
"2017,Quartal 2", "2017,Quartal 3", "2017,Quartal4", "Garden",
"2017", "2017,Quartal1", "2017,Quartal 2", "2017,Quartal 3",
"2017,Quartal4", "Paris", "Male", "2017", "2017,Quartal1", "2017,Quartal4",
"Female", "2017", "2017,Quartal 2", "2017,Quartal 3", "2017,Quartal4",
"Child", "2017", "2017,Quartal4", "Garden", "2017", "2017,Quartal1",
"2017,Quartal 2", "2017,Quartal 3", "2017,Quartal4", "Rome",
"Male", "2017", "2017,Quartal1", "2017,Quartal 2", "2017,Quartal 3",
"2017,Quartal4", "Female", "2017", "2017,Quartal1", "2017,Quartal 2",
"2017,Quartal 3", "2017,Quartal4", "Child", "2017", "2017,Quartal4",
"Total"), TotalSales = c(NA, NA, NA, 56, 81, 103, 88, NA, NA,
368, 296, 369, 324, NA, NA, 103, NA, NA, 230, 225, 334, NA, NA,
787, 806, 925, 918, NA, NA, NA, 242, 287, NA, NA, 156, 161, 169,
153, NA, NA, 2691, NA, 76, 62, 66, 70, NA, NA, NA, 1176, 1185,
1432, 1408, NA, NA, 1376, 1492, 1830, 1628, NA, NA, 289, 21962
), TotalDamage = c(NA, NA, NA, "2", "2", "1", "2", NA, NA, "9",
"4", "4", "3", NA, NA, "1", NA, NA, "7", "6", "11", NA, NA, "2",
"4", "5", "4", NA, NA, NA, "NA", "NA", NA, NA, "NA", "NA", "NA",
"NA", NA, NA, "2", NA, "NA", "NA", "NA", "NA", NA, NA, NA, "NA",
"NA", "4", "1", NA, NA, "34", "19", "40", "37", NA, NA, "6",
"210"), TotalDamagePercent = c(NA, NA, NA, 3.57, 2.46, 9.7, 2.72,
NA, NA, 2.44, 1.35, 1.08, 9.25, NA, NA, 9.7, NA, NA, 3.04, 2.66,
3.29, NA, NA, 2.54, 4.96, 5.4, 4.35, NA, NA, NA, 0, 0, NA, NA,
0, 0, 0, 0, NA, NA, 7.43, NA, 0, 0, 0, 0, NA, NA, NA, 0, 0, 2.79,
7.1, NA, NA, 2.47, 1.27, 2.18, 2.27, NA, NA, 2.07, 0.96)), class = "data.frame", row.names = c(NA,
63L))

this is the outcome_17 that i would like to get:

structure(list(Year = c(2017, 2017, 2017, 2017, 2017, 2017, 2017,
2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017,
2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017,
2017, 2017, 2017, 2017, 2017, 2017, 2017), Quartal = c("2017-Q1",
"2017-Q2", "2017-Q3", "2017-Q4", "2017-Q1", "2017-Q2", "2017-Q3",
"2017-Q4", "2017-Q4", "2017-Q2", "2017-Q3", "2017-Q4", "2017-Q1",
"2017-Q2", "2017-Q3", "2017-Q4", "2017-Q1", "2017-Q4", "2017-Q2",
"2017-Q3", "2017-Q4", "2017-Q4", "2017-Q1", "2017-Q2", "2017-Q3",
"2017-Q4", "2017-Q1", "2017-Q2", "2017-Q3", "2017-Q4", "2017-Q1",
"2017-Q2", "2017-Q3", "2017-Q4", "2017-Q4", NA), City = c("London",
"London", "London", "London", "London", "London", "London", "London",
"London", "London", "London", "London", "London", "London", "London",
"London", "Paris", "Paris", "Paris", "Paris", "Paris", "Paris",
"Paris", "Paris", "Paris", "Paris", "Rome", "Rome", "Rome", "Rome",
"Rome", "Rome", "Rome", "Rome", "Rome", "Rome"), Department = c("Male",
"Male", "Male", "Male", "Female", "Female", "Female", "Female",
"Child", "Home", "Home", "Home", "Garden", "Garden", "Garden",
"Garden", "Male", "Male", "Female", "Female", "Female", "Child",
"Garden", "Garden", "Garden", "Garden", "Male", "Male", "Male",
"Male", "Female", "Female", "Female", "Female", "Child", "Total"
), TotalSales = c(56, 81, 103, 88, 368, 296, 369, 324, 103, 230,
225, 334, 787, 806, 925, 918, 242, 287, 156, 161, 169, 2691,
76, 62, 66, 70, 1176, 1185, 1432, 1408, 1376, 1492, 1830, 1628,
289, 21809), TotalDamage = c("2", "2", "1", "2", "9", "4", "4",
"3", "1", "7", "6", "11", "2", "4", "5", "4", "NA", "NA", "NA",
"NA", "NA", "2", "NA", "NA", "NA", "NA", "NA", "NA", "4", "1",
"34", "19", "40", "37", "6", "210"), TotalDamagePercent = c(3.57,
2.46, 9.7, 2.72, 2.44, 1.35, 1.08, 9.25, 9.7, 3.04, 2.66, 3.29,
2.54, 4.96, 5.4, 4.35, 0, 0, 0, 0, 0, 7.43, 0, 0, 0, 0, 0, 0,
2.79, 7.1, 2.47, 1.27, 2.18, 2.27, 2.07, 0.96)), class = "data.frame", row.names = c(NA,
36L))

and the code is this one:


city <- c("London", "Paris", "Rome")
department <-c("Male", "Female","Child", "Home", "Garden")
quartal <- c("2017-Q1", "2017-Q2","2017-Q3","2017-Q4")


d17<-start_17|>
mutate(
    Group = if_else(Department %in% department, Department, NA_character_)
  ) |>
    mutate(TotalDamagePercent = ifelse(is.na(TotalDamagePercent), 0, TotalDamagePercent)) |> 
  fill(
    Group, .direction = 'down'
  ) |>
  pivot_longer(
    cols = TotalSales:TotalDamagePercent, names_to = 'category', values_to = 'value'
  ) |>
   dplyr::filter(
    Department != Group
  ) |>
  pivot_wider(
    values_from = 'value', names_from = 'category'
  )|>
  rename(
    'Department' = Group,
    'Quartal' = Department
  )|>
   select(Department,Quartal, everything()
  )|>
   add_column(year=2017
  )

Below is one approach that does not require the pivot functions. The outcome matches outcome_17 except for one line, which I believe is an issue with start_17 where a "Garden" row has sales/damage/percent values listed (all others are listed in "Quartal" rows).

image

library(tidyverse)

cities <- c("London", "Paris", "Rome")
departments <-c("Male", "Female","Child", "Home", "Garden")
quartals <- c("2017-Q1", "2017-Q2","2017-Q3","2017-Q4")
years = c('2017')

out = start_17 |>
  # create city 
  mutate(City = ifelse(Department %in% cities, Department, NA)) |>
  fill(City) |>
  filter(!Department %in% cities) |>
  # create department
  mutate(department = ifelse(Department %in% c(departments, 'Total'), Department, NA)) |>
  fill(department) |>
  filter(!Department %in% departments) |>
  # create year
  mutate(Year = ifelse(Department %in% years, Department, NA)) |>
  fill(Year) |>
  filter(!Department %in% years) |>
  mutate(Year = as.numeric(Year)) |>
  # create quartal
  mutate(Quartal = case_when(
    Department == 'Total' ~ NA_character_,
    TRUE ~ str_replace(Department, ',Quartal', '-Q')
  )) |>
  mutate(Quartal = str_replace(Quartal, ' ', '')) |>
  # order columns
  select(-Department) |>
  select(Year, Quartal, City, Department = department, TotalSales, TotalDamage, TotalDamagePercent)

head(out, 16)
#>    Year Quartal   City Department TotalSales TotalDamage TotalDamagePercent
#> 1  2017 2017-Q1 London       Male         56           2               3.57
#> 2  2017 2017-Q2 London       Male         81           2               2.46
#> 3  2017 2017-Q3 London       Male        103           1               9.70
#> 4  2017 2017-Q4 London       Male         88           2               2.72
#> 5  2017 2017-Q1 London     Female        368           9               2.44
#> 6  2017 2017-Q2 London     Female        296           4               1.35
#> 7  2017 2017-Q3 London     Female        369           4               1.08
#> 8  2017 2017-Q4 London     Female        324           3               9.25
#> 9  2017 2017-Q4 London      Child        103           1               9.70
#> 10 2017 2017-Q2 London       Home        230           7               3.04
#> 11 2017 2017-Q3 London       Home        225           6               2.66
#> 12 2017 2017-Q4 London       Home        334          11               3.29
#> 13 2017 2017-Q1 London     Garden        787           2               2.54
#> 14 2017 2017-Q2 London     Garden        806           4               4.96
#> 15 2017 2017-Q3 London     Garden        925           5               5.40
#> 16 2017 2017-Q4 London     Garden        918           4               4.35

Created on 2023-02-12 with reprex v2.0.2.9000

1 Like

thanks very much @ scottyd22

it works perfectly.
Best

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.