Seeking a better way to pivot_wider() multiple names_from/values_from sets

leungi · September 21, 2019, 4:58pm

Reprex below to show desired output.

Similar to post 40033, but operating on multiple names_from.

library(tidyr)
library(dplyr)

mtcars %>%
  select(cyl, vs) %>%
  add_count(cyl, name = "cyl_n") %>%
  pivot_wider(
    names_from = cyl,
    values_from = cyl_n,
    names_prefix = "cyl_",
    values_fn = list(cyl_n = mean)
  ) %>%
  add_count(vs, name = "vs_n") %>%
  pivot_wider(
    names_from = vs,
    values_from = vs_n,
    names_prefix = "vs_",
    values_fn = list(vs_n = mean)
  )
#> # A tibble: 2 x 5
#>   cyl_6 cyl_4 cyl_8  vs_0  vs_1
#>   <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1     7    11    14     1    NA
#> 2     7    11    NA    NA     1

mara · September 22, 2019, 11:16am

Could you expand on your question a bit? I'm a little confused as to what exactly you're trying to do. My confusion might stem from the fact that you have no equivalent of an "id" column (you're using all the variables you're selecting, so you don't really have anything to pivot around if you do them at the same time — at least I don't think you do).

https://tidyr.tidyverse.org/articles/pivot.html

Here's the reprex with intermediate output, since I think it helps to see where the variables show up (e.g. you don't have vs_n until after you add that second count).

library(tidyr)
library(dplyr)

# first `pivot_wider()` only
mtcars %>%
  select(cyl, vs) %>%
  add_count(cyl, name = "cyl_n") %>%
  pivot_wider(
    names_from = cyl,
    values_from = cyl_n,
    names_prefix = "cyl",
    values_fn = list(cyl_n = mean)
  )
#> # A tibble: 2 x 4
#>      vs  cyl6  cyl4  cyl8
#>   <dbl> <dbl> <dbl> <dbl>
#> 1     0     7    11    14
#> 2     1     7    11    NA

# first `pivot_wider()` with count of `vs_n`
mtcars %>%
  select(cyl, vs) %>%
  add_count(cyl, name = "cyl_n") %>%
  pivot_wider(
    names_from = cyl,
    values_from = cyl_n,
    names_prefix = "cyl",
    values_fn = list(cyl_n = mean)
  ) %>%
  add_count(vs, name = "vs_n")
#> # A tibble: 2 x 5
#>      vs  cyl6  cyl4  cyl8  vs_n
#>   <dbl> <dbl> <dbl> <dbl> <int>
#> 1     0     7    11    14     1
#> 2     1     7    11    NA     1

# now with second `pivot_wider()`
mtcars %>%
  select(cyl, vs) %>%
  add_count(cyl, name = "cyl_n") %>%
  pivot_wider(
    names_from = cyl,
    values_from = cyl_n,
    names_prefix = "cyl",
    values_fn = list(cyl_n = mean)
  ) %>%
  add_count(vs, name = "vs_n") %>%
  pivot_wider(
    names_from = vs,
    values_from = vs_n,
    names_prefix = "vs_",
    values_fn = list(vs_n = mean)
  )
#> # A tibble: 2 x 5
#>    cyl6  cyl4  cyl8  vs_0  vs_1
#>   <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1     7    11    14     1    NA
#> 2     7    11    NA    NA     1

^{Created on 2019-09-22 by the reprex package (v0.3.0)}

leungi · September 22, 2019, 7:22pm

@jcblum: thanks for adding tags; I forgot

@mara: thanks for the prompt reply!

Here's a more useful reprex (using iris data) to show the intention.

Goal: Within each species, how many samples are in each bin? Is there a way in pivot_wider() to do it in one step?

library(tidyr)
library(dplyr)

iris %>% 
  select(Species, Sepal.Length, Petal.Width) %>%
  mutate_if(is.numeric, cut, c(0, 1, 3, 5, 7, 9)) %>% 
  group_by(Species) %>% 
  slice(1:5) %>%
  add_count(Sepal.Length, name = "SL_n") %>%
  distinct() %>%
  pivot_wider(names_from = c(Sepal.Length),
              values_from = c(SL_n),
              names_prefix = "SL_",
              values_fill = list(SL_n = 0)) %>%
  add_count(Petal.Width, name = "PW_n") %>%
  distinct() %>%
  pivot_wider(names_from = c(Petal.Width),
              values_from = c(PW_n),
              names_prefix = "PW_",
              values_fill = list(PW_n = 0))
#> # A tibble: 3 x 6
#> # Groups:   Species [3]
#>   Species    `SL_(5,7]` `SL_(3,5]` `SL_(7,9]` `PW_(0,1]` `PW_(1,3]`
#>   <fct>           <int>      <int>      <int>      <int>      <int>
#> 1 setosa              1          4          0          1          0
#> 2 versicolor          5          0          0          0          1
#> 3 virginica           4          0          1          0          1

^{Created on 2019-09-22 by the reprex package (v0.3.0)}

cderv · September 23, 2019, 11:44am

If I understand correctly, I would use tidyr like this

library(tidyr)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

# get data in tidy format
bin_tab <- iris %>% 
  select(Species, Sepal.Length, Petal.Width) %>%
  mutate_if(is.numeric, cut, c(0, 1, 3, 5, 7, 9)) %>%
  pivot_longer(cols = -Species)
glimpse(bin_tab)
#> Observations: 300
#> Variables: 3
#> $ Species <fct> setosa, setosa, setosa, setosa, setosa, setosa, setosa...
#> $ name    <chr> "Sepal.Length", "Petal.Width", "Sepal.Length", "Petal....
#> $ value   <fct> "(5,7]", "(0,1]", "(3,5]", "(0,1]", "(3,5]", "(0,1]", ...

# using aggregation in pivot function
res1 <- bin_tab %>%
  pivot_wider(id_cols = Species, 
              names_from = c(name, value), 
              values_fn = list(value = length), 
              values_fill = list(value = 0))
res1
#> # A tibble: 3 x 6
#>   Species `Sepal.Length_(~ `Petal.Width_(0~ `Sepal.Length_(~
#>   <fct>              <int>            <int>            <int>
#> 1 setosa                22               50               28
#> 2 versic~               47                7                3
#> 3 virgin~               37                0                1
#> # ... with 2 more variables: `Petal.Width_(1,3]` <int>,
#> #   `Sepal.Length_(7,9]` <int>

# calculating bin number in tidy format then pivoting
bin_tab %>%
  group_by_all() %>%
  count() %>%
  pivot_wider(id_cols = Species, names_from = c(name, value), values_from = n, values_fill =list(n = 0)) %>%
  select(colnames(res1))
#> # A tibble: 3 x 6
#> # Groups:   Species [3]
#>   Species `Sepal.Length_(~ `Petal.Width_(0~ `Sepal.Length_(~
#>   <fct>              <int>            <int>            <int>
#> 1 setosa                22               50               28
#> 2 versic~               47                7                3
#> 3 virgin~               37                0                1
#> # ... with 2 more variables: `Petal.Width_(1,3]` <int>,
#> #   `Sepal.Length_(7,9]` <int>

^{Created on 2019-09-23 by the reprex package (v0.3.0)}

leungi · September 23, 2019, 12:55pm

@cderv to the rescue again

Thanks to the RStudio team!

system · September 30, 2019, 12:55pm

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.