Total in dplyr and ends_with

Hi,
I have this simple data file where I would like to add count (to existing sum and mean) and total of Gender at the top or bottom of the table.
I also have issues with ends_with which should contain both "len" and "count". If this worked I would need just one table.

URN <- c("aaa", "bbb", "ccc", "ddd", "eee", "fff", "ggg", "hhh", "iii", "jjj", "kkk", "lll", "mmm", "nnn", "ooo", "ppp")
A_len <- c(20, 0, 10, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0)
C_len <- c(0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1)
A_wcount <- c(3, 0, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1)
C_wcount <- c(0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1)
Gender <- c("Male", "Male", "Male", "Male", "Male", "Male", "Female", "Female", "Female", "Female", "Female", "Female", "Male", "Male", "Male", "Male")

response.data <- data.frame(URN, A_len, C_len, A_wcount, C_wcount, Gender)

l.result <- response.data %>% 
  select(Gender, ends_with("len")) %>% 
  group_by(Gender) %>% 
  summarise_all(list(Aver = mean, Sum = sum))

c.result <- response.data %>% 
  select(Gender, ends_with("count")) %>% 
  group_by(Gender) %>% 
  summarise_all(list(Aver = mean, Sum = sum))

I do apologize if the question is too basic but I cannot find a solution in dplyr documentation...

That is not a data file (on disk), is a data frame (on memory)

Again, it's a data frame not a table (graphic representation) so this is not possible in this format, you would have to create a table.

ends_with() only takes one argument, but you can use matches() to specify a regex that match both options at the same time.

library(dplyr)

response.data <- data.frame(
    A_len = c(20, 0, 10, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0),
    C_len = c(0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1),
    A_wcount = c(3, 0, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1),
    C_wcount = c(0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1),
    URN = as.factor(c("aaa", "bbb", "ccc", "ddd", "eee", "fff", "ggg",
                      "hhh", "iii", "jjj", "kkk", "lll", "mmm", "nnn",
                      "ooo", "ppp")),
    Gender = as.factor(c("Male", "Male", "Male", "Male", "Male", "Male",
                         "Female", "Female", "Female", "Female", "Female",
                         "Female", "Male", "Male", "Male", "Male"))
)

response.data %>% 
    select(Gender, matches("(len|count)$")) %>% 
    group_by(Gender) %>% 
    summarise_all(list(Aver = mean, Sum = sum, Count = ~n()))
#> # A tibble: 2 x 13
#>   Gender A_len_Aver C_len_Aver A_wcount_Aver C_wcount_Aver A_len_Sum
#>   <fct>       <dbl>      <dbl>         <dbl>         <dbl>     <dbl>
#> 1 Female      0.167      0.167           0           0.167         1
#> 2 Male        3.2        0.4             0.9         0.4          32
#> # … with 7 more variables: C_len_Sum <dbl>, A_wcount_Sum <dbl>,
#> #   C_wcount_Sum <dbl>, A_len_Count <int>, C_len_Count <int>,
#> #   A_wcount_Count <int>, C_wcount_Count <int>

Created on 2019-11-12 by the reprex package (v0.3.0.9000)

You can also user "summarise_at" without having to select beforehand:

combined <- response.data %>% 
  group_by(Gender) %>% 
  summarise_at(
    vars(ends_with("len"), ends_with("count")),
    list(Aver = mean, Sum = sum)
  )
combined
# A tibble: 2 x 9
#   Gender A_len_Aver C_len_Aver A_wcount_Aver C_wcount_Aver A_len_Sum
#    <fct>       <dbl>      <dbl>         <dbl>         <dbl>     <dbl>
# 1 Female      0.167      0.167           0           0.167         1
# 2 Male        3.2        0.4             0.9         0.4          32
# … with 3 more variables: C_len_Sum <dbl>, A_wcount_Sum <dbl>,
#   C_wcount_Sum <dbl>
2 Likes

Thank you for your solutions. :grinning:
I cannot believe there is no simple way of displaying Female, Male and Total in one go. Maybe we should use completely different solution or package :thinking:

Is this what you want?

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

response_data <- tibble(A_len = c(20, 0, 10, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0),
                            C_len = c(0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1),
                            A_wcount = c(3, 0, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1),
                            C_wcount = c(0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1),
                            URN = c("aaa", "bbb", "ccc", "ddd", "eee", "fff", "ggg", "hhh", "iii", "jjj", "kkk", "lll", "mmm", "nnn", "ooo", "ppp"),
                            Gender = c("Male", "Male", "Male", "Male", "Male", "Male", "Female", "Female", "Female", "Female", "Female", "Female", "Male", "Male", "Male", "Male"))

response_data %>%
    bind_rows(mutate(.data = .,
                     Gender = "Total")) %>%
    group_by(Gender) %>%
    summarise_at(.vars = vars(ends_with(match = "len"), ends_with(match = "count")),
                 .funs = list(Average = mean,
                              Total = sum,
                              Count = ~n())) %>%
    print(width = Inf)
#> # A tibble: 3 x 13
#>   Gender A_len_Average C_len_Average A_wcount_Average C_wcount_Average
#>   <chr>          <dbl>         <dbl>            <dbl>            <dbl>
#> 1 Female         0.167         0.167            0                0.167
#> 2 Male           3.2           0.4              0.9              0.4  
#> 3 Total          2.06          0.312            0.562            0.312
#>   A_len_Total C_len_Total A_wcount_Total C_wcount_Total A_len_Count
#>         <dbl>       <dbl>          <dbl>          <dbl>       <int>
#> 1           1           1              0              1           6
#> 2          32           4              9              4          10
#> 3          33           5              9              5          16
#>   C_len_Count A_wcount_Count C_wcount_Count
#>         <int>          <int>          <int>
#> 1           6              6              6
#> 2          10             10             10
#> 3          16             16             16
1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.