I'm hoping someone can help me understand why summarise(across())
and summarise_if()
do not yield the same results when provided with the same group_by()
specification. Here's a simplified example of what I'm running into:
library(tidyverse)
library(lubridate)
fake_data <- tribble(
~location, ~sub_location, ~start_date, ~end_date, ~measurement,
"Area 1", "A", as_date("2021-04-01"), as_date("2021-04-25"), 70,
"Area 1", "A", as_date("2021-04-01"), as_date("2021-04-25"), 75,
"Area 1", "B", as_date("2021-04-01"), as_date("2021-04-25"), 85,
"Area 1", "B", as_date("2021-04-01"), as_date("2021-04-25"), 67,
"Area 2", "A", as_date("2021-04-01"), as_date("2021-04-25"), 47,
"Area 2", "B", as_date("2021-04-01"), as_date("2021-04-25"), 90)
fake_data %>%
group_by(location, sub_location) %>%
summarise_if(.predicate = is.numeric,
.funs = ~ mean(.x, na.rm = T))
#> # A tibble: 4 x 3
#> # Groups: location [2]
#> location sub_location measurement
#> <chr> <chr> <dbl>
#> 1 Area 1 A 72.5
#> 2 Area 1 B 76
#> 3 Area 2 A 47
#> 4 Area 2 B 90
fake_data %>%
group_by(location, sub_location) %>%
summarise(across(.cols = where(is.numeric),
.funs = ~ mean(.x, na.rm = T)))
#> `summarise()` has grouped output by 'location', 'sub_location'. You can override using the `.groups` argument.
#> # A tibble: 6 x 3
#> # Groups: location, sub_location [4]
#> location sub_location measurement
#> <chr> <chr> <dbl>
#> 1 Area 1 A 70
#> 2 Area 1 A 75
#> 3 Area 1 B 85
#> 4 Area 1 B 67
#> 5 Area 2 A 47
#> 6 Area 2 B 90
Created on 2021-04-17 by the reprex package (v1.0.0)