Hello and welcome! I have to do this a lot too- I think you can do what you need to do to with the dplyr
scoped variants, but you'll need to do this with group_by_at
and your own mutate
rather than using add_count
(which is a shortcut for that sequence because it is so common).
You are going about the problem in the same way I do to find uniques and duplicates- there are no special functions for this other than the janitor::get_dupes
function (janitor package here: https://github.com/sfirke/janitor) I show below, but you can get the same output by counting rows with dplyr
:
library(tidyverse)
#> Warning: package 'tibble' was built under R version 3.5.2
#> Warning: package 'purrr' was built under R version 3.5.2
df <- tibble::tribble(~t, ~u, ~v, ~w, ~x, ~y, ~z,
1, "a", 'a', 'b', 4, 'c', NA,
1, "a", 'a', 'b', 4, 'c', 5,
2, "a", 'a', 'b', 4, 'c', 5,
3, "b", 'b', 'b', 8, 'c', 9,
3, "a", 'b', 'b', 10, 'c', 25,
4, "c", 'a', 'b', 4, 'c', 5,
4, "c", 'a', 'b', 4, 'c', NA)
## want a tibble with duplicates removed, but don't use last column in
## identifying duplicates
df %>%
add_count(t, u, v, w, x, y) %>%
filter(n == 1)
#> # A tibble: 3 x 8
#> t u v w x y z n
#> <dbl> <chr> <chr> <chr> <dbl> <chr> <dbl> <int>
#> 1 2 a a b 4 c 5 1
#> 2 3 b b b 8 c 9 1
#> 3 3 a b b 10 c 25 1
# same as...
df %>%
group_by_at(vars(-z)) %>%
mutate(num_rows = sum(n())) %>%
filter(num_rows == 1)
#> # A tibble: 3 x 8
#> # Groups: t, u, v, w, x, y [3]
#> t u v w x y z num_rows
#> <dbl> <chr> <chr> <chr> <dbl> <chr> <dbl> <int>
#> 1 2 a a b 4 c 5 1
#> 2 3 b b b 8 c 9 1
#> 3 3 a b b 10 c 25 1
## want to look at all duplicates, again excluding last column from finding
## duplicates
df %>%
add_count(t, u, v, w, x, y) %>%
filter(n > 1)
#> # A tibble: 4 x 8
#> t u v w x y z n
#> <dbl> <chr> <chr> <chr> <dbl> <chr> <dbl> <int>
#> 1 1 a a b 4 c NA 2
#> 2 1 a a b 4 c 5 2
#> 3 4 c a b 4 c 5 2
#> 4 4 c a b 4 c NA 2
# same as...
df %>%
group_by_at(vars(-z)) %>%
mutate(num_rows = sum(n())) %>%
filter(num_rows > 1)
#> # A tibble: 4 x 8
#> # Groups: t, u, v, w, x, y [2]
#> t u v w x y z num_rows
#> <dbl> <chr> <chr> <chr> <dbl> <chr> <dbl> <int>
#> 1 1 a a b 4 c NA 2
#> 2 1 a a b 4 c 5 2
#> 3 4 c a b 4 c 5 2
#> 4 4 c a b 4 c NA 2
## use janitor package to look at duplicates
library(janitor)
df %>%
get_dupes(t, u, v, w, x, y)
#> # A tibble: 4 x 8
#> t u v w x y dupe_count z
#> <dbl> <chr> <chr> <chr> <dbl> <chr> <int> <dbl>
#> 1 1 a a b 4 c 2 NA
#> 2 1 a a b 4 c 2 5
#> 3 4 c a b 4 c 2 5
#> 4 4 c a b 4 c 2 NA
Created on 2019-02-01 by the reprex package (v0.2.1)