In there an existing function like this one?
It filters a data frame by matching n values from the head (or tail) of the unique values of a variable. It is similar to top_n()
, but instead of using min_rank()
or max_rank()
it uses head() or tail(); and var
is flexible as in pull()
.
top <- function(.data, var, n = 1) {
var <- rlang::enquo(var)
pulled <- dplyr::pull(.data, !!var)
sorted <- sort(unique(pulled))
if (n > 0 ) {
to_match <- head(sorted, n)
} else {
to_match <- tail(sorted, abs(n))
}
.data[pulled %in% to_match, ]
}
df <- data.frame(x = 1:9, y = letters[1:3], stringsAsFactors = FALSE)
# `var` can be bare or quoted
(result <- top(df, "y"))
#> x y
#> 1 1 a
#> 4 4 a
#> 7 7 a
identical(top(df, y), result)
#> [1] TRUE
# `var` can also be matched by position
identical(top(df, var = y), top(df, var = 2))
#> [1] TRUE
top(df, y, n = 2)
#> x y
#> 1 1 a
#> 2 2 b
#> 4 4 a
#> 5 5 b
#> 7 7 a
#> 8 8 b
# Negative values select from the tail
top(df, y, n = -2)
#> x y
#> 2 2 b
#> 3 3 c
#> 5 5 b
#> 6 6 c
#> 8 8 b
#> 9 9 c