Hi there,
I'm experimenting the new data.table backend for dplyr - dtplyr package with filter
, map
and pmap
functions.
filter_custom
function below worked with regular data frame but didn't work with data.table
object.
The same happened to purrr::pmap_dfr
function.
What should be the right way to use these functions with dtplyr
?
Thank you!
library(data.table)
library(purrr)
library(dtplyr)
library(dplyr, warn.conflicts = FALSE)
mtcars2 <- lazy_dt(mtcars)
mtcars2 %>%
filter(wt < 5 & wt > 1) %>%
mutate(l100k = 235.21 / mpg) %>% # liters / 100 km
group_by(cyl) %>%
summarise(l100k = mean(l100k)) %>%
as_tibble()
#> # A tibble: 3 x 2
#> cyl l100k
#> <dbl> <dbl>
#> 1 4 9.05
#> 2 6 12.0
#> 3 8 14.9
# define custom function
filter_custom <- function(df, filter_value1, filter_value2) {
out <- df %>%
filter(wt < filter_value1 & wt > filter_value2) %>%
mutate(l100k = 235.21 / mpg) %>%
group_by(cyl) %>%
summarise(l100k = mean(l100k)) %>%
as_tibble()
return(out)
}
# error but there is still output ???
map(mtcars2, ~ print(.x$hp))
#> [1] 110 110 93 110 175 105 245 62 95 123 123 180 180 180 205 215 230 66 52
#> [20] 65 97 150 150 245 175 66 91 113 264 175 335 109
#> Error: $ operator is invalid for atomic vectors
# create a filter data frame
filter_data <- tibble(filter_value1 = c(4, 5, 6),
filter_value2 = c(1, 2, 3))
filter_data2 <- lazy_dt(filter_data)
# nothing showup
filter_data2 %>%
pmap_dfr(~ filter_custom(mtcars2, ..1, ..2), .id = 'id') %>%
as_tibble()
#> # A tibble: 0 x 0
# but works with data frame object
filter_data %>%
set_names() %>%
pmap_dfr(~ filter_custom(mtcars, ..1, ..2), .id = 'id')
#> # A tibble: 9 x 3
#> cyl l100k id
#> <dbl> <dbl> <chr>
#> 1 4 9.05 1
#> 2 6 12.0 1
#> 3 8 14.9 1
#> 4 4 9.79 2
#> 5 6 12.0 2
#> 6 8 14.9 2
#> 7 4 9.98 3
#> 8 6 12.4 3
#> 9 8 16.1 3