Here is another improvement, not regarding logic but speed:
Data <- data.frame(years_in_love = c(10,45,13,45),
years_married = c(8,44,6,8))
# your function
library(magrittr); library(dplyr)
full_function <- function(data) {
df1 <- data %>%
mutate(new_variable = ifelse(years_in_love > 100, 99, years_in_love)) %>%
mutate(type = "love") %>%
count(new_variable, type)
df2 <- data %>%
mutate(new_variable = ifelse(years_married > 100, 99, years_married)) %>%
mutate(type = "married") %>%
count(new_variable, type)
bind_rows(df1, df2)
# not necessary
# return(full_data)
}
# function from @FJCC
library(tidyr)
full_function2 <- function(DAT) {
DAT |>
mutate(married = ifelse(years_married > 100, 99, years_married),
love = ifelse(years_in_love > 100, 99, years_in_love)) |>
select(married,love) |>
pivot_longer(cols = c("married","love"),names_to = "type",values_to = "value") |>
count(value,type)
}
# using collapse and data.table instead of dplyr but same logic
library(collapse);library(data.table)
full_function3 <- function(DAT){
DAT |>
# convert to data.table
qDT() |>
fmutate(
married = fifelse(years_married > 100, 99, years_married),
love = fifelse(years_in_love > 100, 99, years_in_love)
) |>
melt.data.table(
measure.vars = c('love','married'), variable.name = 'type', value.name = 'value'
) |>
(\(x) x[, .N, by = c('type','value')])()
}
# equal?
full_function(Data)
#> new_variable type n
#> 1 10 love 1
#> 2 13 love 1
#> 3 45 love 2
#> 4 6 married 1
#> 5 8 married 2
#> 6 44 married 1
full_function2(Data)
#> # A tibble: 6 × 3
#> value type n
#> <dbl> <chr> <int>
#> 1 6 married 1
#> 2 8 married 2
#> 3 10 love 1
#> 4 13 love 1
#> 5 44 married 1
#> 6 45 love 2
full_function3(Data)
#> type value N
#> 1: love 10 1
#> 2: love 45 2
#> 3: love 13 1
#> 4: married 8 2
#> 5: married 44 1
#> 6: married 6 1
# I think so (except data.frame type)
# speed comparison
microbenchmark::microbenchmark(
original = full_function(Data),
improved = full_function2(Data),
improved2= full_function3(Data),
times = 100
)
#> Unit: milliseconds
#> expr min lq mean median uq max neval
#> original 25.5338 28.5682 32.774681 31.7347 34.64235 56.4048 100
#> improved 19.7737 21.5917 23.830159 23.1554 25.12710 35.2497 100
#> improved2 1.4976 1.7913 2.367522 2.2839 2.63385 9.6346 100
Created on 2022-10-01 by the reprex package (v2.0.1)
Maybe this improves it further
Kind regards