I'm using Rstudio 2021.09.1 and R 4.1.2 and tidyverse 1.3.1. The system.time at the bottom says it took R .08 seconds but it takes a full minute or so for the output tibble to show up in the Rstudio Global environment. If the dataset had more rows the time gets impossibly long. Until ithe result shows up in the Global environment nothing else can be executed.
library(tidyverse)
library(gapminder)
gapminder_w_NAs <- gapminder %>%
mutate(lifeExp = if_else(runif(n()) < 0.1, NA_real_, lifeExp))
big_df <- gapminder %>% slice(rep(1:n(), each = 20)) %>%
mutate(lifeExp = if_else(runif(n()) < 0.1, NA_real_, lifeExp))
imp_model <- function(df) {
lm(lifeExp ~ year, data = df)
}
add_predictions <- function(data, model, var = "pred") {
data[[var]] <- predict(model, newdata = data)
data
}
impute_lifeExp <- function(df, imp_mod) {
nested <-
df %>%
group_by(continent) %>% nest() %>%
mutate(
model = map(data, imp_mod),
preds = map2(data, model, add_predictions)
)
nested %>%
unnest(preds)
}
rm(output)
system.time(
output <- impute_lifeExp(big_df, imp_model)
)