`dplyr::mutate()` but only if the provided columns are new in the data frame

I want to use dplyr::mutate() to only update provided columns that are new in the data frame:

mutate_if_missing(
  cars,
  speed = "don't overwrite", # Existing column
  length = "new value"       # New column
)
#>   speed dist    length
#> 1     4    2 new value
#> 2     4   10 new value
#> 3     7    4 new value
#> 4     7   22 new value
#> 5     8   16 new value
#> 6     9   10 new value

I didn't find a way to do that with dplyr verbs, so I created a helper function mutate_if_missing(), which works fine:

library(dplyr)
library(rlang)

# Helper function
mutate_if_missing <- function(.data, ...) {
  # Convert ... to list
  args <- rlang::list2(...)
  # Select arguments that are new columns
  new_columns <- args[!names(args) %in% colnames(.data)]
  # Splice the filtered arguments into dplyr::mutate()
  dplyr::mutate(.data, !!!new_columns)
}

mutate_if_missing(
  head(cars),
  speed = "don't overwrite",
  length = "new value"
)
#>   speed dist    length
#> 1     4    2 new value
#> 2     4   10 new value
#> 3     7    4 new value
#> 4     7   22 new value
#> 5     8   16 new value
#> 6     9   10 new value

Created on 2025-05-24 with reprex v2.1.1

The function fails however, when the provided arguments use .data:

library(dplyr)
library(rlang)

# Helper function
mutate_if_missing <- function(.data, ...) {
  # Convert ... to list
  args <- rlang::list2(...)
  # Select arguments that are new columns
  new_columns <- args[!names(args) %in% colnames(.data)]
  # Splice the filtered arguments into dplyr::mutate()
  dplyr::mutate(.data, !!!new_columns)
}

mutate_if_missing(
  head(cars),
  length = dplyr::if_else(.data$speed > 4, "> 4", "<= 4")
)
#> Error:
#> ! Can't subset `.data` outside of a data mask context.

# Works fine with dplyr::mutate()
mutate(
  head(cars),
  length = dplyr::if_else(.data$speed > 4, "> 4", "<= 4")
)
#>   speed dist length
#> 1     4    2   <= 4
#> 2     4   10   <= 4
#> 3     7    4  > 4
#> 4     7   22   > 4
#> 5     8   16    > 4
#> 6     9   10    > 4

Created on 2025-05-24 with reprex v2.1.1

Any suggestions on how to resolve this? Approaches using just dplyr verbs that that don't require the helper function would be even better.

Found it, this can be solved by using rlang::quos() rather than rlang::list2():

library(dplyr)
library(rlang)

# Helper function
mutate_if_missing <- function(.data, ...) {
  # Convert ... to list
  args <- rlang::quos(...)
  # Select arguments that are new columns
  new_columns <- args[!names(args) %in% colnames(.data)]
  # Splice the filtered arguments into dplyr::mutate()
  dplyr::mutate(.data, !!!new_columns)
}

mutate_if_missing(
  head(cars),
  length = dplyr::if_else(.data$speed > 4, "> 4", "<= 4")
)
#>   speed dist length
#> 1     4    2   <= 4
#> 2     4   10   <= 4
#> 3     7    4    > 4
#> 4     7   22    > 4
#> 5     8   16    > 4
#> 6     9   10    > 4

Created on 2025-05-24 with reprex v2.1.1

1 Like