As you seem to have a fixed and relatively small number of different patterns there, pre-processing strings yourself into a common format (e.g. yyyy-mm-dd for lubridate::ymd()) is probably one of the easiest approaches and we can achieve this through replacements with regular expressions here.
And {stringr} to make it a bit more convenient:
library(dplyr)
library(lubridate)
library(stringr)
df <-
tibble(InconsistentDates =
c("2001/NUL/NUL", "2001/NUL/NUL", "2000-06-02", "1991/NUL/NUL",
"1984/NUL/NUL", "1985/NUL/NUL", "2013-09-12", "1995", "1994/NUL/NUL",
"2009-09-05", "2011-03-22", "1990/NUL/NUL", "1999-06-17", "2000-09-11",
"1993/NUL/NUL", "2007/NUL/NUL", "2005/NUL/NUL", "1991/NUL/NUL",
"1992/NUL/NUL", "2011/NUL/NUL", "1994/NUL/NUL", "2003-09-30",
"1993", "2010-06", NA, NA, NA)
)
df |>
mutate(
dstr_ymd = str_replace_all(InconsistentDates, pattern = c(
# pattern1 = replacement1
# pattern:
# ^ - string start anchor; () - group; \\d - digits;
# {4} - repeated exactly 4 times; ? - repeated 0 or 1 times;
# "/NUL/NUL" - literal string
# $ - string end anchor
# replacement: \\1 - 1st group from pattern; "-07-01" - literal string
"^(\\d{4})(/NUL/NUL)?$" = "\\1-07-01",
"^(\\d{4}-\\d{2})$" = "\\1-15")),
d = ymd(dstr_ymd)
) |>
print(n = Inf)
Result :
#> # A tibble: 27 × 3
#> InconsistentDates dstr_ymd d
#> <chr> <chr> <date>
#> 1 2001/NUL/NUL 2001-07-01 2001-07-01
#> 2 2001/NUL/NUL 2001-07-01 2001-07-01
#> 3 2000-06-02 2000-06-02 2000-06-02
#> 4 1991/NUL/NUL 1991-07-01 1991-07-01
#> 5 1984/NUL/NUL 1984-07-01 1984-07-01
#> 6 1985/NUL/NUL 1985-07-01 1985-07-01
#> 7 2013-09-12 2013-09-12 2013-09-12
#> 8 1995 1995-07-01 1995-07-01
#> 9 1994/NUL/NUL 1994-07-01 1994-07-01
#> 10 2009-09-05 2009-09-05 2009-09-05
#> 11 2011-03-22 2011-03-22 2011-03-22
#> 12 1990/NUL/NUL 1990-07-01 1990-07-01
#> 13 1999-06-17 1999-06-17 1999-06-17
#> 14 2000-09-11 2000-09-11 2000-09-11
#> 15 1993/NUL/NUL 1993-07-01 1993-07-01
#> 16 2007/NUL/NUL 2007-07-01 2007-07-01
#> 17 2005/NUL/NUL 2005-07-01 2005-07-01
#> 18 1991/NUL/NUL 1991-07-01 1991-07-01
#> 19 1992/NUL/NUL 1992-07-01 1992-07-01
#> 20 2011/NUL/NUL 2011-07-01 2011-07-01
#> 21 1994/NUL/NUL 1994-07-01 1994-07-01
#> 22 2003-09-30 2003-09-30 2003-09-30
#> 23 1993 1993-07-01 1993-07-01
#> 24 2010-06 2010-06-15 2010-06-15
#> 25 <NA> <NA> NA
#> 26 <NA> <NA> NA
#> 27 <NA> <NA> NA
For testing pattern matches, you could try stringr::str_view():
str_view(df$InconsistentDates, "^(\\d{4})(/NUL/NUL)?$")
#> [1] │ <2001/NUL/NUL>
#> [2] │ <2001/NUL/NUL>
#> [4] │ <1991/NUL/NUL>
#> [5] │ <1984/NUL/NUL>
#> [6] │ <1985/NUL/NUL>
#> [8] │ <1995>
#> [9] │ <1994/NUL/NUL>
#> [12] │ <1990/NUL/NUL>
#> [15] │ <1993/NUL/NUL>
#> [16] │ <2007/NUL/NUL>
#> [17] │ <2005/NUL/NUL>
#> [18] │ <1991/NUL/NUL>
#> [19] │ <1992/NUL/NUL>
#> [20] │ <2011/NUL/NUL>
#> [21] │ <1994/NUL/NUL>
#> [23] │ <1993>
str_view(df$InconsistentDates, "^(\\d{4}-\\d{2})$")
#> [24] │ <2010-06>
If you are new to regex, {stringr} Regular expressions Vignette is a great place to start: