Hi all,
I have a csv file which was generated originally from a Excel spreadsheet. Below I've sliced the data to illustrate the issue I'm having.
Basically the dates are not several different formats, using "/" as separators and sometimes "-". As well as sometimes it is "mmddyyyy" and sometimes it is "ddmmyyyy".
I've created the below reprex to illustrate my issue and my attempt to fix these dates but I keep getting errors.
I thought initially I had to standardize the delimiters and then coerce the fields to be a date and Lubridate would then do it's magic... but I guess I'm wrong.
Can't figure out the best way to approach this date problem, any insight would be much appreciated.
library(tidyverse)
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
library(datapasta)
df_sample <- tibble::tribble(
~shipment, ~origin_etd, ~dest_eta, ~act_delivery, ~disch_eta, ~load_etd,
"SSHA22010278", "23-02-2022", "5/4/2022", "12/4/2022", "5/4/2022", "23-02-2022",
"SEWR22040234", "27-03-2022", "8/6/2022", "31-05-2022", "5/5/2022", "27-03-2022",
"SSHA21030471", "31-05-2021", "27-07-2021", NA, "27-07-2021", "31-05-2021",
"SSHA21040774", "11/7/2021", "12/8/2021", NA, "12/8/2021", "11/7/2021",
"SSHA22060455", "29-06-2022", "30-07-2022", NA, "30-07-2022", "29-06-2022",
"SSHA21090611", "9/10/2021", "2/12/2021", NA, "2/12/2021", "9/10/2021",
"SEWR22040262", "20-04-2022", "17-06-2022", NA, "2/6/2022", "20-04-2022",
"SSHA21031094", "28-04-2021", "27-05-2021", NA, "29-05-2021", "28-04-2021",
"SSHA21060042", "30-09-2021", "19-11-2021", NA, "19-11-2021", "12/10/2021",
"SSHA21050914", "11/7/2021", "12/8/2021", NA, "12/8/2021", "11/7/2021",
"SSHA21070207", "11/8/2021", "10/9/2021", NA, "10/9/2021", "11/8/2021",
"SEWR22040229", "1/4/2022", "8/6/2022", "3/6/2022", "13-05-2022", "1/4/2022",
"SSHA21020293", "9/3/2021", "15-04-2021", NA, "15-04-2021", "9/3/2021",
"SEWR22030103", "24-02-2022", "30-03-2022", "2/5/2022", "30-03-2022", "24-02-2022",
"SSHA21031089", "14-05-2021", "16-06-2021", NA, "17-06-2021", "14-05-2021",
"SSHA21050075", "16-08-2021", "20-09-2021", NA, "20-09-2021", "16-08-2021",
"SSHA21090228", "28-09-2021", "22-11-2021", NA, "20-11-2021", "28-09-2021",
"SSHA21020243", "28-03-2021", "3/5/2021", NA, "2/5/2021", "28-03-2021",
"SSHA21120474", "24-12-2021", "23-01-2022", "16-02-2022", "23-01-2022", "24-12-2021",
"SSHA21030833", "28-04-2021", "27-05-2021", NA, "29-05-2021", "28-04-2021",
"SSHA21121016", "7/1/2022", "8/3/2022", "16-03-2022", "8/3/2022", "7/1/2022",
"SSHA21060683", "8/7/2021", "10/8/2021", NA, "10/8/2021", "8/7/2021",
"SSHA21100697", "5/11/2021", "16-12-2021", "22-12-2021", "16-12-2021", "5/11/2021",
"SSHA21050497", "12/7/2021", "16-08-2021", NA, "16-08-2021", "12/7/2021",
"SSHA21060794", "10/8/2021", "10/10/2021", NA, "20-09-2021", "10/8/2021",
"SSHA21070510", "10/10/2021", "18-11-2021", NA, "18-11-2021", "9/10/2021",
"SEWR22050245", "7/4/2022", "8/6/2022", "9/6/2022", "20-05-2022", "7/4/2022",
"SSHA21040610", "11/7/2021", "12/8/2021", NA, "12/8/2021", "11/7/2021",
"SEWR22020247", "23-02-2022", "20-04-2022", "29-04-2022", "20-04-2022", "23-02-2022",
"SSHA21030299", "28-04-2021", "27-05-2021", NA, "29-05-2021", "28-04-2021",
"SEWR22020103", "4/1/2022", "16-02-2022", "8/3/2022", "16-02-2022", "4/1/2022",
"SSHA22050718", "17-06-2022", "19-07-2022", NA, "19-07-2022", "17-06-2022",
"SSHA21031092", "5/6/2021", "10/7/2021", NA, "10/7/2021", "5/6/2021",
"SSHA21060174", "23-06-2021", "29-07-2021", NA, "28-07-2021", "25-06-2021",
"SSHA21120789", "22-05-2022", "25-06-2022", NA, "8/7/2022", "22-05-2022",
"SSHA21120330", "23-05-2022", "11/7/2022", NA, "11/7/2022", "23-05-2022",
"SSHA21060038", "22-10-2021", "19-11-2021", "1/12/2021", "19-11-2021", "22-10-2021",
"SSHA21050091", "14-06-2021", "17-07-2021", NA, NA, NA,
"SSHA21030374", "11/5/2021", "6/6/2021", NA, "6/6/2021", "11/5/2021",
"SSHA21040077", "24-06-2021", "26-07-2021", NA, "26-07-2021", "24-06-2021",
"SSHA21080392", "23-09-2021", "8/11/2021", NA, "8/11/2021", "23-09-2021",
"SSHA21060684", "8/7/2021", "10/8/2021", NA, "10/8/2021", "8/7/2021",
"SSHA21030377", "11/5/2021", "9/6/2021", NA, "9/6/2021", "11/5/2021",
"SSHA21040031", "4/5/2021", "7/6/2021", NA, "7/6/2021", "3/5/2021",
"SEWR22030115", "6/3/2022", "26-03-2022", NA, "26-03-2022", "6/3/2022",
"SSHA21060429", "21-08-2021", "17-10-2021", NA, "17-10-2021", "21-08-2021",
"SSHA21061130", "6/8/2021", "10/9/2021", NA, "10/9/2021", "6/8/2021",
"SSHA21030954", "3/5/2021", "1/6/2021", NA, "3/6/2021", "3/5/2021",
"SSHA21060425", "11/8/2021", "7/10/2021", NA, "7/10/2021", "11/8/2021",
"SSHA21030957", "3/5/2021", "1/6/2021", NA, "3/6/2021", "3/5/2021",
"SSHA21060428", "1/9/2021", "12/10/2021", NA, "12/10/2021", "1/9/2021",
"SSHA21090268", "23-09-2021", "25-10-2021", NA, "25-10-2021", "23-09-2021",
"SSHA21060458", "9/7/2021", "10/8/2021", NA, "10/8/2021", "8/7/2021",
"SSHA21120174", "20-12-2021", "10/1/2022", NA, "10/1/2022", "20-12-2021",
"SSHA21070626", "27-08-2021", "9/10/2021", NA, "9/10/2021", "27-08-2021",
"SSHA21050910", "21-06-2021", "24-07-2021", NA, NA, NA,
"SSHA21090498", "27-04-2022", "12/6/2022", NA, "12/6/2022", "27-04-2022",
"SSHA21030462", "25-04-2021", "27-05-2021", NA, NA, NA,
"SSZN21090592", NA, NA, NA, NA, NA,
"SSHA21060692", "16-08-2021", "14-10-2021", NA, "14-10-2021", "16-08-2021",
"SSHA22050242", "22-05-2022", "25-06-2022", NA, "8/7/2022", "22-05-2022",
"SEWR22010035", "28-12-2021", "30-01-2022", NA, "30-01-2022", "28-12-2021",
"SSHA21050499", "6/8/2021", "10/9/2021", NA, "10/9/2021", "6/8/2021",
"SEWR22030075", "6/3/2022", "8/6/2022", NA, "3/4/2022", "6/3/2022",
"SSHA21070635", "28-09-2021", "20-11-2021", NA, "20-11-2021", "28-09-2021",
"SSHA21040612", "28-06-2021", "4/8/2021", NA, "4/8/2021", "28-06-2021",
"SSHA21080229", "11/4/2022", "22-05-2022", NA, "22-05-2022", "11/4/2022",
"SSHA21050498", "9/10/2021", "13-11-2021", NA, "13-11-2021", "9/10/2021",
"SSHA21030528", "30-04-2021", "27-05-2021", NA, "29-05-2021", "30-04-2021",
"SEWR22030106", "24-02-2022", "30-03-2022", NA, "30-03-2022", "24-02-2022",
"SSHA22050134", "22-05-2022", "25-06-2022", NA, "8/7/2022", "22-05-2022",
"SEWR22010215", "26-01-2022", "26-02-2022", NA, "26-02-2022", "26-01-2022",
"SSHA21050895", "27-06-2021", "27-07-2021", NA, "27-07-2021", "27-06-2021",
"SSHA21090278", "25-09-2021", "2/11/2021", NA, "3/11/2021", "25-09-2021",
"SSHA21070531", "23-03-2022", "13-04-2022", "25-05-2022", "13-04-2022", "23-03-2022",
"SSHA22060079", "19-06-2022", "20-07-2022", NA, "20-07-2022", "19-06-2022",
"SSHA21040062", "25-05-2021", "30-06-2021", NA, "30-06-2021", "25-05-2021",
"SEWR22030100", "17-02-2022", "22-03-2022", "30-03-2022", "22-03-2022", "17-02-2022",
"SSHA21090101", "28-09-2021", "22-11-2021", NA, "20-11-2021", "28-09-2021",
"SSHA21070555", "22-08-2021", "25-09-2021", NA, "25-09-2021", "22-08-2021",
"SEWR22030161", "11/3/2022", "15-04-2022", "26-05-2022", "15-04-2022", "11/3/2022",
"SSHA21080791", "20-09-2021", "30-10-2021", NA, "30-10-2021", "20-09-2021",
"SSHA21030952", "3/5/2021", "3/6/2021", NA, "3/6/2021", "3/5/2021",
"SSHA21040080", "6/6/2021", "7/7/2021", NA, "7/7/2021", "6/6/2021",
"SSHA21070505", "10/9/2021", "18-10-2021", NA, "18-10-2021", "11/9/2021",
"SSHA21030292", "28-04-2021", "29-05-2021", NA, "29-05-2021", "28-04-2021",
"SSHA21061132", "17-07-2021", "16-08-2021", NA, "16-08-2021", "17-07-2021",
"SSHA21100837", "29-11-2021", "15-01-2022", "26-01-2022", "15-01-2022", "29-11-2021",
"SSHA21090366", "2/10/2021", "27-10-2021", "16-11-2021", "27-10-2021", "2/10/2021",
"SSHA21060463", "8/7/2021", "10/8/2021", NA, "10/8/2021", "8/7/2021",
"SSHA21090641", "14-10-2021", "27-11-2021", "6/12/2021", "27-11-2021", "14-10-2021",
"SSHA21061128", "21-07-2021", "20-08-2021", NA, "20-08-2021", "21-07-2021",
"SSHA21040773", "28-06-2021", "4/8/2021", NA, "4/8/2021", "28-06-2021",
"SSHA21060093", "8/7/2021", "5/8/2021", NA, "5/8/2021", "8/7/2021",
"SSHA21040071", "20-05-2021", "27-06-2021", NA, NA, NA,
"SSHA21060173", "23-06-2021", "29-07-2021", NA, "28-07-2021", "25-06-2021",
"SSHA21040611", "7/7/2021", "12/8/2021", NA, "12/8/2021", "7/7/2021",
"SEWR21080205", "16-02-2022", "19-03-2022", NA, NA, NA,
"SSHA21080434", "7/1/2022", "8/3/2022", "16-03-2022", "8/3/2022", "7/1/2022",
"SSHA21070209", "11/8/2021", "10/9/2021", NA, "10/9/2021", "11/8/2021"
)
df_sample %>%mutate(
origin_etd = str_replace_all(origin_etd,"/","-"),
dest_eta = str_replace_all(dest_eta,"/","-"),
load_etd = str_replace_all(load_etd,"/","-"),
disch_eta = str_replace_all(disch_eta,"/","-"),
act_delivery = str_replace_all(act_delivery,"/","-")
) %>%
mutate(
origin_etd = ymd(as_date(origin_etd)),
dest_eta = ymd(as_date(dest_eta)),
load_etd = ymd(as_date(load_etd)),
disch_eta = ymd(as_date(disch_eta)),
act_delivery = ymd(as_date(act_delivery))
)
#> Warning: All formats failed to parse. No formats found.
#> Warning: All formats failed to parse. No formats found.
#> Warning: All formats failed to parse. No formats found.
#> Warning: All formats failed to parse. No formats found.
#> Warning: All formats failed to parse. No formats found.
#> # A tibble: 100 x 6
#> shipment origin_etd dest_eta act_delivery disch_eta load_etd
#> <chr> <date> <date> <date> <date> <date>
#> 1 SSHA22010278 NA NA NA NA NA
#> 2 SEWR22040234 NA NA NA NA NA
#> 3 SSHA21030471 NA NA NA NA NA
#> 4 SSHA21040774 NA NA NA NA NA
#> 5 SSHA22060455 NA NA NA NA NA
#> 6 SSHA21090611 NA NA NA NA NA
#> 7 SEWR22040262 NA NA NA NA NA
#> 8 SSHA21031094 NA NA NA NA NA
#> 9 SSHA21060042 NA NA NA NA NA
#> 10 SSHA21050914 NA NA NA NA NA
#> # ... with 90 more rows
Created on 2022-07-19 by the reprex package (v2.0.1)
Thank you for your time and assistance.