I can help with the future dates, as shown in the reprex below. For the current dates, is the assumption that NAs that are bracketed by non-NAs (1, X, etc.) represent the occurrence of a session with unknown "tardiness" (code as 6?) Also, for the first record, trap for session completed on 9/20 when start date was 9/23? (The sheet is a demonstration why so much time in data science is devoted to data cleaning.)
dat <- data.frame(
id =
c("match_1", "match_2", "match_3", "match_4", "match_5", "match_6", "match_7", "match_8", "match_9", "match_10", "match_11", "match_12", "match_13", "match_14", "match_15", "match_16", "match_17", "match_18", "match_19", "match_20", "match_21", "match_22", "match_23", "match_24", "match_25", "match_26", "match_27", "match_28"), start_date = structure(c(18893, 18897, 18898, 18900, 18900, 18900, 18901, 18904, 18907, 18911, 18911, 18912, 18913, 18919, 18919, 18925, 18925, 18933, 18933, 18934, 18935, 18939, 18939, 18939, 18946, 18964, 18968, 18968), class = "Date"),
day_time =
c("TH/7pm", "MON/5:30 PM", "TUE/6:15PM", "TH/9am", "TH/6:30 PM", "TH/7pm", "F/5:15pm", "MON/4:30PM", "TH/6 pm", "MON/ 5:00 PM", "MON/6:00 PM", "TUE/6:30 PM", "WED/6pm", "TUE/11:00 AM", "TUE/2pm", "MON 4:45 PM", "TUE/6:00 PM", "TUE/6:00 PM", "TUE/6:15PM", "WED/5:00 PM", "TH/6PM", "MON/5:30PM", "MON/5:30 PM", "MON/6:00 PM", "MON/6:00 PM", "F/12pm", "Tue/ 4:30pm", "Tue/5:00 pm"),
"2021-09-20" =
c(1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
"2021-09-27" =
c(NA, 1, 1, 1, 1, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
"2021-10-04" =
c("X", "1", "1", "1", "1", "X", NA, "1", "1", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
"2021-10-11" =
c("X", "1", "1", "1", "1", "1", "1", "1", "4", "1", "1", "1", "1", NA, "4", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
"2021-10-18" =
c("X", "1", "1", "1", "X", "1", "1", "1", "4", "3", "X", "1", "2", NA, "1", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
"2021-10-25" =
c("1", "1", "1", "1", "1", "4/5", "1", "1", "2", "X", "1", "4", "1", "1", "X", "1", "1", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
"2021-11-01" =
c("1", "1", "1", "1", "1", NA, "1", "4", "1", "X", "1", "1", NA, "1", "1", "4", "1", "1", "1", "1", "1", NA, NA, NA, NA, NA, NA, NA),
"2021-11-08" =
c("1", "1", "3", NA, "1", NA, "1", "X", NA, "5", NA, "4", "2", "4", "1", "1", "1", "1", "1", "1", "1", "1", NA, NA, NA, NA, NA, NA),
"2021-11-15" =
c(1, 1, NA, 1, 1, 1, 1, 3, 2, 3, NA, 2, 4, 2, NA, 2, 1, 1, NA, NA, 1, 2, 1, 1, 1, NA, NA, NA),
"2021-11-22" =
c(NA, "1", "1", NA, NA, NA, "1", "X", NA, NA, NA, NA, NA, "5", "X", "4", "4", "1", "4", NA, "1", "1", "2", "X", NA, NA, NA, NA),
"2021-11-29" =
c("1", "1", NA, "1", "1", "X", "1", NA, "2", NA, NA, "1", NA, "1", "2", "X", "1", "1", NA, "1", "4", "1", "1", "1", "1", "4", NA, NA),
"2021-12-06" =
c("1", NA, "1", NA, NA, NA, "5", NA, "1", NA, NA, "1", "1", NA, NA, "X", "3", "1", NA, "2", "5", NA, "1", "1", "1", "4", "1", "1"),
"2021-12-13" =
c(NA, "1", NA, NA, "1", NA, NA, NA, NA, NA, NA, "5", "5", NA, NA, NA, NA, "1", "X", "2", NA, NA, "5", "1", "4", NA, NA, "1"),
"2021-12-20" =
c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
"2021-12-27" =
c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
"2022-01-03" =
c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
"2022-01-10" =
c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
"2022-01-17" =
c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
"2022-01-24" =
c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
"2022-01-31" =
c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
"2022-02-07" =
c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
"2022-02-14" =
c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
"2022-02-21" =
c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
"2022-02-28" =
c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
"2022-03-07" =
c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
"2022-03-14" =
c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
"2022-03-21" =
c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
"2022-03-28" =
c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
"2022-04-04" =
c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
"2022-04-11" =
c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
"2022-04-18" =
c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
"2022-04-25" =
c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
"2022-05-02" =
c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
"2022-05-09" =
c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
"2022-05-16" =
c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
"2022-05-23" =
c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
"2022-05-30" =
c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
"2022-06-06" =
c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)
)
# inspect
str(dat)
#> 'data.frame': 28 obs. of 41 variables:
#> $ id : chr "match_1" "match_2" "match_3" "match_4" ...
#> $ start_date : Date, format: "2021-09-23" "2021-09-27" ...
#> $ day_time : chr "TH/7pm" "MON/5:30 PM" "TUE/6:15PM" "TH/9am" ...
#> $ X2021.09.20: num 1 NA NA NA NA NA NA NA NA NA ...
#> $ X2021.09.27: num NA 1 1 1 1 1 NA NA NA NA ...
#> $ X2021.10.04: chr "X" "1" "1" "1" ...
#> $ X2021.10.11: chr "X" "1" "1" "1" ...
#> $ X2021.10.18: chr "X" "1" "1" "1" ...
#> $ X2021.10.25: chr "1" "1" "1" "1" ...
#> $ X2021.11.01: chr "1" "1" "1" "1" ...
#> $ X2021.11.08: chr "1" "1" "3" NA ...
#> $ X2021.11.15: num 1 1 NA 1 1 1 1 3 2 3 ...
#> $ X2021.11.22: chr NA "1" "1" NA ...
#> $ X2021.11.29: chr "1" "1" NA "1" ...
#> $ X2021.12.06: chr "1" NA "1" NA ...
#> $ X2021.12.13: chr NA "1" NA NA ...
#> $ X2021.12.20: logi NA NA NA NA NA NA ...
#> $ X2021.12.27: logi NA NA NA NA NA NA ...
#> $ X2022.01.03: logi NA NA NA NA NA NA ...
#> $ X2022.01.10: logi NA NA NA NA NA NA ...
#> $ X2022.01.17: logi NA NA NA NA NA NA ...
#> $ X2022.01.24: logi NA NA NA NA NA NA ...
#> $ X2022.01.31: logi NA NA NA NA NA NA ...
#> $ X2022.02.07: logi NA NA NA NA NA NA ...
#> $ X2022.02.14: logi NA NA NA NA NA NA ...
#> $ X2022.02.21: logi NA NA NA NA NA NA ...
#> $ X2022.02.28: logi NA NA NA NA NA NA ...
#> $ X2022.03.07: logi NA NA NA NA NA NA ...
#> $ X2022.03.14: logi NA NA NA NA NA NA ...
#> $ X2022.03.21: logi NA NA NA NA NA NA ...
#> $ X2022.03.28: logi NA NA NA NA NA NA ...
#> $ X2022.04.04: logi NA NA NA NA NA NA ...
#> $ X2022.04.11: logi NA NA NA NA NA NA ...
#> $ X2022.04.18: logi NA NA NA NA NA NA ...
#> $ X2022.04.25: logi NA NA NA NA NA NA ...
#> $ X2022.05.02: logi NA NA NA NA NA NA ...
#> $ X2022.05.09: logi NA NA NA NA NA NA ...
#> $ X2022.05.16: logi NA NA NA NA NA NA ...
#> $ X2022.05.23: logi NA NA NA NA NA NA ...
#> $ X2022.05.30: logi NA NA NA NA NA NA ...
#> $ X2022.06.06: logi NA NA NA NA NA NA ...
# classify by variable type
the_types <- sapply(dat, typeof)
# identify types that are logical (because they were produced for future dates)
logicals <- names(the_types[which(the_types == "logical")])
# subset dat to remove future variables
current <- dat[, setdiff(colnames(dat), logicals)]
# preview
head(current)
#> id start_date day_time X2021.09.20 X2021.09.27 X2021.10.04
#> 1 match_1 2021-09-23 TH/7pm 1 NA X
#> 2 match_2 2021-09-27 MON/5:30 PM NA 1 1
#> 3 match_3 2021-09-28 TUE/6:15PM NA 1 1
#> 4 match_4 2021-09-30 TH/9am NA 1 1
#> 5 match_5 2021-09-30 TH/6:30 PM NA 1 1
#> 6 match_6 2021-09-30 TH/7pm NA 1 X
#> X2021.10.11 X2021.10.18 X2021.10.25 X2021.11.01 X2021.11.08 X2021.11.15
#> 1 X X 1 1 1 1
#> 2 1 1 1 1 1 1
#> 3 1 1 1 1 3 NA
#> 4 1 1 1 1 <NA> 1
#> 5 1 X 1 1 1 1
#> 6 1 1 4/5 <NA> <NA> 1
#> X2021.11.22 X2021.11.29 X2021.12.06 X2021.12.13
#> 1 <NA> 1 1 <NA>
#> 2 1 1 <NA> 1
#> 3 1 <NA> 1 <NA>
#> 4 <NA> 1 <NA> <NA>
#> 5 <NA> 1 <NA> 1
#> 6 <NA> X <NA> <NA>