I am trying to conditionally join two data.tables:
ex <- structure(list(person_id = c("79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8",
"79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", "79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8",
"79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", "79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8",
"79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", "79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8",
"79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", "8b6ea77b-e694-48fb-a9e9-ca8bf1accc65",
"8b6ea77b-e694-48fb-a9e9-ca8bf1accc65", "8b6ea77b-e694-48fb-a9e9-ca8bf1accc65",
"8b6ea77b-e694-48fb-a9e9-ca8bf1accc65"), prs_nat_key = c("8240588160001",
"8240588160001", "8240588160001", "8240588160001", "8240588160001",
"8240588160001", "8240588160001", "8240588160001", "106705689",
"106705689", "106705689", "106705689"), serv_from_dt = structure(c(18262,
18262, 18262, 18262, 18275, 18275, 18275, 18275, 18278, 18278,
18278, 18278), class = "Date"), serv_to_dt = structure(c(18262,
18262, 18262, 18265, 18275, 18278, 18278, 18278, 18282, 18282,
18299, 18299), class = "Date"), new_pos = c("IP", "IP", "IP",
"IP", "IP", "IP", "IP", "IP", "IP", "IP", "IP", "IP"), days_diff = c(0,
0, 0, 3, 0, 3, 3, 3, 4, 4, 21, 21)), row.names = c(NA, -12L), class = c("data.table",
"data.frame"))
with
date_period <- structure(list(prs_nat_key = c("106705689", "8240588160001",
"8240588160001"), unique_start = structure(c(18278, 18262, 18275
), class = "Date"), unique_end = structure(c(18299, 18265, 18278
), class = "Date")), row.names = c(NA, -3L), class = c("data.table",
"data.frame"))
I have this code to conditionally join data.tables :
setDT(ex)
setDT(date_period)
setkey(date_period, prs_nat_key)
setkey(ex, prs_nat_key)
ex[, c("end_date") := # Assign the below result to new columns in dtgrouped2
date_period[ex, # join
.(unique_end), # get the column you need
on = .(unique_start <= serv_from_dt,
unique_end >= serv_to_dt,
prs_nat_key = prs_nat_key)]]
I want the join to be: if the rows match by prs_nat_key, and ex$serv_to_dt <= date_period$unique_end & ex$serv_from_dt >= date_period$unique_start, then join.
The output I'm getting is:
output <- structure(list(person_id = c("79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8",
"79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", "79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8",
"79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", "79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8",
"79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", "79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8",
"79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", "8b6ea77b-e694-48fb-a9e9-ca8bf1accc65",
"8b6ea77b-e694-48fb-a9e9-ca8bf1accc65", "8b6ea77b-e694-48fb-a9e9-ca8bf1accc65",
"8b6ea77b-e694-48fb-a9e9-ca8bf1accc65"), prs_nat_key = c("8240588160001",
"8240588160001", "8240588160001", "8240588160001", "8240588160001",
"8240588160001", "8240588160001", "8240588160001", "106705689",
"106705689", "106705689", "106705689"), serv_from_dt = structure(c(18262,
18262, 18262, 18262, 18275, 18275, 18275, 18275, 18278, 18278,
18278, 18278), class = "Date"), serv_to_dt = structure(c(18262,
18262, 18262, 18265, 18275, 18278, 18278, 18278, 18282, 18282,
18299, 18299), class = "Date"), new_pos = c("IP", "IP", "IP",
"IP", "IP", "IP", "IP", "IP", "IP", "IP", "IP", "IP"), days_diff = c(0,
0, 0, 3, 0, 3, 3, 3, 4, 4, 21, 21), start_date = structure(c(18262,
18262, 18262, 18262, 18275, 18275, 18275, 18275, 18278, 18278,
18278, 18278), class = "Date"), end_date = structure(c(18262,
18262, 18262, 18265, 18275, 18278, 18278, 18278, 18282, 18282,
18299, 18299), class = "Date")), row.names = c(NA, -12L), class = c("data.table",
"data.frame"))
but what I'm actually looking for is:
want <- structure(list(person_id = c("79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8",
"79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", "79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8",
"79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", "79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8",
"79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", "79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8",
"79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", "8b6ea77b-e694-48fb-a9e9-ca8bf1accc65",
"8b6ea77b-e694-48fb-a9e9-ca8bf1accc65", "8b6ea77b-e694-48fb-a9e9-ca8bf1accc65",
"8b6ea77b-e694-48fb-a9e9-ca8bf1accc65"), prs_nat_key = c("8240588160001",
"8240588160001", "8240588160001", "8240588160001", "8240588160001",
"8240588160001", "8240588160001", "8240588160001", "106705689",
"106705689", "106705689", "106705689"), serv_from_dt = structure(c(18262,
18262, 18262, 18262, 18275, 18275, 18275, 18275, 18278, 18278,
18278, 18278), class = "Date"), serv_to_dt = structure(c(18262,
18262, 18262, 18265, 18275, 18278, 18278, 18278, 18282, 18282,
18299, 18299), class = "Date"), new_pos = c("IP", "IP", "IP",
"IP", "IP", "IP", "IP", "IP", "IP", "IP", "IP", "IP"), days_diff = c(0,
0, 0, 3, 0, 3, 3, 3, 4, 4, 21, 21), start_date = structure(c(18262,
18262, 18262, 18262, 18275, 18275, 18275, 18275, 18278, 18278,
18278, 18278), class = "Date"), end_date = structure(c(18265,
18265, 18265, 18265, 18278, 18278, 18278, 18278, 18299, 18299,
18299, 18299), class = "Date")), row.names = c(NA, -12L), class = c("data.table",
"data.frame"))
is there any way I can adjust my code to get what I'm looking for? Thank you!