First.Name<-c("John", "James", "Jack", "Jill", "Corey", "Callie", "Sofie", "Josie", "Lilly", "Luke", "Jane", "Sara", "Chandler","Flora","Parker","Preston")
Last.Name<-c("Smith","Flora","Chandler","Jack","John","Reeves","Preston","Parker","James","Brooks","Johnson","Smith","Krause","Casey","Corey","Lilly")
library(tidyverse)
library(sqldf)
first_df <- data.frame(
firstname = First.Name,
lastname = Last.Name
)
# a function to perform repeated left joins
do_next <- function(i){
lefttable <- if(i==1){"first_df"} else {paste0("result_",i-1)}
join <- if(i==1){"a.lastname"} else {paste0("nextname",i-1)}
assign(x=paste0("result_",i), sqldf::sqldf(paste0(
"select a.*,b.lastname as nextname",i,"
from ",lefttable," a left join
first_df b
on ",join,"=b.firstname")) %>% as_tibble,
envir = globalenv())
}
i<- 0
repeat {
i <- i +1
do_next(i)
#if all the values in the final column of the latest result are NA then break as nothing to join to anything
if(all(is.na(get(paste0("result_",i)) %>% pull(paste0("nextname",i)))))
break;
if(i>1000) #break at a high number in case there was a never ending cycle
break;
}
i
get(paste0("result_",i))
firstname lastname nextname1 nextname2 nextname3 nextname4 nextname5
<fct> <fct> <chr> <chr> <chr> <chr> <chr>
1 John Smith NA NA NA NA NA
2 James Flora Casey NA NA NA NA
3 Jack Chandler Krause NA NA NA NA
4 Jill Jack Chandler Krause NA NA NA
5 Corey John Smith NA NA NA NA
6 Callie Reeves NA NA NA NA NA
7 Sofie Preston Lilly James Flora Casey NA
8 Josie Parker Corey John Smith NA NA
9 Lilly James Flora Casey NA NA NA
10 Luke Brooks NA NA NA NA NA
11 Jane Johnson NA NA NA NA NA
12 Sara Smith NA NA NA NA NA
13 Chandler Krause NA NA NA NA NA
14 Flora Casey NA NA NA NA NA
15 Parker Corey John Smith NA NA NA
16 Preston Lilly James Flora Casey NA NA