One additional tool to be aware of here is dplyr::explain()
, which provides output similar to dplyr::show_query()
but also includes a more detailed query plan. This query plan can provide further clarity into how the query will be executed in the database:
library(dbplyr)
library(tidyverse)
con <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
visits <- data.frame(key = c(1:10),
visit_date = c(51:60))
diagnoses <- data.frame(key2 = c(1:10), diag = c("a", "a", "b", "b", "c", "d", "e", "r", "e", "t"))
patinfo <- data.frame(patid = c(1:10), key = c(1:10), key2 = c(1:10))
copy_to(con, visits, "a",
temporary = FALSE,
overwrite=T
)
copy_to(con, diagnoses, "b",
temporary = FALSE,
overwrite=T
)
copy_to(con, patinfo, "c",
temporary = FALSE,
overwrite=T
)
my_visits <- tbl(con, "a") %>%
filter(visit_date >= 55) %>%
left_join(tbl(con, "c") %>% dplyr::select(patid), by = c("key"= "patid")) %>%
rename(patid = key)
diagnoses <- tbl(con, "b") %>%
left_join(tbl(con, "c") %>% dplyr::select(patid), by = c("key2"= "patid")) %>%
rename(patid = key2)
all <- my_visits %>%
left_join(diagnoses) %>%
explain()
#> Joining, by = "patid"
#> <SQL>
#> SELECT `LHS`.`patid` AS `patid`, `LHS`.`visit_date` AS `visit_date`, `RHS`.`diag` AS `diag`
#> FROM (SELECT `key` AS `patid`, `visit_date`
#> FROM (SELECT `LHS`.`key` AS `key`, `LHS`.`visit_date` AS `visit_date`
#> FROM (SELECT *
#> FROM `a`
#> WHERE (`visit_date` >= 55.0)) AS `LHS`
#> LEFT JOIN (SELECT `patid`
#> FROM `c`) AS `RHS`
#> ON (`LHS`.`key` = `RHS`.`patid`)
#> )) AS `LHS`
#> LEFT JOIN (SELECT `key2` AS `patid`, `diag`
#> FROM (SELECT `LHS`.`key2` AS `key2`, `LHS`.`diag` AS `diag`
#> FROM `b` AS `LHS`
#> LEFT JOIN (SELECT `patid`
#> FROM `c`) AS `RHS`
#> ON (`LHS`.`key2` = `RHS`.`patid`)
#> )) AS `RHS`
#> ON (`LHS`.`patid` = `RHS`.`patid`)
#>
#>
#> <PLAN>
#> id parent notused detail
#> 1 3 0 0 MATERIALIZE 6
#> 2 7 3 0 SCAN TABLE b AS LHS
#> 3 9 3 0 SCAN TABLE c
#> 4 29 0 0 SCAN TABLE a
#> 5 33 0 0 SCAN TABLE c
#> 6 49 0 0 SEARCH SUBQUERY 6 USING AUTOMATIC COVERING INDEX (key2=?)