Is there a method for telling R "hey if you quit this function before it's supposed to be finished, do this"?
Here's just an example of something I wrote that has problems due to the RSelenium driver still running if the user closes the function early. No need to look at it though.
library(tidyverse)
library(rvest)
library(RSelenium)
library(progress)
get_box_score <- function(..., progress = TRUE) {
if (progress) {
pb <- progress::progress_bar$new(format = "get_box_score() [:bar] :percent eta: :eta", clear = FALSE, total = nrow(...), show_after = 0)
pb$tick(0)}
driver <- rsDriver(verbose = FALSE)
.get_box_score <- function(url, league, season, ...) {
seq(2, 5, by = 0.001) %>%
sample(1) %>%
Sys.sleep()
driver$client$navigate(url)
Sys.sleep(3)
page <- driver$client$getPageSource() %>%
purrr::pluck(1) %>%
read_html()
if (league == "OHL") {
teams <- page %>%
html_nodes(".gamecentre-playbyplay-event--goal .gamecentre-playbyplay-event__team-logo") %>%
{tibble(team = as(., "character"))} %>%
mutate(team = str_split(team, 'div class=\"gamecentre-playbyplay-event__team-logo team-logo--ohl-', simplify = TRUE, n = 2)[,2]) %>%
mutate(team = str_split(team, '\" data-reactid', simplify = TRUE, n = 2)[,1]) %>%
mutate(team = toupper(team))
}
if (league == "QMJHL") {
team_table <- data_frame(team_id = c("http://assets.leaguestat.com/lhjmq/logos/70x70/1_190.png",
"http://assets.leaguestat.com/lhjmq/logos/70x70/2_190.png",
"http://assets.leaguestat.com/lhjmq/logos/70x70/3_190.png",
"http://assets.leaguestat.com/lhjmq/logos/70x70/5_190.png",
"http://assets.leaguestat.com/lhjmq/logos/70x70/7_190.png",
"http://assets.leaguestat.com/lhjmq/logos/70x70/8_190.png",
"http://assets.leaguestat.com/lhjmq/logos/70x70/9_190.png",
"http://assets.leaguestat.com/lhjmq/logos/70x70/10_190.png",
"http://assets.leaguestat.com/lhjmq/logos/70x70/11_190.png",
"http://assets.leaguestat.com/lhjmq/logos/70x70/12_190.png",
"http://assets.leaguestat.com/lhjmq/logos/70x70/13_190.png",
"http://assets.leaguestat.com/lhjmq/logos/70x70/14_190.png",
"http://assets.leaguestat.com/lhjmq/logos/70x70/15_190.png",
"http://assets.leaguestat.com/lhjmq/logos/70x70/16_190.png",
"http://assets.leaguestat.com/lhjmq/logos/70x70/17_190.png",
"http://assets.leaguestat.com/lhjmq/logos/70x70/18_190.png",
"http://assets.leaguestat.com/lhjmq/logos/70x70/19_190.png",
"http://assets.leaguestat.com/lhjmq/logos/70x70/60_190.png",
"http://assets.leaguestat.com/lhjmq/logos/70x70/7_171.png",
"http://assets.leaguestat.com/lhjmq/logos/70x70/19_82.png",
"http://assets.leaguestat.com/lhjmq/logos/70x70/4_82.png",
"http://assets.leaguestat.com/lhjmq/logos/70x70/19_104.png",
"http://assets.leaguestat.com/lhjmq/logos/70x70/4_158.png",
"http://assets.leaguestat.com/lhjmq/logos/70x70/7_158.png"),
team = c("Moncton Wildcats",
"Acadie-Bathurst Titan",
"Cape Breton Screaming Eagles",
"Halifax Mooseheads",
"Charlottetown Islanders",
"Saint John Sea Dogs",
"Quebec Remparts",
"Chicoutimi Sagueneens",
"Rouyn-Noranda Huskies",
"Gatineau Olympiques",
"Shawinigan Cataractes",
"Drummondville Voltigeurs",
"Val-d'Or Foreurs",
"Baie-Comeau Drakkar",
"Victoriaville Tigres",
"Rimouski Oceanic",
"Blainville-Boisbriand Armada",
"Sherbrooke Phoenix",
"PEI Rocket",
"Montreal Juniors",
"Lewiston MAINEiacs",
"St. John's Fog Devils",
"Sherbrooke Castors",
"Montreal Rocket"))
teams <- page %>%
html_nodes(".gamecentre-playbyplay-event--goal img") %>%
{tibble(team_id = as(., "character"))} %>%
mutate(team_id = str_split(team_id, '<img src="', simplify = TRUE, n = 2)[,2]) %>%
mutate(team_id = str_split(team_id, '\"', simplify = TRUE, n = 2)[,1]) %>%
#left_join(team_table, by = "team_id") %>% # uncomment later
select(team = team_id) # change to select(team) later and uncomment above
}
if (league == "WHL") {
teams <- page %>%
html_nodes(".gamecentre-playbyplay-event--goal .gamecentre-playbyplay-event__team-logo") %>%
{tibble(team = as(., "character"))} %>%
mutate(team = str_split(team, 'div class=\"gamecentre-playbyplay-event__team-logo team-logo--whl-', simplify = TRUE, n = 2)[,2]) %>%
mutate(team = str_split(team, '\" data-reactid', simplify = TRUE, n = 2)[,1]) %>%
mutate(team = toupper(team))
}
goal_info <- page %>%
html_nodes(".gamecentre-playbyplay-event--goal") %>%
html_text() %>%
as_tibble() %>%
set_names("messy_data") %>%
mutate(period = str_split(messy_data, " ", simplify = TRUE, n = 2)[,1]) %>%
mutate(period = str_split(period, "Goal", simplify = TRUE, n = 2)[,2]) %>%
mutate(period = str_replace_all(period, c("ST" = "", "ND" = "", "RD" = ""))) %>%
mutate(time = str_split(messy_data, " ", simplify = TRUE, n = 2)[,2]) %>%
mutate(time = str_split(time, "\\#", simplify = TRUE, n = 2)[,1]) %>%
mutate(goal = str_split(messy_data, " ", simplify = TRUE, n = 3)[,3]) %>%
mutate(goal = str_split(goal, "\\(", simplify = TRUE, n = 2)[,1]) %>%
mutate(assists = str_split(messy_data, "Assists\\:", simplify = TRUE, n = 2)[,2]) %>%
mutate(assists = str_split(assists, "\\+/-", simplify = TRUE, n = 2)[,1]) %>%
mutate(game_strength = case_when(str_detect(messy_data, "Short Handed") & str_detect(messy_data, "Empty Net") ~ "SH EN",
str_detect(messy_data, "Power Play") & str_detect(messy_data, "Empty Net") ~ "PP EN",
str_detect(messy_data, "Short Handed") & str_detect(messy_data, "Penalty Shot") ~ "SH PS",
str_detect(messy_data, "Power Play") & str_detect(messy_data, "Penalty Shot") ~ "PP PS",
str_detect(messy_data, "Empty Net") ~ "EN",
str_detect(messy_data, "Short Handed") ~ "SH",
str_detect(messy_data, "Power Play") ~ "PP",
str_detect(messy_data, "Penalty Shot") ~ "PS",
TRUE ~ "EV")) %>%
mutate(assists = str_replace_all(assists, c("Power Play" = "",
"Short Handed" = "",
"Empty Net" = "",
"Penalty Shot" = "",
"Game Winning" = "",
"Insurance Goal" = ""))) %>%
mutate(primary_assist = str_split(assists, ",", simplify = TRUE, n = 2)[,1]) %>%
mutate(primary_assist = str_replace_all(primary_assist, "\\#[0-9]{1,2}", "")) %>%
mutate(secondary_assist = str_split(assists, ",", simplify = TRUE, n =2)[,2]) %>%
mutate(secondary_assist = str_replace_all(secondary_assist, "\\#[0-9]{1,2}", ""))
box_score_data <- teams %>%
bind_cols(goal_info) %>%
mutate(season = season) %>%
mutate(league = league) %>%
mutate(game_url = url) %>%
select(time, period, game_strength, team, goal, primary_assist, secondary_assist, season, league, game_url) %>%
mutate_all(str_squish)
if (progress) {pb$tick()}
return(box_score_data)
}
persistently_get_box_score <- elite::persistently(.get_box_score, max_attempts = 10, wait_seconds = 0.0001)
try_get_box_score <- function(url, league, season, ...) {
tryCatch(persistently_get_box_score(url, league, season, ...),
error = function(e) {
print(e)
print(url)
data_frame()},
warning = function(w) {
print(w)
print(url)
data_frame()})
}
all_box_score_data <- pmap_dfr(..., try_get_box_score)
driver$client$close()
driver$server$stop()
return(all_box_score_data)
}