I'm trying to scrape match data from flashscore.pt and I keep running into a problem. This is the code I'm using:
library(RSelenium)
library(rvest)
# Start a Selenium server
driver <- rsDriver(browser = "firefox")
# Connect to the remote driver
remote_driver <- driver$client
# Specify the URL of the website
url <- "https://www.flashscore.pt/futebol/eua/mls-2022/resultados/"
# Navigate to the website
remote_driver$navigate(url)
# Find and click the "Mostrar mais jogos" button using JavaScript
while (TRUE) {
# Execute JavaScript to check if the button is present on the page
is_button_present <- remote_driver$executeScript(
"return document.querySelector('.event__more') !== null;"
)
# Check the result of the JavaScript execution
if (as.logical(is_button_present)) {
# Click the button using JavaScript
remote_driver$executeScript("document.querySelector('.event__more').click();")
# Wait for some time to allow the page to load new content
Sys.sleep(5)
} else {
# If the button is not present, break the loop
break
}
}
# Find the match links
match_links <- remote_driver$findElements(using = "css selector", value = ".event__match")
# Extract the match URLs
match_urls <- sapply(match_links, function(link) {
match_id <- gsub("^.*_(\\w+)$", "\\1", link$getElementAttribute("id")[[1]])
paste0("https://www.flashscore.pt/jogo/", match_id, "/#/sumario-do-jogo/sumario-do-jogo")
})
# Function to extract match data from a URL
extract_match_data <- function(url) {
# Navigate to the match URL
remote_driver$navigate(url)
# Extract the league name
league_name <- remote_driver$findElement(using = "css selector", value = ".tournamentHeader__country")$getElementText()
# Extract the team names
team_names <- remote_driver$findElements(using = "css selector", value = "div.participant__participantName a")$getElementText()
# Extract the date
date <- remote_driver$findElement(using = "css selector", value = "div.duelParticipant__startTime > div:nth-child(1)")$getElementText()
# Extract the home and away goals
home_goals <- remote_driver$findElement(using = "css selector", value = "div.event__score.event__score--home")$getElementText()
away_goals <- remote_driver$findElement(using = "css selector", value = "div.event__score.event__score--away")$getElementText()
# Extract the odds
odds_rows <- remote_driver$findElements(using = "css selector", value = ".oddsRow")
odds <- lapply(odds_rows, function(row) row$getElementText())
# Create a data frame with the extracted data
match_data <- data.frame(
League = league_name,
HomeTeam = team_names[1],
AwayTeam = team_names[2],
Date = date,
HomeGoals = home_goals,
AwayGoals = away_goals,
Odds = odds,
stringsAsFactors = FALSE
)
return(match_data)
}
# Extract match data for each URL
all_match_data <- lapply(match_urls, extract_match_data)
# Print the extracted match data
for (i in seq_along(all_match_data)) {
cat("Match URL:", match_urls[i], "\n")
print(all_match_data[[i]])
cat("\n")
}
and I get this error:
> # Extract match data for each URL
> all_match_data <- lapply(match_urls, extract_match_data)
Error in FUN(X[[i]], ...) : attempt to apply non-function
Called from: FUN(X[[i]], ...)
Browse[1]>
How can I fix this problem?