Im want scrape this 9 pages, only change the number in the link page=. If I see well, the table node is the same for all pages.
Im try with glue but dont run well
library(tidyverse, warn.conflicts = F)
library(rvest, warn.conflicts = F)
library(xml2)
url_dat<- "https://opengovca.com/alberta-child-care?page="
for ( i in 1:9){
reddit_ex <- paste0("https://opengovca.com/alberta-child-care?page=", i) %>%
read_html() %>%
html_nodes(xpath = paste('//html/body/div[1]/div/div[2]/div[3]/div[2]/table')) %>%
html_table() |>
data.frame()
reddit_ex$pagina <- i
}
# This show the results only the 9 page, the others are not showed
# ###### other example:
startTime <- Sys.time()
get_cg <- function(pages) {
cat("Scraping page", pages, "\n")
page <-
paste0("https://opengovca.com/alberta-child-care?page=", pages) %>%
read_html()
html_nodes(xpath = '//html/body/div[1]/div/div[2]/div[3]/div[2]/table') %>%
html_table() |>
data.frame()
}
# Get this error
# Error in xml2::xml_find_all(x, make_selector(css, xpath)) :
# argument "x" is missing, with no default
df <- map_dfr(1, get_cg)
#############
resultados <- list()
for (i in 1:9) {
reddit_ex <- paste0("https://opengovca.com/alberta-child-care?page=", i) %>%
read_html() %>%
html_nodes(xpath = 'body/div/div/div/div/div/table') %>%
html_table() |>
data.frame()
reddit_ex$pagina <- paste0("pag", i)
resultados[[i]] <- reddit_ex
}
todos_los_resultados <- do.call(rbind, resultados)
# Business.Name Office.Address Inspection. Date pagina
# 1 AGAPELAND DAYCARE CENTRE LTD. Bay 13/15/16/17 Corinthia Plaza, Leduc, AB T9E6J9 2018-06-29 pag1
# 2 MONTESSORI PLAY AND LEARN (THE) 7730 106 Street, Edmonton, AB T6E4W3 2018-06-29 pag1
# 3 MONTESSORI SCHOOL HOUSE DAY CARE 4004 114 Street, Edmonton, AB T6J1M6 2018-06-29 pag1
# 4 NORTH ROCKY VIEW COMMUNITY LINKS FAMILY CHILD CARE 125 Main St N, Airdrie, AB T4B0P7 2018-06-29 pag1
# 5 LAUDERDALE AFTERSCHOOL CARE 10816 129 Avenue, Edmonton, AB T5E5W9 2018-06-29 pag1
# 6 AGAPELAND PROGRAM FOR SCHOOL AGE CHILDREN Bay 13/15/16/17 Corinthia Plaza, Leduc, AB T9E6J9 2018-06-29 pag1