Hi @MFPete , for this type of page you need use Rselenium.
The connection is a little difficult for the first time, but not impossible.
Here more info about it:
For you request I'm show an advance.
library(RSelenium)
library(XML)
library(dplyr)
library(rvest)
remDr <- remoteDriver(browserName = "chrome",port = 4444,
remoteServerAddr = "localhost")
remDr$open()
Sys.sleep(1)
remDr$navigate("https://members.parliament.uk/members/Commons")
html <- remDr$getPageSource()[[1]]
url_data1 <- html %>%
read_html() %>%
html_nodes(xpath='//*[@id="main-content"]/div/article/div/div/div[3]/a[1]') %>%
html_attr("href");url_data1
#"/member/172/contact"
url_data2 <- html %>%
read_html() %>%
html_nodes(xpath='//*[@id="main-content"]/div/article/div/div/div[3]/a[2]') %>%
html_attr("href");url_data2
# "/member/4212/contact"
url_data3 <- html %>%
read_html() %>%
html_nodes(xpath='//*[@id="main-content"]/div/article/div/div/div[3]/a[3]') %>%
html_attr("href");url_data3
# "/member/4639/contact"
# but whe im try to make a loop for all post of page 1 show me this error:
for (i in 1:20) {
url_data <- html %>%
html_nodes(xpath = paste('//*[@id="main-content"]/div/article/div/div/div[3]/a[', i, ']')) %>%
html_attr("href")
Sys.sleep(2)
# data frame
df <- df %>% bind_rows(data.frame(url_data))
}
# Error in UseMethod("xml_find_all") :
# no applicable method for 'xml_find_all' applied to an object of class "character"
# The idea is repeat this loop for all pages.