It says I can only post 2 URL's as new user. So I am going to change the characters until it allows.
Getting stuck on this part:
# WRITE GLOBAL ENVIRONMENT VALUES TO MYSQL TABLES
export_1 <- extracted_op_case_cite
dbWriteTable(con,federal_case_law, value = export_1, row.names = FALSE, name = "extracted_op_case_cite", append = TRUE )
extracted_op_case_cite is one of the main "Global Environment" "Values" (which I am not 100% sure if they are dataframes yet or not; or if they even have to be to pass the data over from it's current value variable.)
Any help or suggestions would be helpful. I am basically trying to take each variable extracted from the court opinion url and export them to duplicate mysql rows in a new table for that URL.
SCRIPT:
# sudo apt-get install libcurl4-openssl-dev
# sudo apt-get install libxml2-dev
# Works on Debian 9 / Stretch
install.packages("rvest")
install.packages("xml2")
install.packages("magrittr")
install.packages("tidyverse")
install.packages("readr")
install.packages("dbplyr")
install.packages("RSQLite")
install.packages("RMySQL")
install.packages("RMariaDB")
library(rvest)
library(xml2)
library(magrittr)
library(tidyverse)
library(readr)
library(RMySQL)
library(RMariaDB)
# CONNECT TO LOCAL MYSQL SERVER
con <- dbConnect(MySQL(),
user = 'root',
password = 'workingpassword',
host = 'localhost',
dbname = 'scrape1')
summary(con)
dbListTables(con)
# WRITE GLOBAL ENVIRONMENT VALUES TO MYSQL TABLES
export_1 <- extracted_op_case_cite
dbWriteTable(con,federal_case_law, value = export_1, row.names = FALSE, name = "extracted_op_case_cite", append = TRUE )
# SCRAPE OPINION (PART 1/3)
scraping_op1 <- read_html("https//law.justia.com/cases/federal/district-courts/F2/1/935/1507004/")
scraping_op1 %>%
html_nodes("#opinion :nth-child(1)")
html_text().
scraping_op1
p_nodes <- scraping_op1 %>%
html_nodes("p")
length(p_nodes)
p_nodes[1:21]
p_text <- scraping_op1 %>%
html_nodes("p") %>%
html_text()
p_text[1]
p_text[2]
p_text[3]
p_text[4]
p_text[5]
p_text[6]
p_text[7]
p_text[8]
p_text[9]
p_text[10]
p_text[11]
p_text[12]
p_text[13]
p_text[14]
p_text[15]
p_text[16]
p_text[17]
p_text[18]
p_text[19]
p_text[20]
p_text[21]
extracted_jurisdiction <- p_text[6]
extracted_pinpoints <- p_text[7]
extracted_defense_attorneys <- p_text[8]
extracted_defense2_attorneys <- p_text[9]
extracted_plaintiff_attorneys <- p_text[9]
extracted_judge <- p_text[10]
extracted_opinion_p1 <- p_text[11]
extracted_opinion_p2 <- p_text[12]
extracted_opinion_p3 <- p_text[13]
extracted_opinion_p4 <- p_text[14]
extracted_opinion_p5 <- p_text[15]
extracted_opinion_p6 <- p_text[16]
extracted_opinion_p7 <- p_text[17]
extracted_opinion_p8 <- p_text[18]
extracted_opinion_p9 <- p_text[19]
extracted_opinion_p10 <- p_text[20]
extracted_title <- p_text[21]
# SCRAPE TITLE (PART 2/3)
scraping_op2 <- read_html("https://law.justia.com/cases/federal/district-courts/F2/1/935/1507004/")
scraping_op2 %>%
html_nodes("title")
html_text().
scraping_op2
p_nodes <- scraping_op2 %>%
html_nodes("title")
length(p_nodes)
p_nodes[1:21]
p_text <- scraping_op2 %>%
html_nodes("title") %>%
html_text()
p_text[1]
extracted_title <- p_text[21]
# SCRAPE [CITATION + PARTIES + CASE NUMBER + JURISDICTION + DATE] (PART 3/3)
scraping_op5 <- read_html("https://law.justia.com/cases/federal/district-courts/F2/1/935/1507004/")
scraping_op5 %>%
html_nodes("center")
html_text().
b_nodes <- scraping_op5 %>%
html_nodes("center")
length(b_nodes)
b_nodes[1:5]
p_text <- scraping_op5 %>%
html_nodes("center") %>%
html_text()
p_text[1]
extracted_op_case_cite <- p_text[1]
extracted_op_case_parties <- p_text[2]
extracted_op_case_number <- p_text[3]
extracted_op_case_juris <- p_text[4]
extracted_op_case_date <- p_text[5]