Court Opinion Scraper - Need Help Exporting Values to MySQL

LonelyStudent · July 7, 2019, 5:50am

It says I can only post 2 URL's as new user. So I am going to change the characters until it allows.

Getting stuck on this part:

# WRITE GLOBAL ENVIRONMENT VALUES TO MYSQL TABLES
export_1 <- extracted_op_case_cite
dbWriteTable(con,federal_case_law, value = export_1, row.names = FALSE, name = "extracted_op_case_cite", append = TRUE )

extracted_op_case_cite is one of the main "Global Environment" "Values" (which I am not 100% sure if they are dataframes yet or not; or if they even have to be to pass the data over from it's current value variable.)

Any help or suggestions would be helpful. I am basically trying to take each variable extracted from the court opinion url and export them to duplicate mysql rows in a new table for that URL.

SCRIPT:

# sudo apt-get install libcurl4-openssl-dev 
# sudo apt-get install libxml2-dev
# Works on Debian 9 / Stretch
install.packages("rvest")
install.packages("xml2")
install.packages("magrittr")
install.packages("tidyverse")
install.packages("readr")
install.packages("dbplyr")
install.packages("RSQLite")
install.packages("RMySQL")
install.packages("RMariaDB")
library(rvest)
library(xml2)
library(magrittr)
library(tidyverse)
library(readr)
library(RMySQL)
library(RMariaDB)
# CONNECT TO LOCAL MYSQL SERVER
con <- dbConnect(MySQL(),
                 user = 'root',
                 password = 'workingpassword',
                 host = 'localhost',
                 dbname = 'scrape1')
summary(con)
dbListTables(con)
# WRITE GLOBAL ENVIRONMENT VALUES TO MYSQL TABLES
export_1 <- extracted_op_case_cite
dbWriteTable(con,federal_case_law, value = export_1, row.names = FALSE, name = "extracted_op_case_cite", append = TRUE )
# SCRAPE OPINION (PART 1/3)
scraping_op1 <- read_html("https//law.justia.com/cases/federal/district-courts/F2/1/935/1507004/")
scraping_op1 %>%
  html_nodes("#opinion :nth-child(1)")
html_text().
scraping_op1

p_nodes <- scraping_op1 %>%
  html_nodes("p")


length(p_nodes)
p_nodes[1:21]

p_text <- scraping_op1 %>%
  html_nodes("p") %>%
  html_text()
p_text[1]
p_text[2]
p_text[3]
p_text[4]
p_text[5]
p_text[6]
p_text[7]
p_text[8]
p_text[9]
p_text[10]
p_text[11]
p_text[12]
p_text[13]
p_text[14]
p_text[15]
p_text[16]
p_text[17]
p_text[18]
p_text[19]
p_text[20]
p_text[21]
extracted_jurisdiction <- p_text[6]
extracted_pinpoints <- p_text[7]
extracted_defense_attorneys <- p_text[8]
extracted_defense2_attorneys <- p_text[9]
extracted_plaintiff_attorneys <- p_text[9]
extracted_judge <- p_text[10]
extracted_opinion_p1 <- p_text[11]
extracted_opinion_p2 <- p_text[12]
extracted_opinion_p3 <- p_text[13]
extracted_opinion_p4 <- p_text[14]
extracted_opinion_p5 <- p_text[15]
extracted_opinion_p6 <- p_text[16]
extracted_opinion_p7 <- p_text[17]
extracted_opinion_p8 <- p_text[18]
extracted_opinion_p9 <- p_text[19]
extracted_opinion_p10 <- p_text[20]
extracted_title <- p_text[21]
# SCRAPE TITLE (PART 2/3)
scraping_op2 <- read_html("https://law.justia.com/cases/federal/district-courts/F2/1/935/1507004/")
scraping_op2 %>%
  html_nodes("title")
html_text().
scraping_op2

p_nodes <- scraping_op2 %>%
  html_nodes("title")


length(p_nodes)
p_nodes[1:21]

p_text <- scraping_op2 %>%
  html_nodes("title") %>%
  html_text()
p_text[1]
extracted_title <- p_text[21]

# SCRAPE [CITATION + PARTIES + CASE NUMBER + JURISDICTION + DATE] (PART 3/3)
scraping_op5 <- read_html("https://law.justia.com/cases/federal/district-courts/F2/1/935/1507004/")
scraping_op5 %>%
  html_nodes("center")
html_text().

b_nodes <- scraping_op5 %>%
  html_nodes("center")


length(b_nodes)
b_nodes[1:5]

p_text <- scraping_op5 %>%
  html_nodes("center") %>%
  html_text()
p_text[1]

extracted_op_case_cite <- p_text[1]
extracted_op_case_parties <- p_text[2]
extracted_op_case_number <- p_text[3]
extracted_op_case_juris <- p_text[4]
extracted_op_case_date <- p_text[5]

LonelyStudent · July 7, 2019, 6:56am

Throwing Errors:

dbWriteTable(con,federal_case_law, value = export_1, row.names = FALSE, name = "extracted_op_case_cite", append = TRUE )
Error in file(file, "rt") : cannot open the connection
In addition: Warning message:
In file(file, "rt") :
cannot open file '1 F.2d 935 (1924)': No such file or directory

system · July 28, 2019, 6:56am

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.