How to Read Large JSON file in R?

dhanashreedeshpande · September 3, 2018, 9:13pm

library(sparklyr)
library(dplyr)
library(jsonlite)

Sys.setenv(SPARK_HOME="/usr/lib/spark")
# Configure cluster (c3.4xlarge 30G 16core 320disk)
conf <- spark_config()
conf$'sparklyr.shell.executor-memory' <- "7g"
conf$'sparklyr.shell.driver-memory' <- "7g"
conf$spark.executor.cores <- 20
conf$spark.executor.memory <- "7G"
conf$spark.yarn.am.cores  <- 20
conf$spark.yarn.am.memory <- "7G"
conf$spark.executor.instances <- 20
conf$spark.dynamicAllocation.enabled <- "false"
conf$maximizeResourceAllocation <- "true"
conf$spark.default.parallelism <- 32

sc <- spark_connect(master = "local", config = conf, version = '2.2.0')
sample_tbl <- spark_read_json(sc,name="example",path="example.json", header = TRUE, memory = FALSE,
                              overwrite = TRUE) 
sdf_schema_viewer(sample_tbl)