Hi:
I am doing an exercice in R and I have completed it in Rstudio and in Rmarkdown but when I try to compile it in HTML the process stops at 74% of execution at the point when the Random Forest model is processed. You can see below the error message that I am getting:
"lexical error:invalid char in json text.
<meta http.equiv= ( right here)....^ Ademas:warning message: ggrepel:8 unlabeled data s(too many overlaps).Consider increasing max. overlaps Ejecucion interrumpida" Thanks in advance for your help Jesusgreprex:
suppressPackageStartupMessages({
library(dplyr)
library(data.table)
library(ggplot2)
library(stringr)
library(stringi)
library(lubridate)
library(inspectdf)
library(forecast)
library(tidyr)
library(purrr)
library(tictoc)
library(rmarkdown)
library(ggthemes)
library(h2o)
library(scales)
library(recipes)
library(missRanger)
library(factoextra)
library(cluster)
library(uwot)
library(partykit)
library(rpart)
library(rpart.plot)
library(fpc)
library(randomForest)
library(NbClust)
library(OneR)
library(caret)
library(correlationfunnel)
library(kableExtra)
library(tidytable)
library(ggwordcloud)
library(wordcloud2)
library(DT)
library(iterators)
library(parallel)
library(doParallel)
library(ranger)
})
2-Datos:
library(readr)
#>
#> Attaching package: 'readr'
creditcard1 <- read_delim("creditcard1.csv",
delim = ";", escape_double = FALSE, trim_ws = TRUE)
tic()
h2o.init( max_mem_size = "4g")
#> Connection successful!
#>
#> R is connected to the H2O cluster:
#> H2O cluster uptime: 1 hours 25 seconds
#> H2O cluster timezone: Europe/Paris
#> H2O data parsing timezone: UTC
#> H2O cluster version: 3.36.1.2
#> H2O cluster version age: 3 months and 8 days
#> H2O cluster name: H2O_started_from_R_Eloy_lky784
#> H2O cluster total nodes: 1
#> H2O cluster total memory: 2.85 GB
#> H2O cluster total cores: 8
#> H2O cluster allowed cores: 8
#> H2O cluster healthy: TRUE
#> H2O Connection ip: localhost
#> H2O Connection port: 54321
#> H2O Connection proxy: NA
#> H2O Internal Security: FALSE
#> R Version: R version 4.2.1 (2022-06-23 ucrt)
options("h2o.use.data.table" = TRUE)
h2o.no_progress()
my_csv <- as.h2o(creditcard1)
toc()
#> 2.61 sec elapsed
splits <- h2o.splitFrame(
data = my_csv,
ratios = c(0.7,0.2),
destination_frames = c("train_hex", "valid_hex", "test_hex"),
seed = 1234
)
train_hex <- splits[[1]]
valid_hex <- splits[[2]]
test_hex <- splits[[3]]
y <- "Class"
train_hex[, y] <- as.factor( train_hex[,y] )
x <- setdiff(names(train_hex), y)
nfolds <- 5
tic()
rf_model <- h2o.randomForest(
x = x,
y = y,
training_frame = train_hex,
validation_frame = valid_hex,
nfolds = nfolds,
binomial_double_trees = TRUE,
stopping_metric = 'AUC',
stopping_rounds = 5,
score_each_iteration = TRUE
)
toc()
#> 0.02 sec elapsed
You can find below the consul output ofpart of the dataset :
#> # A tibble: 18 × 31
#> mindata<-Time
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
#>
#> 1 0 -13.59… -0.0… 253.… 137.… -0.3… 0.46… 0.23… 0.09… 0.36… 0.09…
#> 2 0 119.18… 0.26… 0.16… 0.44… 0.06… -0.0… -0.0… 0.08… -0.2… -0.1…
#> 3 1 -135.8… -134… 177.… 0.37… -0.5… 180.… 0.79… 0.24… -151… 0.20…
#> 4 1 -0.966… -0.1… 179.… -0.8… -0.0… 124.… 0.23… 0.37… -138… -0.0…
#> 5 2 -115.8… 0.87… 1.54… 0.40… -0.4… 0.09… 0.59… -0.2… 0.81… 0.75…
#> 6 2 -0.425… 0.96… 114.… -0.1… 0.42… -0.0… 0.47… 0.26… -0.5… -0.3…
#> 7 4 122.96… 0.14… 0.04… 120.… 0.19… 0.27… -0.0… 0.08… 0.46… -0.0…
#> 8 7 -0.644… 141.… 10.7… -0.4… 0.94… 0.42… 112.… -380… 0.61… 124.…
#> 9 7 -0.894… 0.28… -0.1… -0.2… 26.6… 372.… 0.37… 0.85… -0.3… -0.4…
#> 10 9 -0.338… 111.… 104.… -0.2… 0.49… -0.2… 0.65… 0.06… -0.7… -0.3…
#> 11 10 144.90… -117… 0.91… -137… -197… -0.6… -14.… 0.04… -172… 162.…
#> 12 10 0.3849… 0.61… -0.8… -0.0… 292.… 331.… 0.47… 0.53… -0.5… 0.30…
#> 13 10 1.249.… -122… 0.38… -123… -148… -0.7… -0.6… -0.2… -209… 132.…
#> 14 11 10.693… 0.28… 0.82… 271.… -0.1… 0.33… -0.0… 0.11… -0.2… 0.46…
#> 15 12 -27.91… -0.3… 164.… 176.… -0.1… 0.80… -0.4… -190… 0.75… 11.5…
#> 16 12 -0.752… 0.34… 205.… -146… -11.… -0.0… -0.6… 0.00… -0.4… 0.74…
#> 17 12 110.32… -0.0… 12.6… 128.… -0.7… 0.28… -0.5… 0.18… 0.78… -0.2…
#> 18 13 -0.436… 0.91… 0.92… -0.7… 0.91… -0.1… 0.70… 0.08… -0.6… -0.7…
#> # … with 20 more variables: V11 , V12 , V13 , V14 ,
#> # V15 , V16 , V17 , V18 , V19 , V20 ,
#> # V21 , V22 , V23 , V24 , V25 , V26 ,
additional information about the dataset creditcard1
Showing 1 to 12 of 284,807 entries, 31 total columns (three numeric and 28 character variables)