Hi, I need to do a ConfusionMatrix in Random Forest. This is my code, how can I do it?
library(randomForestExplainer)
library(randomForest)
iris[1,]
unique(iris$Species)
table(iris$Species)
db_class <- iris
plot(db_class, col=as.factor(db_class$Species))
righe_train <- sample(nrow(db_class), nrow(db_class)*0.8)
db_class_dc_train <- db_class[righe_train,]
db_class_dc_test <- db_class[-righe_train,]
plot(db_class [,-ncol(db_class)], col=as.factor(db_class$Species))
model_rf <- randomForest(Species~.,
db_class_dc_train,
ntree = 10)
varImpPlot(model_rf)
plot_min_depth_distribution(model_rf)
getTree(model_rf, 10, labelVar = TRUE)
model_rf$err.rate
measure_importance(model_rf)
pred_rf_test <- predict(model_rf, db_class_dc_test, type = "class")
CrossTable(x=db_class_dc_test$Species, y=pred_rf_test, prop.chisq = TRUE)
Max
2
Printing that object gives you the confusion matrix. If you want more statistics, I suggest the function in caret
:
library(randomForestExplainer)
#> Registered S3 method overwritten by 'GGally':
#> method from
#> +.gg ggplot2
library(randomForest)
#> randomForest 4.6-14
#> Type rfNews() to see new features/changes/bug fixes.
db_class <- iris
set.seed(1)
righe_train <- sample(nrow(db_class), nrow(db_class)*0.8)
db_class_dc_train <- db_class[righe_train,]
db_class_dc_test <- db_class[-righe_train,]
model_rf <- randomForest(Species~.,
db_class_dc_train,
ntree = 10)
model_rf
#>
#> Call:
#> randomForest(formula = Species ~ ., data = db_class_dc_train, ntree = 10)
#> Type of random forest: classification
#> Number of trees: 10
#> No. of variables tried at each split: 2
#>
#> OOB estimate of error rate: 4.2%
#> Confusion matrix:
#> setosa versicolor virginica class.error
#> setosa 39 0 0 0.00000000
#> versicolor 0 36 2 0.05263158
#> virginica 0 3 39 0.07142857
pred_rf_test <- predict(model_rf, db_class_dc_test, type = "class")
caret::confusionMatrix(
db_class_dc_test$Species,
pred_rf_test
)
#> Confusion Matrix and Statistics
#>
#> Reference
#> Prediction setosa versicolor virginica
#> setosa 11 0 0
#> versicolor 0 12 0
#> virginica 0 2 5
#>
#> Overall Statistics
#>
#> Accuracy : 0.9333
#> 95% CI : (0.7793, 0.9918)
#> No Information Rate : 0.4667
#> P-Value [Acc > NIR] : 7.093e-08
#>
#> Kappa : 0.8958
#>
#> Mcnemar's Test P-Value : NA
#>
#> Statistics by Class:
#>
#> Class: setosa Class: versicolor Class: virginica
#> Sensitivity 1.0000 0.8571 1.0000
#> Specificity 1.0000 1.0000 0.9200
#> Pos Pred Value 1.0000 1.0000 0.7143
#> Neg Pred Value 1.0000 0.8889 1.0000
#> Prevalence 0.3667 0.4667 0.1667
#> Detection Rate 0.3667 0.4000 0.1667
#> Detection Prevalence 0.3667 0.4000 0.2333
#> Balanced Accuracy 1.0000 0.9286 0.9600
Created on 2020-12-08 by the reprex package (v0.3.0)
2 Likes
system
Closed
3
This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.
If you have a query related to it or one of the replies, start a new topic and refer back with a link.