I am currently working on a binary classification task using a Support Vector Machine (SVM) classifier. Due to limited data availability, I ran the SVM model ten times to obtain stable results. The results table contains the mean accuracy, sensitivity, and specificity of each run.
I am also attempting to plot the mean ROC curve. However, the result seems unreliable, as even though I have high values of sensitivity and specificity, the ROC curve is below the random level.
Is there anyone who can assist me with this issue?
library(caret)
library(dplyr)
library(ROCR)
library(readr)
library(e1071)
data <- read_csv("C:/Users/OneDrive/Desktop/ analysis/datatable.csv")
# Convert Group to a factor variable
data$Group <- as.factor(data$Group)
#set.seed(123)
results <- data.frame(matrix(ncol = 10, nrow = 3))
colnames(results) <- paste0("run_", 1:10)
tpr_list <- list()
fpr_list <- list()
for (i in 1:10) {
indices <- createDataPartition(data$Group, p = 0.7, list = FALSE)
train_data <- data[indices, ]
test_data <- data[-indices, ]
svm_model <- svm(Group ~ ., data = train_data, cost = 10, gamma = 1, scale = FALSE, probability = TRUE)
svm_pred <- predict(svm_model, test_data[, -1], decision.values = TRUE)
cm <- table(svm_pred, test_data$Group)
tn <- cm[1,1]
tp <- cm[2,2]
fn <- cm[2,1]
fp <- cm[1,2]
accuracy <- (tp + tn) / (tp + tn + fp + fn)
sensitivity <- tp / (tp + fn)
specificity <- tn / (tn + fp)
results[1, i] <- accuracy
results[2, i] <- sensitivity
results[3, i] <- specificity
# Calculate ROC curve
svm_pred_decision_values <- attr(svm_pred, "decision.values")
true_labels <- as.numeric(test_data$Group) - 1
pred <- prediction(svm_pred_decision_values, true_labels)
perf <- performance(pred, "tpr", "fpr")
tpr_list[[i]] <- perf@y.values[[1]]
fpr_list[[i]] <- perf@x.values[[1]]
}
#Plot mean ROC curve - 1
mean_tpr <- Reduce(`+`, tpr_list) / length(tpr_list)
mean_fpr <- Reduce(`+`, fpr_list) / length(fpr_list)
plot(mean_fpr, mean_tpr, type = "l", xlab = "False Positive Rate", ylab = "True Positive Rate", main = "Mean ROC Curve")
abline(0, 1, lty = 2, col = "grey")
rownames(results) <- c("Accuracy", "Sensitivity", "Specificity")
mean_results <- apply(results, 1, mean)
mean_results
Accuracy Sensitivity Specificity
0.7700000 0.7150000 0.8907143