I am currently working on a binary classification task using a Support Vector Machine (SVM) classifier. Due to limited data availability, I ran the SVM model ten times to obtain stable results. The results table contains the mean accuracy, sensitivity, and specificity of each run.
I am also attempting to plot the mean ROC curve. However, the result seems unreliable, as even though I have high values of sensitivity and specificity, the ROC curve is below the random level.
Is there anyone who can assist me with this issue?
data <- read_csv("C:/Users/OneDrive/Desktop/ analysis/datatable.csv")
# Convert Group to a factor variable
data$Group <- as.factor(data$Group)
results <- data.frame(matrix(ncol = 10, nrow = 3))
colnames(results) <- paste0("run_", 1:10)
tpr_list <- list()
fpr_list <- list()
for (i in 1:10) {
indices <- createDataPartition(data$Group, p = 0.7, list = FALSE)
train_data <- data[indices, ]
test_data <- data[-indices, ]
svm_model <- svm(Group ~ ., data = train_data, cost = 10, gamma = 1, scale = FALSE, probability = TRUE)
svm_pred <- predict(svm_model, test_data[, -1], decision.values = TRUE)
cm <- table(svm_pred, test_data$Group)
tn <- cm[1,1]
tp <- cm[2,2]
fn <- cm[2,1]
fp <- cm[1,2]
accuracy <- (tp + tn) / (tp + tn + fp + fn)
sensitivity <- tp / (tp + fn)
specificity <- tn / (tn + fp)
results[1, i] <- accuracy
results[2, i] <- sensitivity
results[3, i] <- specificity
# Calculate ROC curve
svm_pred_decision_values <- attr(svm_pred, "decision.values")
true_labels <- as.numeric(test_data$Group) - 1
pred <- prediction(svm_pred_decision_values, true_labels)
perf <- performance(pred, "tpr", "fpr")
tpr_list[[i]] <- perf@y.values[[1]]
fpr_list[[i]] <- perf@x.values[[1]]
#Plot mean ROC curve - 1
mean_tpr <- Reduce(`+`, tpr_list) / length(tpr_list)
mean_fpr <- Reduce(`+`, fpr_list) / length(fpr_list)
plot(mean_fpr, mean_tpr, type = "l", xlab = "False Positive Rate", ylab = "True Positive Rate", main = "Mean ROC Curve")
abline(0, 1, lty = 2, col = "grey")
rownames(results) <- c("Accuracy", "Sensitivity", "Specificity")
mean_results <- apply(results, 1, mean)
Accuracy Sensitivity Specificity
0.7700000 0.7150000 0.8907143