> probs <- seq(0, 1, length.out = 10)
> guessing <- map_df(probs, function(p){
+ y_hat <-
+ sample(c("Male", "Female"), n, replace = TRUE, prob=c(p, 1-p)) %>%
+ factor(levels = c("Female", "Male"))
+ list(method = "Guessing",
+ FPR = 1 - specificity(y_hat, test_set$sex),
+ TPR = sensitivity(y_hat, test_set$sex))
+ })
Error in sample.int(length(x), size, replace, prob) :
invalid 'size' argument
getting error in this I Was trying to draw an ROC curve with TPR = y-axis , FPR = x-axis . Help !
Thanks in advance.
Its unclear what value n represents. You are using that to size your sample.
I can't make much sense of your code.
There are plenty of.quality libraries in R that provide ROC calculations, you dont need to roll your own.
1 Like
I am trying to create an algorithm for height dataset in dslabs . Actually i am a bit confused what value should i give in n . My main code is this
library(dslabs)
library(tidyverse)
library(caret)
library(e1071)
library(plotROC)
library(pROC)
# Making an algorithm for the height dataset
data(heights)
head(heights)
set.seed(2007)
y <- heights$sex
x <- heights$height
# initiallised the testing and training set
test_index <- createDataPartition(y, times = 1, p = 0.5, list = FALSE)
test_set <- heights[test_index, ]
train_set <- heights[-test_index, ]
# For accurate algorithm
cutoff <- seq(61, 70)
accuracy <- map_dbl(cutoff, function(x){
y_hat_accuracy <- ifelse(train_set$height > x, "Male", "Female") %>%
factor(levels = levels(test_set$sex))
mean(y_hat_accuracy == train_set$sex)
})
# For algorithm with good F1score
cutoff <- seq(61, 70)
F_1 <- map_dbl(cutoff, function(x){
y_hat_F1score <- ifelse(train_set$height > x, "Male", "Female") %>%
factor(levels = levels(test_set$sex))
F_meas(data = y_hat_F1score, reference = factor(train_set$sex))
})
best_cutoff_accuracy <- cutoff[which.max(accuracy)]
best_cutoff_F1score <- cutoff[which.max(F_1)]
#table for confusion matrix values
table(predicted = y_hat, actual = test_set$sex)
#confusion matrix
cm <- confusionMatrix(data = y_hat, reference = test_set$sex)
cm
cm$overall["Accuracy"]
cm$byClass[c("Sensitivity","Specificity", "Prevalence")]
# For algorithm with high accuracy and best cutoff value
y_hat_accuracy <- ifelse(test_set$height > best_cutoff_accuracy, "Male", "Female") %>%
factor(levels = levels(test_set$sex))
# For algorithm with high F1 score and best cutoff value
y_hat_F1score <- ifelse(test_set$height > best_cutoff_F1score, "Male", "Female") %>%
factor(levels = levels(test_set$sex))
sensitivity(data = y_hat, reference = test_set$sex)
specificity(data = y_hat, reference = test_set$sex)
# For drawing ROC curve b/w FPR and TPR
probs <- seq(0, 1, length.out = 10)
guessing <- map_df(probs, function(p){
y_hat_accuracy <-
sample(c("Male", "Female"),length(test_set), replace = TRUE, prob=c(p, 1-p)) %>%
factor(levels = c("Female", "Male"))
list(method = "Guessing",
FPR = 1 - specificity(y_hat, test_set$sex),
TPR = sensitivity(y_hat, test_set$sex))
})
plot(guessing)
THANKS !
system
Closed
December 24, 2020, 6:11pm
4
This topic was automatically closed 21 days after the last reply. New replies are no longer allowed. If you have a query related to it or one of the replies, start a new topic and refer back with a link.