I was following a tutorial that shows how decision tree works
path <- 'https://raw.githubusercontent.com/guru99-edu/R-Programming/master/titanic_data.csv'
titanic <-read.csv(path)
head(titanic)
shuffle_index <- sample(1:nrow(titanic))
head(shuffle_index)
titanic <- titanic[shuffle_index, ]
head(titanic)
library(dplyr)
clean_titanic <- titanic
clean_titanic <-select(clean_titanic,-home.dest,-cabin,-name,-x,-ticket)
clean_titanic <-mutate(clean_titanic, pclass = factor(pclass, levels = c(1, 2, 3), labels = c('Upper', 'Middle', 'Lower')),
survived = factor(survived, levels = c(0, 1), labels = c('No', 'Yes')))
clean_titanic=replace(clean_titanic,clean_titanic == "?",NA)
clean_titanic <- na.omit(clean_titanic)
glimpse(clean_titanic)
create_train_test <- function(clean_titanic, size = 0.8, train = TRUE) {
n_row = nrow(clean_titanic)
total_row = size * n_row
train_sample <- 1: total_row
if (train == TRUE) {
return (clean_titanic[train_sample, ])
} else {
return (clean_titanic[-train_sample, ])
}
}
data_train <- create_train_test(clean_titanic, 0.8, train = TRUE)
dim(data_train)
data_test <- create_train_test(clean_titanic, 0.8, train = FALSE)
dim(data_test)
prop.table(table(data_train$survived))
library(rpart)
library(rpart.plot)
fit <- rpart(survived~.,data=data_train, method = 'class')
so far I manage to create the decision tree but could see the result. I mean literally CANT SEA IT..
how can I shorten the name(? Im not sure what that long letter is..) or is there any problem in my sentence?