library(MASS)
data(biopsy)
biopsy$ID = NULL
names(biopsy) = c("thick", "u.size", "u.shape", "adhsn", "s.size", "nucl", "chrom", "n.nuc", "mit", "class")
biopsy.v2 = na.omit(biopsy)
smp_size <- floor(0.80 * nrow(biopsy.v2)) #it is used to calculate sample size (80% of the total rows or observations)
## set the seed to make your partition reproducible
set.seed(123)
select_sample <- sample(1:nrow(biopsy.v2),smp_size) #selects the rows for training set
training_set<-biopsy.v2[select_sample,] #creates the training set from the carseat data using the select_sample
validation_set<-biopsy.v2[-select_sample,] #creates the validation set using the remaining rows which are not in the training set [-select_sample,]
nrow(training_set)
nrow(validation_set)
model_1<-lm(biopsy.v2$thick~., data=training_set) #First regression model
summary (model_1)
Error in model.frame.default(formula = biopsy.v2$thick ~ ., data = training_set, :
variable lengths differ (found for 'u.size')