I'm computing a cross validated average of prediction errors for each of my models. In the first for loop though, I get this error message
Error in model.frame.default(formula = calories ~ time + food_type, data = data %>% :
variable lengths differ (found for 'food_type')
How do I resolve this? Thank you!
setwd("~/R")
data <- read.csv(file="CaloriesProject2.csv")
time <- data$time
calories <- data$calories
food_type <- data$type
head(rep(1:5,26),126)
set.seed(33013301)
sample(head(rep(1:5,26),126))
data <- data %>% mutate(fold=sample(head(rep(1:5,26),126)))
models.M1 <- models.M2 <- models.M3 <- vector("list",5)
for(k in 1:5){
models.M1[[k]] <- lm(calories ~ time, data=data %>% filter(fold !=k))
models.M2[[k]] <- lm(calories ~ time+food_type, data=data %>% filter(fold !=k))
models.M3[[k]] <- lm(calories ~ time+food_type, data=data %>% filter(fold !=k))
}
preds.M1 <- preds.M2 <- preds.M3 <- rep(NA,126)
for(i in 1:126){
foldi <- data$fold[i]
preds.M1[i] <- predict(models.M1[[foldi]], newdata=data[i,])
preds.M2[i] <- predict(models.M2[[foldi]], newdata=data[i,])
preds.M3[i] <- predict(models.M3[[foldi]], newdata=data[i,])
}
avg_preds.M1 <- mean((data$calories ~ preds.M1)^2)
print(avg_preds.M1)
avg_preds.M2 <- mean((data$calories ~ preds.M2)^2)
print(avg_preds.M2)
avg_preds.M3 <- mean((data$calories ~ preds.M3)^2)
print(avg_preds.M3)