I am getting error when running this. Upgrade and reinstall R and Rstudio 3 times and nothing.
Tired on windows and mac and same error.
---CODE---
knitr::opts_chunk$set(echo = TRUE, message = FALSE,
warning = FALSE)
library(tidyverse)
library(dplyr)
library(tidymodels)
abalone <- read.csv("C:/Users/NOROS/Downloads/homework-2-131/data/abalone.csv")
abalone <- abalone %>%
mutate(age = rings + 1.5)
summary(abalone$age)
hist(abalone$age, main = "Distribution of Abalone Age", xlab = "Age")
# Set the random seed for reproducibility
set.seed(123)
# This Creates a factor for stratified sampling based on age quartiles
abalone$age_quartile <- cut(abalone$age,
breaks = quantile(abalone$age, probs = c(0, 0.25, 0.5, 0.75, 1)),
labels = c("Q1", "Q2", "Q3", "Q4"),
include.lowest = TRUE)
# This Splits data into training (70%) and testing (30%) sets using stratified sampling
stratified_samples <- split(abalone, abalone$age_quartile)
train_indices <- lapply(stratified_samples, function(x) sample(nrow(x), 0.7 * nrow(x)))
test_indices <- lapply(stratified_samples, function(x) setdiff(seq_len(nrow(x)), train_indices[[1]]))
train_data <- do.call(rbind, lapply(seq_along(stratified_samples), function(i) stratified_samples[[i]][train_indices[[i]], ]))
test_data <- do.call(rbind, lapply(seq_along(stratified_samples), function(i) stratified_samples[[i]][test_indices[[i]], ]))
#print(nrow(train_data[train_data$age_quartile=='Q1', ]))
#print(nrow(train_data[train_data$age_quartile=='Q2', ]))
#print(nrow(test_data[test_data$age_quartile=='Q1', ]))
#print(nrow(test_data[test_data$age_quartile=='Q2', ]))
# Remove the temporary age_quartile column
train_data$age_quartile <- NULL
test_data$age_quartile <- NULL
# Create the recipe
abalone_recipe <- recipe(age ~ ., data = train_data) %>%
# Remove the 'rings' variable from the recipe
step_rm(rings) %>%
# Step 1: Dummy code categorical predictors
step_dummy(all_nominal(), -all_outcomes()) %>%
# Step 2: Create interactions
step_interact(terms = ~ shucked_weight:starts_with("type") +
longest_shell:diameter +
shucked_weight:shell_weight) %>%
# Step 3: Center and scale predictors
step_center(all_predictors()) %>%
step_scale(all_predictors())
lm_model <- linear_reg(engine = "lm", mode = "regression") #%>%
# set_engine("lm")
library(parsnip)
library(kknn)
# Create and store a KNN model object
knn_model <- nearest_neighbor(weight_func = "rectangular",
neighbors = 7,
engine = "kknn",
mode = "regression")
# Print the KNN model object
knn_model
library(workflows)
library(kknn)
# Set up an empty workflow for linear regression
linear_reg_workflow <- workflow() %>%
add_model(lm_model) %>%
add_recipe(abalone_recipe)
# Set up an empty workflow for KNN
knn_workflow <- workflow() %>%
add_model(knn_model) %>%
add_recipe(abalone_recipe)
# Fit the linear regression model to the training set
linear_reg_fit <- linear_reg_workflow %>%
fit(data = train_data)
# Fit the KNN model to the training set
knn_fit <- knn_workflow %>%
fit(data = train_data)
# Define the input data for prediction
female <- data.frame(type = "F",
longest_shell = 0.50,
diameter = 0.10,
height = 0.30,
whole_weight = 4,
shucked_weight = 1,
viscera_weight = 2,
shell_weight = 1
, rings = -999
)
# Use the linear regression fit to predict age
predicted_age <- predict(linear_reg_fit, female)
# Print the predicted age
print(predicted_age)
library(yardstick)
# Step 1: Create a metric set
multi_metric <- yardstick::metric_set(yardstick::rmse, yardstick::rsq, yardstick::mae)
# Step 2: Use predict() and bind_cols() to create a tibble of predicted vs. actual values
#preds = predict(linear_reg_fit, test_data)
lm_xx = predict(linear_reg_fit, test_data)$.pred
knn_xx = predict(knn_fit, test_data)$.pred
lm_predicted_actual = tibble(est = lm_xx
, tru = test_data$age)
knn_predicted_actual = tibble(est = knn_xx
, tru = test_data$age)
#multi_metric(predicted_actual)
# Step 3: Apply the metric set to the tibble and report the results
#evaluation_results <- predicted_actual %>% metric_set(truth = truth, estimate = estimate)
lm_evaluation_results <- lm_predicted_actual %>% multi_metric(truth = tru, estimate = est)
knn_evaluation_results <- knn_predicted_actual %>% multi_metric(truth = tru, estimate = est)
# Print the evaluation results
print(lm_evaluation_results)
print(knn_evaluation_results)
########################
# lm
#########################
# results data.frame for linear model, including error
preds_lm = predict(linear_reg_fit, test_data)
results_lm = data.frame(actual=test_data$age
, pred=preds_lm$.pred
, error=test_data$age - preds_lm$.pred
)
rmse_lm = sqrt(mean((results_lm$actual-results_lm$pred)^2))
mae_lm=mean(abs(results_lm$actual-results_lm$pred))
#rsq_lm = smod$r.squared
SSE = sum(results_lm$error^2)
SST = sum( (results_lm$actual-mean(results_lm$actual))^2)
rsq_lm = 1-SSE/SST
#######################
# knn
##########################
preds_knn=predict(knn_fit, test_data)
results_knn = data.frame(actual=test_data$age, pred=preds_knn$.pred)
results_knn$error=results_knn$actual - results_knn$pred
SSE = sum(results_knn$error^2)
SST = sum( (results_knn$actual-mean(results_knn$actual))^2)
rsq_knn = 1-SSE/SST
rmse_knn = sqrt(mean((results_knn$actual-results_knn$pred)^2))
mae_knn=mean(abs(results_knn$actual-results_knn$pred))
# Rsquared = 1-SSE/SST
Getting errot at this line:
linear_reg_fit <- linear_reg_workflow %>%
fit(data=train_data)
------errror
Error in step_interact()
: Caused by error in rlang::f_rhs()
: ! x
must be a formula Backtrace: 1. linear_reg_workflow %>% fit(data = train_data) 24. rlang::abort(message = message)
[image] Show Traceback
Error in step_interact() : Caused by error in rlang::f_rhs()
: ! x
must be a formula
---- data is csv here----
type | longest_shell | diameter | height | whole_weight | shucked_weight | viscera_weight | shell_weight | rings |
---|---|---|---|---|---|---|---|---|
M | 0.455 | 0.365 | 0.095 | 0.514 | 0.2245 | 0.101 | 0.15 | 15 |
M | 0.35 | 0.265 | 0.09 | 0.2255 | 0.0995 | 0.0485 | 0.07 | 7 |
F | 0.53 | 0.42 | 0.135 | 0.677 | 0.2565 | 0.1415 | 0.21 | 9 |