I am trying to implement a Lasso regression in the College data set from ISLR, with 10 fold cross-validation. The variable that I am trying to predict is a variable named cost, created by adding up the out of state tuition and the living costs. However, I do not understand the error R is throwing. Could you help me understand what is wrong?
library(ggplot2)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(tidyverse)
library(tidymodels)
#> -- Attaching packages -------------------------------------------------------------- tidymodels 0.1.0 --
#> v broom 0.5.5 v rsample 0.0.5
#> v dials 0.0.4 v tune 0.0.1
#> v infer 0.5.1 v workflows 0.1.0
#> v parsnip 0.0.5 v yardstick 0.0.5
#> v recipes 0.1.9
#> -- Conflicts ----------------------------------------------------------------- tidymodels_conflicts() --
#> x scales::discard() masks purrr::discard()
#> x dplyr::filter() masks stats::filter()
#> x recipes::fixed() masks stringr::fixed()
#> x dplyr::lag() masks stats::lag()
#> x dials::margin() masks ggplot2::margin()
#> x yardstick::spec() masks readr::spec()
#> x recipes::step() masks stats::step()
#> x recipes::yj_trans() masks scales::yj_trans()
library(glmnet)
#> Loading required package: Matrix
#>
#> Attaching package: 'Matrix'
#> The following objects are masked from 'package:tidyr':
#>
#> expand, pack, unpack
#> Loaded glmnet 3.0-2
library(reprex)
library(ISLR)
data("College")
College <- as_tibble(College) %>%
mutate(cost = Outstate + Room.Board) %>%
select(-c(Outstate,Room.Board))
College <- na.omit(College)
College<-College %>%
mutate(Private=if_else(Private == "yes", 1L, 0L))
set.seed(123)
cost_split <- College %>% initial_split(prop = 0.5)
cost_train <- training(cost_split)
cost_test <- testing(cost_split)
x = as.matrix(cost_train[,1:14]) #predictors
y = as.matrix(cost_train[,17]) #goal
lambdas_to_try <- 10^seq(from=1, to=1000)
# Setting alpha = 1 implements lasso regression
lasso_cv <- cv.glmnet(x, y, alpha = 1, lambda = lambdas_to_try,
standardize = TRUE, nfolds = 10)
#> Error in elnet(x, is.sparse, ix, jx, y, weights, offset, type.gaussian, : NA/NaN/Inf in foreign function call (arg 15)