I am using tidymodels
to fit an outcome with a likert scale and therefore want to make use of models for ordered factor outcomes. In order to do this i'm trying to implement the guide for adding new models to parsnip
but i'm having a bit of difficulty getting this to work.
The package i'm using is glmnetcr
which is built on top of glmnet
. The key hyperparameters are therefore the same. One issue that i'm having is figuring out how to specify the input format for the data in the set_fit
function. The function takes x
and y
variables but I can't seem to see any option for translating this across?
The code i've used is shown below:
library(tidymodels)
set_new_model("ordered_logit")
set_model_mode(model = "ordered_logit", mode = "classification")
set_model_engine("ordered_logit",
mode = "classification",
eng = "glmnetcr")
set_dependency("ordered_logit",
eng = "glmnetcr",
pkg = "glmnetcr")
set_model_arg(model = "ordered_logit",
eng = "glmnetcr",
parsnip = "penalty",
original = "lambda",
func = list(pkg = "glmnetcr", fun = "glmnetcr"),
has_submodel = FALSE)
set_model_arg(model = "ordered_logit",
eng = "glmnetcr",
parsnip = "mixture",
original = "alpha",
func = list(pkg = "glmnetcr", fun = "glmnetcr"),
has_submodel = FALSE)
ordered_logit <- function(mode = "classification", penalty = NULL, mixture = NULL){
if (mode != "classification"){
rlang::abort("`mode` should be classification")
}
args <- list(penalty = rlang::enquo(penalty),
mixture = rlang::enquo(mixture))
new_model_spec("ordered_logit",
args = args,
eng_args = NULL,
mode = mode,
method = NULL,
engine = NULL)
}
set_fit(model = "ordered_logit",
eng = "glmnetcr",
mode = "classification",
value = list(interface = "data.frame",
protect = "data",
# data = c
func = c(pkg = "glmnetcr", fun = "glmnetcr"),
defaults = list()
)
)
set_encoding(model = "ordered_logit",
eng = "glmnetcr",
mode = "classification",
options = list(
predictor_indicators = "one_hot",
compute_intercept = TRUE,
remove_intercept = TRUE,
allow_sparse_x = FALSE
))
class_info <-
list(
pre = NULL,
post = NULL,
func = c(fun = "predict"),
args = list(
object = quote(objects$fit),
newdata = quote(new_data),
type = "class"
)
)
set_pred(
model = "ordered_logit",
eng = "glmnetcr",
mode = "classification",
type = "class",
value = class_info
)
prob_info <-
pred_value_template(
post = function(x, object) {
tibble::as_tibble(x)
},
func = c(fun = "predict"),
object = quote(object$fit),
newdata = quote(new_data),
type = "posterior"
)
set_pred(
model = "ordered_logit",
eng = "glmnetcr",
mode = "classification",
type = "prob",
value = prob_info
)
ordered_logit(penalty = 0.5, mixture = 0.2) %>%
translate(engine = "glmnetcr")
#> Model Specification (classification)
#>
#> Main Arguments:
#> penalty = 0.5
#> mixture = 0.2
#>
#> Computational engine: glmnetcr
#>
#> Model fit template:
#> glmnetcr::glmnetcr(data = missing_arg(), lambda = 0.5, alpha = 0.2)
mod <- ordered_logit(penalty = 0.5, mixture = 0.2) %>%
set_engine("glmnetcr")
data("diabetes")
#> Warning in data("diabetes"): data set 'diabetes' not found
x <- diabetes[, 2:dim(diabetes)[2]]
#> Error in eval(expr, envir, enclos): object 'diabetes' not found
y <- diabetes$y
#> Error in eval(expr, envir, enclos): object 'diabetes' not found
model_fit <- mod %>%
fit(x, y)
#> Error in fit.model_spec(., x, y): object 'y' not found
Any help is much appreciated, thanks!