I am getting an error with prep, I tried to follow documentation and step_pca
as an example.
#' Factor Analysis of Mixed Data (FAMD) Step
#' @export
step_famd <- function(recipe,
...,
role = "predictor",
trained = FALSE,
num_comp = 2,
threshold = NA,
options = list(),
res = NULL,
columns = NULL,
prefix = "PC",
keep_original_cols = FALSE,
skip = FALSE,
id = rand_id("famd")) {
#terms <- ellipse_check(...)
add_step(
recipe,
step_famd_new(
terms = enquos(...),
trained = trained,
role = role,
num_comp = num_comp,
threshold = threshold,
options = options,
res = res,
columns = columns,
prefix = prefix,
keep_original_cols = keep_original_cols,
skip = skip,
id = id,
case_weights = NULL
)
)
}
step_famd_new <- function(terms, role, trained, num_comp, threshold, options, res, columns,
prefix, keep_original_cols, skip, id, case_weights) {
step(
subclass = "famd",
terms = terms,
role = role,
trained = trained,
num_comp = num_comp,
threshold = threshold,
options = options,
res = res,
columns = columns,
prefix = prefix,
keep_original_cols = keep_original_cols,
skip = skip,
id = id,
case_weights = case_weights
)
}
#' @export
prep.step_famd <- function(x, training, info = NULL, ...) {
col_names <- recipes_eval_select(x$terms, training, info)
check_number_decimal(x$threshold, arg = "threshold", min = 0, max = 1,
allow_na = TRUE)
check_string(x$prefix, arg = "prefix")
check_number_whole(x$num_comp, arg = "num_comp", min = 0)
wts <- get_case_weights(info, training)
were_weights_used <- are_weights_used(wts, unsupervised = TRUE)
if (isFALSE(were_weights_used)) {
wts <- NULL
}
if (x$num_comp > 0 && length(col_names) > 0) {
if (is.null(wts)) {
famd_call <-
expr(FactoMineR::FAMD(
graph = FALSE
))
if (length(x$options) > 0) {
famd_call <- rlang::call_modify(famd_call, !!!x$options)
}
famd_call$x <- expr(training[, col_names, drop = FALSE])
famd_obj <- eval(famd_call)
## decide on removing prc elements that aren't used in new projections
## e.g. `sdev` etc.
} else {
famd_call <-
expr(FactoMineR::FAMD(
graph = FALSE,
row.w = wts
))
if (length(x$options) > 0) {
famd_call <- rlang::call_modify(famd_call, !!!x$options)
}
famd_call$x <- expr(training[, col_names, drop = FALSE])
famd_obj <- eval(famd_call)
}
x$num_comp <- min(x$num_comp, length(col_names))
if (!is.na(x$threshold)) {
num_comp <-
which.max(famd_obj$eig[,"cumulative percentage of variance"]/100 >= x$threshold)
if (length(num_comp) == 0) {
num_comp <- length(famd_obj$eig[,'eigenvalue'])
}
x$num_comp <- num_comp
}
} else {
famd_obj <- NULL
}
step_famd_new(
terms = x$terms,
role = x$role,
trained = TRUE,
num_comp = x$num_comp,
threshold = x$threshold,
options = x$options,
res = famd_obj,
columns = col_names,
prefix = x$prefix,
keep_original_cols = get_keep_original_cols(x),
skip = x$skip,
id = x$id,
case_weights = were_weights_used
)
}
#' @export
bake.step_famd <- function(object, new_data, ...) {
check_new_data(object$columns, object, new_data)
if (is.null(object$columns)) {
object$columns <- stats::setNames(nm = rownames(object$res$var$coord))
}
if (length(object$columns) == 0 || all(is.na(object$res$var$coord))) {
return(new_data)
}
comps <- as.data.frame(FactoMineR::predict.FAMD(object$res, newdata = new_data)$coord)
comps <- check_name(comps, new_data, object)
new_data <- vec_cbind(new_data, as_tibble(comps))
new_data <- remove_original_cols(new_data, object, pca_vars)
new_data
}
#' @export
print.step_famd <-
function(x, width = max(20, options()$width - 29), ...) {
if (x$trained) {
if (is.null(x$columns)) {
x$columns <- stats::setNames(nm = rownames(x$res$var$coord))
}
if (length(x$columns) == 0 || all(is.na(x$res$var$coord))) {
title <- "No FAMD components were extracted from "
columns <- names(x$columns)
} else {
title <- glue("FMAD extraction with ")
columns <- rownames(x$res$var$coord)
}
} else {
title <- "FAMD extraction with "
}
print_step(columns, x$terms, x$trained, title, width,
case_weights = x$case_weights)
invisible(x)
}
famd_coefs <- function(x) {
if (x$num_comp > 0 && length(x$columns) > 0) {
rot <- as.data.frame(x$res$var$contrib)
npc <- ncol(rot)
res <- utils::stack(rot)
colnames(res) <- c("value", "component")
res$component <- as.character(res$component)
res$terms <- rep(unname(x$columns), npc)
res <- as_tibble(res)[, c("terms", "value", "component")]
} else {
res <- tibble::tibble(
terms = unname(x$columns), value = rlang::na_dbl,
component = rlang::na_chr
)
}
res
}
famd_cos2 <- function(x) {
if (x$num_comp > 0 && length(x$columns) > 0) {
rot <- as.data.frame(x$res$var$cos2)
npc <- ncol(rot)
res <- utils::stack(rot)
colnames(res) <- c("value", "component")
res$component <- as.character(res$component)
res$terms <- rep(unname(x$columns), npc)
res <- as_tibble(res)[, c("terms", "value", "component")]
} else {
res <- tibble::tibble(
terms = unname(x$columns), value = rlang::na_dbl,
component = rlang::na_chr
)
}
res
}
famd_variances <- function(x) {
if (x$num_comp > 0 && length(x$columns) > 0) {
variances <- x$res$eig[,'percentage of variance']
y <- c(
variances,
cumsum(variances)
)
x <-
rep(
c(
"variance",
"cumulative variance"
),
each = p
)
res <- tibble::tibble(
terms = x,
value = y,
component = rep(seq_len(p), 4)
)
} else {
res <- tibble::tibble(
terms = unname(x$columns),
value = rep(rlang::na_dbl, length(x$columns)),
component = rep(rlang::na_chr, length(x$columns))
)
}
res
}
#' @export
tidy.step_famd <- function(x, type = "coef", ...) {
if (!is_trained(x)) {
term_names <- sel2char(x$terms)
res <- tibble(
terms = term_names,
value = na_dbl,
component = na_chr
)
} else {
type <- rlang::arg_match(
type,
c("coef", "variance", 'cos2'),
error_call = rlang::caller_env()
)
if (type == "coef") {
res <- famd_coefs(x)
} else if (type == "variance") {
res <- famd_variances(x)
} else {
res <- famd_cos2(x)
}
}
res$id <- x$id
res
}
#' @export
tunable.step_famd <- function(x, ...) {
tibble::tibble(
name = c("num_comp", "threshold"),
call_info = list(
list(pkg = "dials", fun = "num_comp", range = c(1L, 4L)),
list(pkg = "dials", fun = "threshold")
),
source = "recipe",
component = "step_famd",
component_id = x$id
)
}