library(tidyverse, quietly = TRUE)
library(dplyr, quietly = TRUE)
library(sf, quietly = TRUE)
#> Linking to GEOS 3.9.1, GDAL 3.4.3, PROJ 7.2.1; sf_use_s2() is TRUE
library(tidymodels, quietly = TRUE)
library(EnvStats, quietly = TRUE)
#>
#> Attaching package: 'EnvStats'
#> The following objects are masked from 'package:stats':
#>
#> predict, predict.lm
#> The following object is masked from 'package:base':
#>
#> print.default
#### PARAMETERS
#.. set dependent variable, treatment variable & ID variables
dep_var <- "y"
treat_var <- "xt"
ID_vars <- c("sampleID", "replicateID")
#.. define sets of potential independent vars
quant_indep <- c("X1", "X2", "X3")
qual_indep <- c("N1", "N2")
all_indep_vars <- c(ID_vars, quant_indep, qual_indep)
#### end parameters ####
#### create artificial dataset
txt <- c("A", "B", "C")
samp_ids <- c("sample1", "sample2")
repl_ids <- c("rep1", "rep2", "rep3", "rep4", "rep5")
modl_df <- data.frame(y = EnvStats::rnormTrunc(250, mean = 0, sd = 0.7, min = -1.5, max = 1.5),
sampleID = array(sample(samp_ids), 250),
replicateID = array(sample(repl_ids),250),
xt = rnorm(250, mean = 0, sd = 1),
x = matrix(sample(runif(5000, min = -0.5, max = 1.5), 750), 250, 3),
c = matrix(sample(txt, 500, replace = TRUE), 250, 2))
modl_df <- modl_df %>% dplyr::rename(X1 = x.1, X2 = x.2, X3 = x.3, N1 = c.1, N2 = c.2)
#### Workflow for GAM modeling
gam_formula <- as.formula(str_c(dep_var, "~", treat_var, "+", paste(unlist(all_indep_vars), collapse = " + ") ))
gam_recipe <- recipe(modl_df, formula = gam_formula) %>%
update_role(all_of(!!ID_vars), new_role = "ID") %>%
step_intercept() %>%
step_naomit(!!dep_var) %>%
step_zv(all_predictors()) %>%
step_nzv(all_predictors()) %>%
step_dummy(all_nominal_predictors()) %>%
step_ns(all_of(!!treat_var), deg_free = 2)
##** View outcome of applying 'gam_recipe' to data 'modl_df' via prep() and bake()
##* prep_modl_df <- prep(gam_recipe, retain = TRUE, verbose = TRUE)
##* bakd_modl_df <- bake(prep_modl_df, new_data = NULL) # -OR- bakd_modl_df <- bake(prep_modl_df, modl_df)
##* View(bakd_modl_df)
##**
gam_model <- gen_additive_mod(select_features = TRUE, engine = "mgcv") %>%
set_engine("mgcv") %>%
set_mode("regression")
gam_wkflo <- workflow() %>%
add_model(gam_model) %>%
add_recipe(gam_recipe)
response_gam <- gam_wkflo %>% parsnip::fit(data = modl_df)
#> Error in `fit_xy()`:
#> ! `fit()` must be used with GAM models (due to its use of formulas).
Created on 2022-07-26 by the reprex package (v2.0.1)