Issue when trying to fine tune a random forest regression model using spatialsample as a resampling strategy

Very new to tidymodels so bear with me. Following this example, I want to perform a random forest regression. The difference is that because I have spatial data (points) I am using the spatialsample package for resampling. All works well using the default parameters for the random forest (RF) model. So, when I do:

recipe <- recipes::recipe(eq1, data = sf::st_drop_geometry(ames_sf))

# modeling
ranger_spec <-
  rand_forest() %>% 
  set_mode("regression") %>%
  set_engine("ranger")

ranger_workflow <- 
  workflow(recipe, ranger_spec) |> 
  tune::fit_resamples(folds)

everything's okay.

But when I try to fine tune the model parameters, like so:

recipe <- recipes::recipe(eq1, data = sf::st_drop_geometry(ames_sf))

# modeling
ranger_spec <-
  rand_forest(mtry = tune(), min_n = tune(), trees = 501) %>% # in this line is the difference with the above code
  set_mode("regression") %>%
  set_engine("ranger")

ranger_workflow <- 
  workflow(recipe, ranger_spec) |> 
  tune::fit_resamples(folds)

I'm getting this error:

Error:
! 2 arguments have been tagged for tuning in these components: model_spec. 
Please use one of the tuning functions (e.g. `tune_grid()`) to optimize them.

Moreover, I tried something like this:

recipe <- recipes::recipe(eq1, data = sf::st_drop_geometry(ames_sf))

# modeling
ranger_spec <-
  rand_forest(mtry = tune(), min_n = tune(), trees = 501) %>% 
  set_mode("regression") %>%
  set_engine("ranger")

ranger_workflow <-
  workflow() %>%
  add_recipe(recipe) %>%
  add_model(ranger_spec)

set.seed(678)
doParallel::registerDoParallel()
ranger_tune <-
  tune_grid(ranger_workflow,
            tune::fit_resamples(folds), # or resamples =  tune::fit_resamples(folds)
            grid = 5 
)

I am getting this error:

Error in `tune::fit_resamples()`:
! The first argument to [fit_resamples()] should be either a model or workflow.

But, if I type:

ranger_tune <-
  tune_grid(ranger_workflow,
            resamples =  folds
            grid = 5 
)

It works but I am not sure if that's the right way.

Could you please help me on how could I tune the hyperparameters of the RF model using spatial resampling strategy?

The complete code I am using up to the point I am creating the recipe (for the other part please see the above tries):

library(ggplot2)
library(spatialsample)
library(tidymodels)
library(textrecipes)

wd <- "path/"

# Projected reference system
provoliko <- "EPSG:24313"

df <- read.csv(paste0(wd, 'block.data.csv'))

eq1 <- ntl ~ pop + agbh + nir + ebbi + ndbi + road + pan + tirs

ames_sf <- sf::st_as_sf(df, coords = c("x", "y"), crs = provoliko)

set.seed(1234)
folds <- spatial_block_cv(ames_sf, v = 10)

recipe <- recipes::recipe(eq1, data = sf::st_drop_geometry(ames_sf))

Here is a small subset:

structure(list(ntl = c(3.06382083892822, 5.03140115737915, 12.4984884262085, 
8.94214534759521, 43.884162902832, 45.9491729736328, 3.55169343948364, 
4.35600280761719, 71.9532699584961, 5.3161735534668), pop = c(14.9533805847168, 
28.8371906280518, 77.6342926025391, 45.8121490478516, 86.9894256591797, 
135.774887084961, 19.270393371582, 18.0224170684814, 43.5355529785156, 
30.1428966522217), agbh = c(0.0350548662245274, 0.0189799591898918, 
0.455335229635239, 0.564996838569641, 5.92627477645874, 4.00131750106812, 
0.00264512258581817, 0.0909716635942459, 0.156893357634544, 1.06346011161804
), nir = c(0.363298416137695, 0.286615610122681, 0.243379071354866, 
0.230649575591087, 0.142243817448616, 0.218742504715919, 0.280687063932419, 
0.2623251080513, 0.248221337795258, 0.269129604101181), ebbi = c(-0.31734561920166, 
-0.252076148986816, -0.0437943786382675, 0.00303727621212602, 
0.0230168681591749, -0.014207380823791, -0.212703660130501, -0.172991916537285, 
-0.00198577716946602, -0.097306601703167), ndbi = c(-0.339490443468094, 
-0.338587254285812, -0.0528221093118191, -0.00101917621213943, 
0.0445568449795246, -0.0179230254143476, -0.279076039791107, 
-0.235535085201263, -0.00123130006249994, -0.114315219223499), 
    road = c(0, 0.821298122406006, 183.735855102539, 61.8151817321777, 
    284.634094238281, 419.639801025391, 0, 0, 235.987365722656, 
    10.3933219909668), pan = c(0.0992320701479912, 0.0924557894468307, 
    0.131993010640144, 0.143980875611305, 0.127495512366295, 
    0.141018703579903, 0.094675324857235, 0.0997878834605217, 
    0.150557637214661, 0.124181099236012), nbai = c(-0.266169995069504, 
    -0.255757331848145, -0.110782898962498, -0.0809768587350845, 
    -0.0337011702358723, -0.0905801132321358, -0.229216039180756, 
    -0.204480320215225, -0.0798300430178642, -0.134468331933022
    ), tirs = c(27.6605205535889, 30.7815914154053, 36.7475509643555, 
    35.4362831115723, 35.8885459899902, 37.0875473022461, 30.3521213531494, 
    32.7219085693359, 38.9075927734375, 34.5907135009766), geometry = structure(list(
        structure(c(455050.3092, 3479376.9101), class = c("XY", 
        "POINT", "sfg")), structure(c(426490.3092, 3468036.9101
        ), class = c("XY", "POINT", "sfg")), structure(c(421450.3092, 
        3475176.9101), class = c("XY", "POINT", "sfg")), structure(c(450430.3092, 
        3493236.9101), class = c("XY", "POINT", "sfg")), structure(c(434890.3092, 
        3491136.9101), class = c("XY", "POINT", "sfg")), structure(c(442450.3092, 
        3481056.9101), class = c("XY", "POINT", "sfg")), structure(c(452530.3092, 
        3490716.9101), class = c("XY", "POINT", "sfg")), structure(c(447910.3092, 
        3475176.9101), class = c("XY", "POINT", "sfg")), structure(c(421870.3092, 
        3468876.9101), class = c("XY", "POINT", "sfg")), structure(c(429850.3092, 
        3464256.9101), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT", 
    "sfc"), precision = 0, bbox = structure(c(xmin = 421450.3092, 
    ymin = 3464256.9101, xmax = 455050.3092, ymax = 3493236.9101
    ), class = "bbox"), crs = structure(list(input = "EPSG:24313", 
        wkt = "PROJCRS[\"Kalianpur 1962 / UTM zone 43N\",\n    BASEGEOGCRS[\"Kalianpur 1962\",\n        DATUM[\"Kalianpur 1962\",\n            ELLIPSOID[\"Everest 1830 (1962 Definition)\",6377301.243,300.8017255,\n                LENGTHUNIT[\"metre\",1]]],\n        PRIMEM[\"Greenwich\",0,\n            ANGLEUNIT[\"degree\",0.0174532925199433]],\n        ID[\"EPSG\",4145]],\n    CONVERSION[\"UTM zone 43N\",\n        METHOD[\"Transverse Mercator\",\n            ID[\"EPSG\",9807]],\n        PARAMETER[\"Latitude of natural origin\",0,\n            ANGLEUNIT[\"degree\",0.0174532925199433],\n            ID[\"EPSG\",8801]],\n        PARAMETER[\"Longitude of natural origin\",75,\n            ANGLEUNIT[\"degree\",0.0174532925199433],\n            ID[\"EPSG\",8802]],\n        PARAMETER[\"Scale factor at natural origin\",0.9996,\n            SCALEUNIT[\"unity\",1],\n            ID[\"EPSG\",8805]],\n        PARAMETER[\"False easting\",500000,\n            LENGTHUNIT[\"metre\",1],\n            ID[\"EPSG\",8806]],\n        PARAMETER[\"False northing\",0,\n            LENGTHUNIT[\"metre\",1],\n            ID[\"EPSG\",8807]]],\n    CS[Cartesian,2],\n        AXIS[\"(E)\",east,\n            ORDER[1],\n            LENGTHUNIT[\"metre\",1]],\n        AXIS[\"(N)\",north,\n            ORDER[2],\n            LENGTHUNIT[\"metre\",1]],\n    USAGE[\n        SCOPE[\"Engineering survey, topographic mapping.\"],\n        AREA[\"Pakistan - east of 72°E.\"],\n        BBOX[28.21,72,37.07,77.83]],\n    ID[\"EPSG\",24313]]"), class = "crs"), n_empty = 0L)), row.names = c(NA, 
10L), sf_column = "geometry", agr = structure(c(ntl = NA_integer_, 
pop = NA_integer_, agbh = NA_integer_, nir = NA_integer_, ebbi = NA_integer_, 
ndbi = NA_integer_, road = NA_integer_, pan = NA_integer_, nbai = NA_integer_, 
tirs = NA_integer_), class = "factor", levels = c("constant", 
"aggregate", "identity")), class = c("sf", "data.frame"))

R 4.3.1, RStudio 2023.09.0 Build 463, Windows 10. I also posted the same question on Github.

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.