Very new to tidymodels
so bear with me. Following this example, I want to perform a random forest regression. The difference is that because I have spatial data (points) I am using the spatialsample
package for resampling. All works well using the default parameters for the random forest (RF) model. So, when I do:
recipe <- recipes::recipe(eq1, data = sf::st_drop_geometry(ames_sf))
# modeling
ranger_spec <-
rand_forest() %>%
set_mode("regression") %>%
set_engine("ranger")
ranger_workflow <-
workflow(recipe, ranger_spec) |>
tune::fit_resamples(folds)
everything's okay.
But when I try to fine tune the model parameters, like so:
recipe <- recipes::recipe(eq1, data = sf::st_drop_geometry(ames_sf))
# modeling
ranger_spec <-
rand_forest(mtry = tune(), min_n = tune(), trees = 501) %>% # in this line is the difference with the above code
set_mode("regression") %>%
set_engine("ranger")
ranger_workflow <-
workflow(recipe, ranger_spec) |>
tune::fit_resamples(folds)
I'm getting this error:
Error:
! 2 arguments have been tagged for tuning in these components: model_spec.
Please use one of the tuning functions (e.g. `tune_grid()`) to optimize them.
Moreover, I tried something like this:
recipe <- recipes::recipe(eq1, data = sf::st_drop_geometry(ames_sf))
# modeling
ranger_spec <-
rand_forest(mtry = tune(), min_n = tune(), trees = 501) %>%
set_mode("regression") %>%
set_engine("ranger")
ranger_workflow <-
workflow() %>%
add_recipe(recipe) %>%
add_model(ranger_spec)
set.seed(678)
doParallel::registerDoParallel()
ranger_tune <-
tune_grid(ranger_workflow,
tune::fit_resamples(folds), # or resamples = tune::fit_resamples(folds)
grid = 5
)
I am getting this error:
Error in `tune::fit_resamples()`:
! The first argument to [fit_resamples()] should be either a model or workflow.
But, if I type:
ranger_tune <-
tune_grid(ranger_workflow,
resamples = folds
grid = 5
)
It works but I am not sure if that's the right way.
Could you please help me on how could I tune the hyperparameters of the RF model using spatial resampling strategy?
The complete code I am using up to the point I am creating the recipe
(for the other part please see the above tries):
library(ggplot2)
library(spatialsample)
library(tidymodels)
library(textrecipes)
wd <- "path/"
# Projected reference system
provoliko <- "EPSG:24313"
df <- read.csv(paste0(wd, 'block.data.csv'))
eq1 <- ntl ~ pop + agbh + nir + ebbi + ndbi + road + pan + tirs
ames_sf <- sf::st_as_sf(df, coords = c("x", "y"), crs = provoliko)
set.seed(1234)
folds <- spatial_block_cv(ames_sf, v = 10)
recipe <- recipes::recipe(eq1, data = sf::st_drop_geometry(ames_sf))
Here is a small subset:
structure(list(ntl = c(3.06382083892822, 5.03140115737915, 12.4984884262085,
8.94214534759521, 43.884162902832, 45.9491729736328, 3.55169343948364,
4.35600280761719, 71.9532699584961, 5.3161735534668), pop = c(14.9533805847168,
28.8371906280518, 77.6342926025391, 45.8121490478516, 86.9894256591797,
135.774887084961, 19.270393371582, 18.0224170684814, 43.5355529785156,
30.1428966522217), agbh = c(0.0350548662245274, 0.0189799591898918,
0.455335229635239, 0.564996838569641, 5.92627477645874, 4.00131750106812,
0.00264512258581817, 0.0909716635942459, 0.156893357634544, 1.06346011161804
), nir = c(0.363298416137695, 0.286615610122681, 0.243379071354866,
0.230649575591087, 0.142243817448616, 0.218742504715919, 0.280687063932419,
0.2623251080513, 0.248221337795258, 0.269129604101181), ebbi = c(-0.31734561920166,
-0.252076148986816, -0.0437943786382675, 0.00303727621212602,
0.0230168681591749, -0.014207380823791, -0.212703660130501, -0.172991916537285,
-0.00198577716946602, -0.097306601703167), ndbi = c(-0.339490443468094,
-0.338587254285812, -0.0528221093118191, -0.00101917621213943,
0.0445568449795246, -0.0179230254143476, -0.279076039791107,
-0.235535085201263, -0.00123130006249994, -0.114315219223499),
road = c(0, 0.821298122406006, 183.735855102539, 61.8151817321777,
284.634094238281, 419.639801025391, 0, 0, 235.987365722656,
10.3933219909668), pan = c(0.0992320701479912, 0.0924557894468307,
0.131993010640144, 0.143980875611305, 0.127495512366295,
0.141018703579903, 0.094675324857235, 0.0997878834605217,
0.150557637214661, 0.124181099236012), nbai = c(-0.266169995069504,
-0.255757331848145, -0.110782898962498, -0.0809768587350845,
-0.0337011702358723, -0.0905801132321358, -0.229216039180756,
-0.204480320215225, -0.0798300430178642, -0.134468331933022
), tirs = c(27.6605205535889, 30.7815914154053, 36.7475509643555,
35.4362831115723, 35.8885459899902, 37.0875473022461, 30.3521213531494,
32.7219085693359, 38.9075927734375, 34.5907135009766), geometry = structure(list(
structure(c(455050.3092, 3479376.9101), class = c("XY",
"POINT", "sfg")), structure(c(426490.3092, 3468036.9101
), class = c("XY", "POINT", "sfg")), structure(c(421450.3092,
3475176.9101), class = c("XY", "POINT", "sfg")), structure(c(450430.3092,
3493236.9101), class = c("XY", "POINT", "sfg")), structure(c(434890.3092,
3491136.9101), class = c("XY", "POINT", "sfg")), structure(c(442450.3092,
3481056.9101), class = c("XY", "POINT", "sfg")), structure(c(452530.3092,
3490716.9101), class = c("XY", "POINT", "sfg")), structure(c(447910.3092,
3475176.9101), class = c("XY", "POINT", "sfg")), structure(c(421870.3092,
3468876.9101), class = c("XY", "POINT", "sfg")), structure(c(429850.3092,
3464256.9101), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT",
"sfc"), precision = 0, bbox = structure(c(xmin = 421450.3092,
ymin = 3464256.9101, xmax = 455050.3092, ymax = 3493236.9101
), class = "bbox"), crs = structure(list(input = "EPSG:24313",
wkt = "PROJCRS[\"Kalianpur 1962 / UTM zone 43N\",\n BASEGEOGCRS[\"Kalianpur 1962\",\n DATUM[\"Kalianpur 1962\",\n ELLIPSOID[\"Everest 1830 (1962 Definition)\",6377301.243,300.8017255,\n LENGTHUNIT[\"metre\",1]]],\n PRIMEM[\"Greenwich\",0,\n ANGLEUNIT[\"degree\",0.0174532925199433]],\n ID[\"EPSG\",4145]],\n CONVERSION[\"UTM zone 43N\",\n METHOD[\"Transverse Mercator\",\n ID[\"EPSG\",9807]],\n PARAMETER[\"Latitude of natural origin\",0,\n ANGLEUNIT[\"degree\",0.0174532925199433],\n ID[\"EPSG\",8801]],\n PARAMETER[\"Longitude of natural origin\",75,\n ANGLEUNIT[\"degree\",0.0174532925199433],\n ID[\"EPSG\",8802]],\n PARAMETER[\"Scale factor at natural origin\",0.9996,\n SCALEUNIT[\"unity\",1],\n ID[\"EPSG\",8805]],\n PARAMETER[\"False easting\",500000,\n LENGTHUNIT[\"metre\",1],\n ID[\"EPSG\",8806]],\n PARAMETER[\"False northing\",0,\n LENGTHUNIT[\"metre\",1],\n ID[\"EPSG\",8807]]],\n CS[Cartesian,2],\n AXIS[\"(E)\",east,\n ORDER[1],\n LENGTHUNIT[\"metre\",1]],\n AXIS[\"(N)\",north,\n ORDER[2],\n LENGTHUNIT[\"metre\",1]],\n USAGE[\n SCOPE[\"Engineering survey, topographic mapping.\"],\n AREA[\"Pakistan - east of 72°E.\"],\n BBOX[28.21,72,37.07,77.83]],\n ID[\"EPSG\",24313]]"), class = "crs"), n_empty = 0L)), row.names = c(NA,
10L), sf_column = "geometry", agr = structure(c(ntl = NA_integer_,
pop = NA_integer_, agbh = NA_integer_, nir = NA_integer_, ebbi = NA_integer_,
ndbi = NA_integer_, road = NA_integer_, pan = NA_integer_, nbai = NA_integer_,
tirs = NA_integer_), class = "factor", levels = c("constant",
"aggregate", "identity")), class = c("sf", "data.frame"))
R
4.3.1, RStudio
2023.09.0 Build 463, Windows 10. I also posted the same question on Github.