unnest() a h2o.dataframe.

Does anyone know how to unnest a h2o dataframe? See example:

library(palmerpenguins) # for dataset in this example
library(tidyverse)
library(h2o)

penguins %>% 
  drop_na() %>% 
  group_by(island) %>% 
  nest() %>% 
  mutate(data = map(data, ~as.h2o(.))) %>% 
  mutate(mod_obj = map(data, ~h2o.glm(y = "bill_length_mm",
                                      x = c("bill_depth_mm", "flipper_length_mm"),
                                      training_frame = ., 
                                      family = "gaussian",
                                      nfolds = 0,
                                      alpha = 0.5,
                                      lambda_search = FALSE))) %>%
  mutate(pred = map2(data, mod_obj, ~h2o.predict(.y, newdata = .x)))  %>% # looks like the output below at this stage
  unnest(c(data, pred)) # can't unnest

# looks like this before the unnest()
# A tibble: 3 × 4
# Groups:   island [3]
# island    data           mod_obj    pred          
# <fct>     <list>         <list>     <list>        
#   1 Torgersen <H2OFrame[,7]> <H2ORgrsM> <H2OFrame[,1]>
#   2 Biscoe    <H2OFrame[,7]> <H2ORgrsM> <H2OFrame[,1]>
#   3 Dream     <H2OFrame[,7]> <H2ORgrsM> <H2OFrame[,1]>

I can do this I guess:

penguins %>% 
  drop_na() %>% 
  group_by(island) %>% 
  nest() %>% 
  mutate(data = map(data, ~as.h2o(.))) %>% 
  mutate(mod_obj = map(data, ~h2o.glm(y = "bill_length_mm",
                                      x = c("bill_depth_mm", "flipper_length_mm"),
                                      training_frame = ., 
                                      family = "gaussian",
                                      nfolds = 0,
                                      alpha = 0.5,
                                      lambda_search = FALSE))) %>%
  mutate(pred = map2(data, mod_obj, ~h2o.predict(.y, newdata = .x)),
         data = map(data, ~as.data.frame(.x)),
         pred = map(pred, ~as.data.frame(.x))) %>% 
  unnest(c(data, pred))


# A tibble: 333 × 10
# Groups:   island [3]
   island    species bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex     year mod_obj    predict
   <fct>     <fct>            <dbl>         <dbl>             <int>       <int> <fct>  <int> <list>       <dbl>
 1 Torgersen Adelie            39.1          18.7               181        3750 male    2007 <H2ORgrsM>    37.5
 2 Torgersen Adelie            39.5          17.4               186        3800 female  2007 <H2ORgrsM>    38.1
 3 Torgersen Adelie            40.3          18                 195        3250 female  2007 <H2ORgrsM>    39.5
 4 Torgersen Adelie            36.7          19.3               193        3450 female  2007 <H2ORgrsM>    39.4
 5 Torgersen Adelie            39.3          20.6               190        3650 male    2007 <H2ORgrsM>    39.0
 6 Torgersen Adelie            38.9          17.8               181        3625 female  2007 <H2ORgrsM>    37.4
 7 Torgersen Adelie            39.2          19.6               195        4675 male    2007 <H2ORgrsM>    39.7
 8 Torgersen Adelie            41.1          17.6               182        3200 female  2007 <H2ORgrsM>    37.5
 9 Torgersen Adelie            38.6          21.2               191        3800 male    2007 <H2ORgrsM>    39.3
10 Torgersen Adelie            34.6          21.1               198        4400 male    2007 <H2ORgrsM>    40.3
# … with 323 more rows
1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.