Having an unnesting issue

Dear R studio community,

I currently have a dataframe with a column populated with "<dbl [1]>". I was under the impression that I would be able to extract the value by simply piping into unnest(), but this has not been effective. Any ideas on how I can pull the value from the list column if unnesting is not working for some reason?

Thank you in advance for your help.

Hi!

To help us help you, could you please prepare a reproducible example (reprex) illustrating your issue? Please have a look at this guide, to see how to create one:

Hi there,

Thanks for the help. Here is the reprex:

visit
#> Error in eval(expr, envir, enclos): object 'visit' not found
library(stringr)

get_seconds <- function(x) {
    res <- as.numeric(str_match(x, "(\\d+).(\\d{1,2}\\.?\\d{0,2}).")[2:3])
    res[1]*60+res[2]
}

modifiedtimes <- visit %>%
  mutate(format_duration=map(avg_duration,get_seconds))
#> Error in eval(lhs, parent, parent): object 'visit' not found

modifiedtimes %>%
  unnest()
#> Error in eval(lhs, parent, parent): object 'modifiedtimes' not found

Created on 2019-12-01 by the reprex package (v0.3.0)

Here is a datapasta of the dataframe:

data.frame(
total_sessions = c(4L, 5L, 14L, 1L, 1L, 1L),
total_bounce_rate = c(0.25, 0.4, 0.35714, 1, 0, 0),
total_carts = c(0L, 1L, 4L, 0L, 0L, 0L),
total_checkouts = c(0L, 1L, 3L, 0L, 0L, 0L),
total_conversion = c(0, 0, 0, 0, 0, 0),
total_orders_placed = c(0L, 0L, 0L, 0L, 0L, 0L),
total_pageviews = c(19L, 24L, 87L, 1L, 3L, 14L),
format_duration = c(542.75, 528, 294.92, NA, 67, 244),
day = as.factor(c("1/1/18", "1/1/18", "1/1/18", "1/1/18",
"1/1/18", "1/1/18")),
ua_form_factor = as.factor(c("Desktop", "Desktop", "Desktop",
"Desktop", "Desktop", "Desktop")),
location_region = as.factor(c("Maryland", "Victoria", "Illinois",
"Ar Riyad", "Canterbury",
"Stockholms Lan")),
avg_duration = as.factor(c("9m2.75s", "8m48s", "4m54.928571428s",
"0s", "1m7s", "4m4s"))
)

While values are showing up here, for the format_duration column, when I print the df, they appear as <dbl[1]> and can't be passed into my linear model.

Thanks for your help!

You can specify the format_duration column in your unnest call and that works for me:

library(tidyverse)
library(stringr)

df <- tibble(
  total_sessions = c(4L, 5L, 14L, 1L, 1L, 1L),
  total_bounce_rate = c(0.25, 0.4, 0.35714, 1, 0, 0),
  total_carts = c(0L, 1L, 4L, 0L, 0L, 0L),
  total_checkouts = c(0L, 1L, 3L, 0L, 0L, 0L),
  total_conversion = c(0, 0, 0, 0, 0, 0),
  total_orders_placed = c(0L, 0L, 0L, 0L, 0L, 0L),
  total_pageviews = c(19L, 24L, 87L, 1L, 3L, 14L),
  format_duration = c(542.75, 528, 294.92, NA, 67, 244),
  day = as.factor(c("1/1/18", "1/1/18", "1/1/18", "1/1/18",
                    "1/1/18", "1/1/18")),
  ua_form_factor = as.factor(c("Desktop", "Desktop", "Desktop",
                               "Desktop", "Desktop", "Desktop")),
  location_region = as.factor(c("Maryland", "Victoria", "Illinois",
                                "Ar Riyad", "Canterbury",
                                "Stockholms Lan")),
  avg_duration = as.factor(c("9m2.75s", "8m48s", "4m54.928571428s",
                             "0s", "1m7s", "4m4s"))
)


get_seconds <- function(x) {
  res <- as.numeric(str_match(x, "(\\d+).(\\d{1,2}\\.?\\d{0,2}).")[2:3])
  res[1]*60+res[2]
}

df %>%
  mutate(format_duration=map(avg_duration,get_seconds)) %>% 
  unnest(format_duration) %>% 
  select(format_duration, everything())

#> # A tibble: 6 x 12
#>   format_duration total_sessions total_bounce_ra… total_carts
#>             <dbl>          <int>            <dbl>       <int>
#> 1            543.              4            0.25            0
#> 2            528               5            0.4             1
#> 3            295.             14            0.357           4
#> 4             NA               1            1               0
#> 5             67               1            0               0
#> 6            244               1            0               0
#> # … with 8 more variables: total_checkouts <int>, total_conversion <dbl>,
#> #   total_orders_placed <int>, total_pageviews <int>, day <fct>,
#> #   ua_form_factor <fct>, location_region <fct>, avg_duration <fct>

Created on 2019-12-01 by the reprex package (v0.2.1)

2 Likes

Ah that works, thank you!

1 Like

You could also use rowwise() to avoid the use of map()

library(tidyverse)

df <- tibble(
    total_sessions = c(4L, 5L, 14L, 1L, 1L, 1L),
    total_bounce_rate = c(0.25, 0.4, 0.35714, 1, 0, 0),
    total_carts = c(0L, 1L, 4L, 0L, 0L, 0L),
    total_checkouts = c(0L, 1L, 3L, 0L, 0L, 0L),
    total_conversion = c(0, 0, 0, 0, 0, 0),
    total_orders_placed = c(0L, 0L, 0L, 0L, 0L, 0L),
    total_pageviews = c(19L, 24L, 87L, 1L, 3L, 14L),
    format_duration = c(542.75, 528, 294.92, NA, 67, 244),
    day = as.factor(c("1/1/18", "1/1/18", "1/1/18", "1/1/18",
                      "1/1/18", "1/1/18")),
    ua_form_factor = as.factor(c("Desktop", "Desktop", "Desktop",
                                 "Desktop", "Desktop", "Desktop")),
    location_region = as.factor(c("Maryland", "Victoria", "Illinois",
                                  "Ar Riyad", "Canterbury",
                                  "Stockholms Lan")),
    avg_duration = as.factor(c("9m2.75s", "8m48s", "4m54.928571428s",
                               "0s", "1m7s", "4m4s"))
)


get_seconds <- function(x) {
    res <- as.numeric(str_match(x, "(\\d*)m?(\\d{1,2}\\.?\\d*)s")[2:3])
    sum(res[1]*60, res[2], na.rm = TRUE)
}

df %>%
    rowwise() %>%
    mutate(format_duration = get_seconds(avg_duration)) %>% 
    select(format_duration, everything()) %>% 
    ungroup()
#> # A tibble: 6 x 12
#>   format_duration total_sessions total_bounce_ra… total_carts total_checkouts
#>             <dbl>          <int>            <dbl>       <int>           <int>
#> 1            543.              4            0.25            0               0
#> 2            528               5            0.4             1               1
#> 3            295.             14            0.357           4               3
#> 4              0               1            1               0               0
#> 5             67               1            0               0               0
#> 6            244               1            0               0               0
#> # … with 7 more variables: total_conversion <dbl>, total_orders_placed <int>,
#> #   total_pageviews <int>, day <fct>, ua_form_factor <fct>,
#> #   location_region <fct>, avg_duration <fct>

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.