Here's how I would approach this problem. The key step is to use tidyr::gather()
right away to bring everything into tidy format. This makes it easier to work with the single column containing the strings with mean, min, and max values. Once you have the data in tidy format, you can reshape it into whatever format works best for your output. This reprex gets you from the example data frame to tidy format.
library(dplyr)
library(tidyr)
library(stringr)
df <- tibble(
X2024 = c("1.391\n(1.077 to 1.754)", "1.351\n(1.012 to 1.745)",
"1.177\n(1.145 to 1.208)", "1.059\n(1.031 to 1.083)",
"1.289\n(1.109 to 1.491)"),
X2529 = c("1.391\n(1.077 to 1.754)", "1.351\n(1.012 to 1.745)",
"1.177\n(1.145 to 1.208)", "1.059\n(1.031 to 1.083)",
"1.289\n(1.109 to 1.491)"),
X3034 = c("1.391\n(1.077 to 1.754)", "1.351\n(1.012 to 1.745)",
"1.177\n(1.145 to 1.208)", "1.059\n(1.031 to 1.083)",
"1.289\n(1.109 to 1.491)")
)
Here’s the full pipeline to generate a tidy-formatted version of the original dataset.
df %>%
gather(var, value) %>%
separate(value, c("mean", "range"), sep = "\n") %>%
mutate(range = str_remove_all(range, "[()]")) %>%
separate(range, c("min", "max"), " to ") %>%
mutate_at(vars(mean, min, max), as.numeric)
#> # A tibble: 15 x 4
#> var mean min max
#> <chr> <dbl> <dbl> <dbl>
#> 1 X2024 1.39 1.08 1.75
#> 2 X2024 1.35 1.01 1.74
#> 3 X2024 1.18 1.14 1.21
#> 4 X2024 1.06 1.03 1.08
#> 5 X2024 1.29 1.11 1.49
#> 6 X2529 1.39 1.08 1.75
#> 7 X2529 1.35 1.01 1.74
#> 8 X2529 1.18 1.14 1.21
#> 9 X2529 1.06 1.03 1.08
#> 10 X2529 1.29 1.11 1.49
#> 11 X3034 1.39 1.08 1.75
#> 12 X3034 1.35 1.01 1.74
#> 13 X3034 1.18 1.14 1.21
#> 14 X3034 1.06 1.03 1.08
#> 15 X3034 1.29 1.11 1.49
Expand the section below to the pipeline step-by-step so that you can see how each transformation changes the data.
Step by step
# gather into tidy format
step_1 <- df %>%
gather(var, value)
#> # A tibble: 15 x 2
#> var value
#> <chr> <chr>
#> 1 X2024 "1.391\n(1.077 to 1.754)"
#> 2 X2024 "1.351\n(1.012 to 1.745)"
#> 3 X2024 "1.177\n(1.145 to 1.208)"
#> 4 X2024 "1.059\n(1.031 to 1.083)"
#> 5 X2024 "1.289\n(1.109 to 1.491)"
#> # … with 10 more rows
# separate mean from range
step_2 <- step_1 %>%
separate(value, c("mean", "range"), sep = "\n")
#> # A tibble: 15 x 3
#> var mean range
#> <chr> <chr> <chr>
#> 1 X2024 1.391 (1.077 to 1.754)
#> 2 X2024 1.351 (1.012 to 1.745)
#> 3 X2024 1.177 (1.145 to 1.208)
#> 4 X2024 1.059 (1.031 to 1.083)
#> 5 X2024 1.289 (1.109 to 1.491)
#> # … with 10 more rows
# remove parenthesis from range
step_3 <- step_2 %>%
mutate(range = str_remove_all(range, "[()]"))
#> # A tibble: 15 x 3
#> var mean range
#> <chr> <chr> <chr>
#> 1 X2024 1.391 1.077 to 1.754
#> 2 X2024 1.351 1.012 to 1.745
#> 3 X2024 1.177 1.145 to 1.208
#> 4 X2024 1.059 1.031 to 1.083
#> 5 X2024 1.289 1.109 to 1.491
#> # … with 10 more rows
# separate min and max from range
step_4 <- step_3 %>%
separate(range, c("min", "max"), " to ")
#> # A tibble: 15 x 4
#> var mean min max
#> <chr> <chr> <chr> <chr>
#> 1 X2024 1.391 1.077 1.754
#> 2 X2024 1.351 1.012 1.745
#> 3 X2024 1.177 1.145 1.208
#> 4 X2024 1.059 1.031 1.083
#> 5 X2024 1.289 1.109 1.491
#> # … with 10 more rows
# finally, convert strings to numeric
step_4 %>%
mutate_at(vars(mean, min, max), as.numeric)
#> # A tibble: 15 x 4
#> var mean min max
#> <chr> <dbl> <dbl> <dbl>
#> 1 X2024 1.39 1.08 1.75
#> 2 X2024 1.35 1.01 1.74
#> 3 X2024 1.18 1.14 1.21
#> 4 X2024 1.06 1.03 1.08
#> 5 X2024 1.29 1.11 1.49
#> # … with 10 more rows
Created on 2019-03-05 by the reprex package (v0.2.1)