Error while converting from long to wider dataframe in R

mtoufiq · February 6, 2022, 2:27pm

Hi,

I am trying to convert a dataframe from long to wide format in R using tidyverse. I am getting an error message as indicated below. Perhaps, is this because of the white space in the Sample column?

Error: Column 1 must be named.
Use .name_repair to specify repair.

dput(Test)
structure(list(Gene_Symbols = c("AKAP17A", "AKAP17A", "AKAP17A", 
                                "AKAP17A", "AKAP17A"), Sample = c("E012 IFNa-IFNb", "E012 IFNg", 
                                                                  "E012 IL-1b", "E012 LPS", "E012 Poly IC"), value = c(-0.03761234027548, 
                                                                                                                       -0.756540477529612, -0.162207597329452, -0.467888573617149, 0.749052524290082
                                                                  ), gm = c(17.3177955297245, 16.8199979924704, 17.2581592626705, 
                                                                            17.4751473963829, 18.5338190142901)), row.names = c(NA, -5L), class = c("tbl_df", 
                                                                                                                                                    "tbl", "data.frame"))
#> # A tibble: 5 × 4
#>   Gene_Symbols Sample           value    gm
#>   <chr>        <chr>            <dbl> <dbl>
#> 1 AKAP17A      E012 IFNa-IFNb -0.0376  17.3
#> 2 AKAP17A      E012 IFNg      -0.757   16.8
#> 3 AKAP17A      E012 IL-1b     -0.162   17.3
#> 4 AKAP17A      E012 LPS       -0.468   17.5
#> 5 AKAP17A      E012 Poly IC    0.749   18.5

Merge_Tier_1_2_v1.1_final_v1 <- Test %>%
  select(-gm) %>%
  pivot_wider(values_fn = list) |> 
  unnest(cols = -Gene_Symbols)
> Error: Column 1 must be named.
> Use .name_repair to specify repair.


**Other workaround:**

Merge_Tier_1_2_v1.1_final_v1 <- Test %>%
  select(-gm)

dput(Merge_Tier_1_2_v1.1_final_v1)
structure(list(Gene_Symbols = c("AKAP17A", "AKAP17A", "AKAP17A", 
                                "AKAP17A", "AKAP17A"), Sample = c("E012 IFNa-IFNb", "E012 IFNg", 
                                                                  "E012 IL-1b", "E012 LPS", "E012 Poly IC"), value = c(-0.03761234027548, 
                                                                                                                       -0.756540477529612, -0.162207597329452, -0.467888573617149, 0.749052524290082
                                                                  )), row.names = c(NA, -5L), class = c("tbl_df", "tbl", "data.frame"
                                                                  ))
#> # A tibble: 5 × 3
#>   Gene_Symbols Sample           value
#>   <chr>        <chr>            <dbl>
#> 1 AKAP17A      E012 IFNa-IFNb -0.0376
#> 2 AKAP17A      E012 IFNg      -0.757 
#> 3 AKAP17A      E012 IL-1b     -0.162 
#> 4 AKAP17A      E012 LPS       -0.468 
#> 5 AKAP17A      E012 Poly IC    0.749



Merge_Tier_1_2_v1.1_final_v2 <- Merge_Tier_1_2_v1.1_final_v1 %>% pivot_wider(names_from = "Sample",values_from="value")

dput(Merge_Tier_1_2_v1.1_final_v2)
structure(list(Gene_Symbols = "AKAP17A", `E012 IFNa-IFNb` = -0.03761234027548, 
               `E012 IFNg` = -0.756540477529612, `E012 IL-1b` = -0.162207597329452, 
               `E012 LPS` = -0.467888573617149, `E012 Poly IC` = 0.749052524290082), class = c("tbl_df", 
                                                                                               "tbl", "data.frame"), row.names = c(NA, -1L))
#> # A tibble: 1 × 6
#>   Gene_Symbols `E012 IFNa-IFNb` `E012 IFNg` `E012 IL-1b` `E012 LPS`
#>   <chr>                   <dbl>       <dbl>        <dbl>      <dbl>
#> 1 AKAP17A               -0.0376      -0.757       -0.162     -0.468
#> # … with 1 more variable: E012 Poly IC <dbl>

^{Created on 2022-02-06 by the reprex package (v2.0.1)}

Thank you,

Toufiq

pieterjanvc · February 6, 2022, 3:33pm

Hi,

Here is the way to do this:

library(tidyverse)

test = structure(list(
  Gene_Symbols = c("AKAP17A", "AKAP17A", "AKAP17A", 
                   "AKAP17A", "AKAP17A"), 
  Sample = c("E012 IFNa-IFNb", "E012 IFNg", "E012 IL-1b", 
             "E012 LPS", "E012 Poly IC"), 
  value = c(-0.03761234027548, -0.756540477529612, -0.162207597329452, 
            -0.467888573617149, 0.749052524290082), 
  gm = c(17.3177955297245, 16.8199979924704, 17.2581592626705, 
         17.4751473963829, 18.5338190142901)), row.names = c(NA, -5L), 
  class = c("tbl_df", "tbl", "data.frame"))


test %>% pivot_wider(Gene_Symbols, names_from = Sample, values_from = value)
#> # A tibble: 1 x 6
#>   Gene_Symbols `E012 IFNa-IFNb` `E012 IFNg` `E012 IL-1b` `E012 LPS`
#>   <chr>                   <dbl>       <dbl>        <dbl>      <dbl>
#> 1 AKAP17A               -0.0376      -0.757       -0.162     -0.468
#> # ... with 1 more variable: E012 Poly IC <dbl>

^{Created on 2022-02-06 by the reprex package (v2.0.1)}

Note that the column "gm" gets lost in this process

Hope this helps,
PJ

mtoufiq · February 6, 2022, 4:03pm

Hi @pieterjanvc ,

thank you very much for the prompt response. This is helpful. Is there a way to modify in something like below?

Merge_Tier_1_2_v1.1_final_v1 <- Test %>%
  select(-gm) %>%
  pivot_wider(values_fn = list) |> 
  unnest(cols = -Gene_Symbols)

OR;

Merge_Tier_1_2_v1.1_final_v1 <- Test %>%
  select(-gm) %>%
  pivot_wider(values_fn = list) |> 
  unnest()

FJCC · February 6, 2022, 4:40pm

You can do it like this but I don't understand the benefit of the extra step.

library(tidyverse)
#> Warning: package 'tibble' was built under R version 4.1.2
test = structure(list(
  Gene_Symbols = c("AKAP17A", "AKAP17A", "AKAP17A", 
                   "AKAP17A", "AKAP17A"), 
  Sample = c("E012 IFNa-IFNb", "E012 IFNg", "E012 IL-1b", 
             "E012 LPS", "E012 Poly IC"), 
  value = c(-0.03761234027548, -0.756540477529612, -0.162207597329452, 
            -0.467888573617149, 0.749052524290082), 
  gm = c(17.3177955297245, 16.8199979924704, 17.2581592626705, 
         17.4751473963829, 18.5338190142901)), row.names = c(NA, -5L), 
  class = c("tbl_df", "tbl", "data.frame"))
Merge_Tier_1_2_v1.1_final_v1 <- test %>%
  select(-gm) %>%
  pivot_wider(names_from=Sample, values_from = value,  values_fn = list) |> 
  unnest(cols = -Gene_Symbols)
Merge_Tier_1_2_v1.1_final_v1
#> # A tibble: 1 x 6
#>   Gene_Symbols `E012 IFNa-IFNb` `E012 IFNg` `E012 IL-1b` `E012 LPS`
#>   <chr>                   <dbl>       <dbl>        <dbl>      <dbl>
#> 1 AKAP17A               -0.0376      -0.757       -0.162     -0.468
#> # ... with 1 more variable: E012 Poly IC <dbl>

^{Created on 2022-02-06 by the reprex package (v2.0.1)}

mtoufiq · February 6, 2022, 7:44pm

Hi @FJCC ,

thank you very much. This was helpful. One point, does |> cause an issue because it keeps shows in the R Studio editor Unexpected token ">". However, the code runs smoothly.

FJCC · February 6, 2022, 8:10pm

The |> pipe is the same as %>% and it is included in the most recent versions of R. If you have an older version, use %>%.

mtoufiq · February 6, 2022, 8:24pm

@FJCC, Thank you. This is noted.

system · February 13, 2022, 8:24pm

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.