Wide to long form

Rcloud · June 2, 2020, 5:58am

Hi there,

I am trying to compare two sets of data v1 and v2 and they have two parameters t and p and each is at a different time 1,2,3 but I have the data frame in a wide form and I think it is best to use it in long but I am unsure how to split the variable into 3 variables as well as using gather/spread functions, or is there another way to think about it? esp when it comes to making statistical analysis

so here is a sample I want it to have the variables "set, parameter, time, value"

data <- data.frame(V1_t_1 = runif(10),
                   V1_t_2 = runif(10),
                   V1_t_3 = runif(10),
                   V2_t_1 = runif(10),
                   V2_t_2 = runif(10),
                   V2_t_2 = runif(10),
                   V1_p_1 = runif(10),
                   V2_p_1 = runif(10)
                   )

Any suggestions would be helpful!

DavoWW · June 2, 2020, 6:34am

Hi @Rcloud,
I'm not exactly sure what your desired output data frame should look like.
Also note that gather/spread have been superseded by pivot_longer/pivot_wider.
This may get you some way towards your destination:

set.seed(42)
my_data <- data.frame(V1_t_1 = runif(10),
                      V1_t_2 = runif(10),
                      V1_t_3 = runif(10),
                      V2_t_1 = runif(10),
                      V2_t_2 = runif(10),
                      V2_t_3 = runif(10),  # Changed from 2
                      V1_p_1 = runif(10),
                      V2_p_1 = runif(10)
                      )
my_data
#>       V1_t_1    V1_t_2     V1_t_3      V2_t_1     V2_t_2     V2_t_3    V1_p_1
#> 1  0.9148060 0.4577418 0.90403139 0.737595618 0.37955924 0.33342721 0.6756073
#> 2  0.9370754 0.7191123 0.13871017 0.811055141 0.43577158 0.34674825 0.9828172
#> 3  0.2861395 0.9346722 0.98889173 0.388108283 0.03743103 0.39848541 0.7595443
#> 4  0.8304476 0.2554288 0.94666823 0.685169729 0.97353991 0.78469278 0.5664884
#> 5  0.6417455 0.4622928 0.08243756 0.003948339 0.43175125 0.03893649 0.8496897
#> 6  0.5190959 0.9400145 0.51421178 0.832916080 0.95757660 0.74879539 0.1894739
#> 7  0.7365883 0.9782264 0.39020347 0.007334147 0.88775491 0.67727683 0.2712866
#> 8  0.1346666 0.1174874 0.90573813 0.207658973 0.63997877 0.17126433 0.8281585
#> 9  0.6569923 0.4749971 0.44696963 0.906601408 0.97096661 0.26108796 0.6932048
#> 10 0.7050648 0.5603327 0.83600426 0.611778643 0.61883821 0.51441293 0.2405447
#>         V2_p_1
#> 1  0.042988796
#> 2  0.140479094
#> 3  0.216385415
#> 4  0.479398564
#> 5  0.197410342
#> 6  0.719355838
#> 7  0.007884739
#> 8  0.375489965
#> 9  0.514407708
#> 10 0.001570554

library(tidyverse)

pivot_longer(my_data, cols=contains("V")) -> long.df
head(long.df)
#> # A tibble: 6 x 2
#>   name   value
#>   <chr>  <dbl>
#> 1 V1_t_1 0.915
#> 2 V1_t_2 0.458
#> 3 V1_t_3 0.904
#> 4 V2_t_1 0.738
#> 5 V2_t_2 0.380
#> 6 V2_t_3 0.333

long.df %>% 
  separate(., name, into=c("set", "parameter", "time")) -> long2.df

head(long2.df)
#> # A tibble: 6 x 4
#>   set   parameter time  value
#>   <chr> <chr>     <chr> <dbl>
#> 1 V1    t         1     0.915
#> 2 V1    t         2     0.458
#> 3 V1    t         3     0.904
#> 4 V2    t         1     0.738
#> 5 V2    t         2     0.380
#> 6 V2    t         3     0.333
with(long2.df, table(set,time,parameter))
#> , , parameter = p
#> 
#>     time
#> set   1  2  3
#>   V1 10  0  0
#>   V2 10  0  0
#> 
#> , , parameter = t
#> 
#>     time
#> set   1  2  3
#>   V1 10 10 10
#>   V2 10 10 10

^{Created on 2020-06-02 by the reprex package (v0.3.0)}

The dummy data you provided was unbalanced for set/parameter/time so not sure where to go next.
HTH

Rcloud · June 2, 2020, 3:54pm

Thank you so much this is actually how I wanted it to be!

system · June 9, 2020, 3:54pm

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.