How to sort a dataframe by column values ?

I have got this df:

m_short <- structure(list(row = c("X1", "X2", "X3", "X1", "X2", "X3", "X1", 
"X2", "X3", "X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", "X3", 
"X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", 
"X3", "X1", "X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", "X3", 
"X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", 
"X3", "X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", "X3", "X1", 
"X2", "X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", "X3", "X1", 
"X2", "X3", "X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", "X3", 
"X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", 
"X3", "X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", "X3", "X1", 
"X2", "X3", "X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", "X3", 
"X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", 
"X3", "X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", "X3", "X1", 
"X2", "X3", "X1", "X2", "X3"), column = c("X10", "X10", "X10", 
"X11", "X11", "X11", "X12", "X12", "X12", "X13", "X13", "X13", 
"X14", "X14", "X14", "X15", "X15", "X15", "X16", "X16", "X16", 
"X17", "X17", "X17", "X18", "X18", "X18", "X19", "X19", "X19", 
"X2", "X20", "X20", "X20", "X21", "X21", "X21", "X22", "X22", 
"X22", "X23", "X23", "X23", "X24", "X24", "X24", "X25", "X25", 
"X25", "X26", "X26", "X26", "X27", "X27", "X27", "X28", "X28", 
"X28", "X29", "X29", "X29", "X3", "X3", "X30", "X30", "X30", 
"X4", "X4", "X4", "X5", "X5", "X5", "X6", "X6", "X6", "X7", "X7", 
"X7", "X8", "X8", "X8", "X9", "X9", "X9", "Y1", "Y1", "Y1", "Y10", 
"Y10", "Y10", "Y11", "Y11", "Y11", "Y12", "Y12", "Y12", "Y13", 
"Y13", "Y13", "Y14", "Y14", "Y14", "Y15", "Y15", "Y15", "Y16", 
"Y16", "Y16", "Y17", "Y17", "Y17", "Y18", "Y18", "Y18", "Y19", 
"Y19", "Y19", "Y2", "Y2", "Y2", "Y20", "Y20", "Y20", "Y3", "Y3", 
"Y3", "Y4", "Y4", "Y4", "Y5", "Y5", "Y5", "Y6", "Y6", "Y6", "Y7", 
"Y7", "Y7", "Y8", "Y8", "Y8", "Y9", "Y9", "Y9"), cor = c(0.068, 
0.088, -0.082, 0.139, 0.094, 0.061, -0.07, -0.052, -0.076, -0.097, 
0.067, -0.044, 0.166, 0.044, 0.023, 0.164, 0.026, 0.085, 0.083, 
0.054, -0.031, 0.009, -0.023, -0.24, 0.017, -0.056, 0.195, 0.061, 
-0.024, 0.053, 0.151, -0.062, -0.042, 0.006, 0.012, -0.006, -0.03, 
-0.041, -0.086, -0.077, 0.002, -0.015, -0.033, 0.094, 0.046, 
-0.116, 0.108, 0.391, 0.277, 0.052, 0.027, 0.114, -0.027, 0.18, 
-0.117, 0.201, -0.025, 0.059, -0.108, 0, -0.036, 0.183, 0.179, 
-0.201, 0.054, -0.05, -0.061, -0.009, -0.328, 0.051, -0.097, 
-0.387, -0.195, -0.119, -0.648, -0.058, -0.065, -0.259, -0.21, 
-0.102, 0.011, 0.028, 0.096, -0.169, 0.138, 0.144, -0.088, -0.074, 
0.124, -0.114, -0.143, -0.026, 0.104, 0.053, 0.075, -0.246, 0.125, 
-0.046, 0.125, 0.311, 0.139, 0.859, -0.091, 0.135, -0.091, 0.025, 
-0.199, -0.067, 0.2, -0.2, 0.447, 0.224, 0.043, 0.073, -0.179, 
0, 0, -0.099, -0.029, -0.115, -0.079, -0.091, -0.156, -0.117, 
-0.131, 0.091, -0.024, -0.041, 0.079, 0.007, -0.167, 0.064, -0.053, 
0.098, -0.035, 0.09, 0.172, -0.071, 0.107, -0.026, 0.096, -0.065, 
-0.078, -0.017), p = c(0.345, 0.219, 0.251, 0.051, 0.188, 0.393, 
0.329, 0.469, 0.286, 0.177, 0.353, 0.538, 0.02, 0.54, 0.75, 0.021, 
0.714, 0.234, 0.249, 0.451, 0.662, 0.895, 0.748, 0.001, 0.809, 
0.436, 0.006, 0.395, 0.738, 0.459, 0.035, 0.388, 0.56, 0.936, 
0.87, 0.93, 0.68, 0.571, 0.232, 0.282, 0.973, 0.831, 0.649, 0.535, 
0.759, 0.441, 0.671, 0.109, 0.265, 0.475, 0.711, 0.118, 0.873, 
0.286, 0.489, 0.028, 0.785, 0.521, 0.239, 1, 0.696, 0.01, 0.012, 
0.028, 0.561, 0.585, 0.393, 0.905, 0, 0.475, 0.177, 0, 0.006, 
0.097, 0, 0.415, 0.368, 0, 0.003, 0.152, 0.882, 0.698, 0.18, 
0.018, 0.084, 0.072, 0.273, 0.421, 0.178, 0.214, 0.305, 0.855, 
0.457, 0.708, 0.593, 0.075, 0.373, 0.745, 0.373, 0.225, 0.596, 
0, 0.779, 0.676, 0.779, 0.9, 0.32, 0.738, 0.704, 0.704, 0.374, 
0.155, 0.787, 0.644, 0.258, 1, 1, 0.217, 0.72, 0.15, 0.619, 0.565, 
0.325, 0.143, 0.101, 0.26, 0.761, 0.611, 0.327, 0.947, 0.121, 
0.555, 0.625, 0.367, 0.751, 0.407, 0.113, 0.515, 0.246, 0.782, 
0.298, 0.479, 0.399, 0.856)), class = c("grouped_df", "tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -144L), groups = structure(list(
    column = c("X10", "X11", "X12", "X13", "X14", "X15", "X16", 
    "X17", "X18", "X19", "X2", "X20", "X21", "X22", "X23", "X24", 
    "X25", "X26", "X27", "X28", "X29", "X3", "X30", "X4", "X5", 
    "X6", "X7", "X8", "X9", "Y1", "Y10", "Y11", "Y12", "Y13", 
    "Y14", "Y15", "Y16", "Y17", "Y18", "Y19", "Y2", "Y20", "Y3", 
    "Y4", "Y5", "Y6", "Y7", "Y8", "Y9"), .rows = structure(list(
        1:3, 4:6, 7:9, 10:12, 13:15, 16:18, 19:21, 22:24, 25:27, 
        28:30, 31L, 32:34, 35:37, 38:40, 41:43, 44:46, 47:49, 
        50:52, 53:55, 56:58, 59:61, 62:63, 64:66, 67:69, 70:72, 
        73:75, 76:78, 79:81, 82:84, 85:87, 88:90, 91:93, 94:96, 
        97:99, 100:102, 103:105, 106:108, 109:111, 112:114, 115:117, 
        118:120, 121:123, 124:126, 127:129, 130:132, 133:135, 
        136:138, 139:141, 142:144), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), row.names = c(NA, -49L), .drop = TRUE, class = c("tbl_df", 
"tbl", "data.frame")))

I want to sort it by variable "column" from Y1 to Y20 and then from X smallest to X largest value.
How do I do it ?

First: Don't name a column "column".
One solution:

  • Create a new group column populated with the first character of "column"
  • sort by descending group column, then by "column"
  • remove group column
m_short <- m_short %>%
  mutate(group = stringr::str_sub(column, 1, 1)) %>%
  arrange(desc(group), column) %>%
  select(!group)

No, this is not what I want:

obraz

I want to have it sorted in variable column in asc order from Y1, Y2, Y3 up to Y20.

So you want alphabetic sort order (Y10 before Y2) rather than numerical sort order (Y1, Y2, ..., Y10, Y20)?

No,
I want Y1, Y2, Y3, Y4, Y5 etc. up to Y20.

You can do it as follows using the dplyr library:

library(dplyr)
m_short <- m_short |>
             mutate(first = substring(column, 1, 1), rest = as.numeric(substring(column, 2))) |>
               arrange(desc(first), rest) |>
                 select(-c(first, rest))

Does

library(tidyverse)

m_short <- m_short %>% 
  arrange(parse_number(column))

do what you want?

Thank you @prubin and @startz very much.
This is much closer to what I want to get. Would it be possible to have it like Y1, Y2, Y3 ...Y20 and then again Y1, Y2, Y3...Y20 (no Y1, Y1, Y1, Y2, Y2, Y2 etc) and then X2...X30 in variable column from top to bottom?

Getting Y before X isn't too hard. Re-ordering according to the Y number isn't so obvious. So this is only a partial solution.

library(tidyverse)

m_short <- m_short |>  arrange(parse_number(column)) |>
  arrange(desc(substring(column, 1, 1)))

What about this solution?

library(tidyverse)

m_sort <- m_short %>%
  arrange(desc(str_sub(column, 1, 1)), parse_number(column))

head(m_sort)
#> # A tibble: 6 × 4
#> # Groups:   column [2]
#>   row   column    cor     p
#>   <chr> <chr>   <dbl> <dbl>
#> 1 X1    Y1      0.138 0.084
#> 2 X2    Y1      0.144 0.072
#> 3 X3    Y1     -0.088 0.273
#> 4 X1    Y2     -0.099 0.217
#> 5 X2    Y2     -0.029 0.72 
#> 6 X3    Y2     -0.115 0.15

tail(m_sort)
#> # A tibble: 6 × 4
#> # Groups:   column [2]
#>   row   column    cor     p
#>   <chr> <chr>   <dbl> <dbl>
#> 1 X1    X29    -0.108 0.239
#> 2 X2    X29     0     1    
#> 3 X3    X29    -0.036 0.696
#> 4 X1    X30    -0.201 0.028
#> 5 X2    X30     0.054 0.561
#> 6 X3    X30    -0.05  0.585

Created on 2024-08-11 with reprex v2.1.0

The OP doesn't want three Y1's in a row. He wants the Y sequence to start over.

Ah ok! In that case, here's a solution:

library(tidyverse)

m_sort <- m_short %>%
  mutate(
    Group=row_number()
  ) %>%
  arrange(desc(str_sub(column, 1, 1)), Group, parse_number(column))

print(m_sort, n=80)
#> # A tibble: 144 × 5
#> # Groups:   column [49]
#>    row   column    cor     p Group
#>    <chr> <chr>   <dbl> <dbl> <int>
#>  1 X1    Y1      0.138 0.084     1
#>  2 X1    Y2     -0.099 0.217     1
#>  3 X1    Y3     -0.117 0.143     1
#>  4 X1    Y4     -0.024 0.761     1
#>  5 X1    Y5      0.007 0.947     1
#>  6 X1    Y6     -0.053 0.625     1
#>  7 X1    Y7      0.09  0.407     1
#>  8 X1    Y8      0.107 0.246     1
#>  9 X1    Y9     -0.065 0.479     1
#> 10 X1    Y10    -0.074 0.421     1
#> 11 X1    Y11    -0.143 0.305     1
#> 12 X1    Y12     0.053 0.708     1
#> 13 X1    Y13     0.125 0.373     1
#> 14 X1    Y14     0.311 0.225     1
#> 15 X1    Y15    -0.091 0.779     1
#> 16 X1    Y16     0.025 0.9       1
#> 17 X1    Y17     0.2   0.704     1
#> 18 X1    Y18     0.224 0.155     1
#> 19 X1    Y19    -0.179 0.258     1
#> 20 X1    Y20    -0.079 0.619     1
#> 21 X2    Y1      0.144 0.072     2
#> 22 X2    Y2     -0.029 0.72      2
#> 23 X2    Y3     -0.131 0.101     2
#> 24 X2    Y4     -0.041 0.611     2
#> 25 X2    Y5     -0.167 0.121     2
#> 26 X2    Y6      0.098 0.367     2
#> 27 X2    Y7      0.172 0.113     2
#> 28 X2    Y8     -0.026 0.782     2
#> 29 X2    Y9     -0.078 0.399     2
#> 30 X2    Y10     0.124 0.178     2
#> 31 X2    Y11    -0.026 0.855     2
#> 32 X2    Y12     0.075 0.593     2
#> 33 X2    Y13    -0.046 0.745     2
#> 34 X2    Y14     0.139 0.596     2
#> 35 X2    Y15     0.135 0.676     2
#> 36 X2    Y16    -0.199 0.32      2
#> 37 X2    Y17    -0.2   0.704     2
#> 38 X2    Y18     0.043 0.787     2
#> 39 X2    Y19     0     1         2
#> 40 X2    Y20    -0.091 0.565     2
#> 41 X3    Y1     -0.088 0.273     3
#> 42 X3    Y2     -0.115 0.15      3
#> 43 X3    Y3      0.091 0.26      3
#> 44 X3    Y4      0.079 0.327     3
#> 45 X3    Y5      0.064 0.555     3
#> 46 X3    Y6     -0.035 0.751     3
#> 47 X3    Y7     -0.071 0.515     3
#> 48 X3    Y8      0.096 0.298     3
#> 49 X3    Y9     -0.017 0.856     3
#> 50 X3    Y10    -0.114 0.214     3
#> 51 X3    Y11     0.104 0.457     3
#> 52 X3    Y12    -0.246 0.075     3
#> 53 X3    Y13     0.125 0.373     3
#> 54 X3    Y14     0.859 0         3
#> 55 X3    Y15    -0.091 0.779     3
#> 56 X3    Y16    -0.067 0.738     3
#> 57 X3    Y17     0.447 0.374     3
#> 58 X3    Y18     0.073 0.644     3
#> 59 X3    Y19     0     1         3
#> 60 X3    Y20    -0.156 0.325     3
#> 61 X1    X2      0.151 0.035     1
#> 62 X1    X3      0.183 0.01      1
#> 63 X1    X4     -0.061 0.393     1
#> 64 X1    X5      0.051 0.475     1
#> 65 X1    X6     -0.195 0.006     1
#> 66 X1    X7     -0.058 0.415     1
#> 67 X1    X8     -0.21  0.003     1
#> 68 X1    X9      0.028 0.698     1
#> 69 X1    X10     0.068 0.345     1
#> 70 X1    X11     0.139 0.051     1
#> 71 X1    X12    -0.07  0.329     1
#> 72 X1    X13    -0.097 0.177     1
#> 73 X1    X14     0.166 0.02      1
#> 74 X1    X15     0.164 0.021     1
#> 75 X1    X16     0.083 0.249     1
#> 76 X1    X17     0.009 0.895     1
#> 77 X1    X18     0.017 0.809     1
#> 78 X1    X19     0.061 0.395     1
#> 79 X1    X20    -0.062 0.388     1
#> 80 X1    X21     0.012 0.87      1
#> # ℹ 64 more rows

Created on 2024-08-11 with reprex v2.1.0

1 Like

Thank you very much Steph, this is it.

I have one small additional question, why slice is not working here:

m_short_sliced <- m_short %>% dplyr::slice(1:60)

It still shows 144 rows instead of 60.

Base R works:

m_short_sliced <- m_short[1:60,]

I am curious why slice from dplyr is not working.

It's because you have a grouped data frame and you are getting the first 60 rows from each group. You need to ungroup it to do what you want.

It works now, fantastic. Thank you again.

I would like to say thank you as well to everybody involved in finding solution to this topic.
I have learnt a lot.
Best regards,
Andrzej

1 Like

That is very clever!

Here's another variation that uses a regular expression instead of a substring, doesn't depends on the table order, and doesn't use mutate():

original sample data, saved as `m_short`
m_short <- structure(list(row = c("X1", "X2", "X3", "X1", "X2", "X3", "X1", 
"X2", "X3", "X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", "X3", 
"X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", 
"X3", "X1", "X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", "X3", 
"X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", 
"X3", "X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", "X3", "X1", 
"X2", "X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", "X3", "X1", 
"X2", "X3", "X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", "X3", 
"X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", 
"X3", "X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", "X3", "X1", 
"X2", "X3", "X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", "X3", 
"X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", 
"X3", "X1", "X2", "X3", "X1", "X2", "X3", "X1", "X2", "X3", "X1", 
"X2", "X3", "X1", "X2", "X3"), column = c("X10", "X10", "X10", 
"X11", "X11", "X11", "X12", "X12", "X12", "X13", "X13", "X13", 
"X14", "X14", "X14", "X15", "X15", "X15", "X16", "X16", "X16", 
"X17", "X17", "X17", "X18", "X18", "X18", "X19", "X19", "X19", 
"X2", "X20", "X20", "X20", "X21", "X21", "X21", "X22", "X22", 
"X22", "X23", "X23", "X23", "X24", "X24", "X24", "X25", "X25", 
"X25", "X26", "X26", "X26", "X27", "X27", "X27", "X28", "X28", 
"X28", "X29", "X29", "X29", "X3", "X3", "X30", "X30", "X30", 
"X4", "X4", "X4", "X5", "X5", "X5", "X6", "X6", "X6", "X7", "X7", 
"X7", "X8", "X8", "X8", "X9", "X9", "X9", "Y1", "Y1", "Y1", "Y10", 
"Y10", "Y10", "Y11", "Y11", "Y11", "Y12", "Y12", "Y12", "Y13", 
"Y13", "Y13", "Y14", "Y14", "Y14", "Y15", "Y15", "Y15", "Y16", 
"Y16", "Y16", "Y17", "Y17", "Y17", "Y18", "Y18", "Y18", "Y19", 
"Y19", "Y19", "Y2", "Y2", "Y2", "Y20", "Y20", "Y20", "Y3", "Y3", 
"Y3", "Y4", "Y4", "Y4", "Y5", "Y5", "Y5", "Y6", "Y6", "Y6", "Y7", 
"Y7", "Y7", "Y8", "Y8", "Y8", "Y9", "Y9", "Y9"), cor = c(0.068, 
0.088, -0.082, 0.139, 0.094, 0.061, -0.07, -0.052, -0.076, -0.097, 
0.067, -0.044, 0.166, 0.044, 0.023, 0.164, 0.026, 0.085, 0.083, 
0.054, -0.031, 0.009, -0.023, -0.24, 0.017, -0.056, 0.195, 0.061, 
-0.024, 0.053, 0.151, -0.062, -0.042, 0.006, 0.012, -0.006, -0.03, 
-0.041, -0.086, -0.077, 0.002, -0.015, -0.033, 0.094, 0.046, 
-0.116, 0.108, 0.391, 0.277, 0.052, 0.027, 0.114, -0.027, 0.18, 
-0.117, 0.201, -0.025, 0.059, -0.108, 0, -0.036, 0.183, 0.179, 
-0.201, 0.054, -0.05, -0.061, -0.009, -0.328, 0.051, -0.097, 
-0.387, -0.195, -0.119, -0.648, -0.058, -0.065, -0.259, -0.21, 
-0.102, 0.011, 0.028, 0.096, -0.169, 0.138, 0.144, -0.088, -0.074, 
0.124, -0.114, -0.143, -0.026, 0.104, 0.053, 0.075, -0.246, 0.125, 
-0.046, 0.125, 0.311, 0.139, 0.859, -0.091, 0.135, -0.091, 0.025, 
-0.199, -0.067, 0.2, -0.2, 0.447, 0.224, 0.043, 0.073, -0.179, 
0, 0, -0.099, -0.029, -0.115, -0.079, -0.091, -0.156, -0.117, 
-0.131, 0.091, -0.024, -0.041, 0.079, 0.007, -0.167, 0.064, -0.053, 
0.098, -0.035, 0.09, 0.172, -0.071, 0.107, -0.026, 0.096, -0.065, 
-0.078, -0.017), p = c(0.345, 0.219, 0.251, 0.051, 0.188, 0.393, 
0.329, 0.469, 0.286, 0.177, 0.353, 0.538, 0.02, 0.54, 0.75, 0.021, 
0.714, 0.234, 0.249, 0.451, 0.662, 0.895, 0.748, 0.001, 0.809, 
0.436, 0.006, 0.395, 0.738, 0.459, 0.035, 0.388, 0.56, 0.936, 
0.87, 0.93, 0.68, 0.571, 0.232, 0.282, 0.973, 0.831, 0.649, 0.535, 
0.759, 0.441, 0.671, 0.109, 0.265, 0.475, 0.711, 0.118, 0.873, 
0.286, 0.489, 0.028, 0.785, 0.521, 0.239, 1, 0.696, 0.01, 0.012, 
0.028, 0.561, 0.585, 0.393, 0.905, 0, 0.475, 0.177, 0, 0.006, 
0.097, 0, 0.415, 0.368, 0, 0.003, 0.152, 0.882, 0.698, 0.18, 
0.018, 0.084, 0.072, 0.273, 0.421, 0.178, 0.214, 0.305, 0.855, 
0.457, 0.708, 0.593, 0.075, 0.373, 0.745, 0.373, 0.225, 0.596, 
0, 0.779, 0.676, 0.779, 0.9, 0.32, 0.738, 0.704, 0.704, 0.374, 
0.155, 0.787, 0.644, 0.258, 1, 1, 0.217, 0.72, 0.15, 0.619, 0.565, 
0.325, 0.143, 0.101, 0.26, 0.761, 0.611, 0.327, 0.947, 0.121, 
0.555, 0.625, 0.367, 0.751, 0.407, 0.113, 0.515, 0.246, 0.782, 
0.298, 0.479, 0.399, 0.856)), class = c("grouped_df", "tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -144L), groups = structure(list(
    column = c("X10", "X11", "X12", "X13", "X14", "X15", "X16", 
    "X17", "X18", "X19", "X2", "X20", "X21", "X22", "X23", "X24", 
    "X25", "X26", "X27", "X28", "X29", "X3", "X30", "X4", "X5", 
    "X6", "X7", "X8", "X9", "Y1", "Y10", "Y11", "Y12", "Y13", 
    "Y14", "Y15", "Y16", "Y17", "Y18", "Y19", "Y2", "Y20", "Y3", 
    "Y4", "Y5", "Y6", "Y7", "Y8", "Y9"), .rows = structure(list(
        1:3, 4:6, 7:9, 10:12, 13:15, 16:18, 19:21, 22:24, 25:27, 
        28:30, 31L, 32:34, 35:37, 38:40, 41:43, 44:46, 47:49, 
        50:52, 53:55, 56:58, 59:61, 62:63, 64:66, 67:69, 70:72, 
        73:75, 76:78, 79:81, 82:84, 85:87, 88:90, 91:93, 94:96, 
        97:99, 100:102, 103:105, 106:108, 109:111, 112:114, 115:117, 
        118:120, 121:123, 124:126, 127:129, 130:132, 133:135, 
        136:138, 139:141, 142:144), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), row.names = c(NA, -49L), .drop = TRUE, class = c("tbl_df", 
"tbl", "data.frame")))
library(tidyverse)
m_short |> 
  arrange(desc(str_extract(column, 'X|Y')), row,  parse_number(column))
#> # A tibble: 144 × 4
#> # Groups:   column [49]
#>    row   column    cor     p
#>    <chr> <chr>   <dbl> <dbl>
#>  1 X1    Y1      0.138 0.084
#>  2 X1    Y2     -0.099 0.217
#>  3 X1    Y3     -0.117 0.143
#>  4 X1    Y4     -0.024 0.761
#>  5 X1    Y5      0.007 0.947
#>  6 X1    Y6     -0.053 0.625
#>  7 X1    Y7      0.09  0.407
#>  8 X1    Y8      0.107 0.246
#>  9 X1    Y9     -0.065 0.479
#> 10 X1    Y10    -0.074 0.421
#> # ℹ 134 more rows

Created on 2024-08-14 with reprex v2.0.2

Hi, thank you,
I just have played with your code and one question, why this is not working:

m_short %>%  str_extract(column, 'X|Y')

The error says: object "column" not found, but this variable is present in m_short df.

arrange is a tidyverse function that works over data.frames, therefore it provides you the convenience of interpretting columns you pass in the context of the table you have it operated over.

str_extract, is a function that works over vectors; it has no context for data.frames.
When you used arrange (or if you used mutate) you would get that context; but in the way you wrote your command you dont.

you wrote :
m_short %>% str_extract(column, 'X|Y')
which is effectively :
str_extract(m_short,column, 'X|Y')
this is different from
str_extract(m_short$column, 'X|Y')
or
str_extract(pull(m_short,column), 'X|Y')
or
pull(m_short,column) |> str_extract('X|Y')
which I would expect to work.

1 Like

Thank you Nir for detailed explanation, very helpful.