I do not know why it is wrong.

setwd('D:/Users/GBD数据')
library(dplyr)
library(ggplot2)
colnames(mtcars)
mtcars <- read.csv('DALYs_Table1.csv',header = T) |> 
  select(mtcars,-X) |> 
arrange(Prevalence_Number_1000,ASPR,EAPC_CI,location)

mtcars <- read.csv('DALYs_Table1.csv',header = T) |>

  • select(mtcars,-X)
    Error in select():
    ! Can't select columns with mtcars.
    :heavy_multiplication_x: mtcars must be numeric or character, not a <data.frame> object.
    Run rlang::last_trace() to see where the error occurred.
> dput(mtcars)
structure(list(X = c(10L, 6L, 23L, 22L, 20L, 19L, 16L, 9L, 5L, 
3L, 2L, 27L, 18L, 15L, 4L, 14L, 21L, 17L, 8L, 12L, 26L, 7L, 11L, 
25L, 24L, 1L, 13L), location = c("Global", "Low SDI", "Low-middle SDI", 
"Middle SDI", "High-middle SDI", "High SDI", "Andean Latin America", 
"Australasia", "Caribbean", "Central Asia", "Central Europe", 
"Central Latin America", "Central Sub-Saharan Africa", "East Asia", 
"Eastern Europe", "Eastern Sub-Saharan Africa", "High-income Asia Pacific", 
"High-income North America", "North Africa and Middle East", 
"Oceania", "South Asia", "Southeast Asia", "Southern Latin America", 
"Southern Sub-Saharan Africa", "Tropical Latin America", "Western Europe", 
"Western Sub-Saharan Africa"), Prevalence_Number_1000 = c("1677.4 (1171 to 2343.9)", 
"48.1 (27.5 to 74.1)", "127.3 (77 to 189.7)", "588.1 (389.6 to 847.3)", 
"354.8 (236.1 to 519.9)", "558 (436.1 to 711.9)", "7.9 (4.9 to 11.8)", 
"23.8 (15.8 to 34.1)", "4.2 (2.7 to 6.2)", "20.1 (12.9 to 29)", 
"25.9 (16.3 to 39.2)", "23.6 (15.4 to 33.8)", "7.3 (4 to 11.5)", 
"574.7 (386.3 to 831.8)", "70.6 (52.5 to 94.3)", "22.3 (12.7 to 34.4)", 
"22.7 (13.6 to 35)", "361.3 (295.7 to 445.8)", "44.9 (29.6 to 62.3)", 
"2.7 (1.5 to 4.5)", "47.2 (32 to 67)", "198.3 (116 to 306.9)", 
"7.1 (4.2 to 11.2)", "15 (9.6 to 21.6)", "55.4 (31.7 to 86)", 
"120.8 (83 to 170.6)", "21.6 (11.7 to 34.5)"), ASPR = c("21 (14.6 to 29.3)", 
"4.4 (2.6 to 6.6)", "6.1 (3.8 to 9.1)", "23.6 (15.5 to 34.4)", 
"29.5 (19 to 43.2)", "53 (40.4 to 69.4)", "11 (6.9 to 16.4)", 
"81.7 (53.6 to 117.6)", "8.5 (5.4 to 12.6)", "20.2 (13 to 29.1)", 
"26.8 (16.4 to 40.6)", "8.8 (5.7 to 12.5)", "5.4 (3 to 8.4)", 
"44.7 (29.6 to 65.5)", "39.5 (28.6 to 53.3)", "5.2 (3.1 to 8)", 
"15.2 (8.7 to 23.9)", "98.5 (79.6 to 122.9)", "6.8 (4.5 to 9.4)", 
"18.4 (10.1 to 30.1)", "2.4 (1.6 to 3.4)", "26.8 (15.7 to 41.7)", 
"10.1 (5.9 to 15.9)", "17.5 (11.3 to 24.9)", "23.3 (13.3 to 36.4)", 
"32.2 (21.7 to 45.9)", "4.6 (2.5 to 7.3)"), EAPC_CI = c("-1.68\n(-1.87 to -1.48)", 
"0.13\n(0.11 to 0.14)", "-0.24\n(-0.28 to -0.21)", "-2.72\n(-3.02 to -2.42)", 
"-1.71\n(-1.9 to -1.52)", "2.01\n(1.71 to 2.31)", "0.7\n(0.63 to 0.76)", 
"0.34\n(0.02 to 0.65)", "0.3\n(-0.04 to 0.63)", "0.85\n(0.74 to 0.97)", 
"0.84\n(0.79 to 0.88)", "0.83\n(0.59 to 1.06)", "0.2\n(0.18 to 0.23)", 
"-2.65\n(-3.04 to -2.26)", "0.87\n(0.35 to 1.39)", "0.16\n(0.15 to 0.17)", 
"0.03\n(-0.03 to 0.1)", "3.84\n(3.18 to 4.5)", "0.77\n(0.59 to 0.94)", 
"0.03\n(0.01 to 0.06)", "0.34\n(0.24 to 0.44)", "-0.01\n(-0.07 to 0.05)", 
"0.09\n(-0.02 to 0.2)", "-0.18\n(-0.52 to 0.17)", "-0.14\n(-0.22 to -0.06)", 
"1.14\n(1.01 to 1.27)", "-0.02\n(-0.05 to 0.01)")), class = "data.frame", row.names = c(NA, 
-27L))
>

There are potentially a few things incorrect here.

  1. mtcars is one of the sample datasets that comes with R via the datasets package. While you can name a variable this in your R script and it will work, you probably shouldn't.
  2. For the line of code highlighted above, it's important to realize that the <- represents the assignment operator. This happens LAST. Your R code essentially reads "Read in my csv, pass those results to select, do the select operation and pass that result to arrange. After that, assign the final result to mtcars

At the step read.csv('DALYs_Table1.csv',header = T) |> select(mtcars,-X), you have the dataframe from read.csv being piped to select, but then you also put the GLOBAL mtcars df as the first argument. This cannot work, because the select function argument doesn't take a dataframe if you're already piping the results into it.

Given that you have the code colnames(mtcars) above this line of code, I would guess that if you made your code this way, it would be closer to what you intend:

mtcars <- read.csv('DALYs_Table1.csv',header = T) |> 
  select(colnames(mtcars),-X) |> 
arrange(Prevalence_Number_1000,ASPR,EAPC_CI,location)

But again, you're mixing up the mtcars original dataset with the data you are importing via read.csv. You should instead do something like:

setwd('D:/Users/GBD数据')
library(dplyr)
library(ggplot2)

mydata <- read.csv('DALYs_Table1.csv',header = T) 

mydata_filtered <- select(colnames(mydata),-X) |> 
arrange(Prevalence_Number_1000,ASPR,EAPC_CI,location)

Best,
Randy

1 Like

I ran as follows,but it did not arrange as i expect.Is it my arrange functiom wrong?

mydata <- read.csv('DALYs_Table1.csv',header = T) |> 
  select(colnames(mydata),-X) |> 
arrange(Prevalence_Number_1000,ASPR,EAPC_CI,location)
colnames(mydata)

colnames(mydata)
[1] "location" "Prevalence_Number_1000"
[3] "ASPR" "EAPC_CI"

What is wrong with the result of your code?

I wish its columns were arranged in this order(arrange(Prevalence_Number_1000,ASPR,EAPC_CI,location)), but it did not.

> dput(mydata)
structure(list(location = c("Western Europe", "Low-middle SDI", 
"Southern Sub-Saharan Africa", "Global", "Southeast Asia", "Oceania", 
"Central Asia", "Western Sub-Saharan Africa", "Eastern Sub-Saharan Africa", 
"High-income Asia Pacific", "Central Latin America", "Australasia", 
"Central Europe", "High-middle SDI", "High-income North America", 
"Caribbean", "North Africa and Middle East", "South Asia", "Low SDI", 
"Tropical Latin America", "High SDI", "East Asia", "Middle SDI", 
"Southern Latin America", "Central Sub-Saharan Africa", "Andean Latin America", 
"Eastern Europe"), Prevalence_Number_1000 = c("120.8 (83 to 170.6)", 
"127.3 (77 to 189.7)", "15 (9.6 to 21.6)", "1677.4 (1171 to 2343.9)", 
"198.3 (116 to 306.9)", "2.7 (1.5 to 4.5)", "20.1 (12.9 to 29)", 
"21.6 (11.7 to 34.5)", "22.3 (12.7 to 34.4)", "22.7 (13.6 to 35)", 
"23.6 (15.4 to 33.8)", "23.8 (15.8 to 34.1)", "25.9 (16.3 to 39.2)", 
"354.8 (236.1 to 519.9)", "361.3 (295.7 to 445.8)", "4.2 (2.7 to 6.2)", 
"44.9 (29.6 to 62.3)", "47.2 (32 to 67)", "48.1 (27.5 to 74.1)", 
"55.4 (31.7 to 86)", "558 (436.1 to 711.9)", "574.7 (386.3 to 831.8)", 
"588.1 (389.6 to 847.3)", "7.1 (4.2 to 11.2)", "7.3 (4 to 11.5)", 
"7.9 (4.9 to 11.8)", "70.6 (52.5 to 94.3)"), ASPR = c("32.2 (21.7 to 45.9)", 
"6.1 (3.8 to 9.1)", "17.5 (11.3 to 24.9)", "21 (14.6 to 29.3)", 
"26.8 (15.7 to 41.7)", "18.4 (10.1 to 30.1)", "20.2 (13 to 29.1)", 
"4.6 (2.5 to 7.3)", "5.2 (3.1 to 8)", "15.2 (8.7 to 23.9)", "8.8 (5.7 to 12.5)", 
"81.7 (53.6 to 117.6)", "26.8 (16.4 to 40.6)", "29.5 (19 to 43.2)", 
"98.5 (79.6 to 122.9)", "8.5 (5.4 to 12.6)", "6.8 (4.5 to 9.4)", 
"2.4 (1.6 to 3.4)", "4.4 (2.6 to 6.6)", "23.3 (13.3 to 36.4)", 
"53 (40.4 to 69.4)", "44.7 (29.6 to 65.5)", "23.6 (15.5 to 34.4)", 
"10.1 (5.9 to 15.9)", "5.4 (3 to 8.4)", "11 (6.9 to 16.4)", "39.5 (28.6 to 53.3)"
), EAPC_CI = c("1.14\n(1.01 to 1.27)", "-0.24\n(-0.28 to -0.21)", 
"-0.18\n(-0.52 to 0.17)", "-1.68\n(-1.87 to -1.48)", "-0.01\n(-0.07 to 0.05)", 
"0.03\n(0.01 to 0.06)", "0.85\n(0.74 to 0.97)", "-0.02\n(-0.05 to 0.01)", 
"0.16\n(0.15 to 0.17)", "0.03\n(-0.03 to 0.1)", "0.83\n(0.59 to 1.06)", 
"0.34\n(0.02 to 0.65)", "0.84\n(0.79 to 0.88)", "-1.71\n(-1.9 to -1.52)", 
"3.84\n(3.18 to 4.5)", "0.3\n(-0.04 to 0.63)", "0.77\n(0.59 to 0.94)", 
"0.34\n(0.24 to 0.44)", "0.13\n(0.11 to 0.14)", "-0.14\n(-0.22 to -0.06)", 
"2.01\n(1.71 to 2.31)", "-2.65\n(-3.04 to -2.26)", "-2.72\n(-3.02 to -2.42)", 
"0.09\n(-0.02 to 0.2)", "0.2\n(0.18 to 0.23)", "0.7\n(0.63 to 0.76)", 
"0.87\n(0.35 to 1.39)")), class = "data.frame", row.names = c(NA, 
-27L))

The arrange() function does not change the order of the columns. According to the documentation it changes the order of the rows: arrange() orders the rows of a data frame by the values of selected columns.

1 Like

Your column Prevalence_Number_1000 is being sorted alphabetically because it contains characters. I you want it to sort following the numeric values of the first number in row, you will have to split the column into two and then sort on one of those. Here is an example.

library(tidyverse)
mydata <- structure(list(location = c("Western Europe", "Low-middle SDI", 
                              "Southern Sub-Saharan Africa", "Global", "Southeast Asia", "Oceania", 
                              "Central Asia", "Western Sub-Saharan Africa", "Eastern Sub-Saharan Africa", 
                              "High-income Asia Pacific", "Central Latin America", "Australasia", 
                              "Central Europe", "High-middle SDI", "High-income North America", 
                              "Caribbean", "North Africa and Middle East", "South Asia", "Low SDI", 
                              "Tropical Latin America", "High SDI", "East Asia", "Middle SDI", 
                              "Southern Latin America", "Central Sub-Saharan Africa", "Andean Latin America", 
                              "Eastern Europe"), 
                 Prevalence_Number_1000 = c("120.8 (83 to 170.6)", 
                                            "127.3 (77 to 189.7)", "15 (9.6 to 21.6)", "1677.4 (1171 to 2343.9)", 
                                            "198.3 (116 to 306.9)", "2.7 (1.5 to 4.5)", "20.1 (12.9 to 29)", 
                                            "21.6 (11.7 to 34.5)", "22.3 (12.7 to 34.4)", "22.7 (13.6 to 35)", 
                                            "23.6 (15.4 to 33.8)", "23.8 (15.8 to 34.1)", "25.9 (16.3 to 39.2)", 
                                            "354.8 (236.1 to 519.9)", "361.3 (295.7 to 445.8)", "4.2 (2.7 to 6.2)", 
                                            "44.9 (29.6 to 62.3)", "47.2 (32 to 67)", "48.1 (27.5 to 74.1)", 
                                            "55.4 (31.7 to 86)", "558 (436.1 to 711.9)", "574.7 (386.3 to 831.8)", 
                                            "588.1 (389.6 to 847.3)", "7.1 (4.2 to 11.2)", "7.3 (4 to 11.5)", 
                                            "7.9 (4.9 to 11.8)", "70.6 (52.5 to 94.3)"), 
                 ASPR = c("32.2 (21.7 to 45.9)", 
                          "6.1 (3.8 to 9.1)", "17.5 (11.3 to 24.9)", "21 (14.6 to 29.3)", 
                          "26.8 (15.7 to 41.7)", "18.4 (10.1 to 30.1)", "20.2 (13 to 29.1)", 
                          "4.6 (2.5 to 7.3)", "5.2 (3.1 to 8)", "15.2 (8.7 to 23.9)", "8.8 (5.7 to 12.5)", 
                          "81.7 (53.6 to 117.6)", "26.8 (16.4 to 40.6)", "29.5 (19 to 43.2)", 
                          "98.5 (79.6 to 122.9)", "8.5 (5.4 to 12.6)", "6.8 (4.5 to 9.4)", 
                          "2.4 (1.6 to 3.4)", "4.4 (2.6 to 6.6)", "23.3 (13.3 to 36.4)", 
                          "53 (40.4 to 69.4)", "44.7 (29.6 to 65.5)", "23.6 (15.5 to 34.4)", 
                          "10.1 (5.9 to 15.9)", "5.4 (3 to 8.4)", "11 (6.9 to 16.4)", "39.5 (28.6 to 53.3)"
                 ), 
                 EAPC_CI = c("1.14\n(1.01 to 1.27)", "-0.24\n(-0.28 to -0.21)", 
                             "-0.18\n(-0.52 to 0.17)", "-1.68\n(-1.87 to -1.48)", "-0.01\n(-0.07 to 0.05)", 
                             "0.03\n(0.01 to 0.06)", "0.85\n(0.74 to 0.97)", "-0.02\n(-0.05 to 0.01)", 
                             "0.16\n(0.15 to 0.17)", "0.03\n(-0.03 to 0.1)", "0.83\n(0.59 to 1.06)", 
                             "0.34\n(0.02 to 0.65)", "0.84\n(0.79 to 0.88)", "-1.71\n(-1.9 to -1.52)", 
                             "3.84\n(3.18 to 4.5)", "0.3\n(-0.04 to 0.63)", "0.77\n(0.59 to 0.94)", 
                             "0.34\n(0.24 to 0.44)", "0.13\n(0.11 to 0.14)", "-0.14\n(-0.22 to -0.06)", 
                             "2.01\n(1.71 to 2.31)", "-2.65\n(-3.04 to -2.26)", "-2.72\n(-3.02 to -2.42)", 
                             "0.09\n(-0.02 to 0.2)", "0.2\n(0.18 to 0.23)", "0.7\n(0.63 to 0.76)", 
                             "0.87\n(0.35 to 1.39)")), class = "data.frame", row.names = c(NA, 
                                                                                           -27L))
mydata <- mydata |> separate_wider_delim(Prevalence_Number_1000, names = c("Prev_Value", "Prev_Range"), 
                               delim = " ", too_many = "merge") |> 
  mutate(Prev_Value = as.numeric(Prev_Value)) |> 
  arrange(Prev_Value)
head(mydata)
#> # A tibble: 6 × 5
#>   location                    Prev_Value Prev_Range    ASPR              EAPC_CI
#>   <chr>                            <dbl> <chr>         <chr>             <chr>  
#> 1 Oceania                            2.7 (1.5 to 4.5)  18.4 (10.1 to 30… "0.03\…
#> 2 Caribbean                          4.2 (2.7 to 6.2)  8.5 (5.4 to 12.6) "0.3\n…
#> 3 Southern Latin America             7.1 (4.2 to 11.2) 10.1 (5.9 to 15.… "0.09\…
#> 4 Central Sub-Saharan Africa         7.3 (4 to 11.5)   5.4 (3 to 8.4)    "0.2\n…
#> 5 Andean Latin America               7.9 (4.9 to 11.8) 11 (6.9 to 16.4)  "0.7\n…
#> 6 Southern Sub-Saharan Africa       15   (9.6 to 21.6) 17.5 (11.3 to 24… "-0.18…

Created on 2024-12-05 with reprex v2.1.1

1 Like

To change the order of the columns, I use the select function. In this case, you might do the following:

mtcars <- read.csv('DALYs_Table1.csv',header = T) |> 
  select(-X) |>  # removes column X
select(Prevalence_Number_1000,ASPR,EAPC_CI,location, everything()) # puts these columns in this order and then any other remaining columns
1 Like