Hi,
I was trying to aggregate by sum in the dataframe, however, it performs the sum on the whole dataframe (both rows and columns) instead of sum based only on the "Symbol" or rows of the "Symbol" column. I want to sum up the common gene names. How can I perform this operation?.
dput(Test_data)
structure(list(Symbol = c("Gene_A", "Gene_A", "Gene_B", "Gene_D",
"Gene_D", "Gene_D", "Gene_E", "Gene_F", "Gene_F"), Sample_1 = c(0L,
0L, 146L, 38L, 60L, 676L, 18L, 14L, 59L), Sample_1 = c(0L, 0L,
178L, 65L, 85L, 732L, 23L, 19L, 84L), Sample_1 = c(0L, 0L, 325L,
53L, 88L, 765L, 9L, 30L, 99L), Sample_2 = c(0L, 0L, 378L, 41L,
155L, 25L, 31L, 32L, 173L), Sample_2 = c(0L, 0L, 322L, 52L, 166L,
43L, 27L, 29L, 136L), Sample_3 = c(0L, 0L, 402L, 56L, 181L, 22L,
41L, 34L, 195L), Sample_3 = c(0L, 0L, 315L, 53L, 83L, 25L, 30L,
70L, 108L)), class = "data.frame", row.names = c(NA, -9L))
#> Symbol Sample_1 Sample_1 Sample_1 Sample_2 Sample_2 Sample_3 Sample_3
#> 1 Gene_A 0 0 0 0 0 0 0
#> 2 Gene_A 0 0 0 0 0 0 0
#> 3 Gene_B 146 178 325 378 322 402 315
#> 4 Gene_D 38 65 53 41 52 56 53
#> 5 Gene_D 60 85 88 155 166 181 83
#> 6 Gene_D 676 732 765 25 43 22 25
#> 7 Gene_E 18 23 9 31 27 41 30
#> 8 Gene_F 14 19 30 32 29 34 70
#> 9 Gene_F 59 84 99 173 136 195 108
Test_data_sum = aggregate(. ~ Symbol, Test_data, sum)
dput(Test_data_sum)
structure(list(Symbol = c("Gene_A", "Gene_B", "Gene_D", "Gene_E",
"Gene_F"), Sample_1 = c(0L, 146L, 774L, 18L, 73L), Sample_2 = c(0L,
378L, 221L, 31L, 205L), Sample_3 = c(0L, 402L, 259L, 41L, 229L
)), row.names = c(NA, -5L), class = "data.frame")
#> Symbol Sample_1 Sample_2 Sample_3
#> 1 Gene_A 0 0 0
#> 2 Gene_B 146 378 402
#> 3 Gene_D 774 221 259
#> 4 Gene_E 18 31 41
#> 5 Gene_F 73 205 229
Created on 2021-10-04 by the reprex package (v2.0.1)
Thank you,
Toufiq