Hi,
I am trying to perform basic calculations and transformation on the dataframe in R for analysis and plotting. For smaller datasets, I used to work with Excel sheet, and perform basic operations. However, the for larger datasets, usage of Excel is tedious and cumbersome. I am now trying to perform these operations in R Studio with an example dataset as provided below. Is there a way to perform the data calculations and transformations in better ways using packages like Tidyverse
, Dplyr
, Tidyr
etc.
1. **Import the primary data**
dput(Ct_val)
structure(list(Gene_Symbols = c("P53", "AQP1", "ACSL1", "CD68",
"GAPDH", "GAPDH", "B2M", "B2M", "HPRT1", "HPRT1"), AB_0 = c(22.79533592,
20.82377817, 999, 16.99486578, 14.83797255, 15.28974757, 17.94264512,
18.14911598, 10.88820582, 11.12897198), AC_0 = c(21.73593846,
23.07210088, 999, 17.8064999, 15.25164332, 15.50515474, 17.9512724,
17.72263561, 10.76670429, 10.90362257), BA_1 = c(22.20145134,
21.8933239, 999, 17.5469647, 15.97192712, 17.33504028, 18.6174601,
19.80180672, 11.39678511, 12.75075899), BB_1 = c(999, 999, 999,
20.29270832, 18.83026109, 19.19860066, 23.84040584, 19.900987,
12.97374373, 13.03291435)), class = "data.frame", row.names = c(NA,
-10L))
#> Gene_Symbols AB_0 AC_0 BA_1 BB_1
#> 1 P53 22.79534 21.73594 22.20145 999.00000
#> 2 AQP1 20.82378 23.07210 21.89332 999.00000
#> 3 ACSL1 999.00000 999.00000 999.00000 999.00000
#> 4 CD68 16.99487 17.80650 17.54696 20.29271
#> 5 GAPDH 14.83797 15.25164 15.97193 18.83026
#> 6 GAPDH 15.28975 15.50515 17.33504 19.19860
#> 7 B2M 17.94265 17.95127 18.61746 23.84041
#> 8 B2M 18.14912 17.72264 19.80181 19.90099
#> 9 HPRT1 10.88821 10.76670 11.39679 12.97374
#> 10 HPRT1 11.12897 10.90362 12.75076 13.03291
2. **Replace value "999" with empty**
Ct_val[Ct_val == 999] <- ""
dput(Exlude_NAs)
structure(list(Gene_Symbols = c("P53", "AQP1", "ACSL1", "CD68",
"GAPDH", "GAPDH", "B2M", "B2M", "HPRT1", "HPRT1"), AB_0 = c(22.79533592,
20.82377817, NA, 16.99486578, 14.83797255, 15.28974757, 17.94264512,
18.14911598, 10.88820582, 11.12897198), AC_0 = c(21.73593846,
23.07210088, NA, 17.8064999, 15.25164332, 15.50515474, 17.9512724,
17.72263561, 10.76670429, 10.90362257), BA_1 = c(22.20145134,
21.8933239, NA, 17.5469647, 15.97192712, 17.33504028, 18.6174601,
19.80180672, 11.39678511, 12.75075899), BB_1 = c(NA, NA, NA,
20.29270832, 18.83026109, 19.19860066, 23.84040584, 19.900987,
12.97374373, 13.03291435)), class = "data.frame", row.names = c(NA,
-10L))
#> Gene_Symbols AB_0 AC_0 BA_1 BB_1
#> 1 P53 22.79534 21.73594 22.20145 NA
#> 2 AQP1 20.82378 23.07210 21.89332 NA
#> 3 ACSL1 NA NA NA NA
#> 4 CD68 16.99487 17.80650 17.54696 20.29271
#> 5 GAPDH 14.83797 15.25164 15.97193 18.83026
#> 6 GAPDH 15.28975 15.50515 17.33504 19.19860
#> 7 B2M 17.94265 17.95127 18.61746 23.84041
#> 8 B2M 18.14912 17.72264 19.80181 19.90099
#> 9 HPRT1 10.88821 10.76670 11.39679 12.97374
#> 10 HPRT1 11.12897 10.90362 12.75076 13.03291
3. **Calculate geometric mean of "GAPDH", "B2M", and "HPRT1"** and append to same table
# GM_RG <- c("GAPDH", "B2M", "HPRT1")
# exp(mean(log(x)))
dput(Ct_val_GM)
structure(list(Gene_Symbols = c("P53", "AQP1", "ACSL1", "CD68",
"GM_RG"), AB_0 = c(22.79533592, 20.82377817, NA, 16.99486578,
14.40969202), AC_0 = c(21.73593846, 23.07210088, NA, 17.8064999,
14.37733242), BA_1 = c(22.20145134, 21.8933239, NA, 17.5469647,
15.67488284), BB_1 = c(NA, NA, NA, 20.29270832, 17.52818058)), class = "data.frame", row.names = c(NA,
-5L))
#> Gene_Symbols AB_0 AC_0 BA_1 BB_1
#> 1 P53 22.79534 21.73594 22.20145 NA
#> 2 AQP1 20.82378 23.07210 21.89332 NA
#> 3 ACSL1 NA NA NA NA
#> 4 CD68 16.99487 17.80650 17.54696 20.29271
#> 5 GM_RG 14.40969 14.37733 15.67488 17.52818
4. **Subtract each row in the Gene_Symbols column from GM_RG row**
# For instance; P53 - GM_RG = 22.79534 - 14.40969 = 8.385644
dput(Delta_Ct)
structure(list(Gene_Symbols = c("P53", "AQP1", "ACSL1", "CD68",
"GM_RG"), AB_0 = c(8.385643896, 6.414086146, NA, 2.585173756,
0), AC_0 = c(7.35860604, 8.69476846, NA, 3.42916748, 0), BA_1 = c(6.526568501,
6.218441061, NA, 1.872081861, 0), BB_1 = c(-17.52818058, -17.52818058,
NA, 2.764527736, 0)), class = "data.frame", row.names = c(NA,
-5L))
#> Gene_Symbols AB_0 AC_0 BA_1 BB_1
#> 1 P53 8.385644 7.358606 6.526569 -17.528181
#> 2 AQP1 6.414086 8.694768 6.218441 -17.528181
#> 3 ACSL1 NA NA NA NA
#> 4 CD68 2.585174 3.429167 1.872082 2.764528
#> 5 GM_RG 0.000000 0.000000 0.000000 0.000000
5. **Multiply Delta_Ct by -1**
# Negative_Delta_Ct <- Delta_Ct * (-1)
dput(Negative_Delta_Ct)
structure(list(Gene_Symbols = c("P53", "AQP1", "ACSL1", "CD68",
"GM_RG"), AB_0 = c(-8.385643896, -6.414086146, NA, -2.585173756,
0), AC_0 = c(-7.35860604, -8.69476846, NA, -3.42916748, 0), BA_1 = c(-6.526568501,
-6.218441061, NA, -1.872081861, 0), BB_1 = c(17.52818058, 17.52818058,
NA, -2.764527736, 0)), class = "data.frame", row.names = c(NA,
-5L))
#> Gene_Symbols AB_0 AC_0 BA_1 BB_1
#> 1 P53 -8.385644 -7.358606 -6.526569 17.528181
#> 2 AQP1 -6.414086 -8.694768 -6.218441 17.528181
#> 3 ACSL1 NA NA NA NA
#> 4 CD68 -2.585174 -3.429167 -1.872082 -2.764528
#> 5 GM_RG 0.000000 0.000000 0.000000 0.000000
Created on 2022-01-25 by the reprex package (v2.0.1)
Thank you,
Toufiq