Hi,
I have a question about creating a loop
to perform repeated calculations in R. I have a (8 x 11
) observations data matrix and would like to calculate the p-values for each column (Gene_A, Gene_B, Gene_C, etc.,) compared to a reference column (Reference). Is there a way to loop this and out the output the (p-values) to a separate column or saved separately. I have a large matrix consisting of (10000*100) dimensions and performing the calculations manually would be tedious. Please assist me with this.
dput(Dat_mat)
structure(list(Gene_A = c(16.26875, 21.374, 35.7917, 21.01615,
22.5471, 53.78655, 8.2572, 11.24755), Gene_B = c(16.26875, 21.374,
35.7917, 21.01615, 22.5471, 53.78655, 8.2572, 11.24755), Gene_C = c(30.8274,
112.1106, 126.3887, 104.7907, 122.3795, 90.0362, 50.6777, 100.9305
), Gene_D = c(111.2367, 252.9354, 215.32245, 112.6046, 203.8146,
283.54685, 153.041325, 176.373225), Gene_E = c(111.2367, 252.9354,
215.32245, 112.6046, 203.8146, 283.54685, 153.041325, 176.373225
), Gene_F = c(111.2367, 252.9354, 215.32245, 112.6046, 203.8146,
283.54685, 153.041325, 176.373225), Gene_G = c(35.3882, 53.4914,
88.0871, 56.443, 63.7323, 49.4972, 37.0928, 53.0731), Gene_H = c(41.27,
24.0781, 86.3814, 41.8651, 65.2544, 145.7944, 77.7312, 36.7819
), Gene_I = c(31.7941, 51.24006667, 40.5937, 27.23903333, 33.96723333,
63.5719, 31.3466, 49.88686667), Gene_J = c(31.7941, 51.24006667,
40.5937, 27.23903333, 33.96723333, 63.5719, 31.3466, 49.88686667
), Reference = c(33.59115, 52.365733335, 87.23425, 49.15405,
64.49335, 76.80405, 43.88525, 51.479983335)), class = "data.frame", row.names = c("Sample_1",
"Sample_2", "Sample_3", "Sample_4", "Sample_5", "Sample_6", "Sample_7",
"Sample_8"))
#> Gene_A Gene_B Gene_C Gene_D Gene_E Gene_F Gene_G Gene_H
#> Sample_1 16.26875 16.26875 30.8274 111.2367 111.2367 111.2367 35.3882 41.2700
#> Sample_2 21.37400 21.37400 112.1106 252.9354 252.9354 252.9354 53.4914 24.0781
#> Sample_3 35.79170 35.79170 126.3887 215.3225 215.3225 215.3225 88.0871 86.3814
#> Sample_4 21.01615 21.01615 104.7907 112.6046 112.6046 112.6046 56.4430 41.8651
#> Sample_5 22.54710 22.54710 122.3795 203.8146 203.8146 203.8146 63.7323 65.2544
#> Sample_6 53.78655 53.78655 90.0362 283.5469 283.5469 283.5469 49.4972 145.7944
#> Sample_7 8.25720 8.25720 50.6777 153.0413 153.0413 153.0413 37.0928 77.7312
#> Sample_8 11.24755 11.24755 100.9305 176.3732 176.3732 176.3732 53.0731 36.7819
#> Gene_I Gene_J Reference
#> Sample_1 31.79410 31.79410 33.59115
#> Sample_2 51.24007 51.24007 52.36573
#> Sample_3 40.59370 40.59370 87.23425
#> Sample_4 27.23903 27.23903 49.15405
#> Sample_5 33.96723 33.96723 64.49335
#> Sample_6 63.57190 63.57190 76.80405
#> Sample_7 31.34660 31.34660 43.88525
#> Sample_8 49.88687 49.88687 51.47998
### Obtain p-values
results = t.test(Dat_mat$Gene_A, Dat_mat$Reference)
results$p.value
[1] 0.001095952
results = t.test(Dat_mat$Gene_B, Dat_mat$Reference)
results$p.value
[1] 0.001095952
results = t.test(Dat_mat$Gene_C, Dat_mat$Reference)
results$p.value
[1] 0.02739666
results = t.test(Dat_mat$Gene_D, Dat_mat$Reference)
results$p.value
0.0004201088
results = t.test(Dat_mat$Gene_E, Dat_mat$Reference)
results$p.value
0.0004201088
results = t.test(Dat_mat$Gene_F, Dat_mat$Reference)
results$p.value
0.0004201088
results = t.test(Dat_mat$Gene_G, Dat_mat$Reference)
results$p.value
0.7507277
results = t.test(Dat_mat$Gene_H, Dat_mat$Reference)
results$p.value
0.6314491
results = t.test(Dat_mat$Gene_I, Dat_mat$Reference)
results$p.value
0.05589539
results = t.test(Dat_mat$Reference, Dat_mat$Reference)
results$p.value
1
Created on 2021-11-16 by the reprex package (v2.0.1)