Hi,
I am working with a dataset and trying to calculate difference between one column (average of Untreated Cells 6hr
columns from sample metafile) against all other columns in the dataframe in R. After running, I see that if there are more than one NA entries in that row in the dataframe then the output is assigned NA across all. Is there are way to solve this?
Example code is give below:
dput(Orig_df)
structure(list(`7B_0h-1` = c(7.01903, 5.84523, 7.15273, 9.02213,
3.89193), `64A_0h-1` = c(7.58907, 5.61506, 6.34912, NA, NA),
`45A_0h-1` = c(6.76282, 5.80398, 11.1555, 7.13648, 9.6094
), `45B_0h-1` = c(6.71095, 4.73251, 10.5722, 10.4395, 9.21919
), `34A_0h-1` = c(6.80696, 5.13826, NA, 9.8541, 10.5822),
`34B_0h-1` = c(5.37741, 7.41889, 7.54412, 6.40762, 5.65587
), `7B_6h-15` = c(8.11014, 5.89484, 7.28234, NA, 6.04984),
`64A_6h-15` = c(NA, 5.54925, 4.02645, NA, NA), `45A_6h-15` = c(8.17386,
4.98107, 9.60765, NA, 7.313), `45B_6h-15` = c(6.44788, 5.32679,
8.05785, NA, 6.75502), `34A_6h-15` = c(7.32186, 5.30839,
12.7447, 10.0773, 7.69417), `34B_6h-15` = c(NA, 5.62144,
9.05329, NA, 7.05311), `64B_0h-1` = c(8.38255, 5.70344, 10.8594,
11.3011, NA), `64B_6h-15` = c(8.12568, 5.09649, 10.4627,
9.95098, 6.62127), `7A_0h-1` = c(6.30902, 4.52432, 6.90822,
6.90252, 15.0152), `7A_6h-15` = c(7.22863, 4.38373, 4.26793,
7.47973, 5.22263)), class = "data.frame", row.names = c("ACTR5",
"ACTR8", "ADAM33", "AQP1", "AREG"))
#> 7B_0h-1 64A_0h-1 45A_0h-1 45B_0h-1 34A_0h-1 34B_0h-1 7B_6h-15 64A_6h-15
#> ACTR5 7.01903 7.58907 6.76282 6.71095 6.80696 5.37741 8.11014 NA
#> ACTR8 5.84523 5.61506 5.80398 4.73251 5.13826 7.41889 5.89484 5.54925
#> ADAM33 7.15273 6.34912 11.15550 10.57220 NA 7.54412 7.28234 4.02645
#> AQP1 9.02213 NA 7.13648 10.43950 9.85410 6.40762 NA NA
#> AREG 3.89193 NA 9.60940 9.21919 10.58220 5.65587 6.04984 NA
#> 45A_6h-15 45B_6h-15 34A_6h-15 34B_6h-15 64B_0h-1 64B_6h-15 7A_0h-1
#> ACTR5 8.17386 6.44788 7.32186 NA 8.38255 8.12568 6.30902
#> ACTR8 4.98107 5.32679 5.30839 5.62144 5.70344 5.09649 4.52432
#> ADAM33 9.60765 8.05785 12.74470 9.05329 10.85940 10.46270 6.90822
#> AQP1 NA NA 10.07730 NA 11.30110 9.95098 6.90252
#> AREG 7.31300 6.75502 7.69417 7.05311 NA 6.62127 15.01520
#> 7A_6h-15
#> ACTR5 7.22863
#> ACTR8 4.38373
#> ADAM33 4.26793
#> AQP1 7.47973
#> AREG 5.22263
dput(Sample_Meta_file)
structure(list(Ligand = c("0h", "0h", "0h", "0h", "0h", "0h",
"0h", "0h", "6h", "6h", "6h", "6h", "6h", "6h", "6h", "6h"),
Ligand_Timepoints = c("Untreated Cells 0hr", "Untreated Cells 0hr",
"Untreated Cells 0hr", "Untreated Cells 0hr", "Untreated Cells 0hr",
"Untreated Cells 0hr", "Untreated Cells 0hr", "Untreated Cells 0hr",
"Untreated Cells 6hr", "Untreated Cells 6hr", "Untreated Cells 6hr",
"Untreated Cells 6hr", "Untreated Cells 6hr", "Untreated Cells 6hr",
"Untreated Cells 6hr", "Untreated Cells 6hr")), row.names = c("7B_0h-1",
"64A_0h-1", "45A_0h-1", "45B_0h-1", "34A_0h-1", "34B_0h-1", "64B_0h-1",
"7A_0h-1", "7B_6h-15", "64A_6h-15", "45A_6h-15", "45B_6h-15",
"34A_6h-15", "34B_6h-15", "64B_6h-15", "7A_6h-15"), class = "data.frame")
#> Ligand Ligand_Timepoints
#> 7B_0h-1 0h Untreated Cells 0hr
#> 64A_0h-1 0h Untreated Cells 0hr
#> 45A_0h-1 0h Untreated Cells 0hr
#> 45B_0h-1 0h Untreated Cells 0hr
#> 34A_0h-1 0h Untreated Cells 0hr
#> 34B_0h-1 0h Untreated Cells 0hr
#> 64B_0h-1 0h Untreated Cells 0hr
#> 7A_0h-1 0h Untreated Cells 0hr
#> 7B_6h-15 6h Untreated Cells 6hr
#> 64A_6h-15 6h Untreated Cells 6hr
#> 45A_6h-15 6h Untreated Cells 6hr
#> 45B_6h-15 6h Untreated Cells 6hr
#> 34A_6h-15 6h Untreated Cells 6hr
#> 34B_6h-15 6h Untreated Cells 6hr
#> 64B_6h-15 6h Untreated Cells 6hr
#> 7A_6h-15 6h Untreated Cells 6hr
#prepare data for Difference calculation analysis
###########
df_raw = as.matrix(Orig_df)
sample_info = Sample_Meta_file
#### make sure that expression matrix and sample information are the same order
df_raw = df_raw[,rownames(sample_info)]
colnames(df_raw) == rownames(sample_info)
# Difference
Diff.mod.ind.sin <- df_raw[,]
Diff.mod.ind.sin [,] <- NA
k=1
for (k in 1:nrow(df_raw)) {
signature = rownames(df_raw)[k]
test.table <- sample_info
test.table$scores <- df_raw[k,]
T4 <- test.table
T3 <- test.table[test.table$Ligand_Timepoints %in% c("Untreated Cells 6hr"),]
Diff.mod.ind.sin[k,] <- (T4$scores-(mean(T3$scores)))
}
Diff.mod.ind.sin <- as.data.frame(Diff.mod.ind.sin)
dput(Diff.mod.ind.sin)
structure(list(`7B_0h-1` = c(NA, 0.57498, -1.03513375, NA, NA
), `64A_0h-1` = c(NA, 0.34481, -1.83874375, NA, NA), `45A_0h-1` = c(NA,
0.53373, 2.96763625, NA, NA), `45B_0h-1` = c(NA, -0.537739999999999,
2.38433625, NA, NA), `34A_0h-1` = c(NA, -0.13199, NA, NA, NA),
`34B_0h-1` = c(NA, 2.14864, -0.64374375, NA, NA), `64B_0h-1` = c(NA,
0.43319, 2.67153625, NA, NA), `7A_0h-1` = c(NA, -0.74593,
-1.27964375, NA, NA), `7B_6h-15` = c(NA, 0.62459, -0.90552375,
NA, NA), `64A_6h-15` = c(NA, 0.279, -4.16141375, NA, NA),
`45A_6h-15` = c(NA, -0.28918, 1.41978625, NA, NA), `45B_6h-15` = c(NA,
0.05654, -0.13001375, NA, NA), `34A_6h-15` = c(NA, 0.0381400000000003,
4.55683625, NA, NA), `34B_6h-15` = c(NA, 0.35119, 0.865426250000001,
NA, NA), `64B_6h-15` = c(NA, -0.17376, 2.27483625, NA, NA
), `7A_6h-15` = c(NA, -0.88652, -3.91993375, NA, NA)), class = "data.frame", row.names = c("ACTR5",
"ACTR8", "ADAM33", "AQP1", "AREG"))
#> 7B_0h-1 64A_0h-1 45A_0h-1 45B_0h-1 34A_0h-1 34B_0h-1 64B_0h-1
#> ACTR5 NA NA NA NA NA NA NA
#> ACTR8 0.574980 0.344810 0.533730 -0.537740 -0.13199 2.1486400 0.433190
#> ADAM33 -1.035134 -1.838744 2.967636 2.384336 NA -0.6437437 2.671536
#> AQP1 NA NA NA NA NA NA NA
#> AREG NA NA NA NA NA NA NA
#> 7A_0h-1 7B_6h-15 64A_6h-15 45A_6h-15 45B_6h-15 34A_6h-15 34B_6h-15
#> ACTR5 NA NA NA NA NA NA NA
#> ACTR8 -0.745930 0.6245900 0.279000 -0.289180 0.0565400 0.038140 0.3511900
#> ADAM33 -1.279644 -0.9055237 -4.161414 1.419786 -0.1300138 4.556836 0.8654263
#> AQP1 NA NA NA NA NA NA NA
#> AREG NA NA NA NA NA NA NA
#> 64B_6h-15 7A_6h-15
#> ACTR5 NA NA
#> ACTR8 -0.173760 -0.886520
#> ADAM33 2.274836 -3.919934
#> AQP1 NA NA
#> AREG NA NA
Created on 2022-02-01 by the reprex package (v2.0.1)
Thank you,
Toufiq