Error in calculating subtraction between columns in R

mtoufiq · February 1, 2022, 11:59am

Hi,

I am working with a dataset and trying to calculate difference between one column (average of Untreated Cells 6hr columns from sample metafile) against all other columns in the dataframe in R. After running, I see that if there are more than one NA entries in that row in the dataframe then the output is assigned NA across all. Is there are way to solve this?

Example code is give below:

dput(Orig_df)
structure(list(`7B_0h-1` = c(7.01903, 5.84523, 7.15273, 9.02213, 
                             3.89193), `64A_0h-1` = c(7.58907, 5.61506, 6.34912, NA, NA), 
               `45A_0h-1` = c(6.76282, 5.80398, 11.1555, 7.13648, 9.6094
               ), `45B_0h-1` = c(6.71095, 4.73251, 10.5722, 10.4395, 9.21919
               ), `34A_0h-1` = c(6.80696, 5.13826, NA, 9.8541, 10.5822), 
               `34B_0h-1` = c(5.37741, 7.41889, 7.54412, 6.40762, 5.65587
               ), `7B_6h-15` = c(8.11014, 5.89484, 7.28234, NA, 6.04984), 
               `64A_6h-15` = c(NA, 5.54925, 4.02645, NA, NA), `45A_6h-15` = c(8.17386, 
                                                                              4.98107, 9.60765, NA, 7.313), `45B_6h-15` = c(6.44788, 5.32679, 
                                                                                                                            8.05785, NA, 6.75502), `34A_6h-15` = c(7.32186, 5.30839, 
                                                                                                                                                                   12.7447, 10.0773, 7.69417), `34B_6h-15` = c(NA, 5.62144, 
                                                                                                                                                                                                               9.05329, NA, 7.05311), `64B_0h-1` = c(8.38255, 5.70344, 10.8594, 
                                                                                                                                                                                                                                                     11.3011, NA), `64B_6h-15` = c(8.12568, 5.09649, 10.4627, 
                                                                                                                                                                                                                                                                                   9.95098, 6.62127), `7A_0h-1` = c(6.30902, 4.52432, 6.90822, 
                                                                                                                                                                                                                                                                                                                    6.90252, 15.0152), `7A_6h-15` = c(7.22863, 4.38373, 4.26793, 
                                                                                                                                                                                                                                                                                                                                                      7.47973, 5.22263)), class = "data.frame", row.names = c("ACTR5", 
                                                                                                                                                                                                                                                                                                                                                                                                              "ACTR8", "ADAM33", "AQP1", "AREG"))
#>        7B_0h-1 64A_0h-1 45A_0h-1 45B_0h-1 34A_0h-1 34B_0h-1 7B_6h-15 64A_6h-15
#> ACTR5  7.01903  7.58907  6.76282  6.71095  6.80696  5.37741  8.11014        NA
#> ACTR8  5.84523  5.61506  5.80398  4.73251  5.13826  7.41889  5.89484   5.54925
#> ADAM33 7.15273  6.34912 11.15550 10.57220       NA  7.54412  7.28234   4.02645
#> AQP1   9.02213       NA  7.13648 10.43950  9.85410  6.40762       NA        NA
#> AREG   3.89193       NA  9.60940  9.21919 10.58220  5.65587  6.04984        NA
#>        45A_6h-15 45B_6h-15 34A_6h-15 34B_6h-15 64B_0h-1 64B_6h-15  7A_0h-1
#> ACTR5    8.17386   6.44788   7.32186        NA  8.38255   8.12568  6.30902
#> ACTR8    4.98107   5.32679   5.30839   5.62144  5.70344   5.09649  4.52432
#> ADAM33   9.60765   8.05785  12.74470   9.05329 10.85940  10.46270  6.90822
#> AQP1          NA        NA  10.07730        NA 11.30110   9.95098  6.90252
#> AREG     7.31300   6.75502   7.69417   7.05311       NA   6.62127 15.01520
#>        7A_6h-15
#> ACTR5   7.22863
#> ACTR8   4.38373
#> ADAM33  4.26793
#> AQP1    7.47973
#> AREG    5.22263

dput(Sample_Meta_file)
structure(list(Ligand = c("0h", "0h", "0h", "0h", "0h", "0h", 
                          "0h", "0h", "6h", "6h", "6h", "6h", "6h", "6h", "6h", "6h"), 
               Ligand_Timepoints = c("Untreated Cells 0hr", "Untreated Cells 0hr", 
                                     "Untreated Cells 0hr", "Untreated Cells 0hr", "Untreated Cells 0hr", 
                                     "Untreated Cells 0hr", "Untreated Cells 0hr", "Untreated Cells 0hr", 
                                     "Untreated Cells 6hr", "Untreated Cells 6hr", "Untreated Cells 6hr", 
                                     "Untreated Cells 6hr", "Untreated Cells 6hr", "Untreated Cells 6hr", 
                                     "Untreated Cells 6hr", "Untreated Cells 6hr")), row.names = c("7B_0h-1", 
                                                                                                   "64A_0h-1", "45A_0h-1", "45B_0h-1", "34A_0h-1", "34B_0h-1", "64B_0h-1", 
                                                                                                   "7A_0h-1", "7B_6h-15", "64A_6h-15", "45A_6h-15", "45B_6h-15", 
                                                                                                   "34A_6h-15", "34B_6h-15", "64B_6h-15", "7A_6h-15"), class = "data.frame")
#>           Ligand   Ligand_Timepoints
#> 7B_0h-1       0h Untreated Cells 0hr
#> 64A_0h-1      0h Untreated Cells 0hr
#> 45A_0h-1      0h Untreated Cells 0hr
#> 45B_0h-1      0h Untreated Cells 0hr
#> 34A_0h-1      0h Untreated Cells 0hr
#> 34B_0h-1      0h Untreated Cells 0hr
#> 64B_0h-1      0h Untreated Cells 0hr
#> 7A_0h-1       0h Untreated Cells 0hr
#> 7B_6h-15      6h Untreated Cells 6hr
#> 64A_6h-15     6h Untreated Cells 6hr
#> 45A_6h-15     6h Untreated Cells 6hr
#> 45B_6h-15     6h Untreated Cells 6hr
#> 34A_6h-15     6h Untreated Cells 6hr
#> 34B_6h-15     6h Untreated Cells 6hr
#> 64B_6h-15     6h Untreated Cells 6hr
#> 7A_6h-15      6h Untreated Cells 6hr

#prepare data for Difference calculation analysis
###########
df_raw = as.matrix(Orig_df)          
sample_info = Sample_Meta_file  

#### make sure that expression matrix and sample information are the same order
df_raw = df_raw[,rownames(sample_info)]
colnames(df_raw) == rownames(sample_info)


# Difference
Diff.mod.ind.sin <- df_raw[,]
Diff.mod.ind.sin [,] <- NA


k=1
for (k in 1:nrow(df_raw)) {
  signature = rownames(df_raw)[k]
  test.table <- sample_info 
  test.table$scores <- df_raw[k,]
  T4 <- test.table
  T3 <- test.table[test.table$Ligand_Timepoints %in% c("Untreated Cells 6hr"),]
  Diff.mod.ind.sin[k,] <- (T4$scores-(mean(T3$scores)))
}

Diff.mod.ind.sin <- as.data.frame(Diff.mod.ind.sin)

dput(Diff.mod.ind.sin)
structure(list(`7B_0h-1` = c(NA, 0.57498, -1.03513375, NA, NA
), `64A_0h-1` = c(NA, 0.34481, -1.83874375, NA, NA), `45A_0h-1` = c(NA, 
                                                                    0.53373, 2.96763625, NA, NA), `45B_0h-1` = c(NA, -0.537739999999999, 
                                                                                                                 2.38433625, NA, NA), `34A_0h-1` = c(NA, -0.13199, NA, NA, NA), 
`34B_0h-1` = c(NA, 2.14864, -0.64374375, NA, NA), `64B_0h-1` = c(NA, 
                                                                 0.43319, 2.67153625, NA, NA), `7A_0h-1` = c(NA, -0.74593, 
                                                                                                             -1.27964375, NA, NA), `7B_6h-15` = c(NA, 0.62459, -0.90552375, 
                                                                                                                                                  NA, NA), `64A_6h-15` = c(NA, 0.279, -4.16141375, NA, NA), 
`45A_6h-15` = c(NA, -0.28918, 1.41978625, NA, NA), `45B_6h-15` = c(NA, 
                                                                   0.05654, -0.13001375, NA, NA), `34A_6h-15` = c(NA, 0.0381400000000003, 
                                                                                                                  4.55683625, NA, NA), `34B_6h-15` = c(NA, 0.35119, 0.865426250000001, 
                                                                                                                                                       NA, NA), `64B_6h-15` = c(NA, -0.17376, 2.27483625, NA, NA
                                                                                                                                                       ), `7A_6h-15` = c(NA, -0.88652, -3.91993375, NA, NA)), class = "data.frame", row.names = c("ACTR5", 
                                                                                                                                                                                                                                                  "ACTR8", "ADAM33", "AQP1", "AREG"))
#>          7B_0h-1  64A_0h-1 45A_0h-1  45B_0h-1 34A_0h-1   34B_0h-1 64B_0h-1
#> ACTR5         NA        NA       NA        NA       NA         NA       NA
#> ACTR8   0.574980  0.344810 0.533730 -0.537740 -0.13199  2.1486400 0.433190
#> ADAM33 -1.035134 -1.838744 2.967636  2.384336       NA -0.6437437 2.671536
#> AQP1          NA        NA       NA        NA       NA         NA       NA
#> AREG          NA        NA       NA        NA       NA         NA       NA
#>          7A_0h-1   7B_6h-15 64A_6h-15 45A_6h-15  45B_6h-15 34A_6h-15 34B_6h-15
#> ACTR5         NA         NA        NA        NA         NA        NA        NA
#> ACTR8  -0.745930  0.6245900  0.279000 -0.289180  0.0565400  0.038140 0.3511900
#> ADAM33 -1.279644 -0.9055237 -4.161414  1.419786 -0.1300138  4.556836 0.8654263
#> AQP1          NA         NA        NA        NA         NA        NA        NA
#> AREG          NA         NA        NA        NA         NA        NA        NA
#>        64B_6h-15  7A_6h-15
#> ACTR5         NA        NA
#> ACTR8  -0.173760 -0.886520
#> ADAM33  2.274836 -3.919934
#> AQP1          NA        NA
#> AREG          NA        NA

^{Created on 2022-02-01 by the reprex package (v2.0.1)}

Thank you,

Toufiq

FJCC · February 1, 2022, 6:06pm

Does adding na.rm = TRUE to the following line fix the problem?

Diff.mod.ind.sin[k,] <- (T4$scores-(mean(T3$scores,na.rm = TRUE)))

mtoufiq · February 1, 2022, 8:42pm

Hi @FJCC

Thank you very much for the suggestions. This solved my query.

system · February 8, 2022, 8:42pm

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.