I'm trying to take the following dataframe and take the averages of some of the columns (like reference_1 and reference_2) but I can't seem to get the columns to register as numeric.
Data:
> dput(head(all.data))
structure(list(`Identified Proteins (3404)` = c("Casein kinase II subunit beta",
"No_match_UP_pBlast", "DNA helicase", "Methanethiol oxidase_protein transport and brown fat cell differentation_pBlast",
"No_match_UP_pBlast", "Thylakoid lumenal protein, chlorplastic_pBlast"
), `Accession Number` = c("A0A383V1G7", "A0A383V1H7", "A0A383V1J2",
"A0A383V1M7", "A0A383V1R6", "A0A383V1S1"), reference_1 = c("8.0699999999999994E-2",
"1.52E-2", "-0.1225", "4.4299999999999999E-2", "-2.41E-2", "-5.5999999999999999E-3"
), reference_2 = c("-0.12620000000000001", "-3.3999999999999998E-3",
"0.113", "-5.1999999999999998E-2", "4.9799999999999997E-2", "1.5100000000000001E-2"
), An_24h_1 = c("0.4803", "-0.247", "0.2999", "0.2437", "-0.97709999999999997",
"-1.0880000000000001"), An_24h_2 = c("0.2009", "-0.40379999999999999",
"0.30609999999999998", "0.24909999999999999", "-1.0182", "-1.0642"
), An_28d_1 = c("-0.39629999999999999", "-1.0501", "0.94269999999999998",
"-0.17030000000000001", "0.46289999999999998", "-1.3814"), An_28d_2 = c("9.9299999999999999E-2",
"-0.53979999999999995", "0.83540000000000003", "-0.1983", "0.35149999999999998",
"-1.4052"), C_24h_1 = c("0.17380000000000001", "0.22459999999999999",
"0.13550000000000001", "-0.15820000000000001", "-0.96909999999999996",
"0.29039999999999999"), C_24h_2 = c("0.32090000000000002", "0.20250000000000001",
"-0.1242", "-3.0700000000000002E-2", "-0.74780000000000002",
"9.8599999999999993E-2"), C_28d_1 = c("0.61170000000000002",
"-0.35260000000000002", "0.96399999999999997", "9.4200000000000006E-2",
"-0.40229999999999999", "-0.99809999999999999"), C_28d_2 = c("0.49249999999999999",
"-0.30109999999999998", "0.55520000000000003", "3.44E-2", "-0.3841",
"-0.79159999999999997")), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
My current code:
all.data<-readxl::read_xlsx("Output of all algae proteins for blast sequences.xlsx") %>%
select("Identified Proteins (3404)","Accession Number","Reference...3",
"Reference...4","Anaerobic_24h...5","Anaerobic_24h...6",
"Anaerobic_28d...7","Anaerobic_28d...8","Citric_24h...9",
"Citric_24h...10","Citric_28d...11","Citric_28d...12")
colnames(all.data)<-gsub("Reference...3","reference_1",colnames(all.data))
colnames(all.data)<-gsub("Reference...4","reference_2",colnames(all.data))
colnames(all.data)<-gsub("Anaerobic_24h...5","An_24h_1",colnames(all.data))
colnames(all.data)<-gsub("Anaerobic_24h...6","An_24h_2",colnames(all.data))
colnames(all.data)<-gsub("Anaerobic_28d...7","An_28d_1",colnames(all.data))
colnames(all.data)<-gsub("Anaerobic_28d...8","An_28d_2",colnames(all.data))
colnames(all.data)<-gsub("Citric_24h...9","C_24h_1",colnames(all.data))
colnames(all.data)<-gsub("Citric_24h...10","C_24h_2",colnames(all.data))
colnames(all.data)<-gsub("Citric_28d...11","C_28d_1",colnames(all.data))
colnames(all.data)<-gsub("Citric_28d...12","C_28d_2",colnames(all.data))
colnames(all.data)<-gsub("Identified Proteins (3404)","Protein",colnames(all.data))
I've tried using the as.numeric() command, but get the following:
> all.data %>%
+ as.numeric(all.data$reference_1)
Error in all.data %>% as.numeric(all.data$reference_1) :
'list' object cannot be coerced to type 'double'
And I've tried doing the following, which worked in my prior work but now it just puts NA into the new column:
> all.data<-all.data %>%
+ rowwise() %>%
+ mutate(reference_avg=mean(c_across(c(`reference_1`,`reference_2`))))
There were 50 or more warnings (use warnings() to see the first 50)
> warnings()
Warning messages:
1: Problem while computing `reference_avg = mean(c_across(c(reference_1, reference_2)))`.
ℹ argument is not numeric or logical: returning NA
ℹ The warning occurred in row 1.
2: Problem while computing `reference_avg = mean(c_across(c(reference_1, reference_2)))`.
ℹ argument is not numeric or logical: returning NA
ℹ The warning occurred in row 2.
3: Problem while computing `reference_avg = mean(c_across(c(reference_1, reference_2)))`.
ℹ argument is not numeric or logical: returning NA
ℹ The warning occurred in row 3.
I'm still very much a beginner in any sort of coding, so any advice is very helpful.