I hope someone can help with this problem!
I'm trying to melt this table, the issue is that my headers have similar names and when imported into Rstudio the header are modified by Rstudio and it goes from NDUFS4 to NDUFS4.1, NDUFS4.2 etc
The data frame looks like that before melt
df <- read.csv("barplot-error-bars.csv")
df
ID NDUFS4 NDUFS4.1 NDUFS4.2 NDUFS4.3
1 AGlcSiE 0.003180784 0.0024272530 0.002375452 0.0023496160
2 Cer 0.013791754 0.0126248460 0.013400419 0.0119379920
3 CL 0.000486482 0.0003401490 0.000404080 0.0004320290
4 DG 0.026223988 0.0238132270 0.024463572 0.0257862250
5 DGDG 0.027734765 0.0284323830 0.027978220 0.0305652530
6 LPC 0.000052614 0.0000436625 0.000033694 0.0000339243
7 MGDG 0.153828915 0.1532680840 0.142490287 0.1636577930
8 PC 0.512484448 0.5241883360 0.543522684 0.5128264150
9 PE 0.014686845 0.0133621010 0.013992584 0.0159446760
And then I use melt :
dflong <- melt(df, id.vars=c("ID"))
ID variable value
1 AGlcSiE NDUFS4 0.0031807840
2 Cer NDUFS4 0.0137917540
3 CL NDUFS4 0.0004864820
4 DG NDUFS4 0.0262239880
...
19 AGlcSiE NDUFS4.1 0.0024272530
20 Cer NDUFS4.1 0.0126248460
21 CL NDUFS4.1 0.0003401490
22 DG NDUFS4.1 0.0238132270
...
38 Cer NDUFS4.2 0.0134004190
39 CL NDUFS4.2 0.0004040800
40 DG NDUFS4.2 0.0244635720
41 DGDG NDUFS4.2 0.0279782200
42 LPC NDUFS4.2 0.0000336940
etc etc
I'd like to get rid of the .1 , .2 , .3 after NDUFS4 that Rtsudio added when reading the csv. I've tried renaming the colum headers :
names(df)[names(df) == "NDUFS.4.1"] <- "NDUFS.4"
It works but then the melt function doesn't really work, I end up with a 54row data frame which means something went wrong, I'm guessing because the columns have the same name, that would make sense.
So I've decided to create a new column to get rid of the .1, .2, .3 and to do so I use the mutate function :
dflong %>% mutate(genotype = case_when(
startsWith(variable, "NDFUS4") ~ "NDUFS4",
startsWith(variable, "NDUFS8") ~ "NDUFS8",
startsWith(variable,"col0") ~"Col0" ))
but I get this error message :
> dflong %>% mutate(genotype = case_when(
+ startsWith(variable, "NDFUS.4") ~ "NDUFS-4",
+ startsWith(variable, "NDUFS.8") ~ "NDUFS-8",
+ startsWith(variable,"col0") ~"Col0" ))
Error: Problem with `mutate()` column `genotype`.
i `genotype = case_when(...)`.
x non-character object(s)
Run `rlang::last_error()` to see where the error occurred.
I've tried using as.character :
dfchar <- dflong
dfchar <- as.character(dflong$variable)
sapply(dflong, class)
dfchar %>% mutate(genotype = case_when(
startsWith(variable, "NDFUS.4") ~ "NDUFS-4",
startsWith(variable, "NDUFS.8") ~ "NDUFS-8",
startsWith(variable,"col0") ~"Col0" ))
But doesn't work either :
> dfchar <- dflong
> dfchar <- as.character(dflong$variable)
> dfchar %>% mutate(genotype = case_when(
+ startsWith(variable, "NDFUS.4") ~ "NDUFS-4",
+ startsWith(variable, "NDUFS.8") ~ "NDUFS-8",
+ startsWith(variable,"col0") ~"Col0" ))
Error in UseMethod("mutate") :
no applicable method for 'mutate' applied to an object of class "character"
It doesn't seems like a difficult thing to do but I'm out of idea and I haven't found a good solution online .. If anyone has an idea to get the melt function to work with header with the same name or a workaround I'm up for it !
Thank you for reading ! Cheers, JB
Here is the reprex :
library(ggplot2)
library(tidyr)
library(reshape2)
#>
#> Attaching package: 'reshape2'
#> The following object is masked from 'package:tidyr':
#>
#> smiths
library(ggpubr)
library(rstatix)
#>
#> Attaching package: 'rstatix'
#> The following object is masked from 'package:stats':
#>
#> filter
library(ggsignif)
#> Warning: package 'ggsignif' was built under R version 4.1.2
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(readxl)
library(data)
#> Error in library(data): there is no package called 'data'
library(reprex)
#> Warning: package 'reprex' was built under R version 4.1.2
library(datapasta)
#> Warning: package 'datapasta' was built under R version 4.1.2
lipid <- tibble::tribble(
~ID, ~NDUFS4, ~NDUFS4, ~NDUFS4, ~NDUFS4, ~NDUFS4, ~NDUFS8, ~NDUFS8, ~NDUFS8, ~NDUFS8, ~NDUFS8,
"AGlcSiE", 0.003180784, 0.002427253, 0.002375452, 0.002349616, 0.00266144, 0.003506735, 0.003375117, 0.002578025, 0.003436613, 0.003023535,
"Cer", 0.013791754, 0.012624846, 0.013400419, 0.011937992, 0.012569685, 0.011350475, 0.014363136, 0.010953191, 0.01349187, 0.011513584,
"CL", 0.000486482, 0.000340149, 0.00040408, 0.000432029, 0.000360108, 0.000328307, 0.000298621, 0.000283055, 0.000287209, 0.000250417,
"DG", 0.026223988, 0.023813227, 0.024463572, 0.025786225, 0.024239642, 0.022124243, 0.024045039, 0.023323389, 0.021944435, 0.020865578,
"DGDG", 0.027734765, 0.028432383, 0.02797822, 0.030565253, 0.030171369, 0.029847796, 0.027652731, 0.030043518, 0.026074548, 0.030090703,
"LPC", 5.2614e-05, 4.36625e-05, 3.3694e-05, 3.39243e-05, 4.72809e-05, 5.20176e-05, 3.35985e-05, 4.05347e-05, 6.80396e-05, 3.84779e-05,
"MGDG", 0.153828915, 0.153268084, 0.142490287, 0.163657793, 0.159502502, 0.151857514, 0.137140745, 0.160664474, 0.14435253, 0.157443321,
"PC", 0.512484448, 0.524188336, 0.543522684, 0.512826415, 0.513016279, 0.547773992, 0.546104352, 0.537629742, 0.564226192, 0.544492407
)
head(lipid)
#> # A tibble: 6 x 11
#> ID NDUFS4 NDUFS4 NDUFS4 NDUFS4 NDUFS4 NDUFS8 NDUFS8 NDUFS8 NDUFS8
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 AGlcS~ 3.18e-3 2.43e-3 2.38e-3 2.35e-3 2.66e-3 3.51e-3 3.38e-3 2.58e-3 3.44e-3
#> 2 Cer 1.38e-2 1.26e-2 1.34e-2 1.19e-2 1.26e-2 1.14e-2 1.44e-2 1.10e-2 1.35e-2
#> 3 CL 4.86e-4 3.40e-4 4.04e-4 4.32e-4 3.60e-4 3.28e-4 2.99e-4 2.83e-4 2.87e-4
#> 4 DG 2.62e-2 2.38e-2 2.45e-2 2.58e-2 2.42e-2 2.21e-2 2.40e-2 2.33e-2 2.19e-2
#> 5 DGDG 2.77e-2 2.84e-2 2.80e-2 3.06e-2 3.02e-2 2.98e-2 2.77e-2 3.00e-2 2.61e-2
#> 6 LPC 5.26e-5 4.37e-5 3.37e-5 3.39e-5 4.73e-5 5.20e-5 3.36e-5 4.05e-5 6.80e-5
#> # ... with 1 more variable: NDUFS8 <dbl>