Hi all,
I have a large tibble and would like to collapse rows
library(tidyverse)
# Tibble I have:
tib_1 <- tibble(
tissue = c('Duodenum', 'Duodenum', 'Duodenum', 'Duodenum', 'Ileum', 'Ileum', 'Ileum', 'Ileum', 'Jejunum', 'Jejunum', 'Jejunum', 'Jejunum'),
rfi = c('high', 'high', 'low', 'low', 'high', 'high', 'low', 'low', 'high', 'high', 'low', 'low'),
trial = c(1,2,1,2,1,2,1,2,1,2,1,2),
sample_ids = c("1,2,3", "4,5,6", "7,8", "9,10", "11,12,13", "14,15", "16,17,18,19", "20,21,22", "23,24,25,26", "27,28,29,30", "31,32,33", "34,35,36,37")
) |>
mutate_at(c('tissue', 'rfi', 'trial'), as.factor)
tib_1
# Tibble I want:
tib_2 <- tibble(
tissue = c('Duodenum', 'Ileum', 'Jejunum'),
sample_ids = c("1,2,3,4,5,6,7,8,9,10", "11,12,13,14,15,16,17,18,19,20,21,22", "23,24,25,26,27,28,29,30,31,32,33,34,35,36,37")
) |>
mutate_at(c('tissue'), as.factor)
tib_2
# My best attempt is a workaround (still not working) by looping through the data of another tibble not shown here.
# create empty tibble
tissue_sample_lists <- tibble(
tissue = character(),
sample_ids = list()
)
# loop, and populate tibble
tissues <- c('Duodenum', 'Ileum', 'Jejunum')
for (x in tissues) {
temp <- fastq_annotations_joined_cleaned |> filter(tissue == x) |> select(sample_id)
id_list <- (sort(unique(temp$sample_id)))
my_tib <- tibble(tissue = x, sample_ids = list(id_list))
rbind(tissue_sample_lists, my_tib)
}
tissue_sample_lists
I have tried group_by, filter, select, and extracting the sample_ids column and creating a new tibble with a loop, but it all seems very complex that I'm sure there must be a simple way to do this.
Thanks all in advance,
Kenneth