Hello. I am using the code below to order the columns Sample_ID_1 and Sample_ID_2 in a specific order for each K. I believe this does work. However, in the final step -- when I merge the data frames into one single data frame -- it seems that the factor levels are reset and I lose all the structure that had been created. Would there be a way to generate fulldf while preserving the specific factor levels? I have been trying a lot of options, but nothing seems to work. I want to use fulldfUp to create a plot with facet_grid where the order of individuals on the y-axis should vary in each K (assuming this is even feasible).
Thanks a lot in advance!
Here is a reproducible example:
fulldf <- structure(list(Sample_ID_1 = c("Crotone500", "Crotone500", "Crotone498",
"Crotone498", "Crotone498", "Crotone498", "Crotone500", "Crotone500"
), Sample_ID_2 = c("Crotone500", "Crotone498", "Crotone500",
"Crotone498", "Crotone500", "Crotone498", "Crotone500", "Crotone498"
), Population_1 = c("Crotone", "Crotone", "Crotone", "Crotone",
"Crotone", "Crotone", "Crotone", "Crotone"), Population_2 = c("Crotone",
"Crotone", "Crotone", "Crotone", "Crotone", "Crotone", "Crotone",
"Crotone"), CHRType = c("Allosome", "Allosome", "Allosome", "Allosome",
"Allosome", "Allosome", "Allosome", "Allosome"), K = c("2", "2",
"2", "2", "3", "3", "3", "3"), Value = c(NA, "0.072479", "0.072479",
NA, "0.02641", NA, NA, "0.02641")), row.names = c(NA, -8L), class = c("tbl_df",
"tbl", "data.frame"))
corres <- list(structure(list(Crotone500 = c(NA, 0.072479), Crotone498 = c(0.072479,
NA), Sample_ID_1 = c("Crotone500", "Crotone498"), Population_1 = c("Crotone",
"Crotone"), CHRType = c("Allosome", "Allosome"), K = c("2", "2"
)), row.names = c("Crotone500", "Crotone498"), class = "data.frame"),
structure(list(Crotone498 = c(NA, 0.02641), Crotone500 = c(0.02641,
NA), Sample_ID_1 = c("Crotone498", "Crotone500"), Population_1 = c("Crotone",
"Crotone"), CHRType = c("Allosome", "Allosome"), K = c("3",
"3")), row.names = c("Crotone498", "Crotone500"), class = "data.frame"))
# Defines color palette and breaks ~
color_palette <- c("#023858", "#ffffff", "#a50f15")
nHalf <- 10
Min <- -.3
Max <- .3
Thresh <- 0
rc1 <- colorRampPalette(colors = color_palette[1:2], space = "Lab")(nHalf)
rc2 <- colorRampPalette(colors = color_palette[2:3], space = "Lab")(nHalf)
rampcols <- c(rc1, rc2)
rampcols[c(nHalf, nHalf+1)] <- rgb(t(col2rgb(color_palette[2])), maxColorValue = 256)
rb1 <- seq(Min, Thresh, length.out = nHalf + 1)
rb2 <- seq(Thresh, Max, length.out = nHalf + 1)[-1]
rampbreaks <- c(rb1, rb2)
# Create a list to store ordered labels for each 'K' value
ordered_labels_list <- list()
# Create a list to store unique 'K' values
unique_K_values <- unique(unlist(lapply(corres, function(df) df$K)))
# Create a list to store ordered labels for each 'K' value
ordered_labels_list <- lapply(seq_along(corres), function(k) {
ordered_K <- unique(corres[[k]]$K)
rownames(corres[[k]])
})
names(ordered_labels_list) <- as.character(unique_K_values)
# Split fulldf into a list of data frames by K
split_dfs <- split(fulldf, fulldf$K)
# Function to set factor levels based on 'K' value
set_factor_levels <- function(df, ordered_labels) {
df$Sample_ID_1 <- factor(df$Sample_ID_1, levels = ordered_labels)
df$Sample_ID_2 <- factor(df$Sample_ID_2, levels = ordered_labels)
return(df)
}
# Apply the function to each data frame in split_dfs
processed_dfs <- mapply(function(df, k) {
set_factor_levels(df, ordered_labels_list[[as.character(k)]])
}, split_dfs, names(split_dfs), SIMPLIFY = FALSE)
# Combine data frames while preserving factor levels
fulldfUp <- rbindlist(processed_dfs)
# Safely convert to numeric factor
fulldfUp <- fulldfUp %>% mutate(Sample_ID_Factor = as.numeric(Sample_ID_1))
# Calculate population positions
population_positions <- fulldfUp %>%
filter(!is.na(Sample_ID_Factor)) %>%
group_by(Population_1) %>%
summarise(center = (min(Sample_ID_Factor) + max(Sample_ID_Factor)) / 2)
# Reorders CHRType ~
fulldfUp$CHRType <- factor(fulldfUp$CHRType, ordered = TRUE,
levels = c("Autosomes",
"Allosome"))
# Reorders K ~
fulldfUp$K <- factor(fulldfUp$K, ordered = TRUE,
levels = c("3",
"2"))
# Creates heatmap ~
ggplot(fulldfUp, aes(x = Sample_ID_1, y = Sample_ID_2, fill = as.numeric(Value))) +
geom_tile(linewidth = .15, colour = "#000000") +
scale_fill_gradientn(colors = rampcols, na.value = "#d6d6d6", breaks = rampbreaks, limits = c(-.3, .3)) +
scale_x_discrete(labels = function(x) {
labels <- rep("", length(x))
for (i in seq_along(population_positions$center)) {
labels[population_positions$center[i]] <- population_positions$Population_1[i]}
return(labels)}, expand = c(0, 0), drop = FALSE) +
scale_y_discrete(labels = fulldf$Sample_ID_2, expand = c(0, 0), drop = FALSE) +
facet_grid(K ~ ., scales = "free", space = "free") +
theme(panel.background = element_rect(fill = "#ffffff"),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.spacing = unit(1, "lines"),
legend.position = "right",
legend.key = element_blank(),
legend.background = element_blank(),
legend.margin = margin(t = 0, b = 0, r = 15, l = 15),
legend.box = "vertical",
legend.box.margin = margin(t = 20, b = 30, r = 0, l = 0),
axis.title = element_blank(),
axis.text.x = element_text(color = "#000000", family = "Optima", size = 12, face = "bold", angle = 45, vjust = 1, hjust = 1),
axis.text.y = element_text(color = "#000000", family = "Optima", size = 9, face = "bold"),
axis.ticks.x = element_blank(),
axis.ticks.y = element_line(color = "#000000", linewidth = .15),
strip.text = element_text(colour = "#000000", size = 22, face = "bold", family = "Optima"),
strip.background = element_rect(colour = "#000000", fill = "#d6d6d6", linewidth = .15),
axis.line = element_line(colour = "#000000", linewidth = .15)) +
guides(fill = guide_legend(title = "", title.theme = element_text(size = 16, face = "bold"),
label.theme = element_text(size = 15), ncol = 1, reverse = TRUE))