facet_grid with different order of individuals on the y-axis for each facet

george.pacheco · January 8, 2025, 2:54am

Hello. I am using the code below to order the columns Sample_ID_1 and Sample_ID_2 in a specific order for each K. I believe this does work. However, in the final step -- when I merge the data frames into one single data frame -- it seems that the factor levels are reset and I lose all the structure that had been created. Would there be a way to generate fulldf while preserving the specific factor levels? I have been trying a lot of options, but nothing seems to work. I want to use fulldfUp to create a plot with facet_grid where the order of individuals on the y-axis should vary in each K (assuming this is even feasible).

Thanks a lot in advance!

Here is a reproducible example:

fulldf <- structure(list(Sample_ID_1 = c("Crotone500", "Crotone500", "Crotone498", 
    "Crotone498", "Crotone498", "Crotone498", "Crotone500", "Crotone500"
    ), Sample_ID_2 = c("Crotone500", "Crotone498", "Crotone500", 
    "Crotone498", "Crotone500", "Crotone498", "Crotone500", "Crotone498"
    ), Population_1 = c("Crotone", "Crotone", "Crotone", "Crotone", 
    "Crotone", "Crotone", "Crotone", "Crotone"), Population_2 = c("Crotone", 
    "Crotone", "Crotone", "Crotone", "Crotone", "Crotone", "Crotone", 
    "Crotone"), CHRType = c("Allosome", "Allosome", "Allosome", "Allosome", 
    "Allosome", "Allosome", "Allosome", "Allosome"), K = c("2", "2", 
    "2", "2", "3", "3", "3", "3"), Value = c(NA, "0.072479", "0.072479", 
    NA, "0.02641", NA, NA, "0.02641")), row.names = c(NA, -8L), class = c("tbl_df", 
    "tbl", "data.frame"))

    corres <- list(structure(list(Crotone500 = c(NA, 0.072479), Crotone498 = c(0.072479, 
    NA), Sample_ID_1 = c("Crotone500", "Crotone498"), Population_1 = c("Crotone", 
    "Crotone"), CHRType = c("Allosome", "Allosome"), K = c("2", "2"
    )), row.names = c("Crotone500", "Crotone498"), class = "data.frame"), 
        structure(list(Crotone498 = c(NA, 0.02641), Crotone500 = c(0.02641, 
        NA), Sample_ID_1 = c("Crotone498", "Crotone500"), Population_1 = c("Crotone", 
        "Crotone"), CHRType = c("Allosome", "Allosome"), K = c("3", 
        "3")), row.names = c("Crotone498", "Crotone500"), class = "data.frame"))

    # Defines color palette and breaks ~
    color_palette <- c("#023858", "#ffffff", "#a50f15")
    nHalf <- 10
    Min <- -.3
    Max <- .3
    Thresh <- 0
    
    rc1 <- colorRampPalette(colors = color_palette[1:2], space = "Lab")(nHalf)
    rc2 <- colorRampPalette(colors = color_palette[2:3], space = "Lab")(nHalf)
    rampcols <- c(rc1, rc2)
    rampcols[c(nHalf, nHalf+1)] <- rgb(t(col2rgb(color_palette[2])), maxColorValue = 256) 
    
    rb1 <- seq(Min, Thresh, length.out = nHalf + 1)
    rb2 <- seq(Thresh, Max, length.out = nHalf + 1)[-1]
    rampbreaks <- c(rb1, rb2)


    # Create a list to store ordered labels for each 'K' value
    ordered_labels_list <- list()
    
    
    # Create a list to store unique 'K' values
    unique_K_values <- unique(unlist(lapply(corres, function(df) df$K)))
    
    
    # Create a list to store ordered labels for each 'K' value
    ordered_labels_list <- lapply(seq_along(corres), function(k) {
      ordered_K <- unique(corres[[k]]$K)
      rownames(corres[[k]])
    })
    names(ordered_labels_list) <- as.character(unique_K_values)
    
    # Split fulldf into a list of data frames by K
    split_dfs <- split(fulldf, fulldf$K)

    # Function to set factor levels based on 'K' value
    set_factor_levels <- function(df, ordered_labels) {
      df$Sample_ID_1 <- factor(df$Sample_ID_1, levels = ordered_labels)
      df$Sample_ID_2 <- factor(df$Sample_ID_2, levels = ordered_labels)
      return(df)
    }
    
    # Apply the function to each data frame in split_dfs
    processed_dfs <- mapply(function(df, k) {
      set_factor_levels(df, ordered_labels_list[[as.character(k)]])
    }, split_dfs, names(split_dfs), SIMPLIFY = FALSE)

# Combine data frames while preserving factor levels
fulldfUp <- rbindlist(processed_dfs)


# Safely convert to numeric factor
fulldfUp <- fulldfUp %>% mutate(Sample_ID_Factor = as.numeric(Sample_ID_1))


# Calculate population positions
population_positions <- fulldfUp %>%
  filter(!is.na(Sample_ID_Factor)) %>%
  group_by(Population_1) %>%
  summarise(center = (min(Sample_ID_Factor) + max(Sample_ID_Factor)) / 2)


# Reorders CHRType ~
fulldfUp$CHRType <- factor(fulldfUp$CHRType, ordered = TRUE,
                           levels = c("Autosomes",
                                      "Allosome"))


# Reorders K ~
fulldfUp$K <- factor(fulldfUp$K, ordered = TRUE,
                     levels = c("3",
                                "2"))


# Creates heatmap ~
  ggplot(fulldfUp, aes(x = Sample_ID_1, y = Sample_ID_2, fill = as.numeric(Value))) +
  geom_tile(linewidth = .15, colour = "#000000") +
  scale_fill_gradientn(colors = rampcols, na.value = "#d6d6d6", breaks = rampbreaks, limits = c(-.3, .3)) +
  scale_x_discrete(labels = function(x) {
    labels <- rep("", length(x))
    for (i in seq_along(population_positions$center)) {
      labels[population_positions$center[i]] <- population_positions$Population_1[i]}
    return(labels)}, expand = c(0, 0), drop = FALSE) +
  scale_y_discrete(labels = fulldf$Sample_ID_2, expand = c(0, 0), drop = FALSE) +
  facet_grid(K ~ ., scales = "free", space = "free") +
  theme(panel.background = element_rect(fill = "#ffffff"),
        panel.border = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.spacing = unit(1, "lines"),
        legend.position = "right",
        legend.key = element_blank(),
        legend.background = element_blank(),
        legend.margin = margin(t = 0, b = 0, r = 15, l = 15),
        legend.box = "vertical",
        legend.box.margin = margin(t = 20, b = 30, r = 0, l = 0),
        axis.title = element_blank(),
        axis.text.x = element_text(color = "#000000", family = "Optima", size = 12, face = "bold", angle = 45, vjust = 1, hjust = 1),
        axis.text.y = element_text(color = "#000000", family = "Optima", size = 9, face = "bold"),
        axis.ticks.x = element_blank(),
        axis.ticks.y = element_line(color = "#000000", linewidth = .15),
        strip.text = element_text(colour = "#000000", size = 22, face = "bold", family = "Optima"),
        strip.background = element_rect(colour = "#000000", fill = "#d6d6d6", linewidth = .15),
        axis.line = element_line(colour = "#000000", linewidth = .15)) +
  guides(fill = guide_legend(title = "", title.theme = element_text(size = 16, face = "bold"),
                             label.theme = element_text(size = 15), ncol = 1, reverse = TRUE))

DavoWW · January 8, 2025, 4:34am

Hi @george.pacheco
It might be easier to draw each graph individually and save each one to an object e.g. P1 to Pn. Then use to the {patchwork} package to combine the plots into the required grid.

mduvekot · January 8, 2025, 3:45pm

My little brain can't follow your example, but: No, you cannot have one factor for y that is different depending on some other factor, afaik. Greatly simplified example:

library(ggplot2)

df1 <- data.frame(
  x = c(1:3),
  y = factor(letters[1:3], levels = c("a", "c", "b")),
  k = rep(1, 3)
) 

df2 <- data.frame(
  x = c(1:3),
  y = factor(letters[1:3], levels = c("b", "a", "c")),
  k = rep(2, 3)
  ) 

df <- rbind(df1, df2)  

p <- function(df){
ggplot(df)+
  aes(x = x, y = y)+
  geom_point()+
  facet_wrap(~k, axes = "all_y")
}

# this doesn't maintain the ordering of the y -axes
p(df)

library(patchwork)
# this does maintain the ordering of the y -axes
p(df1) + p(df2)

system · April 8, 2025, 3:45pm

This topic was automatically closed 90 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.