How can I dynamically set axis limits for a plot?

AlexanderTowle · October 10, 2025, 4:04pm

I have some column charts split into pages and facets that all have the x-axis starting at 13. I'd like to make it such that for each facet if the min_temp value is greater than cold_max, then I start the x-axis at 16 instead. I have included my code and the link to my data.

library(tidyverse)
library(ggforce)

summer_avg_longterm = read_csv("summer_avg_longterm.csv")

cold_max = 18.29
warm_min = 21.70

summer_avg_longterm = summer_avg_all %>%
  group_by(staSeq) %>%
  filter(n_distinct(year) >= 10) %>%
  ungroup()

summer_avg_longterm = summer_avg_longterm %>%
  mutate(
    year_group_start = floor(year / 5) * 5,
    year_group_end = year_group_start + 4,
    year_group = paste0(year_group_start, "-", year_group_end)
  )

summer_5yr_summary = summer_avg_longterm %>%
  group_by(staSeq, WaterbodyName, year_group) %>%
  summarise(
    mean_temp = mean(avg_temp, na.rm = TRUE),
    median_temp = median(avg_temp, na.rm = TRUE),
    n_years = n(),
    .groups = "drop"
  )

summer_5yr_summary = summer_5yr_summary %>%
  mutate(facet_label = paste(staSeq, "-", WaterbodyName))

n_per_page = 9

n_sites = length(unique(summer_5yr_summary$staSeq))
n_pages = ceiling(n_sites / n_per_page)

site_ranges = summer_5yr_summary %>%
  group_by(staSeq, WaterbodyName, facet_label) %>%
  summarise(
    min_temp = min(median_temp, na.rm = TRUE),
    max_temp = max(median_temp, na.rm = TRUE),
    .groups = "drop"
  )

cold_lines = site_ranges %>%
  mutate(
    yintercept = cold_max,
    color = "mediumturquoise",
    line_type = "Cold threshold"
  )

warm_lines = site_ranges %>%
  filter(max_temp > warm_min) %>%
  mutate(
    yintercept = warm_min,
    color = "indianred1",
    line_type = "Warm threshold"
  )

site_lines = bind_rows(cold_lines, warm_lines)

summer_5yr_summary = summer_5yr_summary %>%
  left_join(site_ranges, by = c("staSeq", "WaterbodyName", "facet_label")) %>%
  mutate(
    y_min = ifelse(min_temp > cold_max, 16, 13)
  )

for (i in 1:n_pages) {
  p = ggplot(summer_5yr_summary, aes(x = year_group, y = median_temp)) +
    geom_hline(
      data = site_lines,
      aes(yintercept = yintercept),
      color = site_lines$color,
      linetype = "solid",
      size = 1,
      show.legend = FALSE
    ) +
    geom_col(fill = "gray70") +
    geom_text(
      aes(label = paste0("n = ", n_years), y = median_temp + 0.5),
      vjust = 0.3,
      size = 3.5
    ) +
    facet_wrap_paginate(~ facet_label, scales = "free_y", ncol = 3, nrow = 3, page = i) +
    labs(
      title = paste(
        "5-Year Median Summer Water Temperatures by Site (Page",
        i, "of", n_pages, ")"
      ),
      x = "5-Year Period",
      y = "Median Summer Temperature (°C)"
    ) +
    scale_y_continuous(limits = c(13.5, NA), oob = rescale_none) +
    theme_minimal(base_size = 13) +
    theme(
      axis.text.x = element_text(angle = 45, hjust = 1),
      strip.text = element_text(face = "bold"),
      panel.grid.minor = element_blank(),
      panel.grid.major.x = element_blank()
    )
  
  print(p)
}

jrkrideau · October 10, 2025, 5:55pm

Hi, welcome to the forum.

Data downloaded and looks good.

Your code reads

summer_avg_longterm = summer_avg_all %>%
  group_by(staSeq) %>%
  filter(n_distinct(year) >= 10) %>%
  ungroup()

But I do not see any

summer_avg_all

Should

summer_avg_longterm = read_csv("summer_avg_longterm.csv")

read as

summer_avg_all = read_csv("summer_avg_longterm.csv")

AlexanderTowle · October 10, 2025, 6:07pm

Yes, good catch. Typo on my part.

jrkrideau · October 10, 2025, 6:17pm

Thanks. I made the change and am walking through your code at the moment.

jrkrideau · October 10, 2025, 8:54pm

Can you tell me what this is doing or should be doing?

summer_avg_longterm = summer_avg_all %>%
  group_by(staSeq) %>%
  filter(n_distinct(year) >= 10) %>%
  ungroup()

If I do this

library(tidyverse)
library(arsenal)

summer_avg_all = read_csv("summer_avg_longterm.csv")

summer_avg_longterm = summer_avg_all %>%
  group_by(staSeq) %>%
  filter(n_distinct(year) >= 10) %>%
  ungroup()

comparedf(summer_avg_all, summer_avg_longterm )

there seems to be no difference in the two data.frames.

I seldom, well almost* never*, use dplyr so I may just not be understanding what you are trying to do but whatever it is this code does, it not seem to be doing it.

In {data.table} syntax does this do what you wanted? You will need to install {pacman}

# Load packages -----------------------------------------------------------
pacman::p_load(data.table)

# Load data--------------------------------------------------------------

DT <- fread("summer_avg_longterm.csv") 
DT[, V1 := NULL][] # drop uselessrow.names


# Data wrangling ----------------------------------------------------------

TT <- DT[, .(table(year))]
TT[, year := as.integer(year)]
TTU <- TT[N >= 10, ]

# Do join ----------------------------------------------------------------

DT10 <- DT[TTU, on = "year"][ , N := NULL]

jrkrideau · October 13, 2025, 12:51am

This may work but I have a nagging thought I either am making a logic error here or else missing something in your data structure.. I split your code into a data wrangling file, and saved summer_5yr_summary as summer_5yr_summary.csv . It reduces clutter.

library(tidyverse)
library(scales)
library(ggforce)

# Load plot data ----------------------------------------------------------
summer_5yr_summary <- read.csv("summer_5yr_summary.csv")


# Get upper limit for y-axis ----------------------------------------------


tb <- summer_5yr_summary$max_temp + 5  # Upper y-axis limit.

# Start ggplot ------------------------------------------------------------

for (i in 1:n_pages) {
  p = ggplot(summer_5yr_summary, aes(x = year_group, y = median_temp)) +
    geom_hline(
      data = site_lines,
      aes(yintercept = yintercept),
      color = site_lines$color,
      linetype = "solid",
      linewidth = 1,
      show.legend = FALSE
    ) +
    geom_col(fill = "gray70") +
    geom_text(
      aes(label = paste0("n = ", n_years), y = median_temp + 0.5),
      vjust = 0.3,
      size = 3.5
    ) +
    facet_wrap_paginate(~ facet_label, scales = "free_y", ncol = 3, nrow = 3, page = i) +
    labs(
      title = paste(
        "5-Year Median Summer Water Temperatures by Site (Page",
        i, "of", n_pages, ")"
      ),
      x = "5-Year Period",
      y = "Median Summer Temperature (°C)"
    ) +
    scale_y_continuous(limits = c(13.5, tb[i]), oob = rescale_none) +
    theme_minimal(base_size = 13) +
    theme(
      axis.text.x = element_text(angle = 45, hjust = 1),
      strip.text = element_text(face = "bold"),
      panel.grid.minor = element_blank(),
      panel.grid.major.x = element_blank()
    )  
  print(p)
}

AlexanderTowle · October 14, 2025, 12:58pm

That's my bad, after checking again summer_avg_longterm is already in the necessary form for the rest of the processing. The following code isn't actually needed and summer_avg_longterm can safely be read from the csv:

summer_avg_longterm = summer_avg_all %>%
  group_by(staSeq) %>%
  filter(n_distinct(year) >= 10) %>%
  ungroup()

AlexanderTowle · October 14, 2025, 1:03pm

This works well for an upper bound but I'm looking to create a lower one. Also I'm noticing that the bound is set per page, and if possible I'd rather set it per facet on the page. The things I have tried so far either result in the lower bound always starting at one number, either 0 or 13, and I have been unsuccessful in setting it to 13 or 16 depending on how low the minimum value on the facet is.

jrkrideau · October 14, 2025, 1:44pm

I'm noticing that the bound is set per page, and if possible I'd rather set it per facet on the page.

That's what I was afraid of. It was rather late when I suddenly realized how easy it was to do. I decided to send what I had and go to bed but I did think that you probably wanted the bounds by facet. That loop, , as it is, is a problem.

I may try to rethink it. Is there any pressing reason to use the {ggforce} paginate() function. It might be better to use {cowplot} or {patchwork}

Quick reply re lower bound.

Just create a lower bound vector similar to the upper bound vector (tb) below, lets call it call it"lb".

tb <- summer_5yr_summary$max_temp + 5

and change

 scale_y_continuous(limits = c(13.5, tb[i]), oob = rescale_none)

to

scale_y_continuous(limits = c(lb[i]  tb[i]), oob = rescale_none)

This still has the pagination problem.

BTW, your example omitted the command

library(scales)

AlexanderTowle · October 14, 2025, 3:44pm

There isn't any reason to stick to paginate. I took your suggestion and took a crack at it with patchwork and was successful. Thanks for your help!

library(patchwork)
site_list = split(summer_5yr_summary, summer_5yr_summary$facet_label)
site_lines_list = split(site_lines, site_lines$facet_label)
legend_lines = tibble(
  yintercept = c(cold_max, warm_min),
  line_type = c("Cold threshold (18.29°C)", "Warm threshold (21.70°C)")
)

plots = list()
for (facet in names(site_list)) {
  df = site_list[[facet]]
  lines_df = site_lines_list[[facet]]
  y_min = ifelse(df$min_temp[1] > cold_max, 17, 13)
  y_max = max(df$max_temp, na.rm = TRUE) + 2

  p = ggplot(df, aes(x = year_group, y = median_temp)) +
    geom_hline(
      data = lines_df,
      aes(yintercept = yintercept, color = line_type, linetype = line_type),
      size = 1,
      show.legend = TRUE
    ) +
    geom_hline(
      data = legend_lines,
      aes(yintercept = yintercept, color = line_type, linetype = line_type),
      size = 1,
      alpha = 0,
      show.legend = TRUE
    ) +
    geom_col(fill = "gray70") +
    geom_text(
      aes(label = paste0("n = ", n_years), y = median_temp + 0.5),
      vjust = 0.3,
      size = 3.5
    ) +
    labs(
      title = facet,
      x = "5-Year Period",
      y = "Median Summer Temperature (°C)",
      color = "Temperature thresholds",
      linetype = "Temperature thresholds"
    ) +
    scale_color_manual(
      values = c("Cold threshold (18.29°C)" = "mediumturquoise", "Warm threshold (21.70°C)" = "indianred1")
    ) +
    scale_linetype_manual(
      values = c("Cold threshold (18.29°C)" = "solid", "Warm threshold (21.70°C)" = "solid")
    ) +
    scale_y_continuous(limits = c(y_min, y_max), oob = rescale_none) +
    theme_minimal(base_size = 10) +
    theme(
      axis.text.x = element_text(angle = 45, hjust = 1),
      axis.text.y = element_text(size = 7),
      strip.text = element_text(face = "bold"),
      panel.grid.minor = element_blank(),
      panel.grid.major.x = element_blank()
    )
  plots[[facet]] = p
}

plots_per_page = 9
n_pages = ceiling(length(plots) / plots_per_page)

for (i in seq_len(n_pages)) {
  idx = ((i - 1) * plots_per_page + 1):(min(i * plots_per_page, length(plots)))
  page_plot = wrap_plots(plots[idx], ncol = 3, nrow = 3, guides = "collect") +
    plot_annotation(
      title = paste("5-Year Median Summer Water Temperatures by Site (Page", i, "of", n_pages, ")")
    ) &
    theme(legend.position = "bottom")
  print(page_plot)
}```

jrkrideau · October 14, 2025, 4:26pm

Ah, great. I was just about to tackle it and was thinking it was going to be nasty. A very nice approach.
Can you check the solution box? I think your code could help others.

system · October 21, 2025, 4:26pm

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.