How can I dynamically set axis limits for a plot?

I have some column charts split into pages and facets that all have the x-axis starting at 13. I'd like to make it such that for each facet if the min_temp value is greater than cold_max, then I start the x-axis at 16 instead. I have included my code and the link to my data.

library(tidyverse)
library(ggforce)

summer_avg_longterm = read_csv("summer_avg_longterm.csv")

cold_max = 18.29
warm_min = 21.70

summer_avg_longterm = summer_avg_all %>%
  group_by(staSeq) %>%
  filter(n_distinct(year) >= 10) %>%
  ungroup()

summer_avg_longterm = summer_avg_longterm %>%
  mutate(
    year_group_start = floor(year / 5) * 5,
    year_group_end = year_group_start + 4,
    year_group = paste0(year_group_start, "-", year_group_end)
  )

summer_5yr_summary = summer_avg_longterm %>%
  group_by(staSeq, WaterbodyName, year_group) %>%
  summarise(
    mean_temp = mean(avg_temp, na.rm = TRUE),
    median_temp = median(avg_temp, na.rm = TRUE),
    n_years = n(),
    .groups = "drop"
  )

summer_5yr_summary = summer_5yr_summary %>%
  mutate(facet_label = paste(staSeq, "-", WaterbodyName))

n_per_page = 9

n_sites = length(unique(summer_5yr_summary$staSeq))
n_pages = ceiling(n_sites / n_per_page)

site_ranges = summer_5yr_summary %>%
  group_by(staSeq, WaterbodyName, facet_label) %>%
  summarise(
    min_temp = min(median_temp, na.rm = TRUE),
    max_temp = max(median_temp, na.rm = TRUE),
    .groups = "drop"
  )

cold_lines = site_ranges %>%
  mutate(
    yintercept = cold_max,
    color = "mediumturquoise",
    line_type = "Cold threshold"
  )

warm_lines = site_ranges %>%
  filter(max_temp > warm_min) %>%
  mutate(
    yintercept = warm_min,
    color = "indianred1",
    line_type = "Warm threshold"
  )

site_lines = bind_rows(cold_lines, warm_lines)

summer_5yr_summary = summer_5yr_summary %>%
  left_join(site_ranges, by = c("staSeq", "WaterbodyName", "facet_label")) %>%
  mutate(
    y_min = ifelse(min_temp > cold_max, 16, 13)
  )

for (i in 1:n_pages) {
  p = ggplot(summer_5yr_summary, aes(x = year_group, y = median_temp)) +
    geom_hline(
      data = site_lines,
      aes(yintercept = yintercept),
      color = site_lines$color,
      linetype = "solid",
      size = 1,
      show.legend = FALSE
    ) +
    geom_col(fill = "gray70") +
    geom_text(
      aes(label = paste0("n = ", n_years), y = median_temp + 0.5),
      vjust = 0.3,
      size = 3.5
    ) +
    facet_wrap_paginate(~ facet_label, scales = "free_y", ncol = 3, nrow = 3, page = i) +
    labs(
      title = paste(
        "5-Year Median Summer Water Temperatures by Site (Page",
        i, "of", n_pages, ")"
      ),
      x = "5-Year Period",
      y = "Median Summer Temperature (°C)"
    ) +
    scale_y_continuous(limits = c(13.5, NA), oob = rescale_none) +
    theme_minimal(base_size = 13) +
    theme(
      axis.text.x = element_text(angle = 45, hjust = 1),
      strip.text = element_text(face = "bold"),
      panel.grid.minor = element_blank(),
      panel.grid.major.x = element_blank()
    )
  
  print(p)
}

Hi, welcome to the forum.

Data downloaded and looks good.

Your code reads

summer_avg_longterm = summer_avg_all %>%
  group_by(staSeq) %>%
  filter(n_distinct(year) >= 10) %>%
  ungroup()

But I do not see any

summer_avg_all

Should

summer_avg_longterm = read_csv("summer_avg_longterm.csv")

read as

summer_avg_all = read_csv("summer_avg_longterm.csv")

Yes, good catch. Typo on my part.

Thanks. I made the change and am walking through your code at the moment.

Can you tell me what this is doing or should be doing?

summer_avg_longterm = summer_avg_all %>%
  group_by(staSeq) %>%
  filter(n_distinct(year) >= 10) %>%
  ungroup()

If I do this

library(tidyverse)
library(arsenal)

summer_avg_all = read_csv("summer_avg_longterm.csv")

summer_avg_longterm = summer_avg_all %>%
  group_by(staSeq) %>%
  filter(n_distinct(year) >= 10) %>%
  ungroup()

comparedf(summer_avg_all, summer_avg_longterm )

there seems to be no difference in the two data.frames.

I seldom, well almost* never*, use dplyr so I may just not be understanding what you are trying to do but whatever it is this code does, it not seem to be doing it.

In {data.table} syntax does this do what you wanted? You will need to install {pacman}

# Load packages -----------------------------------------------------------
pacman::p_load(data.table)

# Load data--------------------------------------------------------------

DT <- fread("summer_avg_longterm.csv") 
DT[, V1 := NULL][] # drop uselessrow.names


# Data wrangling ----------------------------------------------------------

TT <- DT[, .(table(year))]
TT[, year := as.integer(year)]
TTU <- TT[N >= 10, ]

# Do join ----------------------------------------------------------------

DT10 <- DT[TTU, on = "year"][ , N := NULL]

This may work but I have a nagging thought I either am making a logic error here or else missing something in your data structure.. I split your code into a data wrangling file, and saved summer_5yr_summary as summer_5yr_summary.csv . It reduces clutter.

library(tidyverse)
library(scales)
library(ggforce)

# Load plot data ----------------------------------------------------------
summer_5yr_summary <- read.csv("summer_5yr_summary.csv")


# Get upper limit for y-axis ----------------------------------------------


tb <- summer_5yr_summary$max_temp + 5  # Upper y-axis limit.

# Start ggplot ------------------------------------------------------------

for (i in 1:n_pages) {
  p = ggplot(summer_5yr_summary, aes(x = year_group, y = median_temp)) +
    geom_hline(
      data = site_lines,
      aes(yintercept = yintercept),
      color = site_lines$color,
      linetype = "solid",
      linewidth = 1,
      show.legend = FALSE
    ) +
    geom_col(fill = "gray70") +
    geom_text(
      aes(label = paste0("n = ", n_years), y = median_temp + 0.5),
      vjust = 0.3,
      size = 3.5
    ) +
    facet_wrap_paginate(~ facet_label, scales = "free_y", ncol = 3, nrow = 3, page = i) +
    labs(
      title = paste(
        "5-Year Median Summer Water Temperatures by Site (Page",
        i, "of", n_pages, ")"
      ),
      x = "5-Year Period",
      y = "Median Summer Temperature (°C)"
    ) +
    scale_y_continuous(limits = c(13.5, tb[i]), oob = rescale_none) +
    theme_minimal(base_size = 13) +
    theme(
      axis.text.x = element_text(angle = 45, hjust = 1),
      strip.text = element_text(face = "bold"),
      panel.grid.minor = element_blank(),
      panel.grid.major.x = element_blank()
    )  
  print(p)
}

That's my bad, after checking again summer_avg_longterm is already in the necessary form for the rest of the processing. The following code isn't actually needed and summer_avg_longterm can safely be read from the csv:

summer_avg_longterm = summer_avg_all %>%
  group_by(staSeq) %>%
  filter(n_distinct(year) >= 10) %>%
  ungroup()

This works well for an upper bound but I'm looking to create a lower one. Also I'm noticing that the bound is set per page, and if possible I'd rather set it per facet on the page. The things I have tried so far either result in the lower bound always starting at one number, either 0 or 13, and I have been unsuccessful in setting it to 13 or 16 depending on how low the minimum value on the facet is.

I'm noticing that the bound is set per page, and if possible I'd rather set it per facet on the page.

That's what I was afraid of. It was rather late when I suddenly realized how easy it was to do. I decided to send what I had and go to bed but I did think that you probably wanted the bounds by facet. That loop, , as it is, is a problem.

I may try to rethink it. Is there any pressing reason to use the {ggforce} paginate() function. It might be better to use {cowplot} or {patchwork}

Quick reply re lower bound.

Just create a lower bound vector similar to the upper bound vector (tb) below, lets call it call it"lb".

tb <- summer_5yr_summary$max_temp + 5

and change

 scale_y_continuous(limits = c(13.5, tb[i]), oob = rescale_none)

to

scale_y_continuous(limits = c(lb[i]  tb[i]), oob = rescale_none) 

This still has the pagination problem.

BTW, your example omitted the command

library(scales)

There isn't any reason to stick to paginate. I took your suggestion and took a crack at it with patchwork and was successful. Thanks for your help!

library(patchwork)
site_list = split(summer_5yr_summary, summer_5yr_summary$facet_label)
site_lines_list = split(site_lines, site_lines$facet_label)
legend_lines = tibble(
  yintercept = c(cold_max, warm_min),
  line_type = c("Cold threshold (18.29°C)", "Warm threshold (21.70°C)")
)

plots = list()
for (facet in names(site_list)) {
  df = site_list[[facet]]
  lines_df = site_lines_list[[facet]]
  y_min = ifelse(df$min_temp[1] > cold_max, 17, 13)
  y_max = max(df$max_temp, na.rm = TRUE) + 2

  p = ggplot(df, aes(x = year_group, y = median_temp)) +
    geom_hline(
      data = lines_df,
      aes(yintercept = yintercept, color = line_type, linetype = line_type),
      size = 1,
      show.legend = TRUE
    ) +
    geom_hline(
      data = legend_lines,
      aes(yintercept = yintercept, color = line_type, linetype = line_type),
      size = 1,
      alpha = 0,
      show.legend = TRUE
    ) +
    geom_col(fill = "gray70") +
    geom_text(
      aes(label = paste0("n = ", n_years), y = median_temp + 0.5),
      vjust = 0.3,
      size = 3.5
    ) +
    labs(
      title = facet,
      x = "5-Year Period",
      y = "Median Summer Temperature (°C)",
      color = "Temperature thresholds",
      linetype = "Temperature thresholds"
    ) +
    scale_color_manual(
      values = c("Cold threshold (18.29°C)" = "mediumturquoise", "Warm threshold (21.70°C)" = "indianred1")
    ) +
    scale_linetype_manual(
      values = c("Cold threshold (18.29°C)" = "solid", "Warm threshold (21.70°C)" = "solid")
    ) +
    scale_y_continuous(limits = c(y_min, y_max), oob = rescale_none) +
    theme_minimal(base_size = 10) +
    theme(
      axis.text.x = element_text(angle = 45, hjust = 1),
      axis.text.y = element_text(size = 7),
      strip.text = element_text(face = "bold"),
      panel.grid.minor = element_blank(),
      panel.grid.major.x = element_blank()
    )
  plots[[facet]] = p
}

plots_per_page = 9
n_pages = ceiling(length(plots) / plots_per_page)

for (i in seq_len(n_pages)) {
  idx = ((i - 1) * plots_per_page + 1):(min(i * plots_per_page, length(plots)))
  page_plot = wrap_plots(plots[idx], ncol = 3, nrow = 3, guides = "collect") +
    plot_annotation(
      title = paste("5-Year Median Summer Water Temperatures by Site (Page", i, "of", n_pages, ")")
    ) &
    theme(legend.position = "bottom")
  print(page_plot)
}```

Ah, great. I was just about to tackle it and was thinking it was going to be nasty. A very nice approach.
Can you check the solution box? I think your code could help others.

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.