How to get missing axis labels to appear

dromano · April 8, 2024, 5:36pm

Since some of the information from @brant's post Horizonal axis labels not showing on ggplot 2 may be of general interest, I thought I would review the main points in a separate topic in order to make them more accessible. (A full reprex can be found at the bottom of this post.)

(Context: An analysis of precipitation and stream discharge over time.)

The data, saved in the table 'aggregate_water' (click to access)

structure(list(Date = c("2010-05-01", "2010-06-01", "2010-07-01", 
"2010-08-01", "2010-09-01", "2010-10-01", "2011-05-01", "2011-06-01", 
"2011-07-01", "2011-08-01", "2011-09-01", "2011-10-01", "2012-05-01", 
"2012-06-01", "2012-07-01", "2012-08-01", "2012-09-01", "2012-10-01", 
"2013-05-01", "2013-06-01", "2013-07-01", "2013-08-01", "2013-09-01", 
"2013-10-01", "2014-05-01", "2014-06-01", "2014-07-01", "2014-08-01", 
"2014-09-01", "2014-10-01", "2015-05-01", "2015-06-01", "2015-07-01", 
"2015-08-01", "2015-09-01", "2015-10-01", "2016-05-01", "2016-06-01", 
"2016-07-01", "2016-08-01", "2016-09-01", "2016-10-01", "2017-05-01", 
"2017-06-01", "2017-07-01", "2017-08-01", "2017-09-01", "2017-10-01", 
"2018-05-01", "2018-06-01", "2018-07-01", "2018-08-01", "2018-09-01", 
"2018-10-01", "2019-05-01", "2019-06-01", "2019-07-01", "2019-08-01", 
"2019-09-01", "2019-10-01", "2020-05-01", "2020-06-01", "2020-07-01", 
"2020-08-01", "2020-09-01", "2020-10-01", "2021-05-01", "2021-06-01", 
"2021-07-01", "2021-08-01", "2021-09-01", "2021-10-01", "2022-05-01", 
"2022-06-01", "2022-07-01", "2022-08-01", "2022-09-01", "2022-10-01", 
NA), Time = c("00:00:00", "00:00:00", "00:00:00", "00:00:00", 
"00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", 
"00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", 
"00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", 
"00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", 
"00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", 
"00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", 
"00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", 
"00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", 
"00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", 
"00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", 
"00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", 
"00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", 
"00:00:00", "00:00:00", NA), Precipitation = c(41.6, 63.4, 30.4, 
58, 109.2, 38, 4, 62.6, 67.8, 30.8, 39, 34.2, 140.2, 94.6, 51, 
104.2, 29.8, 40.2, 142.2, 55.2, 126.6, 50.8, 9.8, 37, 57.2, 109, 
22.6, 39.8, 6.6, 14.4, 95.6, 66.8, 34.4, 77.2, 44, 48, 47.8, 
102.2, 79.4, 43, 24.2, 0, 72.2, 70, 60.6, 70, 81.4, 36.8, 53.8, 
71.2, 77.8, 42.8, 79, 102.4, 43.4, 73, 64.2, 61.4, 100.6, 89.6, 
102, 12.2, 5, 81, 49.4, 64.8, 47, 72.6, 12.4, 33.4, 60.4, 50.6, 
100.4, 67.4, 52.4, 33, 99.2, 25.2, NA), Discharge = c(1098.165, 
913.832, 143.449, 122.327, 628.503, 744.365, 3576.262758593, 
1169.660438716, 636.443304331, 89.993293314, 87.987612867, 156.547529614, 
3135.631004576, 2642.972454083, 247.869505193, 184.387420593, 
72.984364263, 144.809074076, 7140.468275454, 1947.160139933, 
3472.148888357, 827.989476767, 1169.288358022, 830.330769324, 
7099.102561831, 3781.54435038, 1403.538744855, 477.311791977, 
415.568367489, 347.669041685, 2712.445233732, 1453.426870664, 
598.906750422, 336.028010752, 684.973451554, 590.387748128, 1969.451711015, 
3125.837815424, 1722.36845952, 271.31683909, 503.568190585, 740.808522966, 
4398.121089032, 1678.901533343, 1080.74882027, 302.582297029, 
1017.53118285, 1286.086914624, 1958.065068929, 939.237816557, 
1289.604132466, 163.11710896, 445.258625216, 1813.391561568, 
4093.504092264, 2022.328847569, 582.173185702, 286.591893656, 
1466.749048724, 3481.635663176, 2094.152961024, 642.646409983, 
521.921978323, 363.166217808, 395.686164598, 831.226330386, 2097.401490171, 
1233.149951321, 154.346937294, 110.370471482, 196.085973386, 
415.034224673, 12515.462389965, 2910.924209312, 770.455482982, 
450.402574557, 821.166584692, 503.228680048, 0.275960889)), row.names = c(NA, 
-79L), class = c("tbl_df", "tbl", "data.frame")) -> aggregate_water

Conversion of dates from strings to datetime values

library(tidyverse)

# Original data
aggregate_water |> head()
#> # A tibble: 6 × 4
#>   Date       Time     Precipitation Discharge
#>   <chr>      <chr>            <dbl>     <dbl>
#> 1 2010-05-01 00:00:00          41.6     1098.
#> 2 2010-06-01 00:00:00          63.4      914.
#> 3 2010-07-01 00:00:00          30.4      143.
#> 4 2010-08-01 00:00:00          58        122.
#> 5 2010-09-01 00:00:00         109.       629.
#> 6 2010-10-01 00:00:00          38        744.

# Combine Date and Time into a new column and covert to datetime data type
aggregate_water |>
  mutate(
    DateTime = 
      str_c(Date, Time, sep = ' ') |> 
      parse_datetime() #<-- from the readr package
  ) -> aggregate_water

aggregate_water |> head()
#> # A tibble: 6 × 5
#>   Date       Time     Precipitation Discharge DateTime           
#>   <chr>      <chr>            <dbl>     <dbl> <dttm>             
#> 1 2010-05-01 00:00:00          41.6     1098. 2010-05-01 00:00:00
#> 2 2010-06-01 00:00:00          63.4      914. 2010-06-01 00:00:00
#> 3 2010-07-01 00:00:00          30.4      143. 2010-07-01 00:00:00
#> 4 2010-08-01 00:00:00          58        122. 2010-08-01 00:00:00
#> 5 2010-09-01 00:00:00         109.       629. 2010-09-01 00:00:00
#> 6 2010-10-01 00:00:00          38        744. 2010-10-01 00:00:00

Here is an out-of-box version, so to speak, of the original plot code:

(Note: The code below preserves the use of geom_segment() from the original post in order to illustrate that it can automatically take care of the loops that appear there. The same plot can be obtained more simply by using geom_line() instead, as @brant does later in the same post.)

# Create monthly version of original plot
aggregate_water |> 
  ggplot() +
  geom_segment(
    aes(
      x = DateTime, 
      xend = lead(DateTime), 
      y = Discharge,
      yend = lead(Discharge)
    ), 
    color = "blue"
  ) -> p

p

Suppose, however, we would prefer to have every year appear as a label on the x-axis. To achieve that, we might try adding the the function scale_x_continuous(), which allows us to customize the appearance of the x-axis, and use the breaks argument:

# Attempt to make x-axis breaks yearly
p +
  scale_x_continuous(
    breaks = seq(2010, 2022, by = 1)
  )

But now the labels are missing!

The reason is that the argument:

 breaks = seq(2010, 2022, by = 1)

uses a numeric vector to supply the breaks, whereas the data type of the column supplying the x-axis values is datetime:

# Choose breaks to match x-axis data type
p +
  scale_x_continuous(
    breaks = 
      seq(
        # The earliest date in the aggregate_water table is May 1, 2010
        '2010-05-01' |> parse_datetime(), 
        by = '1 year',
        length.out = 13
        )
  )

So now we get x-axis labels again, but as full dates instead of just the years.

To fix this, we can add the labels argument and specify the labels explicitly with a character vector:

# Choose labels by supplying a character vector
p +
  scale_x_continuous(
    breaks = 
      seq(
        '2010-05-01' |> parse_datetime(), 
        by = '1 year',
        length.out = 13
        ),
        labels = seq(2010, 2022, by = 1) |> as.character()
  )

Or we could streamline the code by using the more specialized scale_x_datetime() function, which allows us to specify the period of time that occurs between breaks. And we could further streamline the code by using an anonymous function as the labels argument, so that it can be applied to the break values. (The lubridate package's year() function extracts the year from a date or datetime value.)

# Or use datetime scale with date_breaks argument (also works for date scale)
p +
  scale_x_datetime(
    date_breaks = '1 year',
    # use anonymous function instead of character vector (works for all sclaes)
    label = \(datetime) year(datetime)
  )

^{Created on 2024-04-08 with reprex v2.0.2}

full reprex

structure(list(Date = c("2010-05-01", "2010-06-01", "2010-07-01", 
"2010-08-01", "2010-09-01", "2010-10-01", "2011-05-01", "2011-06-01", 
"2011-07-01", "2011-08-01", "2011-09-01", "2011-10-01", "2012-05-01", 
"2012-06-01", "2012-07-01", "2012-08-01", "2012-09-01", "2012-10-01", 
"2013-05-01", "2013-06-01", "2013-07-01", "2013-08-01", "2013-09-01", 
"2013-10-01", "2014-05-01", "2014-06-01", "2014-07-01", "2014-08-01", 
"2014-09-01", "2014-10-01", "2015-05-01", "2015-06-01", "2015-07-01", 
"2015-08-01", "2015-09-01", "2015-10-01", "2016-05-01", "2016-06-01", 
"2016-07-01", "2016-08-01", "2016-09-01", "2016-10-01", "2017-05-01", 
"2017-06-01", "2017-07-01", "2017-08-01", "2017-09-01", "2017-10-01", 
"2018-05-01", "2018-06-01", "2018-07-01", "2018-08-01", "2018-09-01", 
"2018-10-01", "2019-05-01", "2019-06-01", "2019-07-01", "2019-08-01", 
"2019-09-01", "2019-10-01", "2020-05-01", "2020-06-01", "2020-07-01", 
"2020-08-01", "2020-09-01", "2020-10-01", "2021-05-01", "2021-06-01", 
"2021-07-01", "2021-08-01", "2021-09-01", "2021-10-01", "2022-05-01", 
"2022-06-01", "2022-07-01", "2022-08-01", "2022-09-01", "2022-10-01", 
NA), Time = c("00:00:00", "00:00:00", "00:00:00", "00:00:00", 
"00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", 
"00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", 
"00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", 
"00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", 
"00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", 
"00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", 
"00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", 
"00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", 
"00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", 
"00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", 
"00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", 
"00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", "00:00:00", 
"00:00:00", "00:00:00", NA), Precipitation = c(41.6, 63.4, 30.4, 
58, 109.2, 38, 4, 62.6, 67.8, 30.8, 39, 34.2, 140.2, 94.6, 51, 
104.2, 29.8, 40.2, 142.2, 55.2, 126.6, 50.8, 9.8, 37, 57.2, 109, 
22.6, 39.8, 6.6, 14.4, 95.6, 66.8, 34.4, 77.2, 44, 48, 47.8, 
102.2, 79.4, 43, 24.2, 0, 72.2, 70, 60.6, 70, 81.4, 36.8, 53.8, 
71.2, 77.8, 42.8, 79, 102.4, 43.4, 73, 64.2, 61.4, 100.6, 89.6, 
102, 12.2, 5, 81, 49.4, 64.8, 47, 72.6, 12.4, 33.4, 60.4, 50.6, 
100.4, 67.4, 52.4, 33, 99.2, 25.2, NA), Discharge = c(1098.165, 
913.832, 143.449, 122.327, 628.503, 744.365, 3576.262758593, 
1169.660438716, 636.443304331, 89.993293314, 87.987612867, 156.547529614, 
3135.631004576, 2642.972454083, 247.869505193, 184.387420593, 
72.984364263, 144.809074076, 7140.468275454, 1947.160139933, 
3472.148888357, 827.989476767, 1169.288358022, 830.330769324, 
7099.102561831, 3781.54435038, 1403.538744855, 477.311791977, 
415.568367489, 347.669041685, 2712.445233732, 1453.426870664, 
598.906750422, 336.028010752, 684.973451554, 590.387748128, 1969.451711015, 
3125.837815424, 1722.36845952, 271.31683909, 503.568190585, 740.808522966, 
4398.121089032, 1678.901533343, 1080.74882027, 302.582297029, 
1017.53118285, 1286.086914624, 1958.065068929, 939.237816557, 
1289.604132466, 163.11710896, 445.258625216, 1813.391561568, 
4093.504092264, 2022.328847569, 582.173185702, 286.591893656, 
1466.749048724, 3481.635663176, 2094.152961024, 642.646409983, 
521.921978323, 363.166217808, 395.686164598, 831.226330386, 2097.401490171, 
1233.149951321, 154.346937294, 110.370471482, 196.085973386, 
415.034224673, 12515.462389965, 2910.924209312, 770.455482982, 
450.402574557, 821.166584692, 503.228680048, 0.275960889)), row.names = c(NA, 
-79L), class = c("tbl_df", "tbl", "data.frame")) -> aggregate_water

library(tidyverse)

# Original data
aggregate_water |> head()
#> # A tibble: 6 × 4
#>   Date       Time     Precipitation Discharge
#>   <chr>      <chr>            <dbl>     <dbl>
#> 1 2010-05-01 00:00:00          41.6     1098.
#> 2 2010-06-01 00:00:00          63.4      914.
#> 3 2010-07-01 00:00:00          30.4      143.
#> 4 2010-08-01 00:00:00          58        122.
#> 5 2010-09-01 00:00:00         109.       629.
#> 6 2010-10-01 00:00:00          38        744.

# Combine Date and Time into a new column and covert to datetime data type
aggregate_water |>
  mutate(
    DateTime = 
      str_c(Date, Time, sep = ' ') |> 
      parse_datetime()
  ) -> aggregate_water

aggregate_water |> head()
#> # A tibble: 6 × 5
#>   Date       Time     Precipitation Discharge DateTime           
#>   <chr>      <chr>            <dbl>     <dbl> <dttm>             
#> 1 2010-05-01 00:00:00          41.6     1098. 2010-05-01 00:00:00
#> 2 2010-06-01 00:00:00          63.4      914. 2010-06-01 00:00:00
#> 3 2010-07-01 00:00:00          30.4      143. 2010-07-01 00:00:00
#> 4 2010-08-01 00:00:00          58        122. 2010-08-01 00:00:00
#> 5 2010-09-01 00:00:00         109.       629. 2010-09-01 00:00:00
#> 6 2010-10-01 00:00:00          38        744. 2010-10-01 00:00:00

# Create monthly version of original plot
aggregate_water |> 
  ggplot() +
  geom_segment(
    aes(
      x = DateTime, 
      xend = lead(DateTime), 
      y = Discharge,
      yend = lead(Discharge)
    ), 
    color = "blue"
  ) -> p

p


# Attempt to make x-axis breaks yearly
p +
  scale_x_continuous(
    breaks = seq(2010, 2022, by = 1)
  )


# Choose breaks to match x-axis data type
p +
  scale_x_continuous(
    breaks = 
      seq(
        # The earliest date in the aggregate_water table is May 1, 2010
        '2010-05-01' |> parse_datetime(), 
        by = '1 year',
        length.out = 13
        )
  )


# Choose labels by supplying a character vector
p +
  scale_x_continuous(
    breaks = 
      seq(
        '2010-05-01' |> parse_datetime(), 
        by = '1 year',
        length.out = 13
        ),
        labels = seq(2010, 2022, by = 1) |> as.character()
  )


# Or use datetime scale with date_breaks argument (also works for date scale)
p +
  scale_x_datetime(
    date_breaks = '1 year',
    # use anonymous function instead of character vector (works for all sclaes)
    label = \(datetime) year(datetime)
  )

^{Created on 2024-04-08 with reprex v2.0.2}

system · April 29, 2024, 10:00pm

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.