Times series plot with daily values in different hours but in different years.

Hi community,
This data is daily (many values by day in different hours) and for 4 different years, but the same lenght of values.

data:

dat1<- structure(list(Fecha = structure(c(17226, 17226, 17226, 17226, 
17226, 17226, 17226, 17226, 17226, 17226, 17226, 17226, 17226, 
17226, 17227, 17227, 17227, 17227, 17227, 17227, 17227, 17227, 
17227, 17227, 17227, 17228, 17228, 17228, 17228, 17228, 17228, 
17228, 17228, 17228, 17228, 17228), class = "Date"), Año = c(2017, 
2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 
2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 
2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 
2017, 2017), OZ = c(NA, 4.92916666666667, 5.8515625, 8.77510416666667, 
14.221875, 23.8325, 37.8861458333333, 52.9963541666667, 65.6509375, 
71.619375, 74.888125, 74.6621875, 71.69375, 65.644375, 13.5175, 
12.71375, 12.4088541666667, 13.5157291666667, 16.4938541666667, 
21.8866666666667, 29.5363541666667, 39.0266666666667, 48.3944791666667, 
56.5463541666667, 63.554375, 24.486875, 33.6927083333333, 43.1330208333333, 
50.5014583333333, 58.0030208333333, 63.2805208333333, 65.0348958333333, 
62.0083333333333, 55.3983333333333, 47.8646875, 40.100625)), row.names = c(NA, 
-36L), class = c("tbl_df", "tbl", "data.frame"))

dat2<-structure(list(Fecha = structure(c(17591, 17591, 17591, 17591, 
17591, 17591, 17591, 17591, 17591, 17591, 17591, 17591, 17591, 
17591, 17592, 17592, 17592, 17592, 17592, 17592, 17592, 17592, 
17592, 17592, 17592, 17593, 17593, 17593, 17593, 17593, 17593, 
17593, 17593, 17593, 17593, 17593), class = "Date"), Año = c(2018, 
2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 
2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 
2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 
2018, 2018), OZ = c(NA, 4.95458333333333, 5.87958333333333, 10.1564583333333, 
16.8947916666667, 24.7535416666667, 33.945625, 44.3760416666667, 
55.51125, 62.6879166666667, 64.831875, 62.605625, 58.2191666666667, 
51.4239583333333, 8.9, 7.9875, 7.11291666666667, 6.531875, 7.00666666666667, 
9.22708333333333, 14.743125, 22.6714583333333, 30.1208333333333, 
35.0372916666667, 37.87375, 24.068125, 34.1785416666667, 45.7020833333333, 
58.0220833333333, 66.6291666666667, 71.830625, 72.4845833333333, 
69.4608333333333, 63.8810416666667, 55.1627083333333, 44.8725
)), row.names = c(NA, -36L), class = c("tbl_df", "tbl", "data.frame"
))

dat3<-structure(list(Fecha = structure(c(17956, 17956, 17956, 17956, 
17956, 17956, 17956, 17956, 17956, 17956, 17956, 17956, 17956, 
17956, 17957, 17957, 17957, 17957, 17957, 17957, 17957, 17957, 
17957, 17957, 17957, 17958, 17958, 17958, 17958, 17958, 17958, 
17958, 17958, 17958, 17958, 17958), class = "Date"), Año = c(2019, 
2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 
2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 
2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 
2019, 2019), OZ = c(NA, 9.0205, 8.6915, 9.02575, 12.572, 19.4598125, 
28.164875, 37.9554375, 49.10625, 54.8221875, 57.3145625, 57.4600625, 
54.10125, 46.940625, 4.1903125, 3.7421875, 3.645, 3.980625, 6.91625, 
10.849375, 18.6278125, 29.340625, 39.941875, 50.4596875, 62.0096875, 
22.5977083333333, 32.95875, 43.7105208333333, 50.8345833333333, 
55.6383333333333, 58.2522916666667, 57.6136458333333, 56.10375, 
49.2979166666667, 40.3895833333333, 31.2608333333333)), row.names = c(NA, 
-36L), class = c("tbl_df", "tbl", "data.frame"))

dat4<- structure(list(Fecha = structure(c(18322, 18322, 18322, 18322, 
18322, 18322, 18322, 18322, 18322, 18322, 18322, 18322, 18322, 
18322, 18323, 18323, 18323, 18323, 18323, 18323, 18323, 18323, 
18323, 18323, 18323, 18324, 18324, 18324, 18324, 18324, 18324, 
18324, 18324, 18324, 18324, 18324), class = "Date"), Año = c(2020, 
2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 
2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 
2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 
2020, 2020), OZ = c(NA, 5.66344969325153, 6.1039129601227, 8.60314325153374, 
13.4670519171779, 19.9194093558282, 28.578990797546, 38.0642875, 
46.2887223159509, 51.2773675613497, 54.3548848159509, 54.7834878834356, 
52.5918048312883, 48.0607996932515, 7.69726940184049, 6.86214777607362, 
6.01721081288344, 6.1210898006135, 8.16472484662577, 13.2073544478528, 
19.9774016411043, 30.9288643711656, 41.9240871472393, 52.2923552453988, 
58.7169025613497, 18.6814499233129, 28.0330126533742, 38.4740778374233, 
48.3814340490798, 54.3896474693252, 57.9493431748466, 58.5688545552147, 
55.3495056748466, 50.0745797546012, 42.1871381901841, 32.7296516104294
)), row.names = c(NA, -36L), class = c("tbl_df", "tbl", "data.frame"
))
ggplot()+
  geom_line(data= dat1, aes(x=Fecha, y=OZ,colour="2017"),
            size=0.5,  alpha=0.9) +
  geom_line(data= dat2, aes(x=Fecha, y=OZ,colour="2018"),
            size=0.5,  alpha=0.9) +
  geom_line(data= dat3, aes(x=Fecha, y=OZ,colour="2019"),
            size=0.5, alpha=0.9) +
  geom_line(data= dat4, aes(x=Fecha, y=OZ,colour="2020"),
            size=0.5, alpha=0.9) +
  geom_hline(yintercept = 100, color = "#FF0007", linetype = "dashed", 
             size = 1) +
  scale_color_manual(name = "AÑO",
                     values = c("2017" = "#344CE5",
                                "2018" = "#F41B6B",
                                "2019" = "#36FA2C",
                                "2020" = "#FFBF1D")) +
  scale_x_date(date_breaks = "1 months") +
  theme_classic() +
  labs(title="",
       x= "Tiempo",
       y=  bquote('Concentración (\u03BCg/m\u00B3)'),
       caption = "") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        plot.title = element_text(color = "Black", size = 12,
                                  face = "bold", hjust = 0.5),
        plot.caption = element_text(color = "black",
                                    face = "italic")) 

show this:
image

But I need sometihing like that:

Tnks!

Why is the date the same for all your data frames? and how can you tell on what hour each measure was taken? with that criteria, you could build a proper POSIXct variable (if not already available in your actual data set).

1 Like

Im check the data and it had a mistake in Fecha column. The question was corrected and updated.

The data are about OZONO (O2) with differents measurements in one day but in various hours. Is data from climate stations. The idea is compare the concentration of O2 in various years in the same dates.

The hours in this case is not important, its more important the value of O2. But I have differents values in each days.

Ok, but that doesn't answer my second question, how can you tell on what hour each measure was taken? or what logic could we use to infer it? I thought of evenly distributing them according to the number of observations but there are not even the same number of observations per day. Can you please elaborate on that? My idea behind this is to manually construct a POSIXct variable so plotting the time series becomes trivial.

If you don't need that level of definition then it would be OK to average the Ozone measures by day? The problem with several values per day is that there is no logical way to plot a time series with them so you have to deal with this in one way or another. One option would be using the actual hours to get more resolution and the one I'm proposing now is using aggregation to get single values per date.

Total agree with you. I make the other plot with averages measures of Ozone, is well.
But, a suppervisor need the graphics like data was colectect with different values in each day when the hours its no very important, is more important the values of Ozone change each day.
And compare with others years.

Like this?

library(tidyverse)
library(lubridate)

# fake data
dat1 <- tibble(Fecha = seq(ymd("2017-01-01"), ymd("2017-12-31"), by = "day")) %>% 
  mutate(`Año` = year(Fecha),
         OZ = rnorm(nrow(.), 80, 15))

dat2 <- tibble(Fecha = seq(ymd("2018-01-01"), ymd("2018-12-31"), by = "day")) %>% 
  mutate(`Año` = year(Fecha),
         OZ = rnorm(nrow(.), 80, 15))

dat3 <- tibble(Fecha = seq(ymd("2019-01-01"), ymd("2019-12-31"), by = "day")) %>% 
  mutate(`Año` = year(Fecha),
         OZ = rnorm(nrow(.), 80, 15))

dat4 <- tibble(Fecha = seq(ymd("2020-01-01"), ymd("2020-12-31"), by = "day")) %>% 
  mutate(`Año` = year(Fecha),
         OZ = rnorm(nrow(.), 80, 15))

# bind
df <- bind_rows(dat1, dat2, dat3, dat4) %>% 
  mutate(`Año` = factor(`Año`))

# graph
ggplot(df, aes(Fecha, OZ, colour = `Año`)) +
  geom_line() +
  scale_color_manual(values = c("2017" = "#344CE5",
                                "2018" = "#F41B6B",
                                "2019" = "#36FA2C",
                                "2020" = "#FFBF1D")) +
  geom_hline(yintercept = 100, color = "#FF0007", linetype = "dashed", 
             size = 1) +
  scale_x_date(date_breaks = "1 months") +
  theme_classic() +
  labs(title=NULL,
       x= "Tiempo",
       y=  bquote('Concentración (\u03BCg/m\u00B3)'),
       caption = NULL) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        plot.title = element_text(color = "Black", size = 12,
                                  face = "bold", hjust = 0.5),
        plot.caption = element_text(color = "black",
                                    face = "italic")) 

image

1 Like

Excellent, this is the plot that I nedd.

1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.