Here is an approach that applies the code from the post How to enumerate intervals in a sequence, like periods when cryptids come to visit - #8 by dromano to enumerate both dry periods and rain events, using the data shared by you here. (Full reprex can be found at the end of this post.)
dput() output, with table saved as 'water_data' (click to access)
structure(list(Date = c("2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-02", "2010-05-02",
"2010-05-02", "2010-05-02"), Time = c("0:00:00", "0:15:00", "0:30:00",
"0:45:00", "1:00:00", "1:15:00", "1:30:00", "1:45:00", "2:00:00",
"2:15:00", "2:30:00", "2:45:00", "3:00:00", "3:15:00", "3:30:00",
"3:45:00", "4:00:00", "4:15:00", "4:30:00", "4:45:00", "5:00:00",
"5:15:00", "5:30:00", "5:45:00", "6:00:00", "6:15:00", "6:30:00",
"6:45:00", "7:00:00", "7:15:00", "7:30:00", "7:45:00", "8:00:00",
"8:15:00", "8:30:00", "8:45:00", "9:00:00", "9:15:00", "9:30:00",
"9:45:00", "10:00:00", "10:15:00", "10:30:00", "10:45:00", "11:00:00",
"11:15:00", "11:30:00", "11:45:00", "12:00:00", "12:15:00", "12:30:00",
"12:45:00", "13:00:00", "13:15:00", "13:30:00", "13:45:00", "14:00:00",
"14:15:00", "14:30:00", "14:45:00", "15:00:00", "15:15:00", "15:30:00",
"15:45:00", "16:00:00", "16:15:00", "16:30:00", "16:45:00", "17:00:00",
"17:15:00", "17:30:00", "17:45:00", "18:00:00", "18:15:00", "18:30:00",
"18:45:00", "19:00:00", "19:15:00", "19:30:00", "19:45:00", "20:00:00",
"20:15:00", "20:30:00", "20:45:00", "21:00:00", "21:15:00", "21:30:00",
"21:45:00", "22:00:00", "22:15:00", "22:30:00", "22:45:00", "23:00:00",
"23:15:00", "23:30:00", "23:45:00", "0:00:00", "0:15:00", "0:30:00",
"0:45:00"), Precipitation = c(0, 0, 0, 0.2, 0.2, 0, 0, 0.2, 0.4,
0, 0, 0.2, 0, 0, 0, 0, 0.2, 0, 0, 0.6, 0.4, 0.2, 0, 0, 0, 0,
0, 0, 0, 0, 0.2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0), Discharge = c(0.299, 0.302,
0.305, 0.308, 0.312, 0.317, 0.321, 0.326, 0.33, 0.339, 0.352,
0.38, 0.41, 0.428, 0.424, 0.419, 0.415, 0.41, 0.411, 0.412, 0.414,
0.415, 0.416, 0.44, 0.459, 0.465, 0.495, 0.495, 0.495, 0.495,
0.495, 0.495, 0.471, 0.453, 0.44, 0.431, 0.422, 0.423, 0.423,
0.424, 0.424, 0.425, 0.425, 0.426, 0.427, 0.427, 0.428, 0.428,
0.429, 0.429, 0.43, 0.43, 0.431, 0.431, 0.432, 0.432, 0.433,
0.433, 0.434, 0.434, 0.435, 0.441, 0.471, 0.527, 0.565, 0.562,
0.559, 0.556, 0.552, 0.665, 0.892, 0.941, 0.937, 0.933, 0.928,
0.924, 0.843, 0.812, 0.765, 0.729, 0.693, 0.672, 0.644, 0.617,
0.611, 0.591, 0.578, 0.569, 0.559, 0.55, 0.54, 0.538, 0.536,
0.534, 0.532, 0.53, 0.528, 0.526, 0.524, 0.521)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -100L)) -> water_data
First, enumerate all the contiguous periods without precipitation:
library(tidyverse)
water_data |>
# flag when it rained
mutate(rained = Precipitation > 0) |>
# apply 'cryptid visit' approach to numbering dry periods
mutate(dry_period = if_else(!rained, cumsum(rained), NA)) |>
mutate(dry_period = factor(dry_period) |> as.numeric()) -> dry_periods
dry_periods |> head()
#> # A tibble: 6 × 6
#> Date Time Precipitation Discharge rained dry_period
#> <chr> <chr> <dbl> <dbl> <lgl> <dbl>
#> 1 2010-05-01 0:00:00 0 0.299 FALSE 1
#> 2 2010-05-01 0:15:00 0 0.302 FALSE 1
#> 3 2010-05-01 0:30:00 0 0.305 FALSE 1
#> 4 2010-05-01 0:45:00 0.2 0.308 TRUE NA
#> 5 2010-05-01 1:00:00 0.2 0.312 TRUE NA
#> 6 2010-05-01 1:15:00 0 0.317 FALSE 2
Next, identify all the dry periods that lasted at least four hours:
dry_periods |>
# add column that enumerates the observations, place it before 'Date' column
mutate(observation = row_number(), .before = Date) |>
# find when each dry period begins and ends
group_by(dry_period) |>
mutate(
# set start value to earliest index for dry periods, NA for rainy ones
period_start = if_else(!rained, min(observation), NA),
# similarly for end value
period_end = if_else(!rained, max(observation), NA),
) |> # view()
# we no longer need to operate period-wise, so can ungroup table
ungroup() |>
# calculate length of periods (in 15-minute intervals)
mutate(period_length = period_end - period_start + 1) |>
# flag when period is at least four hours
mutate(in_long_period = period_length >= 16) |>
# flag 'dry spells' of four hours or more
mutate(in_dry_spell = in_long_period & !rained) |>
# remove any columns that are no longer needed
select(!contains(c('period'))) -> dry_spells
dry_spells |> head()
#> # A tibble: 6 × 7
#> observation Date Time Precipitation Discharge rained in_dry_spell
#> <int> <chr> <chr> <dbl> <dbl> <lgl> <lgl>
#> 1 1 2010-05-01 0:00:00 0 0.299 FALSE FALSE
#> 2 2 2010-05-01 0:15:00 0 0.302 FALSE FALSE
#> 3 3 2010-05-01 0:30:00 0 0.305 FALSE FALSE
#> 4 4 2010-05-01 0:45:00 0.2 0.308 TRUE FALSE
#> 5 5 2010-05-01 1:00:00 0.2 0.312 TRUE FALSE
#> 6 6 2010-05-01 1:15:00 0 0.317 FALSE FALSE
Now, notice that rain events are exactly the periods that fill the gaps before or after the dry spells, with one exception: if the data starts with a dry period that isn't long enough to be a dry spell, it should also be considered a dry spell so that it's not included as part of the following rain event.
dry_spells |>
# any observations that occur before any precipitation has been recorded
# should be flagged as part of a dry spell
mutate(in_dry_spell = in_dry_spell | cumsum(rained) == 0) |>
# remove any columns that are no longer needed
select(!contains(c('rain'))) -> dry_spells
dry_spells |> head()
#> # A tibble: 6 × 6
#> observation Date Time Precipitation Discharge in_dry_spell
#> <int> <chr> <chr> <dbl> <dbl> <lgl>
#> 1 1 2010-05-01 0:00:00 0 0.299 TRUE
#> 2 2 2010-05-01 0:15:00 0 0.302 TRUE
#> 3 3 2010-05-01 0:30:00 0 0.305 TRUE
#> 4 4 2010-05-01 0:45:00 0.2 0.308 FALSE
#> 5 5 2010-05-01 1:00:00 0.2 0.312 FALSE
#> 6 6 2010-05-01 1:15:00 0 0.317 FALSE
Finally, since an observation is part of a rain event exactly when it isn't part dry spell, we can apply the 'cryptid visit' approach again, this time to numbering rain events:
dry_spells |>
mutate(rain_event = if_else(!in_dry_spell, cumsum(in_dry_spell), NA)) |>
mutate(rain_event = factor(rain_event) |> as.numeric()) -> rain_events
rain_events |> head()
#> # A tibble: 6 × 7
#> observation Date Time Precipitation Discharge in_dry_spell rain_event
#> <int> <chr> <chr> <dbl> <dbl> <lgl> <dbl>
#> 1 1 2010-05-01 0:00:00 0 0.299 TRUE NA
#> 2 2 2010-05-01 0:15:00 0 0.302 TRUE NA
#> 3 3 2010-05-01 0:30:00 0 0.305 TRUE NA
#> 4 4 2010-05-01 0:45:00 0.2 0.308 FALSE 1
#> 5 5 2010-05-01 1:00:00 0.2 0.312 FALSE 1
#> 6 6 2010-05-01 1:15:00 0 0.317 FALSE 1
Created on 2024-04-15 with reprex v2.0.2
full reprex
structure(list(Date = c("2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01",
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-02", "2010-05-02",
"2010-05-02", "2010-05-02"), Time = c("0:00:00", "0:15:00", "0:30:00",
"0:45:00", "1:00:00", "1:15:00", "1:30:00", "1:45:00", "2:00:00",
"2:15:00", "2:30:00", "2:45:00", "3:00:00", "3:15:00", "3:30:00",
"3:45:00", "4:00:00", "4:15:00", "4:30:00", "4:45:00", "5:00:00",
"5:15:00", "5:30:00", "5:45:00", "6:00:00", "6:15:00", "6:30:00",
"6:45:00", "7:00:00", "7:15:00", "7:30:00", "7:45:00", "8:00:00",
"8:15:00", "8:30:00", "8:45:00", "9:00:00", "9:15:00", "9:30:00",
"9:45:00", "10:00:00", "10:15:00", "10:30:00", "10:45:00", "11:00:00",
"11:15:00", "11:30:00", "11:45:00", "12:00:00", "12:15:00", "12:30:00",
"12:45:00", "13:00:00", "13:15:00", "13:30:00", "13:45:00", "14:00:00",
"14:15:00", "14:30:00", "14:45:00", "15:00:00", "15:15:00", "15:30:00",
"15:45:00", "16:00:00", "16:15:00", "16:30:00", "16:45:00", "17:00:00",
"17:15:00", "17:30:00", "17:45:00", "18:00:00", "18:15:00", "18:30:00",
"18:45:00", "19:00:00", "19:15:00", "19:30:00", "19:45:00", "20:00:00",
"20:15:00", "20:30:00", "20:45:00", "21:00:00", "21:15:00", "21:30:00",
"21:45:00", "22:00:00", "22:15:00", "22:30:00", "22:45:00", "23:00:00",
"23:15:00", "23:30:00", "23:45:00", "0:00:00", "0:15:00", "0:30:00",
"0:45:00"), Precipitation = c(0, 0, 0, 0.2, 0.2, 0, 0, 0.2, 0.4,
0, 0, 0.2, 0, 0, 0, 0, 0.2, 0, 0, 0.6, 0.4, 0.2, 0, 0, 0, 0,
0, 0, 0, 0, 0.2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0), Discharge = c(0.299, 0.302,
0.305, 0.308, 0.312, 0.317, 0.321, 0.326, 0.33, 0.339, 0.352,
0.38, 0.41, 0.428, 0.424, 0.419, 0.415, 0.41, 0.411, 0.412, 0.414,
0.415, 0.416, 0.44, 0.459, 0.465, 0.495, 0.495, 0.495, 0.495,
0.495, 0.495, 0.471, 0.453, 0.44, 0.431, 0.422, 0.423, 0.423,
0.424, 0.424, 0.425, 0.425, 0.426, 0.427, 0.427, 0.428, 0.428,
0.429, 0.429, 0.43, 0.43, 0.431, 0.431, 0.432, 0.432, 0.433,
0.433, 0.434, 0.434, 0.435, 0.441, 0.471, 0.527, 0.565, 0.562,
0.559, 0.556, 0.552, 0.665, 0.892, 0.941, 0.937, 0.933, 0.928,
0.924, 0.843, 0.812, 0.765, 0.729, 0.693, 0.672, 0.644, 0.617,
0.611, 0.591, 0.578, 0.569, 0.559, 0.55, 0.54, 0.538, 0.536,
0.534, 0.532, 0.53, 0.528, 0.526, 0.524, 0.521)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -100L)) -> water_data
library(tidyverse)
# First, enumerate all the contiguous periods without precipitation
water_data |>
# flag when it rained
mutate(rained = Precipitation > 0) |>
# apply 'cryptid visit' approach to numbering dry periods
mutate(dry_period = if_else(!rained, cumsum(rained), NA)) |>
mutate(dry_period = factor(dry_period) |> as.numeric()) -> dry_periods
dry_periods |> head()
#> # A tibble: 6 × 6
#> Date Time Precipitation Discharge rained dry_period
#> <chr> <chr> <dbl> <dbl> <lgl> <dbl>
#> 1 2010-05-01 0:00:00 0 0.299 FALSE 1
#> 2 2010-05-01 0:15:00 0 0.302 FALSE 1
#> 3 2010-05-01 0:30:00 0 0.305 FALSE 1
#> 4 2010-05-01 0:45:00 0.2 0.308 TRUE NA
#> 5 2010-05-01 1:00:00 0.2 0.312 TRUE NA
#> 6 2010-05-01 1:15:00 0 0.317 FALSE 2
# Next, identify all the dry periods that lasted at least four hours
dry_periods |>
# add column that enumerates the observations, place it before 'Date' column
mutate(observation = row_number(), .before = Date) |>
# find when each dry period begins and ends
group_by(dry_period) |>
mutate(
# set start value to earliest index for dry periods, NA for rainy ones
period_start = if_else(!rained, min(observation), NA),
# similarly for end value
period_end = if_else(!rained, max(observation), NA),
) |> # view()
# we no longer need to operate period-wise, so can ungroup table
ungroup() |>
# calculate length of periods (in 15-minute intervals)
mutate(period_length = period_end - period_start + 1) |> # view()
# flag when period is at least four hours
mutate(in_long_period = period_length >= 16) |>
# flag 'dry spells' of four hours or more
mutate(in_dry_spell = in_long_period & !rained) |>
# remove any columns that are no longer needed
select(!contains(c('period'))) -> dry_spells
dry_spells |> head()
#> # A tibble: 6 × 7
#> observation Date Time Precipitation Discharge rained in_dry_spell
#> <int> <chr> <chr> <dbl> <dbl> <lgl> <lgl>
#> 1 1 2010-05-01 0:00:00 0 0.299 FALSE FALSE
#> 2 2 2010-05-01 0:15:00 0 0.302 FALSE FALSE
#> 3 3 2010-05-01 0:30:00 0 0.305 FALSE FALSE
#> 4 4 2010-05-01 0:45:00 0.2 0.308 TRUE FALSE
#> 5 5 2010-05-01 1:00:00 0.2 0.312 TRUE FALSE
#> 6 6 2010-05-01 1:15:00 0 0.317 FALSE FALSE
# Now, notice that rain events are exactly the periods that fill the gaps
# before or after the dry spells, with one exception: if the data starts with a
# dry period that isn't long enough to be a dry spell, it should also be considered
# a dry spell so that it's not included as part of the following rain event.
dry_spells |>
# any observations that occur before any precipitation has been recorded
# should be flagged as part of a dry spell
mutate(in_dry_spell = in_dry_spell | cumsum(rained) == 0) |>
# remove any columns that are no longer needed
select(!contains(c('rain'))) -> dry_spells
dry_spells |> head()
#> # A tibble: 6 × 6
#> observation Date Time Precipitation Discharge in_dry_spell
#> <int> <chr> <chr> <dbl> <dbl> <lgl>
#> 1 1 2010-05-01 0:00:00 0 0.299 TRUE
#> 2 2 2010-05-01 0:15:00 0 0.302 TRUE
#> 3 3 2010-05-01 0:30:00 0 0.305 TRUE
#> 4 4 2010-05-01 0:45:00 0.2 0.308 FALSE
#> 5 5 2010-05-01 1:00:00 0.2 0.312 FALSE
#> 6 6 2010-05-01 1:15:00 0 0.317 FALSE
# Finally, since an observation is part of a rain event exactly when it isn't
# part dry spell, we can apply 'cryptid visit' approach to numbering rain
# events, too
dry_spells |>
mutate(rain_event = if_else(!in_dry_spell, cumsum(in_dry_spell), NA)) |>
mutate(rain_event = factor(rain_event) |> as.numeric()) -> rain_events
rain_events |> head()
#> # A tibble: 6 × 7
#> observation Date Time Precipitation Discharge in_dry_spell rain_event
#> <int> <chr> <chr> <dbl> <dbl> <lgl> <dbl>
#> 1 1 2010-05-01 0:00:00 0 0.299 TRUE NA
#> 2 2 2010-05-01 0:15:00 0 0.302 TRUE NA
#> 3 3 2010-05-01 0:30:00 0 0.305 TRUE NA
#> 4 4 2010-05-01 0:45:00 0.2 0.308 FALSE 1
#> 5 5 2010-05-01 1:00:00 0.2 0.312 FALSE 1
#> 6 6 2010-05-01 1:15:00 0 0.317 FALSE 1
Created on 2024-04-15 with reprex v2.0.2