Identifying and Determining the Average Rainfall Intensity of Precipitation Events and Associated Increases in Streamflow Part 1

dromano · April 15, 2024, 7:51pm

Here is an approach that applies the code from the post How to enumerate intervals in a sequence, like periods when cryptids come to visit - #8 by dromano to enumerate both dry periods and rain events, using the data shared by you here. (Full reprex can be found at the end of this post.)

dput() output, with table saved as 'water_data' (click to access)

structure(list(Date = c("2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-02", "2010-05-02", 
"2010-05-02", "2010-05-02"), Time = c("0:00:00", "0:15:00", "0:30:00", 
"0:45:00", "1:00:00", "1:15:00", "1:30:00", "1:45:00", "2:00:00", 
"2:15:00", "2:30:00", "2:45:00", "3:00:00", "3:15:00", "3:30:00", 
"3:45:00", "4:00:00", "4:15:00", "4:30:00", "4:45:00", "5:00:00", 
"5:15:00", "5:30:00", "5:45:00", "6:00:00", "6:15:00", "6:30:00", 
"6:45:00", "7:00:00", "7:15:00", "7:30:00", "7:45:00", "8:00:00", 
"8:15:00", "8:30:00", "8:45:00", "9:00:00", "9:15:00", "9:30:00", 
"9:45:00", "10:00:00", "10:15:00", "10:30:00", "10:45:00", "11:00:00", 
"11:15:00", "11:30:00", "11:45:00", "12:00:00", "12:15:00", "12:30:00", 
"12:45:00", "13:00:00", "13:15:00", "13:30:00", "13:45:00", "14:00:00", 
"14:15:00", "14:30:00", "14:45:00", "15:00:00", "15:15:00", "15:30:00", 
"15:45:00", "16:00:00", "16:15:00", "16:30:00", "16:45:00", "17:00:00", 
"17:15:00", "17:30:00", "17:45:00", "18:00:00", "18:15:00", "18:30:00", 
"18:45:00", "19:00:00", "19:15:00", "19:30:00", "19:45:00", "20:00:00", 
"20:15:00", "20:30:00", "20:45:00", "21:00:00", "21:15:00", "21:30:00", 
"21:45:00", "22:00:00", "22:15:00", "22:30:00", "22:45:00", "23:00:00", 
"23:15:00", "23:30:00", "23:45:00", "0:00:00", "0:15:00", "0:30:00", 
"0:45:00"), Precipitation = c(0, 0, 0, 0.2, 0.2, 0, 0, 0.2, 0.4, 
0, 0, 0.2, 0, 0, 0, 0, 0.2, 0, 0, 0.6, 0.4, 0.2, 0, 0, 0, 0, 
0, 0, 0, 0, 0.2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0), Discharge = c(0.299, 0.302, 
0.305, 0.308, 0.312, 0.317, 0.321, 0.326, 0.33, 0.339, 0.352, 
0.38, 0.41, 0.428, 0.424, 0.419, 0.415, 0.41, 0.411, 0.412, 0.414, 
0.415, 0.416, 0.44, 0.459, 0.465, 0.495, 0.495, 0.495, 0.495, 
0.495, 0.495, 0.471, 0.453, 0.44, 0.431, 0.422, 0.423, 0.423, 
0.424, 0.424, 0.425, 0.425, 0.426, 0.427, 0.427, 0.428, 0.428, 
0.429, 0.429, 0.43, 0.43, 0.431, 0.431, 0.432, 0.432, 0.433, 
0.433, 0.434, 0.434, 0.435, 0.441, 0.471, 0.527, 0.565, 0.562, 
0.559, 0.556, 0.552, 0.665, 0.892, 0.941, 0.937, 0.933, 0.928, 
0.924, 0.843, 0.812, 0.765, 0.729, 0.693, 0.672, 0.644, 0.617, 
0.611, 0.591, 0.578, 0.569, 0.559, 0.55, 0.54, 0.538, 0.536, 
0.534, 0.532, 0.53, 0.528, 0.526, 0.524, 0.521)), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -100L)) -> water_data

First, enumerate all the contiguous periods without precipitation:

library(tidyverse)

water_data |> 
  # flag when it rained
  mutate(rained = Precipitation > 0) |> 
  # apply 'cryptid visit' approach to numbering dry periods
  mutate(dry_period = if_else(!rained, cumsum(rained), NA)) |> 
  mutate(dry_period = factor(dry_period) |> as.numeric()) -> dry_periods

dry_periods |> head()
#> # A tibble: 6 × 6
#>   Date       Time    Precipitation Discharge rained dry_period
#>   <chr>      <chr>           <dbl>     <dbl> <lgl>       <dbl>
#> 1 2010-05-01 0:00:00           0       0.299 FALSE           1
#> 2 2010-05-01 0:15:00           0       0.302 FALSE           1
#> 3 2010-05-01 0:30:00           0       0.305 FALSE           1
#> 4 2010-05-01 0:45:00           0.2     0.308 TRUE           NA
#> 5 2010-05-01 1:00:00           0.2     0.312 TRUE           NA
#> 6 2010-05-01 1:15:00           0       0.317 FALSE           2

Next, identify all the dry periods that lasted at least four hours:

dry_periods |> 
  # add column that enumerates the observations, place it before 'Date' column
  mutate(observation = row_number(), .before = Date) |> 
  # find when each dry period begins and ends
  group_by(dry_period) |> 
  mutate(
    # set start value to earliest index for dry periods, NA for rainy ones
    period_start = if_else(!rained, min(observation), NA),
    # similarly for end value
    period_end = if_else(!rained, max(observation), NA),
    ) |> # view()
  # we no longer need to operate period-wise, so can ungroup table
  ungroup() |> 
  # calculate length of periods (in 15-minute intervals)
  mutate(period_length = period_end - period_start + 1) |> 
  # flag when period is at least four hours
  mutate(in_long_period = period_length >= 16) |>
  # flag 'dry spells' of four hours or more
  mutate(in_dry_spell = in_long_period & !rained) |> 
  # remove any columns that are no longer needed
  select(!contains(c('period'))) ->  dry_spells

dry_spells |> head()
#> # A tibble: 6 × 7
#>   observation Date       Time    Precipitation Discharge rained in_dry_spell
#>         <int> <chr>      <chr>           <dbl>     <dbl> <lgl>  <lgl>       
#> 1           1 2010-05-01 0:00:00           0       0.299 FALSE  FALSE       
#> 2           2 2010-05-01 0:15:00           0       0.302 FALSE  FALSE       
#> 3           3 2010-05-01 0:30:00           0       0.305 FALSE  FALSE       
#> 4           4 2010-05-01 0:45:00           0.2     0.308 TRUE   FALSE       
#> 5           5 2010-05-01 1:00:00           0.2     0.312 TRUE   FALSE       
#> 6           6 2010-05-01 1:15:00           0       0.317 FALSE  FALSE

Now, notice that rain events are exactly the periods that fill the gaps before or after the dry spells, with one exception: if the data starts with a dry period that isn't long enough to be a dry spell, it should also be considered a dry spell so that it's not included as part of the following rain event.

dry_spells |> 
  # any observations that occur before any precipitation has been recorded
  # should be flagged as part of a dry spell 
  mutate(in_dry_spell = in_dry_spell | cumsum(rained) == 0) |> 
  # remove any columns that are no longer needed
  select(!contains(c('rain'))) ->  dry_spells

dry_spells |> head()
#> # A tibble: 6 × 6
#>   observation Date       Time    Precipitation Discharge in_dry_spell
#>         <int> <chr>      <chr>           <dbl>     <dbl> <lgl>       
#> 1           1 2010-05-01 0:00:00           0       0.299 TRUE        
#> 2           2 2010-05-01 0:15:00           0       0.302 TRUE        
#> 3           3 2010-05-01 0:30:00           0       0.305 TRUE        
#> 4           4 2010-05-01 0:45:00           0.2     0.308 FALSE       
#> 5           5 2010-05-01 1:00:00           0.2     0.312 FALSE       
#> 6           6 2010-05-01 1:15:00           0       0.317 FALSE

Finally, since an observation is part of a rain event exactly when it isn't part dry spell, we can apply the 'cryptid visit' approach again, this time to numbering rain events:

dry_spells |> 
  mutate(rain_event = if_else(!in_dry_spell, cumsum(in_dry_spell), NA)) |> 
  mutate(rain_event = factor(rain_event) |> as.numeric()) -> rain_events

rain_events |> head()
#> # A tibble: 6 × 7
#>   observation Date       Time    Precipitation Discharge in_dry_spell rain_event
#>         <int> <chr>      <chr>           <dbl>     <dbl> <lgl>             <dbl>
#> 1           1 2010-05-01 0:00:00           0       0.299 TRUE                 NA
#> 2           2 2010-05-01 0:15:00           0       0.302 TRUE                 NA
#> 3           3 2010-05-01 0:30:00           0       0.305 TRUE                 NA
#> 4           4 2010-05-01 0:45:00           0.2     0.308 FALSE                 1
#> 5           5 2010-05-01 1:00:00           0.2     0.312 FALSE                 1
#> 6           6 2010-05-01 1:15:00           0       0.317 FALSE                 1

^{Created on 2024-04-15 with reprex v2.0.2}

full reprex

structure(list(Date = c("2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", "2010-05-01", 
"2010-05-01", "2010-05-01", "2010-05-01", "2010-05-02", "2010-05-02", 
"2010-05-02", "2010-05-02"), Time = c("0:00:00", "0:15:00", "0:30:00", 
"0:45:00", "1:00:00", "1:15:00", "1:30:00", "1:45:00", "2:00:00", 
"2:15:00", "2:30:00", "2:45:00", "3:00:00", "3:15:00", "3:30:00", 
"3:45:00", "4:00:00", "4:15:00", "4:30:00", "4:45:00", "5:00:00", 
"5:15:00", "5:30:00", "5:45:00", "6:00:00", "6:15:00", "6:30:00", 
"6:45:00", "7:00:00", "7:15:00", "7:30:00", "7:45:00", "8:00:00", 
"8:15:00", "8:30:00", "8:45:00", "9:00:00", "9:15:00", "9:30:00", 
"9:45:00", "10:00:00", "10:15:00", "10:30:00", "10:45:00", "11:00:00", 
"11:15:00", "11:30:00", "11:45:00", "12:00:00", "12:15:00", "12:30:00", 
"12:45:00", "13:00:00", "13:15:00", "13:30:00", "13:45:00", "14:00:00", 
"14:15:00", "14:30:00", "14:45:00", "15:00:00", "15:15:00", "15:30:00", 
"15:45:00", "16:00:00", "16:15:00", "16:30:00", "16:45:00", "17:00:00", 
"17:15:00", "17:30:00", "17:45:00", "18:00:00", "18:15:00", "18:30:00", 
"18:45:00", "19:00:00", "19:15:00", "19:30:00", "19:45:00", "20:00:00", 
"20:15:00", "20:30:00", "20:45:00", "21:00:00", "21:15:00", "21:30:00", 
"21:45:00", "22:00:00", "22:15:00", "22:30:00", "22:45:00", "23:00:00", 
"23:15:00", "23:30:00", "23:45:00", "0:00:00", "0:15:00", "0:30:00", 
"0:45:00"), Precipitation = c(0, 0, 0, 0.2, 0.2, 0, 0, 0.2, 0.4, 
0, 0, 0.2, 0, 0, 0, 0, 0.2, 0, 0, 0.6, 0.4, 0.2, 0, 0, 0, 0, 
0, 0, 0, 0, 0.2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0), Discharge = c(0.299, 0.302, 
0.305, 0.308, 0.312, 0.317, 0.321, 0.326, 0.33, 0.339, 0.352, 
0.38, 0.41, 0.428, 0.424, 0.419, 0.415, 0.41, 0.411, 0.412, 0.414, 
0.415, 0.416, 0.44, 0.459, 0.465, 0.495, 0.495, 0.495, 0.495, 
0.495, 0.495, 0.471, 0.453, 0.44, 0.431, 0.422, 0.423, 0.423, 
0.424, 0.424, 0.425, 0.425, 0.426, 0.427, 0.427, 0.428, 0.428, 
0.429, 0.429, 0.43, 0.43, 0.431, 0.431, 0.432, 0.432, 0.433, 
0.433, 0.434, 0.434, 0.435, 0.441, 0.471, 0.527, 0.565, 0.562, 
0.559, 0.556, 0.552, 0.665, 0.892, 0.941, 0.937, 0.933, 0.928, 
0.924, 0.843, 0.812, 0.765, 0.729, 0.693, 0.672, 0.644, 0.617, 
0.611, 0.591, 0.578, 0.569, 0.559, 0.55, 0.54, 0.538, 0.536, 
0.534, 0.532, 0.53, 0.528, 0.526, 0.524, 0.521)), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -100L)) -> water_data

library(tidyverse)

# First, enumerate all the contiguous periods without precipitation
water_data |> 
  # flag when it rained
  mutate(rained = Precipitation > 0) |> 
  # apply 'cryptid visit' approach to numbering dry periods
  mutate(dry_period = if_else(!rained, cumsum(rained), NA)) |> 
  mutate(dry_period = factor(dry_period) |> as.numeric()) -> dry_periods

dry_periods |> head()
#> # A tibble: 6 × 6
#>   Date       Time    Precipitation Discharge rained dry_period
#>   <chr>      <chr>           <dbl>     <dbl> <lgl>       <dbl>
#> 1 2010-05-01 0:00:00           0       0.299 FALSE           1
#> 2 2010-05-01 0:15:00           0       0.302 FALSE           1
#> 3 2010-05-01 0:30:00           0       0.305 FALSE           1
#> 4 2010-05-01 0:45:00           0.2     0.308 TRUE           NA
#> 5 2010-05-01 1:00:00           0.2     0.312 TRUE           NA
#> 6 2010-05-01 1:15:00           0       0.317 FALSE           2

# Next, identify all the dry periods that lasted at least four hours
dry_periods |> 
  # add column that enumerates the observations, place it before 'Date' column
  mutate(observation = row_number(), .before = Date) |> 
  # find when each dry period begins and ends
  group_by(dry_period) |> 
  mutate(
    # set start value to earliest index for dry periods, NA for rainy ones
    period_start = if_else(!rained, min(observation), NA),
    # similarly for end value
    period_end = if_else(!rained, max(observation), NA),
    ) |> # view()
  # we no longer need to operate period-wise, so can ungroup table
  ungroup() |> 
  # calculate length of periods (in 15-minute intervals)
  mutate(period_length = period_end - period_start + 1) |> # view()
  # flag when period is at least four hours
  mutate(in_long_period = period_length >= 16) |>
  # flag 'dry spells' of four hours or more
  mutate(in_dry_spell = in_long_period & !rained) |> 
  # remove any columns that are no longer needed
  select(!contains(c('period'))) ->  dry_spells

dry_spells |> head()
#> # A tibble: 6 × 7
#>   observation Date       Time    Precipitation Discharge rained in_dry_spell
#>         <int> <chr>      <chr>           <dbl>     <dbl> <lgl>  <lgl>       
#> 1           1 2010-05-01 0:00:00           0       0.299 FALSE  FALSE       
#> 2           2 2010-05-01 0:15:00           0       0.302 FALSE  FALSE       
#> 3           3 2010-05-01 0:30:00           0       0.305 FALSE  FALSE       
#> 4           4 2010-05-01 0:45:00           0.2     0.308 TRUE   FALSE       
#> 5           5 2010-05-01 1:00:00           0.2     0.312 TRUE   FALSE       
#> 6           6 2010-05-01 1:15:00           0       0.317 FALSE  FALSE

# Now, notice that rain events are exactly the periods that fill the gaps
# before or after the dry spells, with one exception: if the data starts with a
# dry period that isn't long enough to be a dry spell, it should also be considered
# a dry spell so that it's not included as part of the following rain event.

dry_spells |> 
  # any observations that occur before any precipitation has been recorded
  # should be flagged as part of a dry spell 
  mutate(in_dry_spell = in_dry_spell | cumsum(rained) == 0) |> 
  # remove any columns that are no longer needed
  select(!contains(c('rain'))) ->  dry_spells

dry_spells |> head()
#> # A tibble: 6 × 6
#>   observation Date       Time    Precipitation Discharge in_dry_spell
#>         <int> <chr>      <chr>           <dbl>     <dbl> <lgl>       
#> 1           1 2010-05-01 0:00:00           0       0.299 TRUE        
#> 2           2 2010-05-01 0:15:00           0       0.302 TRUE        
#> 3           3 2010-05-01 0:30:00           0       0.305 TRUE        
#> 4           4 2010-05-01 0:45:00           0.2     0.308 FALSE       
#> 5           5 2010-05-01 1:00:00           0.2     0.312 FALSE       
#> 6           6 2010-05-01 1:15:00           0       0.317 FALSE

# Finally, since an observation is part of a rain event exactly when it isn't
# part dry spell, we can apply 'cryptid visit' approach to numbering rain
# events, too
dry_spells |> 
  mutate(rain_event = if_else(!in_dry_spell, cumsum(in_dry_spell), NA)) |> 
  mutate(rain_event = factor(rain_event) |> as.numeric()) -> rain_events

rain_events |> head()
#> # A tibble: 6 × 7
#>   observation Date       Time    Precipitation Discharge in_dry_spell rain_event
#>         <int> <chr>      <chr>           <dbl>     <dbl> <lgl>             <dbl>
#> 1           1 2010-05-01 0:00:00           0       0.299 TRUE                 NA
#> 2           2 2010-05-01 0:15:00           0       0.302 TRUE                 NA
#> 3           3 2010-05-01 0:30:00           0       0.305 TRUE                 NA
#> 4           4 2010-05-01 0:45:00           0.2     0.308 FALSE                 1
#> 5           5 2010-05-01 1:00:00           0.2     0.312 FALSE                 1
#> 6           6 2010-05-01 1:15:00           0       0.317 FALSE                 1

^{Created on 2024-04-15 with reprex v2.0.2}