Why did filtering the exact date not work?

I ran into this simple issue but could not figure out why. filter for the exact date returned nothing. A reprex is below

library(dplyr)
library(lubridate)
packageVersion("dplyr")
#> [1] '1.0.10'
packageVersion("lubridate")
#> [1] '1.9.0'

DF <- structure(list(Date = structure(c(11224.5033, 11224.5033, 11224.5033, 
                                        11224.5033, 11224.5033, 11224.5033, 11224.5033, 11224.5033, 11224.5033, 
                                        11224.5033, 11224.5033, 11224.5033, 11224.5033, 11224.5033, 11225.5057, 
                                        11225.5057, 11225.5057, 11225.5057, 11225.5057, 11225.5057, 11225.5057, 
                                        11225.5057, 11225.5057, 11225.5057, 11225.5057, 11225.5057, 11225.5057, 
                                        11225.5057), class = "Date"), 
                     Z = c(42.65, 42.34, 41.73, 41.12, 
                           40.51, 39.9, 39.29, 38.68, 38.07, 37.46, 36.85, 36.24, 35.63, 
                           35.02, 42.65, 42.34, 41.73, 41.12, 40.51, 39.9, 39.29, 38.68, 
                           38.07, 37.46, 36.85, 36.24, 35.63, 35.02), 
                     Temp = c(7.65, 7.58, 
                              7.53, 7.54, 7.54, 7.49, 7.2, 6.58, 6.67, 6.68, 6.71, 6.86, 7.26, 
                              8.38, 7.62, 7.54, 7.48, 7.48, 7.48, 7.48, 7.48, 7.45, 6.64, 6.61, 
                              6.62, 6.75, 7.11, 8.28)), 
                row.names = c(NA, -28L), class = "data.frame")
DF
#>          Date     Z Temp
#> 1  2000-09-24 42.65 7.65
#> 2  2000-09-24 42.34 7.58
#> 3  2000-09-24 41.73 7.53
#> 4  2000-09-24 41.12 7.54
#> 5  2000-09-24 40.51 7.54
#> 6  2000-09-24 39.90 7.49
#> 7  2000-09-24 39.29 7.20
#> 8  2000-09-24 38.68 6.58
#> 9  2000-09-24 38.07 6.67
#> 10 2000-09-24 37.46 6.68
#> 11 2000-09-24 36.85 6.71
#> 12 2000-09-24 36.24 6.86
#> 13 2000-09-24 35.63 7.26
#> 14 2000-09-24 35.02 8.38
#> 15 2000-09-25 42.65 7.62
#> 16 2000-09-25 42.34 7.54
#> 17 2000-09-25 41.73 7.48
#> 18 2000-09-25 41.12 7.48
#> 19 2000-09-25 40.51 7.48
#> 20 2000-09-25 39.90 7.48
#> 21 2000-09-25 39.29 7.48
#> 22 2000-09-25 38.68 7.45
#> 23 2000-09-25 38.07 6.64
#> 24 2000-09-25 37.46 6.61
#> 25 2000-09-25 36.85 6.62
#> 26 2000-09-25 36.24 6.75
#> 27 2000-09-25 35.63 7.11
#> 28 2000-09-25 35.02 8.28

str(DF)
#> 'data.frame':    28 obs. of  3 variables:
#>  $ Date: Date, format: "2000-09-24" "2000-09-24" ...
#>  $ Z   : num  42.6 42.3 41.7 41.1 40.5 ...
#>  $ Temp: num  7.65 7.58 7.53 7.54 7.54 7.49 7.2 6.58 6.67 6.68 ...

selected_date <- "2000-09-24"

DF %>% 
  filter(Date == selected_date)
#> [1] Date Z    Temp
#> <0 rows> (or 0-length row.names)

DF %>% 
  filter(Date == as.Date(selected_date))
#> [1] Date Z    Temp
#> <0 rows> (or 0-length row.names)

DF %>% 
  filter(Date == ymd(selected_date))
#> [1] Date Z    Temp
#> <0 rows> (or 0-length row.names)
1 Like

This needs to be converted to a date object.

Thank you but that was done inside filter using as.Date or ymd

Hi @RuReady, I'm try to change the Date column to character for R can recognize the filter.

I'm not the full exactly responses about this but I see that if the columns has Date format, don't run the filter.

# whit base code
DF$Date <- as.character(DF$Date) # this step is very important.
 
filter_1 <- subset(DF,Date=="2000-09-24")

# with tidyverse mode
DF %>% 
  filter(Date == "2000-09-24")

1 Like

Something odd is happening that I didn't understand.

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#> 
#>     date, intersect, setdiff, union


DF <- structure(
  list(
    # avoid using names of functions in namespace
    Date_ = structure(c(
      11224.5033, 11224.5033, 11224.5033,
      11224.5033, 11224.5033, 11224.5033, 11224.5033, 11224.5033, 11224.5033,
      11224.5033, 11224.5033, 11224.5033, 11224.5033, 11224.5033, 11225.5057,
      11225.5057, 11225.5057, 11225.5057, 11225.5057, 11225.5057, 11225.5057,
      11225.5057, 11225.5057, 11225.5057, 11225.5057, 11225.5057, 11225.5057,
      11225.5057
    ), class = "Date"),
    Z = c(
      42.65, 42.34, 41.73, 41.12,
      40.51, 39.9, 39.29, 38.68, 38.07, 37.46, 36.85, 36.24, 35.63,
      35.02, 42.65, 42.34, 41.73, 41.12, 40.51, 39.9, 39.29, 38.68,
      38.07, 37.46, 36.85, 36.24, 35.63, 35.02
    ),
    Temp = c(
      7.65, 7.58,
      7.53, 7.54, 7.54, 7.49, 7.2, 6.58, 6.67, 6.68, 6.71, 6.86, 7.26,
      8.38, 7.62, 7.54, 7.48, 7.48, 7.48, 7.48, 7.48, 7.45, 6.64, 6.61,
      6.62, 6.75, 7.11, 8.28
    )
  ),
  row.names = c(NA, -28L), class = "data.frame"
)

selected_date <- ymd("2000-09-24")

identical(DF[1,1],selected_date)
#> [1] FALSE

DF %>% filter(Date_ == selected_date)
#> [1] Date_ Z     Temp 
#> <0 rows> (or 0-length row.names)

DF %>% mutate(Date_ = ymd(Date_)) %>% filter(Date_ == selected_date)
#>         Date_     Z Temp
#> 1  2000-09-24 42.65 7.65
#> 2  2000-09-24 42.34 7.58
#> 3  2000-09-24 41.73 7.53
#> 4  2000-09-24 41.12 7.54
#> 5  2000-09-24 40.51 7.54
#> 6  2000-09-24 39.90 7.49
#> 7  2000-09-24 39.29 7.20
#> 8  2000-09-24 38.68 6.58
#> 9  2000-09-24 38.07 6.67
#> 10 2000-09-24 37.46 6.68
#> 11 2000-09-24 36.85 6.71
#> 12 2000-09-24 36.24 6.86
#> 13 2000-09-24 35.63 7.26
#> 14 2000-09-24 35.02 8.38

selected_date <- DF[1,1]
DF %>% filter(Date_ == selected_date)
#>         Date_     Z Temp
#> 1  2000-09-24 42.65 7.65
#> 2  2000-09-24 42.34 7.58
#> 3  2000-09-24 41.73 7.53
#> 4  2000-09-24 41.12 7.54
#> 5  2000-09-24 40.51 7.54
#> 6  2000-09-24 39.90 7.49
#> 7  2000-09-24 39.29 7.20
#> 8  2000-09-24 38.68 6.58
#> 9  2000-09-24 38.07 6.67
#> 10 2000-09-24 37.46 6.68
#> 11 2000-09-24 36.85 6.71
#> 12 2000-09-24 36.24 6.86
#> 13 2000-09-24 35.63 7.26
#> 14 2000-09-24 35.02 8.38

Created on 2022-12-19 by the reprex package (v2.0.1)

But then I noticed that the Date_ objects are not integers, which means they are converted with extra bits indicating H:M:S

library(lubridate)
entry <- 11224.5033
as_date(entry)
#> [1] "2000-09-24"
as_datetime(entry)
#> [1] "1970-01-01 03:07:04 UTC"
2 Likes

Thanks for the work around! I am more interested in why this happened though

Great find! This is definitely something unusual

I should’ve figured when I saw that typeof(DF) was double.

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.