Thanks for posting the data. It shows that the columns started_at, ended_at, and ride_length are all characters. I wrote the data to a csv file and read it back with read_csv from the readr package. The columns started_at and ended_at were still characters but ride_length was read in as an hms time value. I then used the dmy_hm() function from lubridate to make started_at and ended_at into numeric timestamps. Is that what you need?
``` r
DF <- structure(list(ride_id = c("954144C2F67B1932", "292E027607D218B6",
"57765852588AD6E0", "B5B6BE44314590E6", "A4C331F2A00E79E0", "579D73BE2ED880B3",
"EFE518CCEE333669", "315FEBB7B3F6D2EA", "EE3C4A1E66766B56", "1EE6C93A547A187C"
), rideable_type = c("classic_bike", "classic_bike", "classic_bike",
"classic_bike", "classic_bike", "electric_bike", "classic_bike",
"classic_bike", "classic_bike", "electric_bike"), started_at = c("5/7/2022 8:12",
"26/7/2022 12:53", "3/7/2022 13:58", "31/7/2022 17:44", "13/7/2022 19:49",
"1/7/2022 17:04", "18/7/2022 18:11", "28/7/2022 20:38", "10/7/2022 22:55",
"10/7/2022 9:35"), ended_at = c("5/7/2022 8:24", "26/7/2022 12:55",
"3/7/2022 14:06", "31/7/2022 18:42", "13/7/2022 20:15", "1/7/2022 17:13",
"18/7/2022 18:22", "28/7/2022 21:09", "10/7/2022 23:01", "10/7/2022 9:47"
), start_station_name = c("ashland ave & blackhawk st", "buckingham fountain (temp)",
"buckingham fountain (temp)", "buckingham fountain (temp)", "wabash ave & grand ave",
"desplaines st & randolph st", "marquette ave & 89th st", "wabash ave & grand ave",
"wabash ave & grand ave", "ashland ave & blackhawk st"), end_station_name = c("kingsbury st & kinzie st",
"michigan ave & 8th st", "michigan ave & 8th st", "woodlawn ave & 55th st",
"sheffield ave & wellington ave", "clinton st & roosevelt rd",
"east end ave & 87th st", "dearborn pkwy & delaware pl", "dearborn pkwy & delaware pl",
"orleans st & merchandise mart plaza"), start_lat = c(41.907066,
41.86962075, 41.86962075, 41.86962075, 41.891466, 41.88461411,
41.73366879, 41.891466, 41.891466, 41.90709305), start_lng = c(-87.667252,
-87.62398124, -87.62398124, -87.62398124, -87.626761, -87.64456356,
-87.55834222, -87.626761, -87.626761, -87.6672473), end_lat = c(41.88917683,
41.872773, 41.872773, 41.795264, 41.93625348, 41.86711778, 41.73681521,
41.898969, 41.898969, 41.888243), end_lng = c(-87.63850577, -87.623981,
-87.623981, -87.596471, -87.6526621, -87.64108796, -87.58280128,
-87.629912, -87.629912, -87.63639), member_casual = c("member",
"casual", "casual", "casual", "member", "member", "member", "casual",
"member", "member"), ride_length = c("00:11:45", "00:01:53",
"00:07:43", "00:58:29", "00:26:18", "00:08:43", "00:11:29", "00:30:53",
"00:05:33", "00:11:27"), day_of_week = c(3, 3, 1, 1, 4, 6, 2,
5, 1, 1)), row.names = c(NA, -10L), class = c("tbl_df", "tbl",
"data.frame"))
str(DF)
#> Classes 'tbl_df', 'tbl' and 'data.frame': 10 obs. of 13 variables:
#> $ ride_id : chr "954144C2F67B1932" "292E027607D218B6" "57765852588AD6E0" "B5B6BE44314590E6" ...
#> $ rideable_type : chr "classic_bike" "classic_bike" "classic_bike" "classic_bike" ...
#> $ started_at : chr "5/7/2022 8:12" "26/7/2022 12:53" "3/7/2022 13:58" "31/7/2022 17:44" ...
#> $ ended_at : chr "5/7/2022 8:24" "26/7/2022 12:55" "3/7/2022 14:06" "31/7/2022 18:42" ...
#> $ start_station_name: chr "ashland ave & blackhawk st" "buckingham fountain (temp)" "buckingham fountain (temp)" "buckingham fountain (temp)" ...
#> $ end_station_name : chr "kingsbury st & kinzie st" "michigan ave & 8th st" "michigan ave & 8th st" "woodlawn ave & 55th st" ...
#> $ start_lat : num 41.9 41.9 41.9 41.9 41.9 ...
#> $ start_lng : num -87.7 -87.6 -87.6 -87.6 -87.6 ...
#> $ end_lat : num 41.9 41.9 41.9 41.8 41.9 ...
#> $ end_lng : num -87.6 -87.6 -87.6 -87.6 -87.7 ...
#> $ member_casual : chr "member" "casual" "casual" "casual" ...
#> $ ride_length : chr "00:11:45" "00:01:53" "00:07:43" "00:58:29" ...
#> $ day_of_week : num 3 3 1 1 4 6 2 5 1 1
write.csv(DF, "~/R/Play/Dummy.csv", row.names = FALSE)
DF_IN <- readr::read_csv("~/R/Play/Dummy.csv")
#> Rows: 10 Columns: 13
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (7): ride_id, rideable_type, started_at, ended_at, start_station_name, ...
#> dbl (5): start_lat, start_lng, end_lat, end_lng, day_of_week
#> time (1): ride_length
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(DF_IN)
#> spc_tbl_ [10 × 13] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
#> $ ride_id : chr [1:10] "954144C2F67B1932" "292E027607D218B6" "57765852588AD6E0" "B5B6BE44314590E6" ...
#> $ rideable_type : chr [1:10] "classic_bike" "classic_bike" "classic_bike" "classic_bike" ...
#> $ started_at : chr [1:10] "5/7/2022 8:12" "26/7/2022 12:53" "3/7/2022 13:58" "31/7/2022 17:44" ...
#> $ ended_at : chr [1:10] "5/7/2022 8:24" "26/7/2022 12:55" "3/7/2022 14:06" "31/7/2022 18:42" ...
#> $ start_station_name: chr [1:10] "ashland ave & blackhawk st" "buckingham fountain (temp)" "buckingham fountain (temp)" "buckingham fountain (temp)" ...
#> $ end_station_name : chr [1:10] "kingsbury st & kinzie st" "michigan ave & 8th st" "michigan ave & 8th st" "woodlawn ave & 55th st" ...
#> $ start_lat : num [1:10] 41.9 41.9 41.9 41.9 41.9 ...
#> $ start_lng : num [1:10] -87.7 -87.6 -87.6 -87.6 -87.6 ...
#> $ end_lat : num [1:10] 41.9 41.9 41.9 41.8 41.9 ...
#> $ end_lng : num [1:10] -87.6 -87.6 -87.6 -87.6 -87.7 ...
#> $ member_casual : chr [1:10] "member" "casual" "casual" "casual" ...
#> $ ride_length : 'hms' num [1:10] 00:11:45 00:01:53 00:07:43 00:58:29 ...
#> ..- attr(*, "units")= chr "secs"
#> $ day_of_week : num [1:10] 3 3 1 1 4 6 2 5 1 1
#> - attr(*, "spec")=
#> .. cols(
#> .. ride_id = col_character(),
#> .. rideable_type = col_character(),
#> .. started_at = col_character(),
#> .. ended_at = col_character(),
#> .. start_station_name = col_character(),
#> .. end_station_name = col_character(),
#> .. start_lat = col_double(),
#> .. start_lng = col_double(),
#> .. end_lat = col_double(),
#> .. end_lng = col_double(),
#> .. member_casual = col_character(),
#> .. ride_length = col_time(format = ""),
#> .. day_of_week = col_double()
#> .. )
#> - attr(*, "problems")=<externalptr>
library(dplyr)
library(lubridate)
DF_IN <- DF_IN |> mutate(started_at = dmy_hm(started_at),
ended_at = dmy_hm(ended_at))
str(DF_IN)
#> tibble [10 × 13] (S3: tbl_df/tbl/data.frame)
#> $ ride_id : chr [1:10] "954144C2F67B1932" "292E027607D218B6" "57765852588AD6E0" "B5B6BE44314590E6" ...
#> $ rideable_type : chr [1:10] "classic_bike" "classic_bike" "classic_bike" "classic_bike" ...
#> $ started_at : POSIXct[1:10], format: "2022-07-05 08:12:00" "2022-07-26 12:53:00" ...
#> $ ended_at : POSIXct[1:10], format: "2022-07-05 08:24:00" "2022-07-26 12:55:00" ...
#> $ start_station_name: chr [1:10] "ashland ave & blackhawk st" "buckingham fountain (temp)" "buckingham fountain (temp)" "buckingham fountain (temp)" ...
#> $ end_station_name : chr [1:10] "kingsbury st & kinzie st" "michigan ave & 8th st" "michigan ave & 8th st" "woodlawn ave & 55th st" ...
#> $ start_lat : num [1:10] 41.9 41.9 41.9 41.9 41.9 ...
#> $ start_lng : num [1:10] -87.7 -87.6 -87.6 -87.6 -87.6 ...
#> $ end_lat : num [1:10] 41.9 41.9 41.9 41.8 41.9 ...
#> $ end_lng : num [1:10] -87.6 -87.6 -87.6 -87.6 -87.7 ...
#> $ member_casual : chr [1:10] "member" "casual" "casual" "casual" ...
#> $ ride_length : 'hms' num [1:10] 00:11:45 00:01:53 00:07:43 00:58:29 ...
#> ..- attr(*, "units")= chr "secs"
#> $ day_of_week : num [1:10] 3 3 1 1 4 6 2 5 1 1
Created on 2023-08-09 with reprex v2.0.2