I have a data set column which contains data in hour_minuet_seconds (Ex-03:20:00)format. I want to fill the values in the column with the mean value of the column.
like this example?
library(dplyr)
library(lubridate)
df <- data.frame(
id = c(1L, 2L, 3L, 4L, 5L, 6L, 7L),
time = c("07:45:00", "09:45:00", NA, "08:45:00", "23:50:00",
"07:45:00", "16:45:00")
)
df %>%
mutate(time = as.numeric(hms(time)),
time = if_else(is.na(time), mean(time, na.rm=TRUE), time),
time = seconds_to_period(time))
#> id time
#> 1 1 7H 45M 0S
#> 2 2 9H 45M 0S
#> 3 3 12H 25M 50S
#> 4 4 8H 45M 0S
#> 5 5 23H 50M 0S
#> 6 6 7H 45M 0S
#> 7 7 16H 45M 0S
Created on 2019-09-06 by the reprex package (v0.3.0.9000)
2 Likes
Another solution:
df <- data.frame(id = 1:7,
time = c("07:45:00", "09:45:00", NA, "08:45:00", "23:50:00", "07:45:00", "16:45:00"),
stringsAsFactors = FALSE)
within(data = df,
expr =
{
time <- replace(x = time,
list = is.na(x = time),
values = format(x = mean(x = as.POSIXct(x = time,
format = "%H:%M:%S"),
na.rm = TRUE),
format = "%H:%M:%S"))
})
#> id time
#> 1 1 07:45:00
#> 2 2 09:45:00
#> 3 3 12:25:50
#> 4 4 08:45:00
#> 5 5 23:50:00
#> 6 6 07:45:00
#> 7 7 16:45:00
Created on 2019-09-07 by the reprex package (v0.3.0)
1 Like
One more alternative using the lesser known chron package along with dplyr. Personally, I think this is more concise and easier to read and interpret.
library(dplyr)
df <- data.frame(
id = c(1L, 2L, 3L, 4L, 5L, 6L, 7L),
time = c("07:45:00", "09:45:00", NA, "08:45:00", "23:50:00", "07:45:00", "16:45:00")
)
df %>%
mutate(
time = chron::times(time),
time = if_else(is.na(time), mean(time), time)
)
#> id time
#> 1 1 07:45:00
#> 2 2 09:45:00
#> 3 3 12:25:50
#> 4 4 08:45:00
#> 5 5 23:50:00
#> 6 6 07:45:00
#> 7 7 16:45:00
2 Likes
This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.