Hi,
Im trying to detect and remove outliers from a data set with categorical and numeric value.
I need simple code using R studio.
I tried the code in this topic and it used to work with me. However, now when I run the code it gives an error.
The error I get:
Error in UseMethod("slice") :
no applicable method for 'slice' applied to an object of class "data.frame"
can I get a simple code that can do the job.
Thanks.
the code I'm using:
library(tidyverse)
df_to_clean <- NYC1
#the data.frame may contain more than only number columns,
# so determine the names of the number columns
(to_do <- df_to_clean %>%
select(where(is.numeric)) %>%
names())
# calculated first and 3rd quartile
calc_quants <- function(x) {
map(
to_do,
~ {
enframe(quantile(x[[.]], probs = c(.25, .75)),
value = .
)
}
) %>% reduce(left_join)
}
(inner_quartile_df <- calc_quants(df_to_clean))
# re organise quartile info
(iq_df2 <- inner_quartile_df %>%
rename(quantile = name) %>%
pivot_longer(cols = -"quantile") %>%
group_by(quantile) %>%
group_split())
(iqr_df <- left_join(iq_df2[[1]],
iq_df2[[2]],
by = "name"
) %>%
select(name, lower = value.x, upper = value.y) %>%
mutate(
iqr = upper - lower,
low_crit = lower - iqr * 1.5,
hi_crit = upper + iqr * 1.5
))
# for each column to process, determing the rows it would omit,
# collate these
(rows_to_omit <- map(
to_do,
~ {
ovec <- pull(
df_to_clean,
.x
)
criteria <- filter(iqr_df, name == .x)
which(!between(
x = ovec, left = criteria$low_crit,
right = criteria$hi_crit
))
}
) %>% unlist() %>% sort() %>% unique())
# finish
NYC3 <- df_to_clean %>%
slice(-rows_to_omit)