Hello,
I am trying to conduct permutation test by category. In the example I am only able to do it for one category at a time.
Instead of manually doing it for each category by filtering how can I do so for all categories?
My end goal is to have a data frame with the category, the observed difference and p-values for each test.
Example:
# load packages
library('coin')
library('tidyverse')
library('purrr')
library('broom')
# set seed
set.seed(2000)
# generate data
d = tibble(value = rnorm(100),
category = sample(1:5, replace = TRUE, 100),
group = sample(c('ctrl', 'treat'), replace = TRUE, 100)) %>%
arrange(category)
# view data
d
# filter by category for category-wise permutation test of independence
d2 <- d %>% filter(category == 1)
# independence test using coin package
independence_test(value ~ as.factor(group), data = d2, distribution = approximate())
Output:
> # load packages
> library('coin')
> library('tidyverse')
> library('purrr')
> library('broom')
>
> # set seed
> set.seed(2000)
>
> # generate data
> d = tibble(value = rnorm(100),
+ category = sample(1:5, replace = TRUE, 100),
+ group = sample(c('ctrl', 'treat'), replace = TRUE, 100)) %>%
+ arrange(category)
> # view data
> d
# A tibble: 100 × 3
value category group
<dbl> <int> <chr>
1 -0.353 1 treat
2 0.916 1 ctrl
3 -0.192 1 ctrl
4 -1.17 1 treat
5 -0.207 1 ctrl
6 0.374 1 treat
7 -0.404 1 ctrl
8 1.55 1 treat
9 -1.18 1 treat
10 -0.277 1 treat
# … with 90 more rows
# ℹ Use `print(n = ...)` to see more rows
>
> # filter by category for category-wise permutation test of independence
> d2 <- d %>% filter(category == 1)
>
> # independence test using coin package
> independence_test(value ~ as.factor(group), data = d2, distribution = approximate())
Approximative General Independence Test
data: value by as.factor(group) (ctrl, treat)
Z = -0.44373, p-value = 0.6768
alternative hypothesis: two.sided
At the moment I have some notion that I should be nesting the data by category and then mapping the coin::independence_test with purr:
# nest data by category
d2 <- d %>%
group_by(category) %>%
nest()
d2
Output
> d2
# A tibble: 5 × 2
# Groups: category [5]
category data
<int> <list>
1 1 <tibble [16 × 2]>
2 2 <tibble [26 × 2]>
3 3 <tibble [22 × 2]>
4 4 <tibble [15 × 2]>
5 5 <tibble [21 × 2]>
However, I am having trouble with mapping the inputs required by coin::independence_test to map()
I would appreciate any help and insights into what I'm missing.
Edit: forgot to mention I am performing a t.test on the same data as below:
# Welch t.test
d3 <- d %>%
group_by(category, group) %>%
nest() %>%
pivot_wider(names_from = group, values_from = data) %>%
# depreciated: spread(key = treatment, value = data) %>%
mutate(
t_test = map2(ctrl, treat, ~{t.test(.x$value, .y$value) %>% tidy()}),
ctrl = map(ctrl, nrow),
treat = map(treat, nrow)
) %>%
unnest(cols = c(ctrl, treat, t_test))
d3
Ouput
> # Welch t.test
> d3 <- d %>%
+ group_by(category, group) %>%
+ nest() %>%
+ pivot_wider(names_from = group, values_from = data) %>%
+ # depreciated: spread(key = treatment, value = data) %>%
+ mutate(
+ t_test = map2(ctrl, treat, ~{t.test(.x$value, .y$value) %>% tidy()}),
+ ctrl = map(ctrl, nrow),
+ treat = map(treat, nrow)
+ ) %>%
+ unnest(cols = c(ctrl, treat, t_test))
>
> d3
# A tibble: 5 × 13
# Groups: category [5]
category treat ctrl estimate estimate1 estimate2 statistic p.value parameter conf.low conf.high method alter…¹
<int> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
1 1 10 6 -0.169 -0.193 -0.0245 -0.475 0.642 13.6 -0.934 0.596 Welch Two Sample t… two.si…
2 2 13 13 0.272 0.682 0.409 0.873 0.392 22.9 -0.374 0.918 Welch Two Sample t… two.si…
3 3 11 11 -0.478 -0.568 -0.0895 -1.22 0.238 20.0 -1.30 0.342 Welch Two Sample t… two.si…
4 4 4 11 -0.922 -0.359 0.563 -1.23 0.282 4.16 -2.97 1.12 Welch Two Sample t… two.si…
5 5 15 6 0.281 0.0919 -0.190 0.696 0.498 14.0 -0.585 1.15 Welch Two Sample t… two.si…
# … with abbreviated variable name ¹alternative
EDIT:
Editing this post to add in my eventual solution (thank you to everyone for the discussion in the answers) as the replies have been locked:
d3 <- d %>%
group_by(category) %>%
nest() %>%
mutate(
perm_test = map(.x = data, .f = ~pvalue(independence_test(.x$value ~ as.factor(.x$group), data = data)))
) %>%
unnest(cols = c(category, perm_test))
View(d3)