library(tidyverse)
# My data set has 44 factors. Some are nominal and some ordinal. I want to calculate 2-way measures of association between each pair of variables. I'll use Cramers V as an example which takes as input 2 nominal factors.
# Example data
# I'm using a data set with only 3 factors as an example. It's stored as a frequency table so I'm expanding it to 1 row per case.
many_factors <- HairEyeColor |>
as_tibble() |>
mutate(across(where(is.character), as_factor)) |>
uncount(n)
# This works for 2 named factors
rcompanion::cramerV(
x = many_factors$Hair,
y = many_factors$Eye,
digits = 2,
R = 1
)
#> Cramer V
#> 0.28
# How do I do it for all the pairs of factors in my data set?
# create a data set containing all pairs of factors
factor_names <- many_factors |>
select(where(is.factor)) |>
# Checking there are at least 2 levels of each factor in the data set2
summarise(across(everything(), \(x) n_distinct(x))) |>
names()
factor1 <- paste0("many_factors$", factor_names)
factor2 <- factor1
two_names <- crossing(factor1, factor2) |>
filter(factor1 != factor2)
two_names
#> # A tibble: 6 × 2
#> factor1 factor2
#> <chr> <chr>
#> 1 many_factors$Eye many_factors$Hair
#> 2 many_factors$Eye many_factors$Sex
#> 3 many_factors$Hair many_factors$Eye
#> 4 many_factors$Hair many_factors$Sex
#> 5 many_factors$Sex many_factors$Eye
#> 6 many_factors$Sex many_factors$Hair
# something wrong here
# calculate the measure of association for each pair
final <- two_names |>
mutate(cramerV = pmap_dbl(list(factor1, factor2),
\(factor1, factor2) rcompanion::cramerV(
x = factor1,
y = factor2,
digits = 2,
R = 1
)))
#> Error in `mutate()`:
#> ℹ In argument: `cramerV = pmap_dbl(...)`.
#> Caused by error in `pmap_dbl()`:
#> ℹ In index: 1.
#> Caused by error in `chisq.test()`:
#> ! 'x' and 'y' must have at least 2 levels
Created on 2024-02-25 with reprex v2.1.0