using a data set to pass column names to pmap - rlang problem?

library(tidyverse)


# My data set has 44 factors.  Some are nominal and some ordinal.  I want to calculate 2-way measures of association between each pair of variables.  I'll use Cramers V as an example which takes as input 2 nominal factors.

# Example data
# I'm using a data set with only 3 factors as an example.  It's stored as a frequency table so I'm expanding it to 1 row per case.
many_factors <- HairEyeColor |>
  as_tibble() |>
  mutate(across(where(is.character), as_factor)) |>
  uncount(n)

# This works for 2 named factors
rcompanion::cramerV(
  x = many_factors$Hair,
  y = many_factors$Eye,
  digits = 2,
  R = 1
)
#> Cramer V 
#>     0.28


# How do I do it for all the pairs of factors in my data set?

# create a data set containing all pairs of factors
factor_names <- many_factors |>
  select(where(is.factor)) |>
  # Checking there are at least 2 levels of each factor in the data set2
  summarise(across(everything(), \(x) n_distinct(x))) |>
  names()

factor1 <- paste0("many_factors$", factor_names)
factor2 <- factor1

two_names <- crossing(factor1, factor2) |>
  filter(factor1 != factor2)

two_names
#> # A tibble: 6 × 2
#>   factor1           factor2          
#>   <chr>             <chr>            
#> 1 many_factors$Eye  many_factors$Hair
#> 2 many_factors$Eye  many_factors$Sex 
#> 3 many_factors$Hair many_factors$Eye 
#> 4 many_factors$Hair many_factors$Sex 
#> 5 many_factors$Sex  many_factors$Eye 
#> 6 many_factors$Sex  many_factors$Hair


# something wrong here
# calculate the measure of association for each pair
final <- two_names |>
  mutate(cramerV = pmap_dbl(list(factor1, factor2),
                            \(factor1, factor2) rcompanion::cramerV(
                              x = factor1,
                              y = factor2,
                              digits = 2,
                              R = 1
                            )))
#> Error in `mutate()`:
#> ℹ In argument: `cramerV = pmap_dbl(...)`.
#> Caused by error in `pmap_dbl()`:
#> ℹ In index: 1.
#> Caused by error in `chisq.test()`:
#> ! 'x' and 'y' must have at least 2 levels

Created on 2024-02-25 with reprex v2.1.0

after your factor_names <- assignment; you can proceed thusly:

#get pairs
(factor_name_combinations <- combn(x = factor_names,
      m = 2,
      simplify = FALSE))

# make suitable names for the pairs
names(factor_name_combinations) <- map_chr(factor_name_combinations,\(x){
    left <- x[[1]]
    right <- x[[2]]
    paste0(left, "_", right)})

#take a look
factor_name_combinations


(results <- map_dbl(factor_name_combinations,
    \(x){
      left <- x[[1]]
      right <- x[[2]]
      rcompanion::cramerV(
        x = many_factors[[left]],
        y = many_factors[[right]],
        digits = 2,
        R = 1
      )
    })|>enframe())

1 Like

Brilliant! Thanks very much.
Helen

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.