Why tbl_summary doesn't work?

I have this df

>   head(df)
# A tibble: 6 × 18
  fecha_de_realizacion grupo a_usted_o_alguien_muy_cercano_han…¹ b_usted_participa_de…² c_usted_trabaja_o_es…³
  <dttm>               <chr> <chr>                               <chr>                  <chr>                 
1 2024-07-03 00:00:00  1     No                                  No                     No                    
2 2024-07-03 00:00:00  1     No                                  No                     No                    
3 2024-06-03 00:00:00  1     Si                                  No                     No                    
4 2024-07-03 00:00:00  1     No                                  No                     No                    
5 2024-07-03 00:00:00  2     No                                  No                     No                    
6 2024-03-03 00:00:00  2     Si                                  No                     No         

names(df)
[1] "fecha_de_realizacion"                                                                                                    
 [2] "grupo"                                                                                                                   
 [3] "a_usted_o_alguien_muy_cercano_han_tenido_en_los_ultimos_5_anos_algun_tipo_de_cancer_enfermedad_grave"                    
 [4] "b_usted_participa_de_alguna_institucion_asociacion_ong_o_fundacion_vinculadas_a_la_lucha_con_el_cancer"                  
 [5] "c_usted_trabaja_o_es_parte_de_alguna_empresa_farmaceutica"                                                               
 [6] "d_participa_o_trabaja_en_el_instituto_nacional_del_cancer_o_en_otra_institucion_del_sistema_de_salud"                    
 [7] "p2_usted_es"                                                                                                             
 [8] "p3_cual_es_su_ocupacion_actual_a_que_se_dedica"                                                                          
 [9] "p4_cual_es_el_maximo_nivel_educativo_alcanzado_por_ud"                                                                   
[10] "y_este_nivel_lo_completo"                                                                                                
[11] "p5_que_cobertura_de_salud_posee"                                                                                         
[12] "otra_opcion"                                                                                                             
[13] "p6_alguna_vez_le_realizaron_un_papanicolaou_pap"                                                                         
[14] "p7_alguna_vez_realizo_el_test_de_hpv_vph"                                                                                
[15] "p8_recuerda_cuando_realizo_el_ultimo_control_ginecologico"                                                               
[16] "p10_utiliza_internet_en_el_telefono_celular"                                                                             
[17] "p11_con_que_frecuencia_utiliza_usted_internet_en_su_celular"                                                             
[18] "p14_y_utiliza_apps_de_temas_de_salud_por_ejemplo_registrar_la_menstruacion_para_hacer_ejercicio_o_control_del_peso_otras"

And I try this code:

df %>%
  mutate_all(as.factor) %>% 
  tbl_summary(by= grupo,
              missing = "always",
              statistic = list(all_categorical() ~ "{n} {p}"),
              missing_text= "Casos perdidos",
              digits = list(all_categorical() ~ c(0,1)))

And it gaves me this error:

Error en names(df) <- repaired_names(c(names2(dimnames(x)), n), repair_hint = TRUE, : 
  'names' attribute [3] must be the same length as the vector [2]

I edit so I add a reprex:

> dput(df)
structure(list(fecha_de_realizacion = structure(c(1719964800, 
1719964800, 1717372800, 1719964800, 1719964800, 1709424000, 1719964800, 
1719964800, 1719964800, 1719964800, 1719964800, 1720051200, 1720051200, 
NA, 1720051200, 1720051200, 1720051200, NA, 1720051200, 1720051200, 
1720051200, 1720051200, NA, NA, NA, NA, 1720051200, NA), tzone = "UTC", class = c("POSIXct", 
"POSIXt")), grupo = c("1", "1", "1", "1", "2", "2", "2", "2", 
"2", "2", "2", "3", "3", "3", "3", "3", "3", "3", "3", "4", "4", 
"4", "4", "4", "4", "4", "4", "4"), a_usted_o_alguien_muy_cercano_han_tenido_en_los_ultimos_5_anos_algun_tipo_de_cancer_enfermedad_grave = c("No", 
"No", "Si", "No", "No", "Si", "No", "Si", "Si", "No", NA, "No", 
"No", "No lo sabe", "No", "Si", "No", "No", "No", "No", "No", 
"No", "No", "Si", "No", "No", "No", "No contesta"), b_usted_participa_de_alguna_institucion_asociacion_ong_o_fundacion_vinculadas_a_la_lucha_con_el_cancer = c("No", 
"No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", 
"No", "No", "No", "No", "No", "No", "Si", "No", "No", "No", "No", 
"No", "No", "No", "Si", "No"), c_usted_trabaja_o_es_parte_de_alguna_empresa_farmaceutica = c("No", 
"No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", 
"No", "No", "No", "No", "No", "No", "Si", "No", "No", "No", "No", 
"No", "No", "No", "No", "No"), d_participa_o_trabaja_en_el_instituto_nacional_del_cancer_o_en_otra_institucion_del_sistema_de_salud = c("No", 
"No", "No", "No", "No", "No", "No", "No", "No", "No", NA, NA, 
"No", "No", "No", "No", "No", "No", "Si", "No", "No", "No", "No", 
"No", "No", "No", "No", "No"), p2_usted_es = c("Mujer", "Mujer", 
"Mujer", "Mujer", "Mujer", "Mujer", "Mujer", "Mujer", "Mujer", 
"Mujer", "Mujer", "Mujer", "Mujer", "Mujer", "Mujer", "Mujer", 
"Mujer", "Mujer", "Mujer", "Mujer", "Mujer", "Mujer", "Mujer", 
"Mujer", "Mujer", "Mujer", "Mujer", "Mujer"), p3_cual_es_su_ocupacion_actual_a_que_se_dedica = c("Desempleada", 
"Limpieza", "Desocupada. Ama de casa", "Monotributista autonomo", 
"Masajista y Cocinera", NA, "Ama de casa", "Empleada de comercio", 
"Empleada", "Empleada domestica", "Niñera, vendedora Natura", 
"Ama de casa", "Cuidadora de adultos mayores", "Trabajo casa de familia", 
"Empleada domestica", "Trabajo por mi cuenta", "Empleada", "Vendedora de cosméticos", 
"Empleada domestica", "Vendedora por mi cuenta", "Comerciante. Ama de casa", 
"Chofer aplicación", "Desempleada", "Estetica/Manicura", "Repostera", 
"Independiente artesana", "Costurera", "Ama de casa"), p4_cual_es_el_maximo_nivel_educativo_alcanzado_por_ud = c("Primario/EGB", 
"Primario/EGB", "Secundario/Polimodal", "Primario/EGB", "Secundario/Polimodal", 
"Secundario/Polimodal", "Secundario/Polimodal", "Secundario/Polimodal", 
"Secundario/Polimodal", "Secundario/Polimodal", "Secundario/Polimodal", 
"Primario/EGB", "Primario/EGB", "Primario/EGB", "Primario/EGB", 
"Primario/EGB", "Secundario/Polimodal", "Primario/EGB", "Primario/EGB", 
"Secundario/Polimodal", "Secundario/Polimodal", "Secundario/Polimodal", 
"Secundario/Polimodal", "Secundario/Polimodal", "Secundario/Polimodal", 
"Secundario/Polimodal", "Secundario/Polimodal", "Secundario/Polimodal"
), y_este_nivel_lo_completo = c("Si", "Si", "No", "Si", "No", 
NA, "No", "Si", "No", "No", "Si", NA, NA, "Si", "No", NA, "Si", 
"Si", NA, "No", "Si", "No", "Si", "Si", "Si", "Si", "No", "No"
), p5_que_cobertura_de_salud_posee = c("Utiliza el hospital publico, salita, centro de salud publico", 
"Utiliza el hospital publico, salita, centro de salud publico", 
"Utiliza el hospital publico, salita, centro de salud publico", 
"Utiliza el hospital publico, salita, centro de salud publico", 
"Utiliza el hospital publico, salita, centro de salud publico", 
"Obra social Empresa de salud prepaga, plan hospitalario privado", 
"Utiliza el hospital publico, salita, centro de salud publico", 
"Utiliza el hospital publico, salita, centro de salud publico", 
"Utiliza el hospital publico, salita, centro de salud publico", 
"Utiliza el hospital publico, salita, centro de salud publico", 
"Utiliza el hospital publico, salita, centro de salud publico", 
"Utiliza el hospital publico, salita, centro de salud publico", 
"Utiliza el hospital publico, salita, centro de salud publico", 
"Utiliza el hospital publico, salita, centro de salud publico", 
"Utiliza el hospital publico, salita, centro de salud publico", 
"Utiliza el hospital publico, salita, centro de salud publico", 
"Utiliza el hospital publico, salita, centro de salud publico", 
"Utiliza el hospital publico, salita, centro de salud publico", 
"Utiliza el hospital publico, salita, centro de salud publico", 
"Utiliza el hospital publico, salita, centro de salud publico", 
"Utiliza el hospital publico, salita, centro de salud publico", 
"Utiliza el hospital publico, salita, centro de salud publico", 
"Utiliza el hospital publico, salita, centro de salud publico", 
"Utiliza el hospital publico, salita, centro de salud publico", 
"Utiliza el hospital publico, salita, centro de salud publico", 
"Utiliza el hospital publico, salita, centro de salud publico", 
"Utiliza el hospital publico, salita, centro de salud publico", 
"Utiliza el hospital publico, salita, centro de salud publico"
), otra_opcion = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA), p6_alguna_vez_le_realizaron_un_papanicolaou_pap = c("Sí", 
"Sí", "Sí", "Sí", "Sí", "Sí", "Sí", "Sí", "Sí", "Sí", 
"Sí", "Sí", "Sí", "Sí", "Sí", "Sí", "Sí", "Sí", "Sí", 
"Sí", "Sí", "Sí", "Sí", "Sí", "Sí", "Sí", "Sí", "No lo sabe"
), p7_alguna_vez_realizo_el_test_de_hpv_vph = c("Sí", "Sí", 
"No lo sabe", "Sí", "Sí", "Sí", "No lo sabe", "No lo sabe", 
"No", "No", "No", "No", "No", "Sí", "Sí", "Sí", "No", "Sí", 
"No", "Sí", "No", "No", "No", "No", "Sí", "No", "Sí", "No lo sabe"
), p8_recuerda_cuando_realizo_el_ultimo_control_ginecologico = c("Ultimo año", 
"Nunca realizo", "Ultimo año", "Ultimo año", "Ultimo año", 
"Ultimo año", "Mas de un año", "Mas de un año", "Mas de un año", 
"Mas de un año", "Ultimo año", "Mas de un año", "Mas de un año", 
"Ultimo año", "Mas de un año", "Ultimo año", "Ultimo año", 
"Ultimo año", "Mas de un año", "Ultimo año", "Mas de un año", 
"Mas de un año", "Ultimo año", "Mas de un año", "Ultimo año", 
"Mas de un año", "Ultimo año", "Ultimo año"), p10_utiliza_internet_en_el_telefono_celular = c("Sí", 
"Sí", "Sí", "Sí", "Sí", "Sí", "Sí", "Sí", "Sí", "Sí", 
"Sí", "Sí", "Sí", "Sí", "Sí", "Sí", "Sí", "Sí", "Sí", 
"Sí", "Sí", "Sí", "Sí", "Sí", "Sí", "Sí", "Sí", "Sí"
), p11_con_que_frecuencia_utiliza_usted_internet_en_su_celular = c("Todos los días", 
"Todos los días", "Todos los días", "Todos los días", "Todos los días", 
"Todos los días", "Todos los días", "Todos los días", "Todos los días", 
"Todos los días", "Todos los días", "Todos los días", "Mas los días de la semana (L a V)", 
"Todos los días", "Todos los días", "Todos los días", "Todos los días", 
"Todos los días", "Todos los días", "Todos los días", "Todos los días", 
"Todos los días", "Todos los días", "Todos los días", "Todos los días", 
"Todos los días", "Todos los días", "Todos los días"), p14_y_utiliza_apps_de_temas_de_salud_por_ejemplo_registrar_la_menstruacion_para_hacer_ejercicio_o_control_del_peso_otras = c("No", 
"No lo sabe", "Sí", "No", "Sí", "Sí", "Sí", "No", "Sí", 
"Sí", "No", "No", "No", "No", "No", "No", "No", "No", "No", 
"Sí", "No", "Sí", "No", "No", "No", "Sí", "No", "No")), row.names = c(NA, 
-28L), class = c("tbl_df", "tbl", "data.frame"))

Anyone knows why?
if I take variables from the df it works, but it doesn't make sense because tbl_summary should work with any number of variables.

This code works on a data.frame I made up.
I don't have ideas how to change my data.frame to break it ...

Could you provide a reprex ?

To do this, share the output of the following code:

dput(head(df, 10))

Hi, I just eddited the post to add a reprex

Hi, just did it! So now there is a reprex

Have you loaded the {gtsummary}package?

I think this is because the value for otra_opcion is always missing. I can't be certain but excluding that column yields code that works. Here's a much simpler example to show this. I recommend reporting this as a bug.

library(tidyverse)
library(gtsummary)

mydf <- tibble(
  Group=rep(1:4, each=20),
  CatValid=as.factor(sample(letters[1:4], 80, replace=TRUE)),
  CatNA=factor(NA)
)

mydf %>%
  select(-CatNA) %>%
  tbl_summary(by="Group")
Characteristic 1
N = 20
1
2
N = 20
1
3
N = 20
1
4
N = 20
1
CatValid



a 5 (25%) 1 (5.0%) 3 (15%) 7 (35%)
b 5 (25%) 4 (20%) 7 (35%) 5 (25%)
c 6 (30%) 5 (25%) 6 (30%) 4 (20%)
d 4 (20%) 10 (50%) 4 (20%) 4 (20%)
1 n (%)

mydf %>%
  tbl_summary(by="Group")
#> Error in names(df) <- repaired_names(c(names2(dimnames(x)), n), repair_hint = TRUE, : 'names' attribute [3] must be the same length as the vector [2]

Created on 2024-09-13 with reprex v2.1.0

Thank you @StatSteph for the minimal reprex and posting an issue to the gtsummary GH repo.

@juandmaz You can read more details here (Bug Report: Variable with all NA levels causes error · Issue #1973 · ddsjoberg/gtsummary · GitHub) about the issue. Essentially, gtsummary will not summarize factor variables when either the levels are empty or any of the levels are NA.

You'll need to specify the factor levels to get a column of all NAs to tabulate (otherwise, there are no levels to count).

library(gtsummary)
set.seed(8675309)

mydf <- dplyr::tibble(
  Group=rep(1:4, each=20),
  CatValid=as.factor(sample(letters[1:4], 80, replace=TRUE)),
  CatNA=factor(NA, levels = c("Hello", "There"))
)

tbl_summary(mydf, by = Group) |> 
  bold_labels() |> 
  as_kable()
Characteristic 1 N = 20 2 N = 20 3 N = 20 4 N = 20
CatValid
a 5 (25%) 3 (15%) 4 (20%) 4 (20%)
b 4 (20%) 8 (40%) 6 (30%) 4 (20%)
c 8 (40%) 4 (20%) 6 (30%) 6 (30%)
d 3 (15%) 5 (25%) 4 (20%) 6 (30%)
CatNA
Hello 0 (NA%) 0 (NA%) 0 (NA%) 0 (NA%)
There 0 (NA%) 0 (NA%) 0 (NA%) 0 (NA%)
Unknown 20 20 20 20

Created on 2024-09-13 with reprex v2.1.0

This topic was automatically closed 90 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.