# Describing survey results with unequal number of observations

Hello,

I am trying to generate very simple general descriptive statistics on how children rated the taste of a drink. Responses on the survey question range from 0 to 10 but it is getting complicated because each child answered a different number of surveys across the course of a week long period.

For each participant, I would like to get the percentage of times they used response (e.g. 1: 10%, 2: 0%, 3:20%, etc).

I was thinking along these lines but something that actually works!

``````  group_by(subject_id) %>%
summarize(endorse_10 = count(SUGARY_DRINK_TASTE == 10)/length())

``````

For a sample of my dataset please see below:

``````taste <- structure(list(subject_id = c(28053, 28053, 28053, 28053, 28053,
28054, 28054, 28054, 28056, 28056, 28056, 28056, 28056, 28056,
28056, 28056, 28056, 28057, 28057, 28057, 28057, 28057, 28057,
28057, 28057, 28057, 28057, 28057, 28057, 28057, 28057, 28057,
28057, 28057, 28057, 28058, 28058, 28058, 28058, 28058, 28058,
28058, 28058, 28058, 28058, 28058, 28058, 28058, 28058, 28058
), SUGARY_DRINK_TASTE = c(10, 0, 10, 10, 10, 9, 10, 1, 9, 10,
10, 6, 10, 8, 10, 10, 4, 8, 7, 8, 7, 8, 9, 9, 9, 9, 7, 10, 10,
0, 7, 8, 7, 10, 8, 5, 10, 8, 8, 10, 10, 10, 10, 6, 7, 10, 10,
6, 10, 10)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
), row.names = c(NA, -50L), groups = structure(list(subject_id = c(28053,
28054, 28056, 28057, 28058), .rows = structure(list(1:5, 6:8,
9:17, 18:35, 36:50), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -5L), .drop = TRUE))
``````

Is this what you are trying to get?

``````taste <- structure(list(subject_id = c(28053, 28053, 28053, 28053, 28053,
28054, 28054, 28054, 28056, 28056, 28056, 28056, 28056, 28056,
28056, 28056, 28056, 28057, 28057, 28057, 28057, 28057, 28057,
28057, 28057, 28057, 28057, 28057, 28057, 28057, 28057, 28057,
28057, 28057, 28057, 28058, 28058, 28058, 28058, 28058, 28058,
28058, 28058, 28058, 28058, 28058, 28058, 28058, 28058, 28058),
SUGARY_DRINK_TASTE = c(10, 0, 10, 10, 10, 9, 10, 1, 9, 10,
10, 6, 10, 8, 10, 10, 4, 8, 7, 8, 7, 8, 9, 9, 9, 9, 7, 10, 10,
0, 7, 8, 7, 10, 8, 5, 10, 8, 8, 10, 10, 10, 10, 6, 7, 10, 10,
6, 10, 10)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
), row.names = c(NA, -50L),
groups = structure(list(subject_id = c(28053, 28054, 28056, 28057, 28058),
.rows = structure(list(1:5, 6:8, 9:17, 18:35, 36:50), ptype = integer(0),
class = c("vctrs_list_of", "vctrs_vctr", "list"))),
class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -5L), .drop = TRUE))
library(dplyr)

COUNTS <- taste %>% group_by(subject_id, SUGARY_DRINK_TASTE) %>% count()
Frac <- COUNTS %>% group_by(subject_id) %>% mutate(Frac = n/sum(n))
Frac
#> # A tibble: 20 × 4
#> # Groups:   subject_id [5]
#>    subject_id SUGARY_DRINK_TASTE     n   Frac
#>         <dbl>              <dbl> <int>  <dbl>
#>  1      28053                  0     1 0.2
#>  2      28053                 10     4 0.8
#>  3      28054                  1     1 0.333
#>  4      28054                  9     1 0.333
#>  5      28054                 10     1 0.333
#>  6      28056                  4     1 0.111
#>  7      28056                  6     1 0.111
#>  8      28056                  8     1 0.111
#>  9      28056                  9     1 0.111
#> 10      28056                 10     5 0.556
#> 11      28057                  0     1 0.0556
#> 12      28057                  7     5 0.278
#> 13      28057                  8     5 0.278
#> 14      28057                  9     4 0.222
#> 15      28057                 10     3 0.167
#> 16      28058                  5     1 0.0667
#> 17      28058                  6     2 0.133
#> 18      28058                  7     1 0.0667
#> 19      28058                  8     2 0.133
#> 20      28058                 10     9 0.6
``````

Created on 2022-04-13 by the reprex package (v0.2.1)

This is exactly what I had in mind, thank you very much!

Do you know what the best way for me to add a "0" for the proportions in between 0 and 10 even if the participant never used that response.

So in the example above, for participant 28053, ideally I would like n= 0, Frac = 0 .2; n =1, Frac = 0; n = 2, Frac = 0... n= 10, Frac = 0.8

The compete() function from tidyr can do that.

``````taste <- structure(list(subject_id = c(28053, 28053, 28053, 28053, 28053,
28054, 28054, 28054, 28056, 28056, 28056, 28056, 28056, 28056,
28056, 28056, 28056, 28057, 28057, 28057, 28057, 28057, 28057,
28057, 28057, 28057, 28057, 28057, 28057, 28057, 28057, 28057,
28057, 28057, 28057, 28058, 28058, 28058, 28058, 28058, 28058,
28058, 28058, 28058, 28058, 28058, 28058, 28058, 28058, 28058),
SUGARY_DRINK_TASTE = c(10, 0, 10, 10, 10, 9, 10, 1, 9, 10,
10, 6, 10, 8, 10, 10, 4, 8, 7, 8, 7, 8, 9, 9, 9, 9, 7, 10, 10,
0, 7, 8, 7, 10, 8, 5, 10, 8, 8, 10, 10, 10, 10, 6, 7, 10, 10,
6, 10, 10)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
), row.names = c(NA, -50L),
groups = structure(list(subject_id = c(28053, 28054, 28056, 28057, 28058),
.rows = structure(list(1:5, 6:8, 9:17, 18:35, 36:50), ptype = integer(0),
class = c("vctrs_list_of", "vctrs_vctr", "list"))),
class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -5L), .drop = TRUE))

library(dplyr)
library(tidyr)
taste <- taste %>% mutate(SUGARY_DRINK_TASTE = factor(SUGARY_DRINK_TASTE, levels = 0:10))
COUNTS <- taste %>% group_by(subject_id, SUGARY_DRINK_TASTE) %>% count()
COUNTS <- COUNTS %>% ungroup() %>% complete(subject_id, SUGARY_DRINK_TASTE, fill = list(n =0))
Frac <- COUNTS %>% group_by(subject_id) %>% mutate(Frac = n/sum(n))
Frac
Frac
# A tibble: 55 × 4
# Groups:   subject_id [5]
subject_id SUGARY_DRINK_TASTE     n  Frac
<dbl> <fct>              <int> <dbl>
1      28053 0                      1   0.2
2      28053 1                      0   0
3      28053 2                      0   0
4      28053 3                      0   0
5      28053 4                      0   0
6      28053 5                      0   0
7      28053 6                      0   0
8      28053 7                      0   0
9      28053 8                      0   0
10      28053 9                      0   0
``````

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.