# Computing the mean for the rest of the group with mutate

I want to compute the mean of X for the rest of the group (Gr). I think the obvious code should be:

``````          mutate(mean_rest_of_group = mean(X[-row_number()], na.rm = TRUE))
``````

But it yields Nas. This is my code and results:

``````library(tidyverse)
set.seed(123)
datos <- data.frame(idi = 1:9, Gr = rep(letters[1:3], each = 3),
X = rpois(9,5), stringsAsFactors = F)
datos\$X[datos\$X==8] <- NA
datos <- datos |>
group_by(Gr) |>
mutate(Media_Companeros = mean(X[-row_number()], na.rm = TRUE),
Num = X[row_number()])
datos
#> # A tibble: 9 × 5
#> # Groups:   Gr [3]
#>     idi Gr        X Media_Companeros   Num
#>   <int> <chr> <int>            <dbl> <int>
#> 1     1 a         4              NaN     4
#> 2     2 a         7              NaN     7
#> 3     3 a         4              NaN     4
#> 4     4 b        NA              NaN    NA
#> 5     5 b         9              NaN     9
#> 6     6 b         2              NaN     2
#> 7     7 c         5              NaN     5
#> 8     8 c        NA              NaN    NA
#> 9     9 c         5              NaN     5
``````

between us humans what does rest of the group mean ?
I suppose from your code I could guess you want the mean for the group that the present row belongs to but leaving out any contribution from the present row itself ?

It means that, for example, in a class, I wan to compute for each student the mean of the rest of the class
Thanks

Excuse me nirgrahamuk. Yes, I think you interpreted me properly.

I think this approach is fairly explicit.

``````library(tidyverse)
set.seed(123)
datos <- data.frame(
idi = 1:9, Gr = rep(letters[1:3], each = 3),
X = rpois(9, 5), stringsAsFactors = F
)
datos\$X[datos\$X == 8] <- NA

(co_groups <- datos |>
rowwise() |>
group_map(.f = \(x, ...)filter(
datos,
Gr == x\$Gr,
idi != x\$idi
)) |> enframe(
name = NULL,
value = "co_groups"
))

(datos2 <- tibble(bind_cols(datos, co_groups)))

(datos3 <- mutate(rowwise(datos2),
Media_Companeros = mean(co_groups\$X,
na.rm = TRUE)
) |>
ungroup())``````

Thank you very much. It appears to work, obviously it is more complex.

What did you intend by this in your original attempt, since it just duplicates the `X` column?

IF we usu {data.table} rather than {dplyr}, that is {tidyverse}, I think we can do this.

``````
set.seed(123)
DT<- data.table(
idi = 1:9, Gr = rep(letters[1:3], each = 3),
X = rpois(9, 5), stringsAsFactors = F
)

DT[8, X := NA]

DT[, mean(X), by = idi]
DT

``````

Here's another approach that uses `rowwise()`, like @nirgrahamuk's solution, but with list columns:

``````library(tidyverse)
set.seed(123)
datos <- data.frame(idi = 1:9, Gr = rep(letters[1:3], each = 3),
X = rpois(9,5), stringsAsFactors = F)
datos\$X[datos\$X==8] <- NA
``````
``````# step 1: add group-related columns
datos |>
group_by(Gr) |>
mutate(
# create a list column with vector of group members
class = list(X),
# add an in-group id column
id_in_group = row_number()
) |>
ungroup() |>
# convert to row-based table for later row-based calculations
rowwise() |>
# convert class values to strings, for ease of viewing results
mutate(
class_txt =
# use -1 to encode NA as string
list(if_else(class |> is.na(), -1, class)),
class_txt =
# coerce class vector to character
class_txt |> str_flatten(collapse = ', ')
) -> datos_temp
``````
``````datos_temp
#> # A tibble: 9 × 6
#> # Rowwise:
#>     idi Gr        X class     id_in_group class_txt
#>   <int> <chr> <int> <list>          <int> <chr>
#> 1     1 a         4 <int [3]>           1 4, 7, 4
#> 2     2 a         7 <int [3]>           2 4, 7, 4
#> 3     3 a         4 <int [3]>           3 4, 7, 4
#> 4     4 b        NA <int [3]>           1 -1, 9, 2
#> 5     5 b         9 <int [3]>           2 -1, 9, 2
#> 6     6 b         2 <int [3]>           3 -1, 9, 2
#> 7     7 c         5 <int [3]>           1 5, -1, 5
#> 8     8 c        NA <int [3]>           2 5, -1, 5
#> 9     9 c         5 <int [3]>           3 5, -1, 5
``````
``````# step 2: exclude individual from group
datos_temp |>
# remove individual from group vector
mutate(
rest = list(class[-id_in_group])
) |>
# convert class values to strings, for ease of viewing results
mutate(
rest_txt =
# use -1 to encode NA as string
list(if_else(rest |> is.na(), -1, rest)),
rest_txt =
# coerce class vector to character
rest_txt |> str_flatten(collapse = ', ')
) -> datos_temp
``````
``````datos_temp
#> # A tibble: 9 × 8
#> # Rowwise:
#>     idi Gr        X class     id_in_group class_txt rest      rest_txt
#>   <int> <chr> <int> <list>          <int> <chr>     <list>    <chr>
#> 1     1 a         4 <int [3]>           1 4, 7, 4   <int [2]> 7, 4
#> 2     2 a         7 <int [3]>           2 4, 7, 4   <int [2]> 4, 4
#> 3     3 a         4 <int [3]>           3 4, 7, 4   <int [2]> 4, 7
#> 4     4 b        NA <int [3]>           1 -1, 9, 2  <int [2]> 9, 2
#> 5     5 b         9 <int [3]>           2 -1, 9, 2  <int [2]> -1, 2
#> 6     6 b         2 <int [3]>           3 -1, 9, 2  <int [2]> -1, 9
#> 7     7 c         5 <int [3]>           1 5, -1, 5  <int [2]> -1, 5
#> 8     8 c        NA <int [3]>           2 5, -1, 5  <int [2]> 5, 5
#> 9     9 c         5 <int [3]>           3 5, -1, 5  <int [2]> 5, -1
``````
``````# step 3: calculate average of rest
datos_temp |>
# calculate mean of rest
mutate(avg_rest = list(mean(rest, na.rm = T))) |>
# remove unneeded columns
select(!(class:rest)) -> datos_temp

datos_temp
#> # A tibble: 9 × 5
#> # Rowwise:
#>     idi Gr        X rest_txt avg_rest
#>   <int> <chr> <int> <chr>    <list>
#> 1     1 a         4 7, 4     <dbl [1]>
#> 2     2 a         7 4, 4     <dbl [1]>
#> 3     3 a         4 4, 7     <dbl [1]>
#> 4     4 b        NA 9, 2     <dbl [1]>
#> 5     5 b         9 -1, 2    <dbl [1]>
#> 6     6 b         2 -1, 9    <dbl [1]>
#> 7     7 c         5 -1, 5    <dbl [1]>
#> 8     8 c        NA 5, 5     <dbl [1]>
#> 9     9 c         5 5, -1    <dbl [1]>
``````
``````# extract value of avg_rest
datos_temp |>
# since each avg_rest value is a single number, can use unnest to extract
unnest(avg_rest)
#> # A tibble: 9 × 5
#>     idi Gr        X rest_txt avg_rest
#>   <int> <chr> <int> <chr>       <dbl>
#> 1     1 a         4 7, 4          5.5
#> 2     2 a         7 4, 4          4
#> 3     3 a         4 4, 7          5.5
#> 4     4 b        NA 9, 2          5.5
#> 5     5 b         9 -1, 2         2
#> 6     6 b         2 -1, 9         9
#> 7     7 c         5 -1, 5         5
#> 8     8 c        NA 5, 5          5
#> 9     9 c         5 5, -1         5
``````

All together from the beginning:

``````datos |>
group_by(Gr) |>
mutate(
# create a list column with vector of group members
class = list(X),
# add an in-group id column
id_in_Gr = row_number()
) |>
ungroup() |>
# convert to row-based table for later row-based calculations
rowwise() |>
# remove individual from group vector
mutate(rest = list(class[-id_in_Gr])) |>
# calculate mean of rest
mutate(avg_rest = list(mean(rest, na.rm = T))) |>
# remove unneeded columns
select(!(class:rest)) |>
# since each avg_rest value is a single number, can use unnest to extract
unnest(avg_rest)
#> # A tibble: 9 × 4
#>     idi Gr        X avg_rest
#>   <int> <chr> <int>    <dbl>
#> 1     1 a         4      5.5
#> 2     2 a         7      4
#> 3     3 a         4      5.5
#> 4     4 b        NA      5.5
#> 5     5 b         9      2
#> 6     6 b         2      9
#> 7     7 c         5      5
#> 8     8 c        NA      5
#> 9     9 c         5      5
``````

Created on 2024-04-05 with reprex v2.0.2

``````library(tidyverse)
set.seed(123)
datos <- data.frame(idi = 1:9, Gr = rep(letters[1:3], each = 3),
X = rpois(9,5), stringsAsFactors = F)
datos\$X[datos\$X==8] <- NA

# step 1: add group-related columns
datos |>
group_by(Gr) |>
mutate(
# create a list column with vector of group members
class = list(X),
# add an in-group id column
id_in_group = row_number()
) |>
ungroup() |>
# convert to row-based table for later row-based calculations
rowwise() |>
# convert class values to strings, for ease of viewing results
mutate(
class_txt =
# use -1 to encode NA as string
list(if_else(class |> is.na(), -1, class)),
class_txt =
# coerce class vector to character
class_txt |> str_flatten(collapse = ', ')
) -> datos_temp

datos_temp
#> # A tibble: 9 × 6
#> # Rowwise:
#>     idi Gr        X class     id_in_group class_txt
#>   <int> <chr> <int> <list>          <int> <chr>
#> 1     1 a         4 <int [3]>           1 4, 7, 4
#> 2     2 a         7 <int [3]>           2 4, 7, 4
#> 3     3 a         4 <int [3]>           3 4, 7, 4
#> 4     4 b        NA <int [3]>           1 -1, 9, 2
#> 5     5 b         9 <int [3]>           2 -1, 9, 2
#> 6     6 b         2 <int [3]>           3 -1, 9, 2
#> 7     7 c         5 <int [3]>           1 5, -1, 5
#> 8     8 c        NA <int [3]>           2 5, -1, 5
#> 9     9 c         5 <int [3]>           3 5, -1, 5

# step 2: exclude individual from group
datos_temp |>
# remove individual from group vector
mutate(
rest = list(class[-id_in_group])
) |>
# convert class values to strings, for ease of viewing results
mutate(
rest_txt =
# use -1 to encode NA as string
list(if_else(rest |> is.na(), -1, rest)),
rest_txt =
# coerce class vector to character
rest_txt |> str_flatten(collapse = ', ')
) -> datos_temp

datos_temp
#> # A tibble: 9 × 8
#> # Rowwise:
#>     idi Gr        X class     id_in_group class_txt rest      rest_txt
#>   <int> <chr> <int> <list>          <int> <chr>     <list>    <chr>
#> 1     1 a         4 <int [3]>           1 4, 7, 4   <int [2]> 7, 4
#> 2     2 a         7 <int [3]>           2 4, 7, 4   <int [2]> 4, 4
#> 3     3 a         4 <int [3]>           3 4, 7, 4   <int [2]> 4, 7
#> 4     4 b        NA <int [3]>           1 -1, 9, 2  <int [2]> 9, 2
#> 5     5 b         9 <int [3]>           2 -1, 9, 2  <int [2]> -1, 2
#> 6     6 b         2 <int [3]>           3 -1, 9, 2  <int [2]> -1, 9
#> 7     7 c         5 <int [3]>           1 5, -1, 5  <int [2]> -1, 5
#> 8     8 c        NA <int [3]>           2 5, -1, 5  <int [2]> 5, 5
#> 9     9 c         5 <int [3]>           3 5, -1, 5  <int [2]> 5, -1

# step 3: calculate average of rest
datos_temp |>
# calculate mean of rest
mutate(avg_rest = list(mean(rest, na.rm = T))) |>
# remove unneeded columns
select(!(class:rest)) -> datos_temp

datos_temp
#> # A tibble: 9 × 5
#> # Rowwise:
#>     idi Gr        X rest_txt avg_rest
#>   <int> <chr> <int> <chr>    <list>
#> 1     1 a         4 7, 4     <dbl [1]>
#> 2     2 a         7 4, 4     <dbl [1]>
#> 3     3 a         4 4, 7     <dbl [1]>
#> 4     4 b        NA 9, 2     <dbl [1]>
#> 5     5 b         9 -1, 2    <dbl [1]>
#> 6     6 b         2 -1, 9    <dbl [1]>
#> 7     7 c         5 -1, 5    <dbl [1]>
#> 8     8 c        NA 5, 5     <dbl [1]>
#> 9     9 c         5 5, -1    <dbl [1]>

# extract value of avg_rest
datos_temp |>
# since each avg_rest value is a single number, can use unnest to extract
unnest(avg_rest)
#> # A tibble: 9 × 5
#>     idi Gr        X rest_txt avg_rest
#>   <int> <chr> <int> <chr>       <dbl>
#> 1     1 a         4 7, 4          5.5
#> 2     2 a         7 4, 4          4
#> 3     3 a         4 4, 7          5.5
#> 4     4 b        NA 9, 2          5.5
#> 5     5 b         9 -1, 2         2
#> 6     6 b         2 -1, 9         9
#> 7     7 c         5 -1, 5         5
#> 8     8 c        NA 5, 5          5
#> 9     9 c         5 5, -1         5

# all together from the beginning
datos |>
group_by(Gr) |>
mutate(
# create a list column with vector of group members
class = list(X),
# add an in-group id column
id_in_Gr = row_number()
) |>
ungroup() |>
# convert to row-based table for later row-based calculations
rowwise() |>
# remove individual from group vector
mutate(rest = list(class[-id_in_Gr])) |>
# calculate mean of rest
mutate(avg_rest = list(mean(rest, na.rm = T))) |>
# remove unneeded columns
select(!(class:rest)) |>
# since each avg_rest value is a single number, can use unnest to extract
unnest(avg_rest)
#> # A tibble: 9 × 4
#>     idi Gr        X avg_rest
#>   <int> <chr> <int>    <dbl>
#> 1     1 a         4      5.5
#> 2     2 a         7      4
#> 3     3 a         4      5.5
#> 4     4 b        NA      5.5
#> 5     5 b         9      2
#> 6     6 b         2      9
#> 7     7 c         5      5
#> 8     8 c        NA      5
#> 9     9 c         5      5
``````

Created on 2024-04-05 with reprex v2.0.2

Thanks for your code. In fact I use a minus sign `Num = X[-row_number()]` trying to remove the observation in the mean calculation to get the mean for the rest of the group. But I don't understand the problem with this code.

Here's similar situation: Suppose we use the built-in `letters` vector to make the following table,

``````library(tidyverse)
tibble(row = 1:3, ltr = letters[row])
#> # A tibble: 3 × 2
#>     row ltr
#>   <int> <chr>
#> 1     1 a
#> 2     2 b
#> 3     3 c
``````

and suppose we want to use the row numbers to delete the corresponding letter from the string "abc".

We can use the function `str_flatten()` convert character vectors to strings:

``````letters
#>  [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s"
#> [20] "t" "u" "v" "w" "x" "y" "z"
letters %>% str_flatten()
#> [1] "abcdefghijklmnopqrstuvwxyz"
``````

and so, following your example, we might try:

``````tibble(row = 1:3, ltr = letters[row]) %>%
mutate(abc_minus_ltr = str_flatten(ltr[-row]))
#> # A tibble: 3 × 3
#>     row ltr   abc_minus_ltr
#>   <int> <chr> <chr>
#> 1     1 a     ""
#> 2     2 b     ""
#> 3     3 c     ""
``````

To understand why this doesn't work, note that the column-creation argument

``````abc_minus_ltr = str_flatten(ltr[-row])
``````

behaves like an assignment statement.

In other words, if we have the vectors `row` and `ltr`

``````row <- 1:3
ltr <- letters[row]
ltr
#> [1] "a" "b" "c"
``````

the inner calculation, `ltr[-row]`, removes all of the elements of the vector `ltr`, which results in a character vector of length 0:

``````ltr[-row]
#> character(0)
``````

so that the outer function, `str_flatten()`, creates a corresponding string of length 0, namely the empty string, "":

``````str_flatten(ltr[-row])
#> [1] ""
``````

Now, the `mutate()` function recycles this single empty string to create a vector with as many elements as the table has rows:

``````str_flatten(ltr[-row]) %>% rep(3)
#> [1] "" "" ""
``````

and assigns this vector to the new column `abc_minus_ltr`:

``````tibble(row = 1:3, ltr = letters[row]) %>%
mutate(abc_minus_ltr = str_flatten(ltr[-row]))
#> # A tibble: 3 × 3
#>     row ltr   abc_minus_ltr
#>   <int> <chr> <chr>
#> 1     1 a     ""
#> 2     2 b     ""
#> 3     3 c     ""
``````

In other words, the key issue is that the statement `ltr[-row]` removes all indices contained in the vector `row` instead of removing each index one at a time.

To do that, we could use a list column to allow each row to contain a copy of the content of the `ltr` column:

``````tibble(row = 1:3, ltr = letters[row]) %>%
mutate(all_ltrs = list(ltr))
#> # A tibble: 3 × 3
#>     row ltr   all_ltrs
#>   <int> <chr> <list>
#> 1     1 a     <chr [3]>
#> 2     2 b     <chr [3]>
#> 3     3 c     <chr [3]>
``````

and then split the table into many one-row subtables with the function `rowwise()`, so that in each subtable, the column `row` consists of a single index.

This allows us both to flatten each of the `all_ltrs` vector values separately for each row, and also to remove each row's index from that vector and then flatten the result:

``````tibble(row = 1:3, ltr = letters[row]) %>%
mutate(all_ltrs = list(ltr)) %>%
rowwise() %>%
mutate(all_ltrs_chr = str_flatten(all_ltrs)) %>%
mutate(abc_minus_ltr = str_flatten(all_ltrs[-row]))
#> # A tibble: 3 × 5
#> # Rowwise:
#>     row ltr   all_ltrs  all_ltrs_chr abc_minus_ltr
#>   <int> <chr> <list>    <chr>        <chr>
#> 1     1 a     <chr [3]> abc          bc
#> 2     2 b     <chr [3]> abc          ac
#> 3     3 c     <chr [3]> abc          ab
``````

Created on 2024-04-09 with reprex v2.0.2

full reprex
``````library(tidyverse)
tibble(row = 1:3, ltr = letters[1:3])
#> # A tibble: 3 × 2
#>     row ltr
#>   <int> <chr>
#> 1     1 a
#> 2     2 b
#> 3     3 c

letters
#>  [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s"
#> [20] "t" "u" "v" "w" "x" "y" "z"
letters %>% str_flatten()
#> [1] "abcdefghijklmnopqrstuvwxyz"

tibble(row = 1:3, ltr = letters[row]) %>%
mutate(abc_minus_ltr = str_flatten(ltr[-row]))
#> # A tibble: 3 × 3
#>     row ltr   abc_minus_ltr
#>   <int> <chr> <chr>
#> 1     1 a     ""
#> 2     2 b     ""
#> 3     3 c     ""

# abc_minus_ltr = str_flatten(ltr[-row])

row <- 1:3
ltr <- letters[row]
ltr
#> [1] "a" "b" "c"

ltr[-row]
#> character(0)

str_flatten(ltr[-row])
#> [1] ""

str_flatten(ltr[-row]) %>% rep(3)
#> [1] "" "" ""

tibble(row = 1:3, ltr = letters[row]) %>%
mutate(abc_minus_ltr = str_flatten(ltr[-row]))
#> # A tibble: 3 × 3
#>     row ltr   abc_minus_ltr
#>   <int> <chr> <chr>
#> 1     1 a     ""
#> 2     2 b     ""
#> 3     3 c     ""

tibble(row = 1:3, ltr = letters[row]) %>%
mutate(all_ltrs = list(ltr))
#> # A tibble: 3 × 3
#>     row ltr   all_ltrs
#>   <int> <chr> <list>
#> 1     1 a     <chr [3]>
#> 2     2 b     <chr [3]>
#> 3     3 c     <chr [3]>

tibble(row = 1:3, ltr = letters[row]) %>%
mutate(all_ltrs = list(ltr)) %>%
rowwise() %>%
mutate(all_ltrs_chr = str_flatten(all_ltrs)) %>%
mutate(abc_minus_ltr = str_flatten(all_ltrs[-row]))
#> # A tibble: 3 × 5
#> # Rowwise:
#>     row ltr   all_ltrs  all_ltrs_chr abc_minus_ltr
#>   <int> <chr> <list>    <chr>        <chr>
#> 1     1 a     <chr [3]> abc          bc
#> 2     2 b     <chr [3]> abc          ac
#> 3     3 c     <chr [3]> abc          ab
``````

Created on 2024-04-09 with reprex v2.0.2

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.