Computing the mean for the rest of the group with mutate

I want to compute the mean of X for the rest of the group (Gr). I think the obvious code should be:

          mutate(mean_rest_of_group = mean(X[-row_number()], na.rm = TRUE))

But it yields Nas. This is my code and results:

library(tidyverse)
set.seed(123)
datos <- data.frame(idi = 1:9, Gr = rep(letters[1:3], each = 3), 
                       X = rpois(9,5), stringsAsFactors = F)
datos$X[datos$X==8] <- NA
datos <- datos |>    
     group_by(Gr) |>
     mutate(Media_Companeros = mean(X[-row_number()], na.rm = TRUE),  
            Num = X[row_number()])  
datos
#> # A tibble: 9 × 5
#> # Groups:   Gr [3]
#>     idi Gr        X Media_Companeros   Num
#>   <int> <chr> <int>            <dbl> <int>
#> 1     1 a         4              NaN     4
#> 2     2 a         7              NaN     7
#> 3     3 a         4              NaN     4
#> 4     4 b        NA              NaN    NA
#> 5     5 b         9              NaN     9
#> 6     6 b         2              NaN     2
#> 7     7 c         5              NaN     5
#> 8     8 c        NA              NaN    NA
#> 9     9 c         5              NaN     5

between us humans what does rest of the group mean ?
I suppose from your code I could guess you want the mean for the group that the present row belongs to but leaving out any contribution from the present row itself ?

It means that, for example, in a class, I wan to compute for each student the mean of the rest of the class
Thanks

Excuse me nirgrahamuk. Yes, I think you interpreted me properly.

I think this approach is fairly explicit.

library(tidyverse)
set.seed(123)
datos <- data.frame(
  idi = 1:9, Gr = rep(letters[1:3], each = 3),
  X = rpois(9, 5), stringsAsFactors = F
)
datos$X[datos$X == 8] <- NA

(co_groups <- datos |>
  rowwise() |>
  group_map(.f = \(x, ...)filter(
    datos,
    Gr == x$Gr,
    idi != x$idi
  )) |> enframe(
    name = NULL,
    value = "co_groups"
  ))

(datos2 <- tibble(bind_cols(datos, co_groups)))

(datos3 <- mutate(rowwise(datos2),
  Media_Companeros = mean(co_groups$X,
    na.rm = TRUE)
) |>
  ungroup())

Thank you very much. It appears to work, obviously it is more complex.

What did you intend by this in your original attempt, since it just duplicates the X column?

IF we usu {data.table} rather than {dplyr}, that is {tidyverse}, I think we can do this.


set.seed(123)
DT<- data.table(
  idi = 1:9, Gr = rep(letters[1:3], each = 3),
  X = rpois(9, 5), stringsAsFactors = F
)

DT[8, X := NA]

DT[, mean(X), by = idi]
DT

Here's another approach that uses rowwise(), like @nirgrahamuk's solution, but with list columns:

library(tidyverse)
set.seed(123)
datos <- data.frame(idi = 1:9, Gr = rep(letters[1:3], each = 3), 
                    X = rpois(9,5), stringsAsFactors = F)
datos$X[datos$X==8] <- NA
# step 1: add group-related columns
datos |> 
  group_by(Gr) |>
  mutate(
    # create a list column with vector of group members
    class = list(X),
    # add an in-group id column
    id_in_group = row_number()
  ) |> 
  ungroup() |> 
  # convert to row-based table for later row-based calculations
  rowwise() |> 
  # convert class values to strings, for ease of viewing results
  mutate(
    class_txt = 
      # use -1 to encode NA as string
      list(if_else(class |> is.na(), -1, class)),
    class_txt =
      # coerce class vector to character
      class_txt |> str_flatten(collapse = ', ')
  ) -> datos_temp
datos_temp
#> # A tibble: 9 × 6
#> # Rowwise: 
#>     idi Gr        X class     id_in_group class_txt
#>   <int> <chr> <int> <list>          <int> <chr>    
#> 1     1 a         4 <int [3]>           1 4, 7, 4  
#> 2     2 a         7 <int [3]>           2 4, 7, 4  
#> 3     3 a         4 <int [3]>           3 4, 7, 4  
#> 4     4 b        NA <int [3]>           1 -1, 9, 2 
#> 5     5 b         9 <int [3]>           2 -1, 9, 2 
#> 6     6 b         2 <int [3]>           3 -1, 9, 2 
#> 7     7 c         5 <int [3]>           1 5, -1, 5 
#> 8     8 c        NA <int [3]>           2 5, -1, 5 
#> 9     9 c         5 <int [3]>           3 5, -1, 5
# step 2: exclude individual from group
datos_temp |> 
  # remove individual from group vector
  mutate(
    rest = list(class[-id_in_group])
  ) |> 
  # convert class values to strings, for ease of viewing results
  mutate(
    rest_txt = 
      # use -1 to encode NA as string
      list(if_else(rest |> is.na(), -1, rest)),
    rest_txt =
      # coerce class vector to character
      rest_txt |> str_flatten(collapse = ', ')
  ) -> datos_temp
datos_temp
#> # A tibble: 9 × 8
#> # Rowwise: 
#>     idi Gr        X class     id_in_group class_txt rest      rest_txt
#>   <int> <chr> <int> <list>          <int> <chr>     <list>    <chr>   
#> 1     1 a         4 <int [3]>           1 4, 7, 4   <int [2]> 7, 4    
#> 2     2 a         7 <int [3]>           2 4, 7, 4   <int [2]> 4, 4    
#> 3     3 a         4 <int [3]>           3 4, 7, 4   <int [2]> 4, 7    
#> 4     4 b        NA <int [3]>           1 -1, 9, 2  <int [2]> 9, 2    
#> 5     5 b         9 <int [3]>           2 -1, 9, 2  <int [2]> -1, 2   
#> 6     6 b         2 <int [3]>           3 -1, 9, 2  <int [2]> -1, 9   
#> 7     7 c         5 <int [3]>           1 5, -1, 5  <int [2]> -1, 5   
#> 8     8 c        NA <int [3]>           2 5, -1, 5  <int [2]> 5, 5    
#> 9     9 c         5 <int [3]>           3 5, -1, 5  <int [2]> 5, -1
# step 3: calculate average of rest
datos_temp |> 
  # calculate mean of rest
  mutate(avg_rest = list(mean(rest, na.rm = T))) |> 
  # remove unneeded columns
  select(!(class:rest)) -> datos_temp

datos_temp
#> # A tibble: 9 × 5
#> # Rowwise: 
#>     idi Gr        X rest_txt avg_rest 
#>   <int> <chr> <int> <chr>    <list>   
#> 1     1 a         4 7, 4     <dbl [1]>
#> 2     2 a         7 4, 4     <dbl [1]>
#> 3     3 a         4 4, 7     <dbl [1]>
#> 4     4 b        NA 9, 2     <dbl [1]>
#> 5     5 b         9 -1, 2    <dbl [1]>
#> 6     6 b         2 -1, 9    <dbl [1]>
#> 7     7 c         5 -1, 5    <dbl [1]>
#> 8     8 c        NA 5, 5     <dbl [1]>
#> 9     9 c         5 5, -1    <dbl [1]>
# extract value of avg_rest
datos_temp |> 
  # since each avg_rest value is a single number, can use unnest to extract
  unnest(avg_rest)
#> # A tibble: 9 × 5
#>     idi Gr        X rest_txt avg_rest
#>   <int> <chr> <int> <chr>       <dbl>
#> 1     1 a         4 7, 4          5.5
#> 2     2 a         7 4, 4          4  
#> 3     3 a         4 4, 7          5.5
#> 4     4 b        NA 9, 2          5.5
#> 5     5 b         9 -1, 2         2  
#> 6     6 b         2 -1, 9         9  
#> 7     7 c         5 -1, 5         5  
#> 8     8 c        NA 5, 5          5  
#> 9     9 c         5 5, -1         5

All together from the beginning:

datos |> 
  group_by(Gr) |> 
  mutate(
    # create a list column with vector of group members
    class = list(X),
    # add an in-group id column
    id_in_Gr = row_number()
  ) |>   
  ungroup() |> 
  # convert to row-based table for later row-based calculations
  rowwise() |> 
  # remove individual from group vector
  mutate(rest = list(class[-id_in_Gr])) |> 
  # calculate mean of rest
  mutate(avg_rest = list(mean(rest, na.rm = T))) |> 
  # remove unneeded columns
  select(!(class:rest)) |> 
  # since each avg_rest value is a single number, can use unnest to extract
  unnest(avg_rest)
#> # A tibble: 9 × 4
#>     idi Gr        X avg_rest
#>   <int> <chr> <int>    <dbl>
#> 1     1 a         4      5.5
#> 2     2 a         7      4  
#> 3     3 a         4      5.5
#> 4     4 b        NA      5.5
#> 5     5 b         9      2  
#> 6     6 b         2      9  
#> 7     7 c         5      5  
#> 8     8 c        NA      5  
#> 9     9 c         5      5

Created on 2024-04-05 with reprex v2.0.2

full reprex (click here to open)
library(tidyverse)
set.seed(123)
datos <- data.frame(idi = 1:9, Gr = rep(letters[1:3], each = 3), 
                    X = rpois(9,5), stringsAsFactors = F)
datos$X[datos$X==8] <- NA

# step 1: add group-related columns
datos |> 
  group_by(Gr) |>
  mutate(
    # create a list column with vector of group members
    class = list(X),
    # add an in-group id column
    id_in_group = row_number()
  ) |> 
  ungroup() |> 
  # convert to row-based table for later row-based calculations
  rowwise() |> 
  # convert class values to strings, for ease of viewing results
  mutate(
    class_txt = 
      # use -1 to encode NA as string
      list(if_else(class |> is.na(), -1, class)),
    class_txt =
      # coerce class vector to character
      class_txt |> str_flatten(collapse = ', ')
  ) -> datos_temp

datos_temp
#> # A tibble: 9 × 6
#> # Rowwise: 
#>     idi Gr        X class     id_in_group class_txt
#>   <int> <chr> <int> <list>          <int> <chr>    
#> 1     1 a         4 <int [3]>           1 4, 7, 4  
#> 2     2 a         7 <int [3]>           2 4, 7, 4  
#> 3     3 a         4 <int [3]>           3 4, 7, 4  
#> 4     4 b        NA <int [3]>           1 -1, 9, 2 
#> 5     5 b         9 <int [3]>           2 -1, 9, 2 
#> 6     6 b         2 <int [3]>           3 -1, 9, 2 
#> 7     7 c         5 <int [3]>           1 5, -1, 5 
#> 8     8 c        NA <int [3]>           2 5, -1, 5 
#> 9     9 c         5 <int [3]>           3 5, -1, 5

# step 2: exclude individual from group
datos_temp |> 
  # remove individual from group vector
  mutate(
    rest = list(class[-id_in_group])
  ) |> 
  # convert class values to strings, for ease of viewing results
  mutate(
    rest_txt = 
      # use -1 to encode NA as string
      list(if_else(rest |> is.na(), -1, rest)),
    rest_txt =
      # coerce class vector to character
      rest_txt |> str_flatten(collapse = ', ')
  ) -> datos_temp

datos_temp
#> # A tibble: 9 × 8
#> # Rowwise: 
#>     idi Gr        X class     id_in_group class_txt rest      rest_txt
#>   <int> <chr> <int> <list>          <int> <chr>     <list>    <chr>   
#> 1     1 a         4 <int [3]>           1 4, 7, 4   <int [2]> 7, 4    
#> 2     2 a         7 <int [3]>           2 4, 7, 4   <int [2]> 4, 4    
#> 3     3 a         4 <int [3]>           3 4, 7, 4   <int [2]> 4, 7    
#> 4     4 b        NA <int [3]>           1 -1, 9, 2  <int [2]> 9, 2    
#> 5     5 b         9 <int [3]>           2 -1, 9, 2  <int [2]> -1, 2   
#> 6     6 b         2 <int [3]>           3 -1, 9, 2  <int [2]> -1, 9   
#> 7     7 c         5 <int [3]>           1 5, -1, 5  <int [2]> -1, 5   
#> 8     8 c        NA <int [3]>           2 5, -1, 5  <int [2]> 5, 5    
#> 9     9 c         5 <int [3]>           3 5, -1, 5  <int [2]> 5, -1

# step 3: calculate average of rest
datos_temp |> 
  # calculate mean of rest
  mutate(avg_rest = list(mean(rest, na.rm = T))) |> 
  # remove unneeded columns
  select(!(class:rest)) -> datos_temp

datos_temp
#> # A tibble: 9 × 5
#> # Rowwise: 
#>     idi Gr        X rest_txt avg_rest 
#>   <int> <chr> <int> <chr>    <list>   
#> 1     1 a         4 7, 4     <dbl [1]>
#> 2     2 a         7 4, 4     <dbl [1]>
#> 3     3 a         4 4, 7     <dbl [1]>
#> 4     4 b        NA 9, 2     <dbl [1]>
#> 5     5 b         9 -1, 2    <dbl [1]>
#> 6     6 b         2 -1, 9    <dbl [1]>
#> 7     7 c         5 -1, 5    <dbl [1]>
#> 8     8 c        NA 5, 5     <dbl [1]>
#> 9     9 c         5 5, -1    <dbl [1]>

# extract value of avg_rest
datos_temp |> 
  # since each avg_rest value is a single number, can use unnest to extract
  unnest(avg_rest)
#> # A tibble: 9 × 5
#>     idi Gr        X rest_txt avg_rest
#>   <int> <chr> <int> <chr>       <dbl>
#> 1     1 a         4 7, 4          5.5
#> 2     2 a         7 4, 4          4  
#> 3     3 a         4 4, 7          5.5
#> 4     4 b        NA 9, 2          5.5
#> 5     5 b         9 -1, 2         2  
#> 6     6 b         2 -1, 9         9  
#> 7     7 c         5 -1, 5         5  
#> 8     8 c        NA 5, 5          5  
#> 9     9 c         5 5, -1         5

# all together from the beginning
datos |> 
  group_by(Gr) |> 
  mutate(
    # create a list column with vector of group members
    class = list(X),
    # add an in-group id column
    id_in_Gr = row_number()
  ) |>   
  ungroup() |> 
  # convert to row-based table for later row-based calculations
  rowwise() |> 
  # remove individual from group vector
  mutate(rest = list(class[-id_in_Gr])) |> 
  # calculate mean of rest
  mutate(avg_rest = list(mean(rest, na.rm = T))) |> 
  # remove unneeded columns
  select(!(class:rest)) |> 
  # since each avg_rest value is a single number, can use unnest to extract
  unnest(avg_rest)
#> # A tibble: 9 × 4
#>     idi Gr        X avg_rest
#>   <int> <chr> <int>    <dbl>
#> 1     1 a         4      5.5
#> 2     2 a         7      4  
#> 3     3 a         4      5.5
#> 4     4 b        NA      5.5
#> 5     5 b         9      2  
#> 6     6 b         2      9  
#> 7     7 c         5      5  
#> 8     8 c        NA      5  
#> 9     9 c         5      5

Created on 2024-04-05 with reprex v2.0.2

Thanks for your code. In fact I use a minus sign Num = X[-row_number()] trying to remove the observation in the mean calculation to get the mean for the rest of the group. But I don't understand the problem with this code.

Here's similar situation: Suppose we use the built-in letters vector to make the following table,

library(tidyverse)
tibble(row = 1:3, ltr = letters[row])
#> # A tibble: 3 × 2
#>     row ltr  
#>   <int> <chr>
#> 1     1 a    
#> 2     2 b    
#> 3     3 c

and suppose we want to use the row numbers to delete the corresponding letter from the string "abc".

We can use the function str_flatten() convert character vectors to strings:

letters
#>  [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s"
#> [20] "t" "u" "v" "w" "x" "y" "z"
letters %>% str_flatten()
#> [1] "abcdefghijklmnopqrstuvwxyz"

and so, following your example, we might try:

tibble(row = 1:3, ltr = letters[row]) %>% 
  mutate(abc_minus_ltr = str_flatten(ltr[-row]))
#> # A tibble: 3 × 3
#>     row ltr   abc_minus_ltr
#>   <int> <chr> <chr>        
#> 1     1 a     ""           
#> 2     2 b     ""           
#> 3     3 c     ""

To understand why this doesn't work, note that the column-creation argument

abc_minus_ltr = str_flatten(ltr[-row])

behaves like an assignment statement.

In other words, if we have the vectors row and ltr

row <- 1:3
ltr <- letters[row]
ltr
#> [1] "a" "b" "c"

the inner calculation, ltr[-row], removes all of the elements of the vector ltr, which results in a character vector of length 0:

ltr[-row]
#> character(0)

so that the outer function, str_flatten(), creates a corresponding string of length 0, namely the empty string, "":

str_flatten(ltr[-row])
#> [1] ""

Now, the mutate() function recycles this single empty string to create a vector with as many elements as the table has rows:

str_flatten(ltr[-row]) %>% rep(3)
#> [1] "" "" ""

and assigns this vector to the new column abc_minus_ltr:

tibble(row = 1:3, ltr = letters[row]) %>% 
  mutate(abc_minus_ltr = str_flatten(ltr[-row]))
#> # A tibble: 3 × 3
#>     row ltr   abc_minus_ltr
#>   <int> <chr> <chr>        
#> 1     1 a     ""           
#> 2     2 b     ""           
#> 3     3 c     ""

In other words, the key issue is that the statement ltr[-row] removes all indices contained in the vector row instead of removing each index one at a time.

To do that, we could use a list column to allow each row to contain a copy of the content of the ltr column:

tibble(row = 1:3, ltr = letters[row]) %>% 
  mutate(all_ltrs = list(ltr))
#> # A tibble: 3 × 3
#>     row ltr   all_ltrs 
#>   <int> <chr> <list>   
#> 1     1 a     <chr [3]>
#> 2     2 b     <chr [3]>
#> 3     3 c     <chr [3]>

and then split the table into many one-row subtables with the function rowwise(), so that in each subtable, the column row consists of a single index.

This allows us both to flatten each of the all_ltrs vector values separately for each row, and also to remove each row's index from that vector and then flatten the result:

tibble(row = 1:3, ltr = letters[row]) %>% 
  mutate(all_ltrs = list(ltr)) %>% 
  rowwise() %>% 
  mutate(all_ltrs_chr = str_flatten(all_ltrs)) %>% 
  mutate(abc_minus_ltr = str_flatten(all_ltrs[-row]))
#> # A tibble: 3 × 5
#> # Rowwise: 
#>     row ltr   all_ltrs  all_ltrs_chr abc_minus_ltr
#>   <int> <chr> <list>    <chr>        <chr>        
#> 1     1 a     <chr [3]> abc          bc           
#> 2     2 b     <chr [3]> abc          ac           
#> 3     3 c     <chr [3]> abc          ab

Created on 2024-04-09 with reprex v2.0.2

full reprex
library(tidyverse)
tibble(row = 1:3, ltr = letters[1:3])
#> # A tibble: 3 × 2
#>     row ltr  
#>   <int> <chr>
#> 1     1 a    
#> 2     2 b    
#> 3     3 c

letters
#>  [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s"
#> [20] "t" "u" "v" "w" "x" "y" "z"
letters %>% str_flatten()
#> [1] "abcdefghijklmnopqrstuvwxyz"

tibble(row = 1:3, ltr = letters[row]) %>% 
  mutate(abc_minus_ltr = str_flatten(ltr[-row]))
#> # A tibble: 3 × 3
#>     row ltr   abc_minus_ltr
#>   <int> <chr> <chr>        
#> 1     1 a     ""           
#> 2     2 b     ""           
#> 3     3 c     ""

# abc_minus_ltr = str_flatten(ltr[-row])

row <- 1:3
ltr <- letters[row]
ltr
#> [1] "a" "b" "c"

ltr[-row]
#> character(0)

str_flatten(ltr[-row])
#> [1] ""

str_flatten(ltr[-row]) %>% rep(3)
#> [1] "" "" ""

tibble(row = 1:3, ltr = letters[row]) %>% 
  mutate(abc_minus_ltr = str_flatten(ltr[-row]))
#> # A tibble: 3 × 3
#>     row ltr   abc_minus_ltr
#>   <int> <chr> <chr>        
#> 1     1 a     ""           
#> 2     2 b     ""           
#> 3     3 c     ""

tibble(row = 1:3, ltr = letters[row]) %>% 
  mutate(all_ltrs = list(ltr))
#> # A tibble: 3 × 3
#>     row ltr   all_ltrs 
#>   <int> <chr> <list>   
#> 1     1 a     <chr [3]>
#> 2     2 b     <chr [3]>
#> 3     3 c     <chr [3]>

tibble(row = 1:3, ltr = letters[row]) %>% 
  mutate(all_ltrs = list(ltr)) %>% 
  rowwise() %>% 
  mutate(all_ltrs_chr = str_flatten(all_ltrs)) %>% 
  mutate(abc_minus_ltr = str_flatten(all_ltrs[-row]))
#> # A tibble: 3 × 5
#> # Rowwise: 
#>     row ltr   all_ltrs  all_ltrs_chr abc_minus_ltr
#>   <int> <chr> <list>    <chr>        <chr>        
#> 1     1 a     <chr [3]> abc          bc           
#> 2     2 b     <chr [3]> abc          ac           
#> 3     3 c     <chr [3]> abc          ab

Created on 2024-04-09 with reprex v2.0.2

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.