Second highest value

jynusmac · May 30, 2023, 11:35am

This dataframe:

df <- data.frame(
  stringsAsFactors = FALSE,
          District = c("Dob", "Dab", "Dec", "Doi", "Dun", "Das"),
                PBC = c(254L, 21L, 93L, 1601L, 136L, 169L),
                PTC = c(65L, 147L, 27L, 1321L, 239L, 22L),
                PCC = c(242L, 0L, 0L, 374L, 1L, 69L),
                PKL = c(0L, 23L, 0L, 529L, 0L, 0L)
)

I want to obtain the second highest value in each row and assign it to a new column. I use:

MyFunc <- function(R,Pos){
        tmp <- sort(R,decreasing = TRUE)
        colnames(df)[which(R==tmp[Pos])]
}

df |> rowwise() |> 
        mutate(A = MyFunc(c_across(PBC:PKL),Pos = 2))

But I don't get the desired result. I get

  District   PBC   PTC   PCC   PKL A       
  <chr>    <int> <int> <int> <int> <chr>   
1 Dob        254    65   242     0 PTC     
2 Dab         21   147     0    23 PCC     
3 Dec         93    27     0     0 PBC     
4 Doi       1601  1321   374   529 PBC     
5 Dun        136   239     1     0 District
6 Das        169    22    69     0 PTC

where A needs to be PCC in row 1, PKL on row 2, PTC on row 4, PCC in row 5.....

scottyd22 · May 30, 2023, 1:01pm

Below is one approach that produces the desired output, which uses the tidyverse.

library(tidyverse)

# data frame of second highest values
second_highest = df |>
  mutate(row = row_number()) |>
  pivot_longer(c(-District, -row)) |>
  group_by(row) |>
  arrange(desc(value)) |>
  filter(row_number() == 2) |>
  ungroup()

# join to original data frame
out = df |>
  mutate(row = row_number()) |>
  left_join(
    second_highest |> distinct(row, A = name)
  ) |>
  select(-row)
#> Joining with `by = join_by(row)`

out
#>   District  PBC  PTC PCC PKL   A
#> 1      Dob  254   65 242   0 PCC
#> 2      Dab   21  147   0  23 PKL
#> 3      Dec   93   27   0   0 PTC
#> 4      Doi 1601 1321 374 529 PTC
#> 5      Dun  136  239   1   0 PBC
#> 6      Das  169   22  69   0 PCC

Created on 2023-05-30 with reprex v2.0.2

nirgrahamuk · May 30, 2023, 2:07pm

I've created a solution which hews as close as possible to your original approach.

library(tidyverse)
df <- data.frame(
  stringsAsFactors = FALSE,
  District = c("Dob", "Dab", "Dec", "Doi", "Dun", "Das"),
  PBC = c(254L, 21L, 93L, 1601L, 136L, 169L),
  PTC = c(65L, 147L, 27L, 1321L, 239L, 22L),
  PCC = c(242L, 0L, 0L, 374L, 1L, 69L),
  PKL = c(0L, 23L, 0L, 529L, 0L, 0L)
)

MyFunc <- function(R,Pos){
  r_res <-  R |>
    mutate(tmp= sort(c_across(where(is.numeric)),decreasing = TRUE)[Pos])
  colnames(R)[which(R==r_res$tmp, arr.ind=TRUE)[,2]]
}

df |>  rowwise() |>
  mutate(A = MyFunc(pick(PBC:PKL),Pos = 2)) |> 
  ungroup()

system · June 6, 2023, 2:07pm

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.