Second highest value

This dataframe:

df <- data.frame(
  stringsAsFactors = FALSE,
          District = c("Dob", "Dab", "Dec", "Doi", "Dun", "Das"),
                PBC = c(254L, 21L, 93L, 1601L, 136L, 169L),
                PTC = c(65L, 147L, 27L, 1321L, 239L, 22L),
                PCC = c(242L, 0L, 0L, 374L, 1L, 69L),
                PKL = c(0L, 23L, 0L, 529L, 0L, 0L)
)

I want to obtain the second highest value in each row and assign it to a new column. I use:

MyFunc <- function(R,Pos){
        tmp <- sort(R,decreasing = TRUE)
        colnames(df)[which(R==tmp[Pos])]
}

df |> rowwise() |> 
        mutate(A = MyFunc(c_across(PBC:PKL),Pos = 2))

But I don't get the desired result. I get

  District   PBC   PTC   PCC   PKL A       
  <chr>    <int> <int> <int> <int> <chr>   
1 Dob        254    65   242     0 PTC     
2 Dab         21   147     0    23 PCC     
3 Dec         93    27     0     0 PBC     
4 Doi       1601  1321   374   529 PBC     
5 Dun        136   239     1     0 District
6 Das        169    22    69     0 PTC     

where A needs to be PCC in row 1, PKL on row 2, PTC on row 4, PCC in row 5.....

Below is one approach that produces the desired output, which uses the tidyverse.

library(tidyverse)

# data frame of second highest values
second_highest = df |>
  mutate(row = row_number()) |>
  pivot_longer(c(-District, -row)) |>
  group_by(row) |>
  arrange(desc(value)) |>
  filter(row_number() == 2) |>
  ungroup()

# join to original data frame
out = df |>
  mutate(row = row_number()) |>
  left_join(
    second_highest |> distinct(row, A = name)
  ) |>
  select(-row)
#> Joining with `by = join_by(row)`

out
#>   District  PBC  PTC PCC PKL   A
#> 1      Dob  254   65 242   0 PCC
#> 2      Dab   21  147   0  23 PKL
#> 3      Dec   93   27   0   0 PTC
#> 4      Doi 1601 1321 374 529 PTC
#> 5      Dun  136  239   1   0 PBC
#> 6      Das  169   22  69   0 PCC

Created on 2023-05-30 with reprex v2.0.2

I've created a solution which hews as close as possible to your original approach.

library(tidyverse)
df <- data.frame(
  stringsAsFactors = FALSE,
  District = c("Dob", "Dab", "Dec", "Doi", "Dun", "Das"),
  PBC = c(254L, 21L, 93L, 1601L, 136L, 169L),
  PTC = c(65L, 147L, 27L, 1321L, 239L, 22L),
  PCC = c(242L, 0L, 0L, 374L, 1L, 69L),
  PKL = c(0L, 23L, 0L, 529L, 0L, 0L)
)

MyFunc <- function(R,Pos){
  r_res <-  R |>
    mutate(tmp= sort(c_across(where(is.numeric)),decreasing = TRUE)[Pos])
  colnames(R)[which(R==r_res$tmp, arr.ind=TRUE)[,2]]
}

df |>  rowwise() |>
  mutate(A = MyFunc(pick(PBC:PKL),Pos = 2)) |> 
  ungroup()
1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.