6

I have the same request as in this question: For each row return the column name of the largest value

However I need the name of ALL columns holding the maximum value per row. All solutions provided in the link above cause problems if I have two identical max values per row.

df <- data.frame(V1=c(2,8,1,5 , 6),V2=c(7,3,5, 7 , 3),V3=c(7,6,4, 5, 1))

df
  V1 V2 V3
1  2  7  7
2  8  3  6
3  1  5  4
4  5  7  5
5  6  3  1

Now, I would like to extract the column names which have the max value. At best as an additional column in the df. Somewhat like this:

> df
  V1 V2 V3  Max
1  2  7  7  V3_V2
2  8  3  6  V1
3  1  5  4  V2
4  5  7  5  V2
5  6  3  1  V1

Thank you!

Linda Espey
  • 145
  • 5

4 Answers4

7

You can use rowwise in dplyr and get the column names of a row that have maximum value.

library(dplyr)

df %>%
  rowwise() %>%
  mutate(Max = paste0(names(.)[c_across() == max(c_across())], collapse = '_'))

#     V1    V2    V3 Max  
#  <dbl> <dbl> <dbl> <chr>
#1     2     7     7 V2_V3
#2     8     3     6 V1   
#3     1     5     4 V2   
#4     5     7     5 V2   
#5     6     3     1 V1   

In base R, you could use apply -

df$Max <- apply(df, 1, function(x) paste0(names(df)[x == max(x)],collapse = '_'))
Ronak Shah
  • 377,200
  • 20
  • 156
  • 213
1
df <- data.frame(V1=c(2,8,1,5 , 6),V2=c(7,3,5, 7 , 3),V3=c(7,6,4, 5, 1))
df <- df %>%rowwise() %>% mutate(maxx=max(V1,V2,V3)) 
df$MAX <- 0
for (k in 1:nrow(df)) {
  if(length(which(df[k,1:3]==df$maxx[k]))==2){
    df$MAX[k]=paste0(colnames(df)[which(df[k,1:3]==df$maxx[k])[1]],"_",colnames(df)[which(df[k,1:3]==df$maxx[k])[2]])
  }else{
    df$MAX[k]=colnames(df)[which(df[k,1:3]==df$maxx[k])[1]]
  }
}

df$maxx <- NULL
> df
# A tibble: 5 x 4
# Rowwise: 
     V1    V2    V3 MAX  
  <dbl> <dbl> <dbl> <chr>
1     2     7     7 V2_V3
2     8     3     6 V1   
3     1     5     4 V2   
4     5     7     5 V2   
5     6     3     1 V1   
Daman deep
  • 631
  • 3
  • 14
1

We could use pmap from tidyverse

library(dplyr)
library(purrr)
library(stringr)
df <- df %>% 
   mutate(Max = select(cur_data(), everything()) %>%
                   pmap_chr(~  {v1 <- c(...)
               str_c(names(v1)[v1 == max(v1)], collapse="_")}))

-output

df
  V1 V2 V3   Max
1  2  7  7 V2_V3
2  8  3  6    V1
3  1  5  4    V2
4  5  7  5    V2
5  6  3  1    V1
akrun
  • 874,273
  • 37
  • 540
  • 662
1

You can also use this:

library(dplyr)
library(tidyr)

df %>%
  mutate(id = row_number()) %>%
  pivot_longer(!id, names_to = "V", values_to = "Val") %>%
  group_by(id) %>%
  slice_max(Val) %>%
  summarise(Val = paste(V, collapse = "-")) %>%
  select(Val) %>%
  bind_cols(df) %>%
  relocate(Val, .after = last_col())

# A tibble: 5 x 4
     V1    V2    V3 Val  
  <dbl> <dbl> <dbl> <chr>
1     2     7     7 V2-V3
2     8     3     6 V1   
3     1     5     4 V2   
4     5     7     5 V2   
5     6     3     1 V1   
Anoushiravan R
  • 21,622
  • 3
  • 18
  • 41