2

Below is my data.frame and I would like to know what is the mode for each of the memory categories (1 through 8)

> dput(d)
structure(list(MEMORY1 = c(5.5, 7, 1.5, 6, 4.5, 4.5, 5, 4, 1, 
5.5, 2.5, 4.5, 2.5, 5.5, 4, 1, 4, 5, 2.5, 5.5), MEMORY2 = c(5.5, 
3, 1.5, 6, 4.5, 4.5, 5, 4, 5, 5.5, 6.5, 4.5, 2.5, 5.5, 4, 7, 
8, 5, 6.5, 5.5), MEMORY3 = c(5.5, 3, 4.5, 2, 4.5, 4.5, 5, 4, 
5, 1.5, 6.5, 4.5, 6.5, 5.5, 4, 7, 4, 5, 6.5, 5.5), MEMORY4 = c(1.5, 
3, 4.5, 2, 1, 4.5, 5, 4, 5, 5.5, 2.5, 4.5, 2.5, 1.5, 4, 2, 4, 
5, 2.5, 1.5), MEMORY5 = c(5.5, 3, 4.5, 6, 4.5, 4.5, 5, 1, 5, 
5.5, 6.5, 4.5, 6.5, 5.5, 4, 4, 4, 5, 2.5, 1.5), MEMORY6 = c(5.5, 
7, 7.5, 6, 8, 4.5, 5, 7.5, 5, 5.5, 6.5, 4.5, 6.5, 5.5, 4, 4, 
4, 5, 2.5, 5.5), MEMORY7 = c(1.5, 3, 4.5, 2, 4.5, 4.5, 1, 4, 
5, 1.5, 2.5, 4.5, 6.5, 1.5, 4, 7, 4, 1, 6.5, 5.5), MEMORY8 = c(5.5, 
7, 7.5, 6, 4.5, 4.5, 5, 7.5, 5, 5.5, 2.5, 4.5, 2.5, 5.5, 8, 4, 
4, 5, 6.5, 5.5)), .Names = c("MEMORY1", "MEMORY2", "MEMORY3", 
"MEMORY4", "MEMORY5", "MEMORY6", "MEMORY7", "MEMORY8"), row.names = c(492L, 
509L, 510L, 518L, 519L, 522L, 527L, 533L, 535L, 542L, 543L, 557L, 
558L, 560L, 567L, 569L, 578L, 581L, 582L, 584L), class = "data.frame")

Please ignore the first unnamed column as that is not relevant here.

> d
    MEMORY1 MEMORY2 MEMORY3 MEMORY4 MEMORY5 MEMORY6 MEMORY7 MEMORY8
492     5.5     5.5     5.5     1.5     5.5     5.5     1.5     5.5
509     7.0     3.0     3.0     3.0     3.0     7.0     3.0     7.0
510     1.5     1.5     4.5     4.5     4.5     7.5     4.5     7.5
518     6.0     6.0     2.0     2.0     6.0     6.0     2.0     6.0
519     4.5     4.5     4.5     1.0     4.5     8.0     4.5     4.5
522     4.5     4.5     4.5     4.5     4.5     4.5     4.5     4.5
527     5.0     5.0     5.0     5.0     5.0     5.0     1.0     5.0
533     4.0     4.0     4.0     4.0     1.0     7.5     4.0     7.5
535     1.0     5.0     5.0     5.0     5.0     5.0     5.0     5.0
542     5.5     5.5     1.5     5.5     5.5     5.5     1.5     5.5
543     2.5     6.5     6.5     2.5     6.5     6.5     2.5     2.5
557     4.5     4.5     4.5     4.5     4.5     4.5     4.5     4.5
558     2.5     2.5     6.5     2.5     6.5     6.5     6.5     2.5
560     5.5     5.5     5.5     1.5     5.5     5.5     1.5     5.5
567     4.0     4.0     4.0     4.0     4.0     4.0     4.0     8.0
569     1.0     7.0     7.0     2.0     4.0     4.0     7.0     4.0
578     4.0     8.0     4.0     4.0     4.0     4.0     4.0     4.0
581     5.0     5.0     5.0     5.0     5.0     5.0     1.0     5.0
582     2.5     6.5     6.5     2.5     2.5     2.5     6.5     6.5
584     5.5     5.5     5.5     1.5     1.5     5.5     5.5     5.5

If I were to tabulate the values for MEMORY1, I get the following:

> table(d$MEMORY1)

  1 1.5 2.5   4 4.5   5 5.5   6   7 
  2   1   3   3   3   2   4   1   1 

So I can see that 5.5 is the mode here, and I tried as.numeric(names(table(d$MEMORY1))[which.max(table(d$MEMORY1))]) which did return 5.5. This is pretty clunky and how can I iterate this over all 8 columns of my data.frame? I want the resulting vector containing 8 modes (one corresponding to each column). What's an elegant way to do this?

Adrian
  • 9,229
  • 24
  • 74
  • 132

1 Answers1

4

This post provides an elegant function to determine the mode so all you need to do is apply it to your data frame.

Mode <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}

apply(d, 2, Mode)

Yields:

MEMORY1 MEMORY2 MEMORY3 MEMORY4 MEMORY5 MEMORY6 MEMORY7 MEMORY8 
    5.5     5.5     4.5     1.5     4.5     5.5     4.5     5.5 
Community
  • 1
  • 1
Ritchie Sacramento
  • 29,890
  • 4
  • 48
  • 56