0

I have a data.frame like this:

24.8     
23.2     
22.8        
22.5     
22.5      
22.4     
22.4      
22.4      
22.3     
22.2     
22.2      
22.2      
22      
21.9      
21.9     
21.8    

I would like to add a value according to the frequency, to have the following output:

24.8      1      
23.2      1      
22.8      1      
22.5      2      
22.5      2     
22.4      3       
22.4      3        
22.4      3        
22.3      1     
22.2      3          
22.2      3         
22.2      3        
22        1                
21.9      2           
21.9      2         
21.8      1

How this can be done? in other words, since 28.8 occurs 1 time, it will have the value 1; since 22.5 occurs two times, it will have value 2 and so on.

Matt Dowle
  • 58,872
  • 22
  • 166
  • 224
Bfu38
  • 1,081
  • 1
  • 8
  • 17

3 Answers3

11

You can use ave() as follows:

myData <- data.frame(x = c(24.8, 23.2, 22.8, 22.5, 22.5, 22.4, 22.4, 22.4, 
                           22.3, 22.2, 22.2, 22.2, 22, 21.9, 21.9, 21.8))
myData$Index <- ave(myData$x, myData$x, FUN = length)
myData
#       x Index
# 1  24.8     1
# 2  23.2     1
# 3  22.8     1
# 4  22.5     2
# 5  22.5     2
# 6  22.4     3
# 7  22.4     3
# 8  22.4     3
# 9  22.3     1
# 10 22.2     3
# 11 22.2     3
# 12 22.2     3
# 13 22.0     1
# 14 21.9     2
# 15 21.9     2
# 16 21.8     1

You can also use the data.table package as follows:

myData2 <- data.table(x = c(24.8, 23.2, 22.8, 22.5, 22.5, 22.4, 22.4, 22.4, 
                            22.3, 22.2, 22.2, 22.2, 22, 21.9, 21.9, 21.8),
                      key = "x")
# A `data.tabe` noob approach
# myData2[, Index := lapply(.SD, length), by = key(myData2)][]
# Or a better approach, as suggested by @Roland
myData2[, Index := .N, by = key(myData2)]
print(myData2)
#        x Index
#  1: 21.8     1
#  2: 21.9     2
#  3: 21.9     2
#  4: 22.0     1
#  5: 22.2     3
#  6: 22.2     3
#  7: 22.2     3
#  8: 22.3     1
#  9: 22.4     3
# 10: 22.4     3
# 11: 22.4     3
# 12: 22.5     2
# 13: 22.5     2
# 14: 22.8     1
# 15: 23.2     1
# 16: 24.8     1
Roland
  • 127,288
  • 10
  • 191
  • 288
A5C1D2H2I1M1N2O1R2T1
  • 190,393
  • 28
  • 405
  • 485
6

This could be done with merge and table:

dat <- data.frame(V1 = c(24.8, 23.2, 22.8, 22.5, 22.5, 22.4, 22.4, 22.4, 
                         22.3, 22.2, 22.2, 22.2, 22,   21.9, 21.9, 21.8))

merge(dat, as.data.frame(table(dat$V1)), by.x = "V1", by.y = "Var1", sort = F)

#      V1 Freq
# 1  24.8    1
# 2  23.2    1
# 3  22.8    1
# 4  22.5    2
# 5  22.5    2
# 6  22.4    3
# 7  22.4    3
# 8  22.4    3
# 9  22.3    1
# 10 22.2    3
# 11 22.2    3
# 12 22.2    3
# 13 22.0    1
# 14 21.9    2
# 15 21.9    2
# 16 21.8    1
Sven Hohenstein
  • 80,497
  • 17
  • 145
  • 168
3

Or use package plyr:

a <- c(24.8,23.2,22.8,22.5,22.5,22.4,22.4,22.4,22.3,22.2,22.2,22.2,22,21.9,21.9,21.8)
df <- data.frame(a)

library(plyr)
ddply(df,~a,transform,freq = length(a))

      a freq
1  21.8    1
2  21.9    2
3  21.9    2
4  22.0    1
5  22.2    3
6  22.2    3
7  22.2    3
8  22.3    1
9  22.4    3
10 22.4    3
11 22.4    3
12 22.5    2
13 22.5    2
14 22.8    1
15 23.2    1
16 24.8    1
Roland
  • 127,288
  • 10
  • 191
  • 288