0

suppose I have a data set:

x y
1 a
1 a
1 a
1 a
2 a
2 a
2 b 
3 c
3 e

how do I delete rows whose x frequency repeats more than 3 (e.g. '1' that appear 4 times)?

r2evans
  • 141,215
  • 6
  • 77
  • 149
damien
  • 69
  • 4

1 Answers1

1

base R

dat[ave(dat$x, dat$x, FUN=length) < 4,]
#   x y
# 5 2 a
# 6 2 a
# 7 2 b
# 8 3 c
# 9 3 e

dplyr

library(dplyr)
dat %>%
  group_by(x) %>%
  filter(n() < 4) %>%
  ungroup()
# # A tibble: 5 x 2
#       x y    
#   <int> <chr>
# 1     2 a    
# 2     2 a    
# 3     2 b    
# 4     3 c    
# 5     3 e    

data.table

library(data.table)
as.data.table(dat)[, .SD[.N < 4,], by = .(x)][]
#        x      y
#    <int> <char>
# 1:     2      a
# 2:     2      a
# 3:     2      b
# 4:     3      c
# 5:     3      e
r2evans
  • 141,215
  • 6
  • 77
  • 149