0

I have a data like this

df<- structure(list(from = structure(c(9L, 10L, 11L, 12L, 13L, 14L, 
    15L, 16L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 7L, 8L, 8L), .Label = c("A0A023J6K5", 
    "A0A023J6L7", "A0A023J6M1", "A0A067XG53", "A0A067XKM5", "A0A067XKP8", 
    "A0A067XKR4", "A0A067XKW4", "A0A0A6YXQ7", "A0A0A6YXW8", "A0A0A6YXX6", 
    "A0A0A6YXZ1", "A0A0A6YY28", "A0A0A6YY47", "A0A0A6YY78", "A0A0A6YY91"
    ), class = "factor"), to = structure(c(6L, 11L, 1L, 4L, 12L, 
    5L, 5L, 5L, 9L, 7L, 9L, 2L, 7L, 3L, 10L, 9L, 7L, 8L), .Label = c("Arhgap15", 
    "Cask", "COXI", "Igtp", "MumuTL", "Myo1f", "ND1", "ND1F", "ND4", 
    "ND4F", "Pak2", "pol"), class = "factor")), .Names = c("from", 
    "to"), class = "data.frame", row.names = c(NA, -18L))

I want to make it unique . In the form column, two strings are not unique A0A067XKR4 and A0A067XKW4 I want to add the other column values by a ; so it will be ND4F;ND4 and ND1; ND1F

         from       to
1  A0A0A6YXQ7    Myo1f
2  A0A0A6YXW8     Pak2
3  A0A0A6YXX6 Arhgap15
4  A0A0A6YXZ1     Igtp
5  A0A0A6YY28      pol
6  A0A0A6YY47   MumuTL
7  A0A0A6YY78   MumuTL
8  A0A0A6YY91   MumuTL
9  A0A023J6K5      ND4
10 A0A023J6L7      ND1
11 A0A023J6M1      ND4
12 A0A067XG53     Cask
13 A0A067XKM5      ND1
14 A0A067XKP8     COXI
15 A0A067XKR4     ND4F;ND4
17 A0A067XKW4     ND1; ND1F

this will show the which ones are not unique

df2$from[duplicated(df2$from)]
nik
  • 2,500
  • 5
  • 21
  • 48

1 Answers1

0

A simple option will be to use aggregate function, other options may exist though.

    aggregate(to ~ from , df, paste,collapse = ";")
Rahul Pant
  • 707
  • 5
  • 7