0

I am trying to convert a dataframe with many IDs some of which are associated to multiple names to one where each ID occurs once and the names are all contained in the second column (see example below). I have been trying this for a while now but somehow can not figure out how to do it. Any advice?


df<-data.frame("ID"=c("a","a","b","b","c","d","d"),
"name"=c("John","Lisa", "Hank", "Peter", "Anne", "Lisa", "Mic")

#desired outcome:

df<-data.frame("ID"=c("a", "b", "c", "d"),
"names"=c("John;Lisa", "Hank;Peter", "Anne", "Lisa;Mic")

ktm
  • 67
  • 6

1 Answers1

1

tidyverse

library(tidyverse)

df <- data.frame(
  "ID" = c("a", "a", "b", "b", "c", "d", "d"),
  "name" = c("John", "Lisa", "Hank", "Peter", "Anne", "Lisa", "Mic")
)

df %>% 
  group_by(ID) %>% 
  summarise(name = str_c(name, collapse = ";"), .groups = "drop")
#> # A tibble: 4 x 2
#>   ID    name      
#>   <chr> <chr>     
#> 1 a     John;Lisa 
#> 2 b     Hank;Peter
#> 3 c     Anne      
#> 4 d     Lisa;Mic

data.table

library(data.table)

setDT(df)[, list(name = paste(name, collapse = ";")), by = ID][]
#>    ID       name
#> 1:  a  John;Lisa
#> 2:  b Hank;Peter
#> 3:  c       Anne
#> 4:  d   Lisa;Mic

base

aggregate(name ~ ID, data = df, FUN = function(x) paste(x, collapse = ";"))
#>   ID       name
#> 1  a  John;Lisa
#> 2  b Hank;Peter
#> 3  c       Anne
#> 4  d   Lisa;Mic

Created on 2023-01-20 with reprex v2.0.2

Yuriy Saraykin
  • 8,390
  • 1
  • 7
  • 14