0

Say I have the following data.frame

df <- data.frame(letters = c("a, b", "a", "b", "a, c"), value = c(1, 2, 3, 4))
df
#>   letters value
#> 1    a, b     1
#> 2       a     2
#> 3       b     3
#> 4    a, c     4

Which I want to split/melt to

#>   letters value
#> 1       a     1
#> 2       b     1
#> 3       a     2
#> 4       b     3
#> 5       a     4 
#> 6       c     4 

In order to have single record for each letters

JohnCoene
  • 2,107
  • 1
  • 14
  • 31

4 Answers4

5

You can try with strsplit. Split the characters based on , and then repeating the value field.

char <- strsplit(as.character(df$letters), ',')
data.frame(letter=unlist(char), value=rep(df$value, sapply(char, FUN=length)))


   letter value
#1      a     1
#2      b     1
#3      a     2
#4      b     3
#5      a     4
#6      c     4

As per @docendo discimus update in the comments for faster results you could try,

char <- strsplit(as.character(df$letters), ',', fixed = T)
data.frame(letter=unlist(char), value=rep(df$value, lengths(char)))
Ronak Shah
  • 377,200
  • 20
  • 156
  • 213
  • 3
    You could use `fixed = TRUE` for faster string splitting and `lengths(char)` instead of `sapply(char, FUN=length)` – talat Apr 22 '16 at 05:55
4
library(dplyr)
library(tidyr)

df <- data.frame(letters = c("a, b", "a", "b", "a, c"), value = c(1, 2, 3, 4))

df %>% mutate(letters = strsplit(as.character(letters), ", ")) %>% unnest(letters)

Source: local data frame [6 x 2]

  value letters
  (dbl)   (chr)
1     1       a
2     1       b
3     2       a
4     3       b
5     4       a
6     4       c
inscaven
  • 2,514
  • 19
  • 29
  • 1
    Nice answer - you could also use `strsplit(as.character(letters), ", ", fixed=TRUE)` for faster string splitting – talat Apr 22 '16 at 05:58
3

We can use cSplit

 library(splitstackshape)
 cSplit(df, "letters", ", ", "long")
#   letters value
#1:       a     1
#2:       b     1
#3:       a     2
#4:       b     3
#5:       a     4
#6:       c     4
akrun
  • 874,273
  • 37
  • 540
  • 662
2

Also you can do like this step wise

df <- data.frame(letters = c("a, b", "a", "b", "a, c"), value = c(1, 2, 3, 4))
l <- do.call(c,strsplit(as.character(df$letters),split = ","))
Indx <- grepl(",",df$letters)
x <- c()
for(i in 1:length(Indx)){
  ifelse(Indx[i],x <- c(x,rep(df$value[i],2)),x<-c(x,df$value[i]))
}

df <- data.frame(l=l,x=x)
Koundy
  • 5,265
  • 3
  • 24
  • 37