1

I have the following info:

M_PT

     CEDIS       |              PLAZA        
 9999999021-1    |   10MDA
 9999999021-2    |   10CAN
 9999999012-1    |   10GUD','10CLJ
 9999999012-2    |   10DZV
 9999999025-1    |   10LPB','10HHM','10OBR','10HER
 9999999025-2    |   10DCU

And I would like to end up with the following:

      CEDIS     |  PLAZA       
  9999999021-1  |   10MDA
  9999999021-2  |   10CAN
  9999999012-1  |   10GUD
  9999999012-1  |   10CLJ
  9999999012-2  |   10DZV
  9999999025-1  |   10LPB
  9999999025-1  |   10HHM
  9999999025-1  |   10OBR
  9999999025-1  |   10HER
  9999999025-2  |   10DCU

I tried to do the following but I failed miserably:

> vec  <- rep(NA,length(unlist(strsplit(M_PT[,"PLAZA"],split="','"))))
> j <- 0
> 
> for(i in 1:nrow(M_PT)){
+ 
+    if(nchar(M_PT[i,"PLAZA"]) == 5){
+    
+         vec[i] <- paste(M_PT[i,"CEDIS"],M_PT[i,"PLAZA"],sep="-")
+       
+          }else{
+ 
+         for(j in 1:sum(nchar(gsub("','","",M_PT[i,"PLAZA"])) / 5)){
+         
+         vec[i + ifelse(j == 1, 0, j - 1)] <- paste(M_PT[i,"CEDIS"], 
unlist(strsplit(M_PT[i,"PLAZA"],split="','"))[j],sep="-")
+ 
+     }
+   }
+ }

Any idea on the solution please?

Ronak Shah
  • 377,200
  • 20
  • 156
  • 213
Max Molina
  • 73
  • 6
  • I think this question has been answered here https://stackoverflow.com/questions/13773770/split-comma-separated-column-into-separate-rows and here https://stackoverflow.com/questions/15347282/split-delimited-strings-in-a-column-and-insert-as-new-rows – hpesoj626 May 07 '18 at 00:25
  • I already fixed the structure of the post, It should be understandable now. – Max Molina May 07 '18 at 00:28
  • If you don't want to use package, you can adapt the first answer in the linked duplicated post like this: `s <- strsplit(dat$PLAZA, split = ","); dat <- data.frame(CEDIS = rep(dat$CEDIS, sapply(s, length)), PLAZA = unlist(s)); dat` – hpesoj626 May 07 '18 at 01:05

2 Answers2

4

We can use separate_rows from the package.

library(dplyr)
library(tidyr)

dat2 <- dat %>% separate_rows("PLAZA")
dat2
#           CEDIS PLAZA
# 1  9999999021-1 10MDA
# 2  9999999021-2 10CAN
# 3  9999999012-1 10GUD
# 4  9999999012-1 10CLJ
# 5  9999999012-2 10DZV
# 6  9999999025-1 10LPB
# 7  9999999025-1 10HHM
# 8  9999999025-1 10OBR
# 9  9999999025-1 10HER
# 10 9999999025-2 10DCU

DATA

dat <- read.table(text = "   CEDIS        PLAZA        
1 '9999999021-1'                             10MDA
                  2 '9999999021-2'                             10CAN
                  3 '9999999012-1'                     '10GUD, 10CLJ'
                  4 '9999999012-2'                             10DZV
                  5 '9999999025-1'       '10LPB, 10HHM, 10OBR, 10HER'
                  6 '9999999025-2'                             10DCU",
                  header = TRUE, stringsAsFactors = FALSE)
www
  • 38,575
  • 12
  • 48
  • 84
3

By using unnest , data from www.

library(dplyr)
library(tidyr)
dat %>%
  transform(COLUMN.NAME.2 = strsplit(COLUMN.NAME.2, ",")) %>%
  unnest(COLUMN.NAME.2)

    COLUMN.NAME COLUMN.NAME.2
1  9999999021-1         10MDA
2  9999999021-2         10CAN
3  9999999012-1         10GUD
4  9999999012-1         10CLJ
5  9999999012-2         10DZV
6  9999999025-1         10LPB
7  9999999025-1         10HHM
8  9999999025-1         10OBR
9  9999999025-1         10HER
10 9999999025-2         10DCU
BENY
  • 317,841
  • 20
  • 164
  • 234