2

I want to repeat certain part of string between ] and ; as the number of elements separated by ; preceding within []. So the desired output for [A1, AB11; A2, AB22] I1, C1 would be [A1, AB11] I1, C1; [A2, AB22] I1, C1. Any hints to start with. Thanks

df1 <-
  data.frame(
   String = c(
    "[A1, AB11; A2, AB22] I1, C1; [A3, AB33] I3, C1"
  , "[A4, AB44] I4, C4; [A5, AB55; A6, AB66; A7, AB77] I7, C7"
  )
  )
df1

                                                    String
1           [A1, AB11; A2, AB22] I1, C1; [A3, AB33] I3, C1
2 [A4, AB44] I4, C4; [A5, AB55; A6, AB66; A7, AB77] I7, C7


df2 <-
  data.frame(
   String = c(
    "[A1, AB11] I1, C1; [A2, AB22] I1, C1; [A3, AB33] I3, C1"
  , "[A4, AB44] I4, C4; [A5, AB55] I7, C7;[A6, AB66] I7, C7; [A7, AB77] I7, C7"
  )
  )

df2

                                                                     String
1                   [A1, AB11] I1, C1; [A2, AB22] I1, C1; [A3, AB33] I3, C1
2 [A4, AB44] I4, C4; [A5, AB55] I7, C7;[A6, AB66] I7, C7; [A7, AB77] I7, C7
MYaseen208
  • 22,666
  • 37
  • 165
  • 309

3 Answers3

2

Here's a base R solution:

sapply(strsplit(paste0(df1$String, ";"), "\\[|\\]"), function(x) {
  for(i in seq_along(x))
  {
    if(i %% 2 == 0) {
      x[i] <- paste0("[", gsub(";", paste0("]", x[i + 1], " ["), x[i]), "]")
    }
  }
  paste(x, collapse = "")
})
#> [1] "[A1, AB11] I1, C1;  [ A2, AB22] I1, C1; [A3, AB33] I3, C1;"                   
#> [2] "[A4, AB44] I4, C4; [A5, AB55] I7, C7; [ A6, AB66] I7, C7; [ A7, AB77] I7, C7;"
Allan Cameron
  • 147,086
  • 7
  • 49
  • 87
2

I had tried something similar in the past, and thought it might be interesting to adapt using glue and unglue packages.

The initial strsplit separates by semicolon, ignoring the semicolons between the brackets.

The unglue will separate out what is between brackets that gets repeated, and what is appended outside the brackets, for each row.

library(glue)
library(unglue)
library(purrr)

my_fun <- function(inside, outside) {
  glue("[{inside}] {outside}")
}

sapply(strsplit(df1$String, '\\[[^]]*\\](*SKIP)(*F)|;\\s', perl = T), function(x) {
  ud <- unglue_data(x, patterns = "[{Inside}] {Outside}")
  ud_in <- map(ud[['Inside']], strsplit, split = "; ")
  ud_map <- map(seq_along(ud[['Inside']]), function(y) {
    map2(unlist(ud_in[y]), ud[['Outside']][y], my_fun)
  })
  paste(unlist(ud_map), collapse = '; ')
})

Output

[1] "[A1, AB11] I1, C1; [A2, AB22] I1, C1; [A3, AB33] I3, C1"                   
[2] "[A4, AB44] I4, C4; [A5, AB55] I7, C7; [A6, AB66] I7, C7; [A7, AB77] I7, C7"
Ben
  • 28,684
  • 5
  • 23
  • 45
1

Not the tidiest solution however it's using stringr

str_split(df1$String, ";(?= *\\[)") %>%
  map(str_match, "\\[(.+?)\\] (.+)") %>%
   map( ~ paste(unlist(map2(paste0(str_split(.x[,2], "; ?")), .x[,3], ~ paste0("[", .x,"] ",.y ))), collapse="; ")) 

somewhat nicer solution:

as_tibble(df1) %>%
  mutate(splits=str_split(String, "; *(?=\\[)")) %>%
   unnest_longer(col=splits) %>%
    mutate(splits=map(str_split(splits,"\\[|\\] ?"), str_split, "; ?"))  %>%
     unnest_wider(splits) %>%
      mutate(val=map2(...2, ...3, ~ paste0("[", .x ,"] ", .y, collapse="; ") )) %>%
       group_by(String) %>%
        summarise(val=paste0(val, collapse="; "))
# A tibble: 2 x 2
  String                             val                                        
  <fct>                              <chr>
1 [A1, AB11; A2, AB22] I1, C1; [A3,… [A1, AB11] I1, C1; [A2, AB22] I1, C1; [A3, AB33] I3, C1
2 [A4, AB44] I4, C4; [A5, AB55; A6,… [A4, AB44] I4, C4; [A5, AB55] I7, C7; [A6, AB66] I7, C7; [A7, AB77] I7, C7
Abdessabour Mtk
  • 3,895
  • 2
  • 14
  • 21