0

I have the following dataframe (sour):

"sequence" "support"
"1" "<{\"OV188\"}>" 0.628465804066543
"2" "<{\"OV191\"}>" 0.584103512014787
"3" "<{\"OV194\"}>" 0.584103512014787
"4" "<{\"OV195\"}>" 0.680221811460259
"5" "<{\"OV197\"}>" 0.584103512014787
"6" "<{\"OV200\"}>" 0.56007393715342
"7" "<{\"OV188\"},{\"OV200\"}>" 0.56007393715342
"8" "<{\"OV191\"},{\"OV197\"}>" 0.584103512014787
"9" "<{\"OV194\"},{\"OV197\"}>" 0.584103512014787
"10" "<{\"OV195\"},{\"OV197\"}>" 0.584103512014787
"11" "<{\"OV194\"},{\"OV195\"},{\"OV197\"}>" 0.584103512014787
"12" "<{\"OV191\"},{\"OV195\"},{\"OV197\"}>" 0.584103512014787
"13" "<{\"OV191\"},{\"OV194\"},{\"OV195\"},{\"OV197\"}>" 0.584103512014787
"14" "<{\"OV191\"},{\"OV194\"},{\"OV197\"}>" 0.584103512014787
"15" "<{\"OV191\"},{\"OV195\"}>" 0.584103512014787
"16" "<{\"OV194\"},{\"OV195\"}>" 0.584103512014787
"17" "<{\"OV191\"},{\"OV194\"},{\"OV195\"}>" 0.584103512014787
"18" "<{\"OV191\"},{\"OV194\"}>" 0.584103512014787

The required result is the following format:

"sequence" "support"
"1" "<{OV188}>" 0.628465804066543
"2" "<{OV191}>" 0.584103512014787
"3" "<{OV194}>" 0.584103512014787
"4" "<{OV195}>" 0.680221811460259
"5" "<{OV197}>" 0.584103512014787
"6" "<{OV200}>" 0.56007393715342
"7" "<{OV188},{OV200}>" 0.56007393715342
"8" "<{OV191},{OV197}>" 0.584103512014787
"9" "<{OV194},{OV197}>" 0.584103512014787
"10" "<{OV195},{OV197}>" 0.584103512014787
"11" "<{OV194},{OV195},{OV197}>" 0.584103512014787
"12" "<{OV191},{OV195},{OV197}>" 0.584103512014787
"13" "<{OV191},{OV194},{OV195},{OV197}>" 0.584103512014787
"14" "<{OV191},{OV194},{OV197}>" 0.584103512014787
"15" "<{OV191},{OV195}>" 0.584103512014787
"16" "<{OV194},{OV195}>" 0.584103512014787
"17" "<{OV191},{OV194},{OV195}>" 0.584103512014787
"18" "<{OV191},{OV194}>" 0.584103512014787

I use the following code for it:

a<-sour
names(a) <- sub("X\\.(.*)\\.", "\\1",  names(a))
a$sequence <- sub('<\\{"(.*)"\\}>', "<{\\1}>", a$sequence)
sour<-a

However, I still get unwanted characters in case there are more than one value at "sequence" column as seen here (from line 7 and on):

"sequence" "support"
"1" "<{OV188}>" 0.628465804066543
"2" "<{OV191}>" 0.584103512014787
"3" "<{OV194}>" 0.584103512014787
"4" "<{OV195}>" 0.680221811460259
"5" "<{OV197}>" 0.584103512014787
"6" "<{OV200}>" 0.56007393715342
"7" "<{OV188\"},{\"OV200}>" 0.56007393715342
"8" "<{OV191\"},{\"OV197}>" 0.584103512014787
"9" "<{OV194\"},{\"OV197}>" 0.584103512014787
"10" "<{OV195\"},{\"OV197}>" 0.584103512014787
"11" "<{OV194\"},{\"OV195\"},{\"OV197}>" 0.584103512014787
"12" "<{OV191\"},{\"OV195\"},{\"OV197}>" 0.584103512014787
"13" "<{OV191\"},{\"OV194\"},{\"OV195\"},{\"OV197}>" 0.584103512014787
"14" "<{OV191\"},{\"OV194\"},{\"OV197}>" 0.584103512014787
"15" "<{OV191\"},{\"OV195}>" 0.584103512014787
"16" "<{OV194\"},{\"OV195}>" 0.584103512014787
"17" "<{OV191\"},{\"OV194\"},{\"OV195}>" 0.584103512014787
"18" "<{OV191\"},{\"OV194}>" 0.584103512014787

What do I have to change in the code in order that line 7 to 18 will be without quotation marks and slashes as in lines 1-6?

> dput (sour)
structure(list(sequence = c("<{\"OV188\"}>", "<{\"OV191\"}>", 
"<{\"OV194\"}>", "<{\"OV195\"}>", "<{\"OV197\"}>", "<{\"OV200\"}>", 
"<{\"OV188\"},{\"OV200\"}>", "<{\"OV191\"},{\"OV197\"}>", "<{\"OV194\"},{\"OV197\"}>", 
"<{\"OV195\"},{\"OV197\"}>", "<{\"OV194\"},{\"OV195\"},{\"OV197\"}>", 
"<{\"OV191\"},{\"OV195\"},{\"OV197\"}>", "<{\"OV191\"},{\"OV194\"},{\"OV195\"},{\"OV197\"}>", 
"<{\"OV191\"},{\"OV194\"},{\"OV197\"}>", "<{\"OV191\"},{\"OV195\"}>", 
"<{\"OV194\"},{\"OV195\"}>", "<{\"OV191\"},{\"OV194\"},{\"OV195\"}>", 
"<{\"OV191\"},{\"OV194\"}>"), support = c(0.628465804066543, 
0.584103512014787, 0.584103512014787, 0.680221811460259, 0.584103512014787, 
0.56007393715342, 0.56007393715342, 0.584103512014787, 0.584103512014787, 
0.584103512014787, 0.584103512014787, 0.584103512014787, 0.584103512014787, 
0.584103512014787, 0.584103512014787, 0.584103512014787, 0.584103512014787, 
0.584103512014787)), .Names = c("sequence", "support"), class = "data.frame", row.names = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", 
"14", "15", "16", "17", "18"))
Avi
  • 2,247
  • 4
  • 30
  • 52
  • Please post a `dput` of your data. – SabDeM Nov 19 '15 at 22:01
  • @SabDeM, I added the dput of the sour. at the end of the question. – Avi Nov 19 '15 at 22:03
  • it does not have the `\` nor the `/`, i mean the `dput`. – SabDeM Nov 19 '15 at 22:05
  • It has the \"OV188\" for each character and I would like to remove them as seen in the example. – Avi Nov 19 '15 at 22:07
  • 1
    If you are just trying to remove the backslashes, you don't have to, they aren't really there. It's like the kid in the Matrix that bends the spoon, you just have to realize that there is no spoon. – Pierre L Nov 19 '15 at 22:32
  • I would like to get an output as seen in the example. Without the special characters. – Avi Nov 19 '15 at 22:34
  • I have a code that needs the result format as an input in order to work. – Avi Nov 19 '15 at 22:37
  • If you what output without escaped interior quote characters, then use `cat`. I repeat Pierre's comment.... there is no spoon. – IRTFM Nov 20 '15 at 00:37

1 Answers1

1

How about

library(dplyr)
library(stringi)

sour %>%
  mutate(sequence = 
           sequence %>% 
           stri_replace_all_fixed('"', '') %>%
           shQuote(type = "cmd"))

I would also recommend unnesting sequence.

library(tidyr)

sour %>%
  mutate(sequence = sequence %>% stri_split_fixed(",")) %>%
  unnest(sequence) %>%
  mutate(sequence = sequence %>% extract_numeric)
bramtayl
  • 4,004
  • 2
  • 11
  • 18
  • It appeared the removing the double-quotes was not the questioners goal. He just didn't realize that there were no backslashes. – IRTFM Nov 20 '15 at 00:40
  • Thanks @bramtayl, The solution is very nice but please find the required result there is an outer quotation which are eliminated by your solution. I need for example "1" "<{OV188}>" and by your solution I get 1 <{OV188}> – Avi Nov 20 '15 at 04:24
  • I suppose you can just add back in literal quotations around the edges. See edit. – bramtayl Nov 20 '15 at 05:10
  • @bramtayl, Thanks a lot! – Avi Nov 20 '15 at 05:49