I am trying to extract both a letter (should be K or Y) and all digits between that letter and the pattern (XO44_TMT6)
and put extracted values in two separate columns (Mod.residue
and Mod.position.in.pep
), but failed to get what I want.
Below are my codes and data frame. Can anyone explain why my codes failed and how to fix that?
Thanks so much!
My data frame:
structure(list(Modifications = c("Y9(XO44_TMT6)", "Y9(XO44_TMT6)",
"Y9(XO44_TMT6)", "Y9(XO44_TMT6)", "Y9(XO44_TMT6)", "Y9(XO44_TMT6)",
"Y9(XO44_TMT6)", "Y8(XO44_TMT6)", "Y8(XO44_TMT6)", "Y8(XO44_TMT6)",
"Y8(XO44_TMT6)", "Y8(XO44_TMT6)", "Y8(XO44_TMT6)", "Y8(XO44_TMT6)",
"Y8(XO44_TMT6)", "Y8(XO44_TMT6)", "Y8(XO44_TMT6)", "Y8(XO44_TMT6)",
"Y8(XO44_TMT6)", "Y8(XO44_TMT6)", "Y8(XO44_TMT6)", "Y8(XO44_TMT6)",
"Y8(XO44_TMT6)", "Y7(XO44_TMT6); M9(Oxidation)", "Y7(XO44_TMT6); M8(Oxidation)",
"Y7(XO44_TMT6); M8(Oxidation)", "Y7(XO44_TMT6); C9(Carbamidomethyl); C18(Carbamidomethyl)",
"Y7(XO44_TMT6); C15(Carbamidomethyl)", "Y7(XO44_TMT6)", "Y7(XO44_TMT6)",
"Y7(XO44_TMT6)", "Y7(XO44_TMT6)", "Y7(XO44_TMT6)", "Y7(XO44_TMT6)",
"Y7(XO44_TMT6)", "Y7(XO44_TMT6)", "Y7(XO44_TMT6)", "Y7(XO44_TMT6)",
"Y7(XO44_TMT6)", "Y7(XO44_TMT6)", "Y7(XO44_TMT6)", "Y7(XO44_TMT6)",
"Y7(XO44_TMT6)", "Y7(XO44_TMT6)", "Y7(XO44_TMT6)", "Y7(XO44_TMT6)",
"Y7(XO44_TMT6)", "Y7(XO44_TMT6)", "Y7(XO44_TMT6)", "Y7(XO44_TMT6)",
"Y7(XO44_TMT6)", "Y7(XO44_TMT6)", "Y7(XO44_TMT6)", "Y7(XO44_TMT6)",
"Y6(XO44_TMT6); C23(Carbamidomethyl)", "Y6(XO44_TMT6); C12(Carbamidomethyl)",
"Y6(XO44_TMT6); C12(Carbamidomethyl)", "Y6(XO44_TMT6)", "Y6(XO44_TMT6)",
"Y6(XO44_TMT6)", "Y6(XO44_TMT6)", "Y6(XO44_TMT6)", "Y6(XO44_TMT6)",
"Y6(XO44_TMT6)", "Y6(XO44_TMT6)", "Y6(XO44_TMT6)", "Y6(XO44_TMT6)",
"Y6(XO44_TMT6)", "Y6(XO44_TMT6)", "Y5(XO44_TMT6)", "Y5(XO44_TMT6)",
"Y5(XO44_TMT6)", "Y5(XO44_TMT6)", "Y5(XO44_TMT6)", "Y5(XO44_TMT6)",
"Y5(XO44_TMT6)", "Y5(XO44_TMT6)", "Y5(XO44_TMT6)", "Y5(XO44_TMT6)",
"Y5(XO44_TMT6)", "Y5(XO44_TMT6)", "Y5(XO44_TMT6)", "Y5(XO44_TMT6)",
"Y5(XO44_TMT6)", "Y5(XO44_TMT6)", "Y5(XO44_TMT6)", "Y4(XO44_TMT6); C29(Carbamidomethyl)",
"Y4(XO44_TMT6); C13(Carbamidomethyl)", "Y4(XO44_TMT6)", "Y4(XO44_TMT6)",
"Y4(XO44_TMT6)", "Y4(XO44_TMT6)", "Y4(XO44_TMT6)", "Y4(XO44_TMT6)",
"Y4(XO44_TMT6)", "Y4(XO44_TMT6)", "Y4(XO44_TMT6)", "Y4(XO44_TMT6)",
"Y4(XO44_TMT6)", "Y3(XO44_TMT6); M5(Oxidation)", "Y3(XO44_TMT6); C11(Carbamidomethyl)",
"Y3(XO44_TMT6)", "Y3(XO44_TMT6)", "Y3(XO44_TMT6)", "Y3(XO44_TMT6)",
"Y3(XO44_TMT6)", "Y3(XO44_TMT6)", "Y3(XO44_TMT6)", "Y3(XO44_TMT6)",
"Y3(XO44_TMT6)", "Y3(XO44_TMT6)", "Y3(XO44_TMT6)", "Y3(XO44_TMT6)",
"Y3(XO44_TMT6)", "Y3(XO44_TMT6)", "Y3(XO44_TMT6)", "Y3(XO44_TMT6)",
"Y3(XO44_TMT6)", "Y3(XO44_TMT6)", "Y3(XO44_TMT6)", "Y3(XO44_TMT6)",
"Y3(XO44_TMT6)", "Y3(XO44_TMT6)", "Y3(XO44_TMT6)", "Y3(XO44_TMT6)",
"Y3(XO44_TMT6)", "Y3(XO44_TMT6)", "Y3(XO44_TMT6)", "Y3(XO44_TMT6)",
"Y29(XO44_TMT6)", "Y25(XO44_TMT6)", "Y25(XO44_TMT6)", "Y25(XO44_TMT6)",
"Y23(XO44_TMT6)", "Y22(XO44_TMT6)", "Y22(XO44_TMT6)", "Y22(XO44_TMT6)",
"Y22(XO44_TMT6)", "Y22(XO44_TMT6)", "Y22(XO44_TMT6)", "Y22(XO44_TMT6)",
"Y21(XO44_TMT6)", "Y20(XO44_TMT6)", "Y20(XO44_TMT6)", "Y20(XO44_TMT6)",
"Y20(XO44_TMT6)", "Y20(XO44_TMT6)", "Y20(XO44_TMT6)", "Y20(XO44_TMT6)",
"Y20(XO44_TMT6)", "Y2(XO44_TMT6); C8(Carbamidomethyl)", "Y2(XO44_TMT6); C19(Carbamidomethyl)",
"Y2(XO44_TMT6); C15(Carbamidomethyl)", "Y2(XO44_TMT6); C15(Carbamidomethyl)",
"Y2(XO44_TMT6); C15(Carbamidomethyl)", "Y2(XO44_TMT6); C15(Carbamidomethyl)",
"Y2(XO44_TMT6); C15(Carbamidomethyl)", "Y2(XO44_TMT6)", "Y2(XO44_TMT6)",
"Y2(XO44_TMT6)", "Y2(XO44_TMT6)", "Y2(XO44_TMT6)", "Y2(XO44_TMT6)",
"Y2(XO44_TMT6)", "Y2(XO44_TMT6)", "Y2(XO44_TMT6)", "Y2(XO44_TMT6)",
"Y2(XO44_TMT6)", "Y2(XO44_TMT6)", "Y2(XO44_TMT6)", "Y2(XO44_TMT6)",
"Y2(XO44_TMT6)", "Y2(XO44_TMT6)", "Y19(XO44_TMT6)", "Y19(XO44_TMT6)",
"Y19(XO44_TMT6)", "Y19(XO44_TMT6)", "Y19(XO44_TMT6)", "Y18(XO44_TMT6)",
"Y18(XO44_TMT6)", "Y18(XO44_TMT6)", "Y18(XO44_TMT6)", "Y18(XO44_TMT6)",
"Y18(XO44_TMT6)", "Y18(XO44_TMT6)", "Y17(XO44_TMT6)", "Y17(XO44_TMT6)",
"Y17(XO44_TMT6)", "Y17(XO44_TMT6)", "Y17(XO44_TMT6)", "Y16(XO44_TMT6)",
"Y16(XO44_TMT6)", "Y16(XO44_TMT6)", "Y16(XO44_TMT6)", "Y16(XO44_TMT6)",
"Y16(XO44_TMT6)", "Y16(XO44_TMT6)", "Y16(XO44_TMT6)", "Y16(XO44_TMT6)",
"Y16(XO44_TMT6)", "Y15(XO44_TMT6); C16(Carbamidomethyl)", "Y15(XO44_TMT6)",
"Y15(XO44_TMT6)", "Y15(XO44_TMT6)", "Y15(XO44_TMT6)", "Y15(XO44_TMT6)",
"Y15(XO44_TMT6)", "Y15(XO44_TMT6)", "Y15(XO44_TMT6)", "Y15(XO44_TMT6)",
"Y14(XO44_TMT6); C15(Carbamidomethyl)", "Y14(XO44_TMT6); C15(Carbamidomethyl)",
"Y14(XO44_TMT6)", "Y14(XO44_TMT6)", "Y14(XO44_TMT6)", "Y13(XO44_TMT6)",
"Y13(XO44_TMT6)", "Y13(XO44_TMT6)", "Y12(XO44_TMT6); C14(Carbamidomethyl)",
"Y12(XO44_TMT6)", "Y12(XO44_TMT6)", "Y12(XO44_TMT6)", "Y12(XO44_TMT6)",
"Y12(XO44_TMT6)", "Y12(XO44_TMT6)", "Y12(XO44_TMT6)", "Y12(XO44_TMT6)",
"Y12(XO44_TMT6)", "Y12(XO44_TMT6)", "Y12(XO44_TMT6)", "Y12(XO44_TMT6)",
"Y11(XO44_TMT6)", "Y11(XO44_TMT6)", "Y11(XO44_TMT6)", "Y11(XO44_TMT6)",
"Y11(XO44_TMT6)", "Y11(XO44_TMT6)", "Y11(XO44_TMT6)", "Y10(XO44_TMT6)",
"Y10(XO44_TMT6)", "Y10(XO44_TMT6)", "Y10(XO44_TMT6)", "Y10(XO44_TMT6)",
"Y10(XO44_TMT6)", "Y10(XO44_TMT6)", "Y10(XO44_TMT6)", "Y10(XO44_TMT6)",
"Y10(XO44_TMT6)", "Y10(XO44_TMT6)", "Y10(XO44_TMT6)", "Y1(XO44_TMT6); C9(Carbamidomethyl)",
"Y1(XO44_TMT6); C15(Carbamidomethyl)", "Y1(XO44_TMT6); C15(Carbamidomethyl)",
"Y1(XO44_TMT6); C15(Carbamidomethyl)", "Y1(XO44_TMT6); C15(Carbamidomethyl)",
"Y1(XO44_TMT6); C11(Carbamidomethyl)", "Y1(XO44_TMT6); C11(Carbamidomethyl)",
"Y1(XO44_TMT6); C11(Carbamidomethyl)", "Y1(XO44_TMT6)", "Y1(XO44_TMT6)",
"Y1(XO44_TMT6)", "Y1(XO44_TMT6)", "Y1(XO44_TMT6)", "Y1(XO44_TMT6)",
"Y1(XO44_TMT6)", "Y1(XO44_TMT6)", "Y1(XO44_TMT6)", "Y1(XO44_TMT6)",
"Y1(XO44_TMT6)", "Y1(XO44_TMT6)", "Y1(XO44_TMT6)", "Y1(XO44_TMT6)",
"Y1(XO44_TMT6)", "N-Term(Prot)(Met-loss+Acetyl); Y9(XO44_TMT6)",
"N-Term(Prot)(Met-loss+Acetyl); Y9(XO44_TMT6)", "N-Term(Prot)(Met-loss+Acetyl); Y9(XO44_TMT6)",
"N-Term(Prot)(Met-loss+Acetyl); Y9(XO44_TMT6)", "N-Term(Prot)(Met-loss+Acetyl); Y8(XO44_TMT6)",
"N-Term(Prot)(Met-loss+Acetyl); Y7(XO44_TMT6); C17(Carbamidomethyl)",
"N-Term(Prot)(Met-loss+Acetyl); Y7(XO44_TMT6); C17(Carbamidomethyl)",
"N-Term(Prot)(Met-loss+Acetyl); Y7(XO44_TMT6); C17(Carbamidomethyl)",
"N-Term(Prot)(Met-loss+Acetyl); Y7(XO44_TMT6); C17(Carbamidomethyl)",
"N-Term(Prot)(Met-loss+Acetyl); Y7(XO44_TMT6); C17(Carbamidomethyl)",
"N-Term(Prot)(Met-loss+Acetyl); Y7(XO44_TMT6); C17(Carbamidomethyl)",
"N-Term(Prot)(Met-loss+Acetyl); Y7(XO44_TMT6); C17(Carbamidomethyl)",
"N-Term(Prot)(Met-loss+Acetyl); Y7(XO44_TMT6); C17(Carbamidomethyl)",
"N-Term(Prot)(Met-loss+Acetyl); Y7(XO44_TMT6); C17(Carbamidomethyl)",
"N-Term(Prot)(Met-loss+Acetyl); Y7(XO44_TMT6); C17(Carbamidomethyl)",
"N-Term(Prot)(Met-loss+Acetyl); Y7(XO44_TMT6); C17(Carbamidomethyl)",
"N-Term(Prot)(Met-loss+Acetyl); Y7(XO44_TMT6); C17(Carbamidomethyl)",
"N-Term(Prot)(Met-loss+Acetyl); Y7(XO44_TMT6); C17(Carbamidomethyl)",
"N-Term(Prot)(Met-loss+Acetyl); Y7(XO44_TMT6); C17(Carbamidomethyl)",
"N-Term(Prot)(Met-loss+Acetyl); Y7(XO44_TMT6); C17(Carbamidomethyl)",
"N-Term(Prot)(Met-loss+Acetyl); Y7(XO44_TMT6)", "N-Term(Prot)(Met-loss+Acetyl); Y7(XO44_TMT6)",
"N-Term(Prot)(Met-loss+Acetyl); K4(XO44_TMT6)", "N-Term(Prot)(Met-loss); Y8(XO44_TMT6)",
"N-Term(Prot)(Met-loss); Y8(XO44_TMT6)", "N-Term(Prot)(Met-loss); Y8(XO44_TMT6)",
"N-Term(Prot)(Met-loss); Y8(XO44_TMT6)"), Mod.residue = c("9",
"9", "9", "9", "9", "9", "9", "8", "8", "8", "8", "8", "8", "8",
"8", "8", "8", "8", "8", "8", "8", "8", "8", "7", "7", "7", "7",
"7", "7", "7", "7", "7", "7", "7", "7", "7", "7", "7", "7", "7",
"7", "7", "7", "7", "7", "7", "7", "7", "7", "7", "7", "7", "7",
"7", "6", "6", "6", "6", "6", "6", "6", "6", "6", "6", "6", "6",
"6", "6", "6", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5",
"5", "5", "5", "5", "5", "5", "5", "4", "4", "4", "4", "4", "4",
"4", "4", "4", "4", "4", "4", "4", "3", "3", "3", "3", "3", "3",
"3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3",
"3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "9", "5",
"5", "5", "3", "2", "2", "2", "2", "2", "2", "2", "1", "0", "0",
"0", "0", "0", "0", "0", "0", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "9", "9", "9", "9", "9", "8", "8", "8", "8", "8",
"8", "8", "7", "7", "7", "7", "7", "6", "6", "6", "6", "6", "6",
"6", "6", "6", "6", "5", "5", "5", "5", "5", "5", "5", "5", "5",
"5", "4", "4", "4", "4", "4", "3", "3", "3", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2", "2", "2", "2", "1", "1", "1", "1",
"1", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
"0", "0", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "9",
"9", "9", "9", "8", "7", "7", "7", "7", "7", "7", "7", "7", "7",
"7", "7", "7", "7", "7", "7", "7", "7", "4", "8", "8", "8", "8"
), Mod.position.in.pep = c("", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "")), row.names = c(NA, -300L), class = "data.frame")
My codes:
df <- df.test %>%
mutate(Mod.residue = gsub(".*(\\w{1})\\d*\\(XO44_TMT6)\\;*\\s*.*", "\\1", Modifications),
Mod.position.in.pep = gsub(".*\\w{1}(\\d*)\\(XO44_TMT6\\)\\;*\\s*.*", "\\1", Modifications)
)