Here is the code that I came up with:
from contextlib import redirect_stdout
from io import StringIO
from re import compile, DEBUG, error, MULTILINE, VERBOSE
def unescape(pattern: str, flags: int):
"""Remove any escape that does not change the regex meaning"""
strio = StringIO()
with redirect_stdout(strio):
compile(pattern, DEBUG | flags)
original_debug = strio.getvalue()
index = len(pattern)
while index >= 0:
index -= 1
character = pattern[index]
if character != '\\':
continue
removed_escape = pattern[:index] + pattern[index+1:]
strio = StringIO()
with redirect_stdout(strio):
try:
compile(removed_escape, DEBUG | flags)
except error:
continue
if original_debug == strio.getvalue():
pattern = removed_escape
return pattern
def print_unescaped_raw(regex: str, flags:int=0):
"""Print an unescaped raw-string representation for s."""
print(
("r'%s'" % unescape(regex, flags)
.replace("'", r'\'')
.replace('\n', r'\n'))
)
print_unescaped_raw(r'\{\"*') # r'{"*'
One can also use sre_parse.parse
directly, but the SubPatterns and tuples in the result may contain nested SubPatterns. And SubPattern instances don't have __eq__
method defined for them, so a recursive comparison subroutine might be required.
P.S.
Unfortunately, this method does not work with the regex module because in regex
you get different debug output for escaped characters:
regex.compile(r'{', regex.DEBUG)
LITERAL MATCH '{'
regex.compile(r'\{', regex.DEBUG)
CHARACTER MATCH '{'
Unlike re that gives:
re.compile(r'{', re.DEBUG)
LITERAL 123
re.compile(r'\{', re.DEBUG)
LITERAL 123