In the below code, there is a simple, very efficient and well tested answer to this question. The code has comments explaining everything in it.
I promise it's not as scary as it looks - it's actually only 13 lines of code! The rest are all comments, docs and assertions
def split_including_delimiters(input: str, delimiter: str):
"""
Splits an input string, while including the delimiters in the output
Unlike str.split, we can use an empty string as a delimiter
Unlike str.split, the output will not have any extra empty strings
Conequently, len(''.split(delimiter))== 0 for all delimiters,
whereas len(input.split(delimiter))>0 for all inputs and delimiters
INPUTS:
input: Can be any string
delimiter: Can be any string
EXAMPLES:
>>> split_and_keep_delimiter('Hello World ! ',' ')
ans = ['Hello ', 'World ', ' ', '! ', ' ']
>>> split_and_keep_delimiter("Hello**World**!***", "**")
ans = ['Hello', '**', 'World', '**', '!', '**', '*']
EXAMPLES:
assert split_and_keep_delimiter('-xx-xx-','xx') == ['-', 'xx', '-', 'xx', '-'] # length 5
assert split_and_keep_delimiter('xx-xx-' ,'xx') == ['xx', '-', 'xx', '-'] # length 4
assert split_and_keep_delimiter('-xx-xx' ,'xx') == ['-', 'xx', '-', 'xx'] # length 4
assert split_and_keep_delimiter('xx-xx' ,'xx') == ['xx', '-', 'xx'] # length 3
assert split_and_keep_delimiter('xxxx' ,'xx') == ['xx', 'xx'] # length 2
assert split_and_keep_delimiter('xxx' ,'xx') == ['xx', 'x'] # length 2
assert split_and_keep_delimiter('x' ,'xx') == ['x'] # length 1
assert split_and_keep_delimiter('' ,'xx') == [] # length 0
assert split_and_keep_delimiter('aaa' ,'xx') == ['aaa'] # length 1
assert split_and_keep_delimiter('aa' ,'xx') == ['aa'] # length 1
assert split_and_keep_delimiter('a' ,'xx') == ['a'] # length 1
assert split_and_keep_delimiter('' ,'' ) == [] # length 0
assert split_and_keep_delimiter('a' ,'' ) == ['a'] # length 1
assert split_and_keep_delimiter('aa' ,'' ) == ['a', '', 'a'] # length 3
assert split_and_keep_delimiter('aaa' ,'' ) == ['a', '', 'a', '', 'a'] # length 5
"""
# Input assertions
assert isinstance(input,str), "input must be a string"
assert isinstance(delimiter,str), "delimiter must be a string"
if delimiter:
# These tokens do not include the delimiter, but are computed quickly
tokens = input.split(delimiter)
else:
# Edge case: if the delimiter is the empty string, split between the characters
tokens = list(input)
# The following assertions are always true for any string input and delimiter
# For speed's sake, we disable this assertion
# assert delimiter.join(tokens) == input
output = tokens[:1]
for token in tokens[1:]:
output.append(delimiter)
if token:
output.append(token)
# Don't let the first element be an empty string
if output[:1]==['']:
del output[0]
# The only case where we should have an empty string in the output is if it is our delimiter
# For speed's sake, we disable this assertion
# assert delimiter=='' or '' not in output
# The resulting strings should be combinable back into the original string
# For speed's sake, we disable this assertion
# assert ''.join(output) == input
return output