This problem piqued my interest, so I wrote an overly generic solution.
Here's a function that
- aligns any number of sequences
- works on iterators, so it can efficiently handle long (or infinite) sequences
- supports repeated values
- is compatible with Python 2 and 3 (although I'd use
align_iterables(*inputs, missing_value=None)
if I didn't care about historical Python versions)
import itertools
def align_iterables(inputs, missing=None):
"""Align sorted iterables
Yields tuples with values from the respective `inputs`, placing
`missing` if the value does not exist in the corresponding
iterable.
Example: align_generator('bc', 'bf', '', 'abf') yields:
(None, None, None, 'a')
('b', 'b', None, 'b')
('c', None, None, None)
(None, 'f', None, 'f')
"""
End = object()
iterators = [itertools.chain(i, [End]) for i in inputs]
values = [next(i) for i in iterators]
while not all(v is End for v in values):
smallest = min(v for v in values if v is not End)
yield tuple(v if v == smallest else missing for v in values)
values = [next(i) if v == smallest else v
for i, v in zip(iterators, values)]
# An adapter for this question's problem:
def align_two_lists(list1, list2, missing="MISSING"):
value = list(zip(*list(align_iterables([list1, list2], missing=missing))))
if not value:
return [[], []]
else:
a, b = value
return [list(a), list(b)]
# A set of tests for the question's problem:
if __name__ == '__main__':
assert align_two_lists('abcef', 'abcdef', '_') == [['a', 'b', 'c', '_', 'e', 'f'], ['a', 'b', 'c', 'd', 'e', 'f']]
assert align_two_lists('a', 'abcdef', '_') == [['a', '_', '_', '_', '_', '_'], ['a', 'b', 'c', 'd', 'e', 'f']]
assert align_two_lists('abcdef', 'a', '_') == [['a', 'b', 'c', 'd', 'e', 'f'], ['a', '_', '_', '_', '_', '_']]
assert align_two_lists('', 'abcdef', '_') == [['_', '_', '_', '_', '_', '_'], ['a', 'b', 'c', 'd', 'e', 'f']]
assert align_two_lists('abcdef', '', '_') == [['a', 'b', 'c', 'd', 'e', 'f'], ['_', '_', '_', '_', '_', '_']]
assert align_two_lists('ace', 'abcdef', '_') == [['a', '_', 'c', '_', 'e', '_'], ['a', 'b', 'c', 'd', 'e', 'f']]
assert align_two_lists('bdf', 'ace', '_') == [['_', 'b', '_', 'd', '_', 'f'], ['a', '_', 'c', '_', 'e', '_']]
assert align_two_lists('ace', 'bdf', '_') == [['a', '_', 'c', '_', 'e', '_'], ['_', 'b', '_', 'd', '_', 'f']]
assert align_two_lists('aaacd', 'acd', '_') == [['a', 'a', 'a', 'c', 'd'], ['a', '_', '_', 'c', 'd']]
assert align_two_lists('acd', 'aaacd', '_') == [['a', '_', '_', 'c', 'd'], ['a', 'a', 'a', 'c', 'd']]
assert align_two_lists('', '', '_') == [[], []]
list1 = ["datetimeA", "datetimeB", "datetimeD", "datetimeE"]
list2 = ["datetimeB", "datetimeC", "datetimeD", "datetimeF"]
new_list1 = ["datetimeA", "datetimeB", "MISSING", "datetimeD", "datetimeE", "MISSING"]
new_list2 = ["MISSING", "datetimeB", "datetimeC", "datetimeD", "MISSING", "datetimeF"]
assert align_two_lists(list1, list2) == [new_list1, new_list2]
# And some extra tests:
# Also test multiple generators
for expected, got in zip(
[(None, None, None, 'a'),
('b', 'b', None, 'b'),
('c', None, None, None),
(None, 'f', None, 'f')],
align_iterables(['bc', 'bf', '', 'abf'])):
assert expected == got
assert list(align_iterables([])) == []
# And an infinite generator
for expected, got in zip(
[(0, 0),
('X', 1),
(2, 2),
('X', 3),
(4, 4)],
align_iterables([itertools.count(step=2), itertools.count()], missing='X')):
assert expected == got