I think "without creating temporary objects" is impossible, especially since "everything is an object" in Python.
You could get O(1) space / number of objects if you implement some sorting algorithm yourself, though if you want O(n log n) time and stability, it's difficult. If you don't care about stability (seems likely, since you say you want to sort by a
but then actually sort by a
, b
and c
), heapsort is reasonably easy:
def sort_together_heapsort(a, b, c):
n = len(a)
def swap(i, j):
a[i], a[j] = a[j], a[i]
b[i], b[j] = b[j], b[i]
c[i], c[j] = c[j], c[i]
def siftdown(i):
while (kid := 2*i+1) < n:
imax = kid if a[kid] > a[i] else i
kid += 1
if kid < n and a[kid] > a[imax]:
imax = kid
if imax == i:
return
swap(i, imax)
i = imax
for i in range(n // 2)[::-1]:
siftdown(i)
while n := n - 1:
swap(0, n)
siftdown(0)
Anyway, if someone's interested in just saving some amount of memory, that can be done by decorating in-place (building tuples and storing them in a
):
def sort_together_decorate_in_a(a, b, c):
for i, a[i] in enumerate(zip(a, b, c)):
pass
a.sort()
for i, [a[i], b[i], c[i]] in enumerate(a):
pass
Or if you trust that list.sort
will ask for keys for the elements in order (at least in CPython it does, already did so when the key
parameter was introduced 18 years ago, and I suspect will keep doing so):
def sort_together_iter_key(a, b, c):
it = iter(a)
b.sort(key=lambda _: next(it))
it = iter(a)
c.sort(key=lambda _: next(it))
a.sort()
Testing memory and time with three lists of 100,000 elements:
15,072,520 bytes 152 ms sort_together_sorted_zip
15,072,320 bytes 166 ms sort_together_sorted_zip_2
14,272,576 bytes 152 ms sort_together_sorted_zip_X
6,670,708 bytes 126 ms sort_together_decorate_in_a
6,670,772 bytes 177 ms sort_together_decorate_in_first_X
5,190,212 bytes 342 ms sort_multi_by_a_guest_X
1,597,400 bytes 100 ms sort_together_iter_key
1,597,448 bytes 102 ms sort_together_iter_key_X
744 bytes 1584 ms sort_together_heapsort
704 bytes 1663 ms sort_together_heapsort_X
168 bytes 1326 ms sort_together_heapsort_opti
188 bytes 1512 ms sort_together_heapsort_opti_X
Note:
- The second solution is a shortened/improved version of yours, no need for temporary variables and conversions to lists.
- The solutions with
_X
suffix are versions that take arbitrarily many lists as parameters.
- The @a_guest is from their answer. Runtime-wise it currently benefits from my data being random, as that doesn't expose that solution's worst case complexity O(m * n²), where m is the number of lists and n is the length of each list.
Testing memory and time with ten lists of 100,000 elements:
19,760,808 bytes 388 ms sort_together_sorted_zip_X
12,159,100 bytes 425 ms sort_together_decorate_in_first_X
5,190,292 bytes 1249 ms sort_multi_by_a_guest_X
1,597,528 bytes 393 ms sort_together_iter_key_X
704 bytes 4186 ms sort_together_heapsort_X
188 bytes 4032 ms sort_together_heapsort_opti_X
The whole code (Try it online!):
import tracemalloc as tm
from random import random
from timeit import timeit
def sort_together_sorted_zip(a, b, c):
a_sorted, b_sorted, c_sorted = map(list, zip(*sorted(zip(a, b, c))))
a[:] = a_sorted
b[:] = b_sorted
c[:] = c_sorted
def sort_together_sorted_zip_2(a, b, c):
a[:], b[:], c[:] = zip(*sorted(zip(a, b, c)))
def sort_together_sorted_zip_X(*lists):
sorteds = zip(*sorted(zip(*lists)))
for lst, lst[:] in zip(lists, sorteds):
pass
def sort_together_decorate_in_a(a, b, c):
for i, a[i] in enumerate(zip(a, b, c)):
pass
a.sort()
for i, [a[i], b[i], c[i]] in enumerate(a):
pass
def sort_together_decorate_in_first_X(*lists):
first = lists[0]
for i, first[i] in enumerate(zip(*lists)):
pass
first.sort()
for i, values in enumerate(first):
for lst, lst[i] in zip(lists, values):
pass
def sort_together_iter_key(a, b, c):
it = iter(a)
b.sort(key=lambda _: next(it))
it = iter(a)
c.sort(key=lambda _: next(it))
a.sort()
def sort_together_iter_key_X(*lists):
for lst in lists[1:]:
it = iter(lists[0])
lst.sort(key=lambda _: next(it))
lists[0].sort()
def sort_together_heapsort(a, b, c):
n = len(a)
def swap(i, j):
a[i], a[j] = a[j], a[i]
b[i], b[j] = b[j], b[i]
c[i], c[j] = c[j], c[i]
def siftdown(i):
while (kid := 2*i+1) < n:
imax = kid if a[kid] > a[i] else i
kid += 1
if kid < n and a[kid] > a[imax]:
imax = kid
if imax == i:
return
swap(i, imax)
i = imax
for i in range(n // 2)[::-1]:
siftdown(i)
while n := n - 1:
swap(0, n)
siftdown(0)
def sort_together_heapsort_X(*lists):
a = lists[0]
n = len(a)
def swap(i, j):
for lst in lists:
lst[i], lst[j] = lst[j], lst[i]
def siftdown(i):
while (kid := 2*i+1) < n:
imax = kid if a[kid] > a[i] else i
kid += 1
if kid < n and a[kid] > a[imax]:
imax = kid
if imax == i:
return
swap(i, imax)
i = imax
for i in range(n // 2)[::-1]:
siftdown(i)
while n := n - 1:
swap(0, n)
siftdown(0)
def sort_together_heapsort_opti(a, b, c):
# Avoid inner functions and range-loop to minimize memory.
# Makes it faster, too. But duplicates code. Not recommended.
n = len(a)
i0 = n // 2 - 1
while i0 >= 0:
i = i0
while (kid := 2*i+1) < n:
imax = kid if a[kid] > a[i] else i
kid += 1
if kid < n and a[kid] > a[imax]:
imax = kid
if imax == i:
break
a[i], a[imax] = a[imax], a[i]
b[i], b[imax] = b[imax], b[i]
c[i], c[imax] = c[imax], c[i]
i = imax
i0 -= 1
while n := n - 1:
a[0], a[n] = a[n], a[0]
b[0], b[n] = b[n], b[0]
c[0], c[n] = c[n], c[0]
i = 0
while (kid := 2*i+1) < n:
imax = kid if a[kid] > a[i] else i
kid += 1
if kid < n and a[kid] > a[imax]:
imax = kid
if imax == i:
break
a[i], a[imax] = a[imax], a[i]
b[i], b[imax] = b[imax], b[i]
c[i], c[imax] = c[imax], c[i]
i = imax
def sort_together_heapsort_opti_X(*lists):
# Avoid inner functions and range-loop to minimize memory.
# Makes it faster, too. But duplicates code. Not recommended.
a = lists[0]
n = len(a)
i0 = n // 2 - 1
while i0 >= 0:
i = i0
while (kid := 2*i+1) < n:
imax = kid if a[kid] > a[i] else i
kid += 1
if kid < n and a[kid] > a[imax]:
imax = kid
if imax == i:
break
for lst in lists:
lst[i], lst[imax] = lst[imax], lst[i]
i = imax
i0 -= 1
while n := n - 1:
for lst in lists:
lst[0], lst[n] = lst[n], lst[0]
i = 0
while (kid := 2*i+1) < n:
imax = kid if a[kid] > a[i] else i
kid += 1
if kid < n and a[kid] > a[imax]:
imax = kid
if imax == i:
break
for lst in lists:
lst[i], lst[imax] = lst[imax], lst[i]
i = imax
def sort_multi_by_a_guest_X(a, *lists):
indices = list(range(len(a)))
indices.sort(key=lambda i: a[i])
a.sort()
for lst in lists:
for i, j in enumerate(indices):
while j < i:
j = indices[j]
lst[i], lst[j] = lst[j], lst[i]
funcs = [
sort_together_sorted_zip,
sort_together_sorted_zip_2,
sort_together_sorted_zip_X,
sort_together_decorate_in_a,
sort_together_decorate_in_first_X,
sort_multi_by_a_guest_X,
sort_together_iter_key,
sort_together_iter_key_X,
sort_together_heapsort,
sort_together_heapsort_X,
sort_together_heapsort_opti,
sort_together_heapsort_opti_X,
]
n = 100000
a0 = [random() for _ in range(n)]
b0 = [x + 1 for x in a0]
c0 = [x + 2 for x in a0]
for _ in range(3):
for func in funcs:
a, b, c = a0[:], b0[:], c0[:]
time = timeit(lambda: func(a, b, c), number=1)
assert a == sorted(a0)
assert b == sorted(b0)
assert c == sorted(c0)
a, b, c = a0[:], b0[:], c0[:]
tm.start()
func(a, b, c)
memory = tm.get_traced_memory()[1]
tm.stop()
print(f'{memory:10,} bytes {int(time * 1e3):4} ms {func.__name__}')
print()