I have code like below
def yield_multiple():
for prime in prime_list:
for multiple in range(prime+prime, end, prime):
yield multiple
And I use this to get the prime numbers
multiple_set = set(yield_multiple())
result = [v for v in candidate_list if v not in multiple_set]
And I meet the memory error when the set is very large, so I was thinking to use this to save the memory
result = [v for v in candidate_list if v not in yield_multiple()]
But this will get the wrong result. So, How to avoid memory error to get the prime numbers correctly?
Here's my improved solution without too much memory to use.
import math
import sys
import time
from mpi4py import MPI
import eratosthenes
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
TAG_RESULT = 0
n = sys.argv[1]
if n.isdigit():
start_time = time.time()
n = int(n)
sqrt_n = int(math.sqrt(n))
task_per_block = int(math.ceil((n - 1) / size))
begin = 2 + rank * task_per_block
end = begin + task_per_block if begin + task_per_block <= n + 1 else n + 1
if rank == 0:
begin = sqrt_n if sqrt_n < end else begin
sieve_list = [True] * (end - begin)
prime_list = eratosthenes.sieve(sqrt_n)
if rank == 0:
result = sum(prime_list)
for prime in prime_list:
start = begin if begin % prime == 0 else (int(begin / prime) + 1) * prime
for multiple in range(start, end, prime):
sieve_list[multiple - begin] = False
result += sum(i + begin for i, v in enumerate(sieve_list) if v)
result_received = 0
while result_received < size - 1:
data = comm.recv(source=MPI.ANY_SOURCE, tag=TAG_RESULT)
result += data
result_received += 1
print(result)
print(time.time() - start_time)
else:
for prime in prime_list:
start = begin if begin % prime == 0 else (int(begin / prime) + 1) * prime
for multiple in range(start, end, prime):
sieve_list[multiple - begin] = False
result = sum(i + begin for i, v in enumerate(sieve_list) if v)
comm.send(result, dest=0, tag=TAG_RESULT)