You did not specify what platform you are running under or how large the data string being written will be. If you are running under Linux and the data is not too large, explicit locking is not necessary. See this post. But whether the writing is atomic or you have to do explicit locking, since all processes would be writing to the same file, there is no real parallelization being accomplished in outputting the data. For that reason I would find it simpler to have a single writer.
If the order in which the strings do not matter, I would use the following code:
from multiprocessing import Pool, Queue, cpu_count
def init_pool_processes(q: Queue) -> None:
global queue
queue = q
def some_function(a_string: str) -> None:
... # Perform some CPU-intenive operations yielding result
result = a_string.upper() + '\n' # for demo purposes
queue.put(result)
def writer() -> None:
with open("my_enemy_list!.txt", "w") as f:
for result in iter(queue.get, None):
f.write(result)
def main():
a = [f'string{i}' for i in range(1, 101)]
queue = Queue()
with Pool(cpu_count() + 1, initializer=init_pool_processes, initargs=(queue,)) as pool:
async_result = pool.apply_async(writer)
pool.map(some_function, a)
# Tell writer there is no more data coming:
queue.put(None)
# Wait for writer to complete:
async_result.get()
if __name__ == '__main__':
main()
If the order does matter, then:
from multiprocessing import Pool, cpu_count
def some_function(a_string: str) -> None:
... # Perform some CPU-intenive operations yielding result
result = a_string.upper() + '\n' # for demo purposes
return result
def compute_chunksize(iterable_size: int, pool_size: int) -> int:
chunksize, remainder = divmod(iterable_size, 4 * pool_size)
if remainder:
chunksize += 1
return chunksize
def main():
a = [f'string{i}' for i in range(1, 101)]
iterable_size = len(a)
pool_size = cpu_count()
chunksize = compute_chunksize(iterable_size, pool_size)
with Pool(pool_size) as pool, \
open("my_enemy_list!.txt", "w") as f:
for result in pool.imap_unordered(some_function, a, chunksize=chunksize):
f.write(result)
if __name__ == '__main__':
main()