Issue
Multiprocessing messes up logging to file:
- Lines already written may be removed
- New lines may not be written
- Order of lines may be incorrect
Logging works fine if I don't use multiprocessing.
I read that I can use a QueueHandler, but I want to understand why writing logging to some handler still messes up another handler.
Code to reproduce
Set USE_MP=False
or uncomment exit()
to verify that logging works fine as long as multiprocessing lines are not executed.
import logging
import multiprocessing
import time
from multiprocessing import Pool
from typing import Dict, List
USE_MP = True
logger = logging.getLogger()
logger.addHandler(logging.FileHandler(filename="test.log", mode="w"))
logger.setLevel(logging.DEBUG)
mplogger = multiprocessing.log_to_stderr()
mplogger.addHandler(logging.StreamHandler())
mplogger.setLevel(logging.DEBUG)
def time_consuming_function(file_name):
logger.info(f"Running time_consuming_function with {file_name}")
time.sleep(1)
return file_name, file_name
def mp(file_names: List[str]) -> Dict[str, str]:
logger.info(f"Running mp...")
with Pool() as p:
return {
file_name: file_name
for file_name, file_name in p.imap_unordered(
time_consuming_function, file_names
)
}
def non_mp(file_names: List[str]) -> Dict[str, str]:
logger.info(f"Running non-mp...")
return {
file_name: file_name
for file_name, file_name in map(time_consuming_function, file_names)
}
def main():
logger.info("Start run...")
file_names = list("ABCDE")
# exit()
if USE_MP:
mp(file_names)
else:
non_mp(file_names)
logger.info("End run.")
if __name__ == "__main__":
main()
Python version: 3.7