I am learning how to work with Python's multiprocessing, and the following are virtually identical block of codes, one is written in single threaded, single process, and the other is multiprocessing, the speed of processing for multi-processing is so much worse than single one,
both working on the same 38910 records of gettext PO file, with english and Vietnamese translations
- single time:
# Execution took: 4.229461978015024 second(s)
and
- multi-processing took:
# Execution took: 35.94734842295293 second(s).
nearly 9 times worse off
here is the code:
#!/usr/bin/env python3
import re
from matcher import MatcherRecord
import os
from multiprocessing import Pool
from multiprocessing.managers import BaseManager
from babel.messages import Message
from sphinx_intl import catalog as c
from collections import OrderedDict
from translation_finder import TranslationFinder
from pattern_utils import PatternUtils as pu
from definition import Definitions as df
import time
class PatternFoundResult:
def __init__(self):
self.found_dict: OrderedDict = None
def updateFoundResult(self, found_dict: OrderedDict):
self.found_dict = found_dict
is_found = len(found_dict) > 0
if not is_found:
return
print(found_dict)
def task001(arg):
def genericAbbrevFormat(orig_txt: str, trans_txt: str, is_reverse=False):
def replaceAbbrev(entry):
loc = entry[0]
mm: MatcherRecord = entry[1]
sub_list = mm.getSubEntriesAsList()
(abbrev_loc, abbrev_txt) = sub_list[1]
changing_txt = replaceAbbrev.txt
try:
(exp_loc, exp_txt) = sub_list[3]
except Exception as e:
(exp_loc, exp_txt) = sub_list[2]
abbrev_txt = f'[{abbrev_txt} - {exp_txt}]'
changed_txt = pu.jointText(changing_txt, abbrev_txt, loc)
replaceAbbrev.txt = changed_txt
return True
abbrev_dict = pu.patternMatchAll(df.ABBR_WITH_PRE_POST_QUOTES, trans_txt, is_reversed=True)
has_abbrev_embedded = (len(abbrev_dict) > 0)
if has_abbrev_embedded:
replaceAbbrev.txt = trans_txt
list(filter(replaceAbbrev, abbrev_dict.items()))
trans_txt = replaceAbbrev.txt
orig_txt = df.squareBracket(orig_txt)
is_tran = (trans_txt is not None)
if not is_tran:
format_string = f':abbr:`{orig_txt} ()`'
return format_string
trans_txt = df.squareBracket(trans_txt)
if is_reverse:
format_string = f':abbr:`{orig_txt} ({trans_txt})`'
else:
format_string = f':abbr:`{trans_txt} ({orig_txt})`'
return format_string
def isGlossary(m: Message):
check_string = 'manual/glossary/index'
locations = m.locations
is_glossary = False
for loc in locations:
is_glossary = (check_string in locations)
if is_glossary:
break
return is_glossary
def formatFoundEntry(entry):
mm: MatcherRecord = None
(loc, mm) = entry
en_txt = mm.getComponent(2, 1)
vn_txt = tf.isInDict(en_txt)
has_tran = (vn_txt is not None)
if has_tran:
in_catalog = tf.isEnGoesFirst(en_txt)
is_en_coming_first = (in_catalog or is_glossary)
abbrev_txt = genericAbbrevFormat(en_txt, vn_txt, is_reverse=is_en_coming_first)
front_filler = mm.getComponent(1, 1)
back_filler = mm.getComponent(3, 1)
ast_txt = f'{front_filler}{abbrev_txt}{back_filler}'
return ast_txt
else:
return en_txt
pat: re.Pattern = None
m: Message = None
tf: TranslationFinder = None
(index, m, tf, pat, is_simple) = arg
en_txt = m.id
is_glossary = isGlossary(m)
is_repeat = tf.isRepeat(en_txt)
found_dict = pu.patternMatchAll(pat, en_txt)
is_found = len(found_dict) > 0
if is_found:
result_string_list = list(map(formatFoundEntry, found_dict.items()))
else:
result_string_list = []
return result_string_list
if __name__ == "__main__":
time_start = time.perf_counter()
is_debug = False
home_dev = os.environ['DEV']
input_path = os.path.join(home_dev, "current_blender_manual_merge_flat_0001.po")
input_cat = c.load_po(input_path)
BaseManager.register('TranslationFinder', TranslationFinder)
manager = BaseManager()
manager.start()
tf = manager.TranslationFinder()
pat:re.Pattern = df.QUOTEDTEXT_UNTRANSLATED_PATTERN
is_simple = True
result_handler = PatternFoundResult()
with Pool() as pool:
m: Message = None
for (index, m) in enumerate(input_cat):
arg = (index, m, tf, pat, is_simple)
pool.apply_async(task001, args=[arg], callback=result_handler.updateFoundResult)
pool.close()
pool.join()
print(f'Execution took: {time.perf_counter() - time_start} second(s).')
and here is the single threaded, the main section:
if __name__ == "__main__":
time_start = time.perf_counter()
is_debug = False
home_dev = os.environ['DEV']
input_path = os.path.join(home_dev, "current_blender_manual_merge_flat_0001.po")
input_cat = c.load_po(input_path)
result_handler = PatternFoundResult()
tf = TranslationFinder()
pat:re.Pattern = df.QUOTEDTEXT_UNTRANSLATED_PATTERN
is_simple = True
for (index, m) in enumerate(input_cat):
arg = (index, m, tf, pat, is_simple)
result_string_list = task001(arg)
result_handler.updateFoundResult(result_string_list)
print(f'Execution took: {time.perf_counter() - time_start} second(s) - records: {len(input_cat)}')
Could you please tell me how so, where did I go wrong?