1

The idea is that I have some very big dictionaries that will go through some loops and I want to use Python's multiprocessing in order to get this job done faster.

I have defined 2 functions.

import sys, collections
import multiprocessing
from timeit import itertools

def getReoccurences(g, dups, genes, synonyms):
    #dups = collections.OrderedDict()
        # check if our gene exists in the synonym dict
    if g in synonyms:
        dups[g] = []
        for synonym in synonyms[g]:
            if synonym in genes:
                # got one reoccurence.
                dups[g].append(synonym)
                del genes[synonym]
    #return dups

def cleanFromUnofficialName(g, replaced, dups, genes, synonyms):
    for official_gene in synonyms:
        list_of_synonyms = synonyms[official_gene]
        if g in list_of_synonyms:
            if g not in synonyms:
                #We got a gene that is listed with the unofficial name.
                replaced[official_gene] = g
                #sys.stdout.write("\t%s was listed with the unofficial name %s. Replaced\n" % (official_gene, g))
                if g not in genes:
                    sys.stdout.write("\tDouble occurence: %s ignoring 2nd occurence.\n" %  g)
                else:
                    del genes[g]            # remove that gene from the list
                genes[official_gene] = []   # put the official in it's place

#main func
def clean(genes, synomyms)
    # lists to join the processes later on...
    #############################
    getReoccuring_processes = []
    cleanFromUnofficialName_processes = []
    # http://stackoverflow.com/questions/6832554/python-multiprocessing-how-do-i-share-a-dict-among-multiple-processes
    manager1 = multiprocessing.Manager()
    dups = manager1.dict()

    manager2 = multiprocessing.Manager()
    replaced = manager2.dict()

    for g in genes:
        p1 = multiprocessing.Process(target=getReoccurences, args=(g, dups, genes, synomyms))
        p1.start()
        getReoccuring_processes.append(p1)

    for g in genes:
        p2 = multiprocessing.Process(target=cleanFromUnofficialName_processes, args=(g, replaced, dups, genes, synomyms))
        p2.start()
        cleanFromUnofficialName_processes.append(p2)


    #join the processes
    for p1, p2 in itertools.izip(getReoccuring_processes, cleanFromUnofficialName_processes):
        p1.join()
        p2.join()

but when I do

for dup in dups:
        print dup

I get TypeError: 'list' object is not callable

Question is: How can I edit this dictionaries with multiprocessing and still be able to print their values?

Pavlos Panteliadis
  • 1,495
  • 1
  • 15
  • 25

0 Answers0