0

Is there a way to find memory leaks in cython programs? I have a cython code as below... How can I detect what exactly is causing a memory leak? Would the following code cause a memory leak? Is there a way to make sure that there are no memory leaks?

cdef extern from "stdlib.h":
    double drand48()
import cython
import numpy as np
cimport numpy as np


def draw_topic(double[:] topic_distribution):
  cdef double td_sum = 0
  cdef double rand_01 = drand48()
  cdef double accumulated_weight = 0
  cdef long drawn_topic = len(topic_distribution) - 1
  for i in xrange(len(topic_distribution)):
    td_sum += topic_distribution[i]
  for i in xrange(len(topic_distribution)):
    accumulated_weight += topic_distribution[i] / td_sum
    if rand_01 <= accumulated_weight:
      drawn_topic = i
      break
  return drawn_topic


@cython.boundscheck(False)
def gs_iterate_once(double[:,:] doc_topic,
                    double[:,:] topic_word,
                    double[:] topic_distribution,
                    double[:] topic_probabilities,
                    unsigned int[:,:] doc_word_topic,
                    int num_topics):
  cdef unsigned int doc_id
  cdef unsigned int word_id
  cdef unsigned int topic_id
  cdef unsigned int new_topic
  for i in xrange(doc_word_topic.shape[0]):
    doc_id = doc_word_topic[i, 0]
    word_id = doc_word_topic[i, 1]
    topic_id = doc_word_topic[i, 2]

    doc_topic[doc_id, topic_id] -= 1
    topic_word[topic_id, word_id] -= 1
    topic_distribution[topic_id] -= 1

    for j in xrange(num_topics):
      # 1. For every topic get current topic distribution for the document form
      #    doc_topic.
      # 2. From topic_word get current distribution of that word across
      #    topics.
      # Get the un-normalized probability of a word being assigned new topic
      # ==> element-wise (1 * 2 / topic_distribution)
      topic_probabilities[j] = (doc_topic[doc_id, j] * topic_word[j, word_id]) / topic_distribution[j]

    new_topic = draw_topic(topic_probabilities)

    doc_topic[doc_id, new_topic] += 1
    topic_word[new_topic, word_id] += 1
    topic_distribution[new_topic] += 1
    # Set the new topic
    doc_word_topic[i, 2] = new_topic
vin
  • 960
  • 2
  • 14
  • 28
  • What makes you think you have a memory leak? And have you run `gc.collect()` to clear up any circular references? – DavidW Jun 04 '17 at 11:31
  • Yep, thanks, looked again there was no memory leak.. When exactly does the python garbage collect ? From the mem utilization graphs it is unclear. – vin Jun 05 '17 at 04:15
  • Mostly immediately upon no variables pointing at the memory. It then looks for circular references less frequently. https://stackoverflow.com/questions/9449489/when-are-objects-garbage-collected-in-python – DavidW Jun 05 '17 at 04:32

0 Answers0