memory leak in batch matrix factorization with tensorflow

Question

suppose I have a rate matrix R and I want to factorize it to matrices U and V with tensorflow

without batch size its simple problem and could be solve with following code:

# define Variables
u = tf.Variable(np.random.rand(R_dim_1, output_dim), dtype=tf.float32, name='u')
v = tf.Variable(np.random.rand(output_dim, R_dim_2), dtype=tf.float32, name='v')

# predict rate by multiplication 
predicted_R = tf.matmul(tf.cast(u, tf.float32), tf.cast(v, tf.float32))

#cost function and train step
cost = tf.reduce_sum(tf.reduce_sum(tf.abs(tf.sub(predicted_R, R))))
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cost)

with tf.Session() as sess:
    init = tf.initialize_all_variables()
    sess.run(init)
    for i in range(no_epochs):
        _, this_cost = sess.run([train_step, cost])
        print 'cost: ', this_cost

I decided to solve this problem with batch updates and my solution was sending indices of U and V which I want to use in predicting rate matrix R and update just those selected ones here is my code (just read comments if it takes much time) :

# define variables
u = tf.Variable(np.random.rand(R_dim_1, output_dim), dtype=tf.float32, name='u')
v = tf.Variable(np.random.rand(output_dim, R_dim_2), dtype=tf.float32, name='v')
idx1 = tf.placeholder(tf.int32, shape=batch_size1, name='idx1')
idx2 = tf.placeholder(tf.int32, shape=batch_size2, name='idx2')

# get current U and current V by slicing U and V
cur_u = tf.Variable(tf.gather(u, idx1), dtype=tf.float32, name='cur_u')
cur_v = tf.transpose(v)
cur_v = tf.gather(cur_v, idx2)
cur_v = tf.Variable(tf.transpose(cur_v), dtype=tf.float32, name='cur_v')

# predict rate by multiplication 
predicted_R = tf.matmul(tf.cast(cur_u, tf.float32), tf.cast(cur_v, tf.float32))

# get needed rate from rate matrix by slicing it
cur_rate = tf.gather(R, idx1)
cur_rate = tf.transpose(cur_rate)
cur_rate = tf.gather(cur_rate, idx2)
cur_rate = tf.transpose(cur_rate)

#cost function and train step
cost = tf.reduce_sum(tf.reduce_sum(tf.abs(tf.sub(predicted_R, cur_rate))))
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cost)

with tf.Session() as sess:
    # initialize variables 

    init_new_vars_op = tf.initialize_variables([v, u])
    sess.run(init_new_vars_op)

    init = tf.initialize_all_variables()
    rand_idx = np.sort(np.random.randint(0, R_dim_1, batch_size1))
    rand_idx2 = np.sort(np.random.randint(0, R_dim_2, batch_size2))
    sess.run(init, feed_dict={idx1: rand_idx, idx2: rand_idx2})

    for i in range(no_epochs):
        with tf.Graph().as_default():
            rand_idx1 = np.random.randint(0, R_dim_1, batch_size1)
            rand_idx2 = np.random.randint(0, R_dim_2, batch_size2)
            _, this_cost, tmp_u, tmp_v, tmp_cur_u, tmp_cur_v = sess.run([train_step, cost, u, v, cur_u, cur_v],feed_dict={idx1: rand_idx1, idx2: rand_idx2})
        print this_cost

            #update U and V with computed current U and current V
            tmp_u = np.array(tmp_u)
            tmp_u[rand_idx] = tmp_cur_u
            u = tf.assign(u, tmp_u)

            tmp_v = np.array(tmp_v)
            tmp_v[:, rand_idx2] = tmp_cur_v
            v = tf.assign(v, tmp_v)

but I have memory leak right at u = tf.assign(u, tmp_u) and u = tf.assign(u, tmp_u) I applied this but got nothing.
there was another solution to apply update just to subset of U and V like this but encountered lots of other errors so please stay on course of how to solve my memory leak problem.
sorry for my long question and thanks for reading it.

You're creating a new assign op in every loop iteration, similar to what is happening in http://stackoverflow.com/questions/36245481/tensorflow-slow-performance-when-getting-gradients-at-inputs and http://stackoverflow.com/questions/36230559/processing-time-gets-longer-and-longer-after-each-iteration-tensorflow/36233277#36233277 — etarion, Oct 05 '16 at 16:02
I know that's my problem but I cant just "build the graph at the start and in the training loop only execute it" as you said in http://stackoverflow.com/questions/36230559/processing-time-gets-longer-and-longer-after-each-iteration-tensorflow/36233277#36233277 — Kibo, Oct 05 '16 at 16:16
Modifying the graph at between run calls will increase memory usage, and is quite slow. Every time graph is modified, it has to encode whole graph and copy it over, so something like `for i....sess.run(a.assign_add)` has quadratic complexity. I wrote a wrapper called imperative to deal with this problem -- https://github.com/yaroslavvb/imperative — Yaroslav Bulatov, Oct 05 '16 at 16:20
would you please tell me what should I do in my code https://github.com/yaroslavvb/imperative/ is so low documented — Kibo, Oct 05 '16 at 16:35

score 3 · Accepted Answer · answered Oct 06 '16 at 16:10

I just solve this problem by sending the updated values of U and V as placeholder and then assign U and V to these passed parameters so the created graph will stay the same on different iterations. here is the code:

# define variables
u = tf.Variable(np.random.rand(R_dim_1, output_dim), dtype=tf.float32, name='u')
v = tf.Variable(np.random.rand(output_dim, R_dim_2), dtype=tf.float32, name='v')
idx1 = tf.placeholder(tf.int32, shape=batch_size1, name='idx1')
idx2 = tf.placeholder(tf.int32, shape=batch_size2, name='idx2')

#define new place holder for changed values of U and V
last_u = tf.placeholder(tf.float32, shape=[R_dim_1, output_dim], name='last_u')
last_v = tf.placeholder(tf.float32, shape=[output_dim, R_dim_2], name='last_v')

#set U and V to updated ones
change_u = tf.assign(u, last_u)
change_v = tf.assign(v, last_v)

# get current U and current V by slicing U and V
cur_u = tf.Variable(tf.gather(u, idx1), dtype=tf.float32, name='cur_u')
cur_v = tf.transpose(v)
cur_v = tf.gather(cur_v, idx2)
cur_v = tf.Variable(tf.transpose(cur_v), dtype=tf.float32, name='cur_v')

# predict rate by multiplication 
predicted_R = tf.matmul(tf.cast(cur_u, tf.float32), tf.cast(cur_v, tf.float32))

# get needed rate from rate matrix by slicing it
cur_rate = tf.gather(R, idx1)
cur_rate = tf.transpose(cur_rate)
cur_rate = tf.gather(cur_rate, idx2)
cur_rate = tf.transpose(cur_rate)

#cost function and train step
cost = tf.reduce_sum(tf.reduce_sum(tf.abs(tf.sub(predicted_R, cur_rate))))
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cost)

with tf.Session() as sess:
    tmp_u = initial_u;
    tmp_v = initial_v;

    # initialize variables  
    init_new_vars_op = tf.initialize_variables([v, u])
    sess.run(init_new_vars_op, feed_dict={last_u: tmp_u, last_v: tmp_v})

    init = tf.initialize_all_variables()
    rand_idx = np.sort(np.random.randint(0, R_dim_1, batch_size1))
    rand_idx2 = np.sort(np.random.randint(0, R_dim_2, batch_size2))
    sess.run(init, feed_dict={idx1: rand_idx, idx2: rand_idx2})

    for i in range(no_epochs):
        with tf.Graph().as_default():
            rand_idx1 = np.random.randint(0, R_dim_1, batch_size1)
            rand_idx2 = np.random.randint(0, R_dim_2, batch_size2)
            _, this_cost, tmp_u, tmp_v, tmp_cur_u, tmp_cur_v, _, _ =
                sess.run([train_step, cost, u, v, cur_u, cur_v, change_u, change_v],
                feed_dict={idx1: rand_idx1, idx2: rand_idx2, last_u: tmp_u, last_v: tmp_v})
        print this_cost

            # find new values of  U and current V but don't assign to them
            tmp_u = np.array(tmp_u)
            tmp_u[rand_idx] = tmp_cur_u

            tmp_v = np.array(tmp_v)
            tmp_v[:, rand_idx2] = tmp_cur_v

memory leak in batch matrix factorization with tensorflow

1 Answers1