Following the answer here: Tensorflow - matmul of input matrix with batch data
I compared between the tf.scan
-based results and the tf.matmul
-based results. As far as I can see, the results should be identical, but I'm getting different results, consistently. I also compared with Keras' K.dot
with the same functionality as reference.
I'll appreciate any explanation as to why that is, or what is my mistake.
Attached is the full MWE, with two results, evaluated on two separate computers (with different GPUs).
import tensorflow as tf
import keras.backend as K
from keras.layers import Lambda, Input
import numpy as np
na = 100
nb = 10000
mb = 10
v = Input( (1,na) )
e = tf.placeholder(dtype=tf.float32, shape=(na,nb))
def dot_a(v):
res = K.dot(v,e)
return res
initer = np.zeros((1,nb),dtype=np.float32)
def dot_b(v):
res = tf.scan(lambda a,x: tf.matmul(x, e), v, initializer=tf.constant(initer))
return res
def dot_c(v):
v = tf.reshape(v, [-1, na])
h = tf.matmul(v, e)
res = tf.reshape(h, [-1, 1, nb])
return res
mul1 = Lambda(dot_a)(v)
mul2 = Lambda(dot_b)(v)
mul3 = Lambda(dot_c)(v)
# inputs
v0 = np.random.random((mb,1,na)).astype(np.float32)
e0 = np.random.random((na,nb)).astype(np.float32)
v0 = np.round(v0,decimals=2)
e0 = np.round(e0,decimals=2)
sess = tf.Session()
out1,out2,out3 = sess.run([mul1,mul2,mul3], feed_dict={v:v0,e:e0})
print 'norm(out, out-matmul)',np.linalg.norm(out1-out2)
print 'norm(out, out-scan)',np.linalg.norm(out1-out3)
Output in computer 1:
norm(out, out-matmul) 0.000715436
norm(out, out-scan) 0.0
Output in computer 2:
norm(out, out-matmul) 0.000511952
norm(out, out-scan) 0.0