change tf.math.divide with int(/) to remove dim_per_head from the TF graph (#14600)

Co-authored-by: yis <yis@graphcore.ai>
This commit is contained in:
yis11178 2021-12-02 13:13:42 +00:00 committed by GitHub
parent 43f953cc2e
commit 96cc02b51b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -170,7 +170,7 @@ class TFMultiHeadSelfAttention(tf.keras.layers.Layer):
k_length = shape_list(key)[1] k_length = shape_list(key)[1]
# assert dim == self.dim, f'Dimensions do not match: {dim} input vs {self.dim} configured' # assert dim == self.dim, f'Dimensions do not match: {dim} input vs {self.dim} configured'
# assert key.size() == value.size() # assert key.size() == value.size()
dim_per_head = tf.math.divide(self.dim, self.n_heads) dim_per_head = int(self.dim / self.n_heads)
dim_per_head = tf.cast(dim_per_head, dtype=tf.int32) dim_per_head = tf.cast(dim_per_head, dtype=tf.int32)
mask_reshape = [bs, 1, 1, k_length] mask_reshape = [bs, 1, 1, k_length]