From 4cdb7ee51db6ec544382b75cda3685ae39c4ec8f Mon Sep 17 00:00:00 2001 From: yujun <50394665+JunnYu@users.noreply.github.com> Date: Wed, 14 Jul 2021 05:18:54 +0800 Subject: [PATCH] fix #11724 (#11897) --- src/transformers/models/marian/modeling_tf_marian.py | 7 ++++--- src/transformers/models/pegasus/modeling_tf_pegasus.py | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/transformers/models/marian/modeling_tf_marian.py b/src/transformers/models/marian/modeling_tf_marian.py index f0bfea8c549..8dddcfa6331 100644 --- a/src/transformers/models/marian/modeling_tf_marian.py +++ b/src/transformers/models/marian/modeling_tf_marian.py @@ -151,11 +151,12 @@ class TFMarianSinusoidalPositionalEmbedding(tf.keras.layers.Layer): position_enc = np.array( [[pos / np.power(10000, 2 * (j // 2) / dim) for j in range(dim)] for pos in range(n_pos)] ) + table = np.zeros_like(position_enc) # index 0 is all zero - position_enc[:, 0 : dim // 2] = np.sin(position_enc[:, 0::2]) - position_enc[:, dim // 2 :] = np.cos(position_enc[:, 1::2]) + table[:, 0 : dim // 2] = np.sin(position_enc[:, 0::2]) + table[:, dim // 2 :] = np.cos(position_enc[:, 1::2]) # convert to tensor - table = tf.convert_to_tensor(position_enc) + table = tf.convert_to_tensor(table) tf.stop_gradient(table) return table diff --git a/src/transformers/models/pegasus/modeling_tf_pegasus.py b/src/transformers/models/pegasus/modeling_tf_pegasus.py index f71878ecaee..ced69871f6f 100644 --- a/src/transformers/models/pegasus/modeling_tf_pegasus.py +++ b/src/transformers/models/pegasus/modeling_tf_pegasus.py @@ -152,11 +152,12 @@ class TFPegasusSinusoidalPositionalEmbedding(tf.keras.layers.Layer): position_enc = np.array( [[pos / np.power(10000, 2 * (j // 2) / dim) for j in range(dim)] for pos in range(n_pos)] ) + table = np.zeros_like(position_enc) # index 0 is all zero - position_enc[:, 0 : dim // 2] = np.sin(position_enc[:, 0::2]) - position_enc[:, dim // 2 :] = np.cos(position_enc[:, 1::2]) + table[:, 0 : dim // 2] = np.sin(position_enc[:, 0::2]) + table[:, dim // 2 :] = np.cos(position_enc[:, 1::2]) # convert to tensor - table = tf.convert_to_tensor(position_enc) + table = tf.convert_to_tensor(table) tf.stop_gradient(table) return table