mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
[TF 2.2 compat] use tf.VariableAggregation.ONLY_FIRST_REPLICA (#4283)
* Fix the issue to properly run the accumulator with TF 2.2 * Apply style * Fix training_args_tf for TF 2.2 * Fix the TF training args when only one GPU is available * Remove the fixed version of TF in setup.py
This commit is contained in:
parent
cffbb3d8ed
commit
94b57bf796
6
setup.py
6
setup.py
@ -67,8 +67,8 @@ extras = {}
|
||||
|
||||
extras["mecab"] = ["mecab-python3"]
|
||||
extras["sklearn"] = ["scikit-learn"]
|
||||
extras["tf"] = ["tensorflow<=2.1.0"]
|
||||
extras["tf-cpu"] = ["tensorflow-cpu<=2.1.0"]
|
||||
extras["tf"] = ["tensorflow"]
|
||||
extras["tf-cpu"] = ["tensorflow-cpu"]
|
||||
extras["torch"] = ["torch"]
|
||||
|
||||
extras["serving"] = ["pydantic", "uvicorn", "fastapi", "starlette"]
|
||||
@ -81,7 +81,7 @@ extras["quality"] = [
|
||||
"isort @ git+git://github.com/timothycrosley/isort.git@e63ae06ec7d70b06df9e528357650281a3d3ec22#egg=isort",
|
||||
"flake8",
|
||||
]
|
||||
extras["dev"] = extras["testing"] + extras["quality"] + ["mecab-python3", "scikit-learn", "tensorflow<=2.1.0", "torch"]
|
||||
extras["dev"] = extras["testing"] + extras["quality"] + ["mecab-python3", "scikit-learn", "tensorflow", "torch"]
|
||||
|
||||
setup(
|
||||
name="transformers",
|
||||
|
@ -204,7 +204,10 @@ class GradientAccumulator(object):
|
||||
"""Number of accumulated steps."""
|
||||
if self._accum_steps is None:
|
||||
self._accum_steps = tf.Variable(
|
||||
tf.constant(0, dtype=tf.int64), trainable=False, synchronization=tf.VariableSynchronization.ON_READ,
|
||||
tf.constant(0, dtype=tf.int64),
|
||||
trainable=False,
|
||||
synchronization=tf.VariableSynchronization.ON_READ,
|
||||
aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA,
|
||||
)
|
||||
|
||||
return self._accum_steps.value()
|
||||
@ -223,7 +226,10 @@ class GradientAccumulator(object):
|
||||
self._gradients.extend(
|
||||
[
|
||||
tf.Variable(
|
||||
tf.zeros_like(gradient), trainable=False, synchronization=tf.VariableSynchronization.ON_READ,
|
||||
tf.zeros_like(gradient),
|
||||
trainable=False,
|
||||
synchronization=tf.VariableSynchronization.ON_READ,
|
||||
aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA,
|
||||
)
|
||||
for gradient in gradients
|
||||
]
|
||||
|
@ -56,9 +56,11 @@ class TFTrainingArguments(TrainingArguments):
|
||||
strategy = tf.distribute.experimental.TPUStrategy(tpu)
|
||||
elif len(gpus) == 0:
|
||||
strategy = tf.distribute.OneDeviceStrategy(device="/cpu:0")
|
||||
elif len(gpus) == 1:
|
||||
strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0")
|
||||
elif len(gpus) > 1:
|
||||
# If you only want to use a specific subset of GPUs use `CUDA_VISIBLE_DEVICES=0`
|
||||
strategy = tf.distribute.MirroredStrategy(gpus)
|
||||
strategy = tf.distribute.MirroredStrategy()
|
||||
else:
|
||||
raise ValueError("Cannot find the proper strategy please check your environment properties.")
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user