diff --git a/src/transformers/integrations/deepspeed.py b/src/transformers/integrations/deepspeed.py index 622080d4135..de5d3eeef47 100644 --- a/src/transformers/integrations/deepspeed.py +++ b/src/transformers/integrations/deepspeed.py @@ -228,6 +228,11 @@ class HfTrainerDeepSpeedConfig(HfDeepSpeedConfig): elif hasattr(model.config, "hidden_sizes"): # if there are many hidden sizes pick the largest one hidden_size = max(model.config.hidden_sizes) + elif hasattr(model.config, "text_config") and hasattr(model.config.text_config, "hidden_size"): + hidden_size = model.config.text_config.hidden_size + elif hasattr(model.config, "text_config") and hasattr(model.config.text_config, "hidden_sizes"): + # if there are many hidden sizes pick the largest one + hidden_size = max(model.config.text_config.hidden_sizes) else: raise ValueError( "The model's config file has neither `hidden_size` nor `hidden_sizes` entry, "