mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-03 04:40:06 +06:00

* update exampel * update * push the converted diff files for testing and ci * correct one example * fix class attributes and docstring * nits * oups * fixed config! * update * nitd * class attributes are not matched against the other, this is missing * fixed overwriting self.xxx now onto the attributes I think * partial fix, now order with docstring * fix docstring order? * more fixes * update * fix missing docstrings! * examples don't all work yet * fixup * nit * updated * hick * update * delete * update * update * update * fix * all default * no local import * fix more diff * some fix related to "safe imports" * push fixed * add helper! * style * add a check * all by default * add the * update * FINALLY! * nit * fix config dependencies * man that is it * fix fix * update diffs * fix the last issue * re-default to all * alll the fixes * nice * fix properties vs setter * fixup * updates * update dependencies * make sure to install what needs to be installed * fixup * quick fix for now * fix! * fixup * update * update * updates * whitespaces * nit * fix * simplify everything, and make it file agnostic (should work for image processors) * style * finish fixing all import issues * fixup * empty modeling should not be written! * Add logic to find who depends on what * update * cleanup * update * update gemma to support positions * some small nits * this is the correct docstring for gemma2 * fix merging of docstrings * update * fixup * update * take doc into account * styling * update * fix hidden activation * more fixes * final fixes! * fixup * fixup instruct blip video * update * fix bugs * align gemma2 with the rest as well * updats * revert * update * more reversiom * grind * more * arf * update * order will matter * finish del stuff * update * rename to modular * fixup * nits * update makefile * fixup * update order of the checks! * fix * fix docstring that has a call inside * fiix conversion check * style * add some initial documentation * update * update doc * some fixup * updates * yups * Mostly todo gimme a minut * update * fixup * revert some stuff * Review docs for the modular transformers (#33472) Docs * good update * fixup * mmm current updates lead to this code * okay, this fixes it * cool * fixes * update * nit * updates * nits * fix doc * update * revert bad changes * update * updates * proper update * update * update? * up * update * cool * nits * nits * bon bon * fix * ? * minimise changes * update * update * update * updates? * fixed gemma2 * kind of a hack * nits * update * remove `diffs` in favor of `modular` * fix make fix copies --------- Co-authored-by: Lysandre Debut <hi@lysand.re>
98 lines
4.3 KiB
Python
98 lines
4.3 KiB
Python
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
|
|
# This file was automatically generated from <path_to_diff_file.py>.
|
|
# Do NOT edit this file manually as any edits will be overwritten by the generation of
|
|
# the file from the diff. If any change should be done, please apply the change to the
|
|
# diff.py file directly. One of our CI enforces this
|
|
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
|
|
from ...configuration_utils import PretrainedConfig
|
|
from ...modeling_rope_utils import rope_config_validation
|
|
|
|
|
|
class MyNewModel2Config(PretrainedConfig):
|
|
r"""
|
|
This is the configuration class to store the configuration of a [`GemmaModel`]. It is used to instantiate an Gemma
|
|
model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
|
|
defaults will yield a similar configuration to that of the Gemma-7B.
|
|
e.g. [google/gemma-7b](https://huggingface.co/google/gemma-7b)
|
|
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
|
|
documentation from [`PretrainedConfig`] for more information.
|
|
Args:
|
|
vocab_size (`int`, *optional*, defaults to 256000):
|
|
Vocabulary size of the Gemma model. Defines the number of different tokens that can be represented by the
|
|
`inputs_ids` passed when calling [`GemmaModel`]
|
|
```python
|
|
>>> from transformers import GemmaModel, GemmaConfig
|
|
>>> # Initializing a Gemma gemma-7b style configuration
|
|
>>> configuration = GemmaConfig()
|
|
>>> # Initializing a model from the gemma-7b style configuration
|
|
>>> model = GemmaModel(configuration)
|
|
>>> # Accessing the model configuration
|
|
>>> configuration = model.config
|
|
```"""
|
|
|
|
model_type = "my_new_model2"
|
|
keys_to_ignore_at_inference = ["past_key_values"]
|
|
|
|
def __init__(
|
|
self,
|
|
vocab_size=32000,
|
|
hidden_size=4096,
|
|
intermediate_size=11008,
|
|
num_hidden_layers=32,
|
|
num_attention_heads=32,
|
|
num_key_value_heads=None,
|
|
hidden_act="silu",
|
|
max_position_embeddings=2048,
|
|
initializer_range=0.02,
|
|
rms_norm_eps=1e-6,
|
|
use_cache=True,
|
|
pad_token_id=None,
|
|
bos_token_id=1,
|
|
eos_token_id=2,
|
|
pretraining_tp=1,
|
|
tie_word_embeddings=False,
|
|
rope_theta=10000.0,
|
|
rope_scaling=None,
|
|
attention_bias=False,
|
|
attention_dropout=0.0,
|
|
mlp_bias=False,
|
|
head_dim=None,
|
|
**kwargs,
|
|
):
|
|
self.vocab_size = vocab_size
|
|
self.max_position_embeddings = max_position_embeddings
|
|
self.hidden_size = hidden_size
|
|
self.intermediate_size = intermediate_size
|
|
self.num_hidden_layers = num_hidden_layers
|
|
self.num_attention_heads = num_attention_heads
|
|
|
|
# for backward compatibility
|
|
if num_key_value_heads is None:
|
|
num_key_value_heads = num_attention_heads
|
|
|
|
self.num_key_value_heads = num_key_value_heads
|
|
self.hidden_act = hidden_act
|
|
self.initializer_range = initializer_range
|
|
self.rms_norm_eps = rms_norm_eps
|
|
self.pretraining_tp = pretraining_tp
|
|
self.use_cache = use_cache
|
|
self.rope_theta = rope_theta
|
|
self.rope_scaling = rope_scaling
|
|
self.attention_bias = attention_bias
|
|
self.attention_dropout = attention_dropout
|
|
self.mlp_bias = mlp_bias
|
|
self.head_dim = head_dim if head_dim is not None else self.hidden_size // self.num_attention_heads
|
|
# Validate the correctness of rotary position embeddings parameters
|
|
# BC: if there is a 'type' field, move it to 'rope_type'.
|
|
if self.rope_scaling is not None and "type" in self.rope_scaling:
|
|
self.rope_scaling["rope_type"] = self.rope_scaling["type"]
|
|
rope_config_validation(self)
|
|
|
|
super().__init__(
|
|
pad_token_id=pad_token_id,
|
|
bos_token_id=bos_token_id,
|
|
eos_token_id=eos_token_id,
|
|
tie_word_embeddings=tie_word_embeddings,
|
|
**kwargs,
|
|
)
|