diff --git a/utils/check_config_attributes.py b/utils/check_config_attributes.py index 68fd6434ee8..507046ea3c3 100644 --- a/utils/check_config_attributes.py +++ b/utils/check_config_attributes.py @@ -33,7 +33,7 @@ CONFIG_MAPPING = transformers.models.auto.configuration_auto.CONFIG_MAPPING SPECIAL_CASES_TO_ALLOW = { # 'max_position_embeddings' is not used in modeling file, but needed for eval frameworks like Huggingface's lighteval (https://github.com/huggingface/lighteval/blob/af24080ea4f16eaf1683e353042a2dfc9099f038/src/lighteval/models/base_model.py#L264). - # periods and offsers are not used in modeling file, but used in the configuration file to define `layers_block_type` and `layers_num_experts`. + # periods and offsets are not used in modeling file, but used in the configuration file to define `layers_block_type` and `layers_num_experts`. "BambaConfig": [ "attn_layer_indices", ], diff --git a/utils/check_copies.py b/utils/check_copies.py index c62a192c107..0dffa79a327 100644 --- a/utils/check_copies.py +++ b/utils/check_copies.py @@ -245,7 +245,7 @@ def _sanity_check_splits(splits_1, splits_2, is_class, filename): ["block_without_name", "block_with_name"], ]: raise ValueError( - f"""Class defined in {filename} doesn't have the expected stucture. + f"""Class defined in {filename} doesn't have the expected structure. See the docstring of `_sanity_check_splits` in the file `utils/check_copies.py`""", ) @@ -652,7 +652,7 @@ def is_copy_consistent(filename: str, overwrite: bool = False, buffer: dict = No Returns: `Optional[List[Tuple[str, int]]]`: If `overwrite=False`, returns the list of differences as tuples `(str, int)` - with the name of the object having a diff and the line number where theere is the first diff. + with the name of the object having a diff and the line number where there is the first diff. """ base_path = TRANSFORMERS_PATH if not filename.startswith("tests") else MODEL_TEST_PATH diff --git a/utils/check_docstrings.py b/utils/check_docstrings.py index 43dfdc9a497..81ab0dea0d4 100644 --- a/utils/check_docstrings.py +++ b/utils/check_docstrings.py @@ -683,7 +683,7 @@ def replace_default_in_arg_description(description: str, default: Any) -> str: Args: description (`str`): The description of an argument in a docstring to process. - default (`Any`): The default value that whould be in the docstring of that argument. + default (`Any`): The default value that would be in the docstring of that argument. Returns: `str`: The description updated with the new default value. @@ -906,7 +906,7 @@ def match_docstring_with_signature(obj: Any) -> Optional[Tuple[str, str]]: def fix_docstring(obj: Any, old_doc_args: str, new_doc_args: str): """ - Fixes the docstring of an object by replacing its arguments documentaiton by the one matched with the signature. + Fixes the docstring of an object by replacing its arguments documentation by the one matched with the signature. Args: obj (`Any`): diff --git a/utils/check_inits.py b/utils/check_inits.py index 840bad086dd..95e5a48a0fb 100644 --- a/utils/check_inits.py +++ b/utils/check_inits.py @@ -16,7 +16,7 @@ Utility that checks the custom inits of Transformers are well-defined: Transformers uses init files that delay the import of an object to when it's actually needed. This is to avoid the main init importing all models, which would make the line `import transformers` very slow when the user has all optional dependencies installed. The inits with -delayed imports have two halves: one definining a dictionary `_import_structure` which maps modules to the name of the +delayed imports have two halves: one defining a dictionary `_import_structure` which maps modules to the name of the objects in each module, and one in `TYPE_CHECKING` which looks like a normal init for type-checkers. The goal of this script is to check the objects defined in both halves are the same. @@ -363,7 +363,7 @@ def check_submodules(): if len(module_not_registered) > 0: list_of_modules = "\n".join(f"- {module}" for module in module_not_registered) raise ValueError( - "The following submodules are not properly registed in the main init of Transformers:\n" + "The following submodules are not properly registered in the main init of Transformers:\n" f"{list_of_modules}\n" "Make sure they appear somewhere in the keys of `_import_structure` with an empty list as value." ) diff --git a/utils/custom_init_isort.py b/utils/custom_init_isort.py index 82bf07ce43a..7d3635c71b5 100644 --- a/utils/custom_init_isort.py +++ b/utils/custom_init_isort.py @@ -16,7 +16,7 @@ Utility that sorts the imports in the custom inits of Transformers. Transformers uses init files that delay the import of an object to when it's actually needed. This is to avoid the main init importing all models, which would make the line `import transformers` very slow when the user has all optional dependencies installed. The inits with -delayed imports have two halves: one definining a dictionary `_import_structure` which maps modules to the name of the +delayed imports have two halves: one defining a dictionary `_import_structure` which maps modules to the name of the objects in each module, and one in `TYPE_CHECKING` which looks like a normal init for type-checkers. `isort` or `ruff` properly sort the second half which looks like traditionl imports, the goal of this script is to sort the first half. diff --git a/utils/deprecate_models.py b/utils/deprecate_models.py index add8da74d93..db449ef00a0 100644 --- a/utils/deprecate_models.py +++ b/utils/deprecate_models.py @@ -344,7 +344,7 @@ def deprecate_models(models): print("Removing #Copied from statements from model's files") remove_copied_from_statements(model) - # Move the model file to deprecated: src/transfomers/models/model -> src/transformers/models/deprecated/model + # Move the model file to deprecated: src/transformers/models/model -> src/transformers/models/deprecated/model print("Moving model files to deprecated for model") move_model_files_to_deprecated(model) diff --git a/utils/modular_model_converter.py b/utils/modular_model_converter.py index efb8ac00475..0c0e2ac4a06 100644 --- a/utils/modular_model_converter.py +++ b/utils/modular_model_converter.py @@ -257,7 +257,7 @@ def is_full_docstring(new_docstring: str) -> bool: """Check if `new_docstring` is a full docstring, or if it is only part of a docstring that should then be merged with the existing old one. """ - # libcst returns the docstrinbgs with litteral `r"""` quotes in front + # libcst returns the docstrinbgs with literal `r"""` quotes in front new_docstring = new_docstring.split('"""', 1)[1] # The docstring contains Args definition, so it is self-contained if re.search(r"\n\s*Args:\n", new_docstring): @@ -1141,7 +1141,7 @@ def append_new_import_node( def get_needed_imports(body: dict[str, dict], all_imports: list[cst.CSTNode]) -> list[cst.CSTNode]: """Get all the imports needed in the `body`, from the list of `all_imports`. `body` is a dict with the following structure `{str: {"insert_idx": int, "node": cst.CSTNode}}`. - Note: we need to use `isinstance` on scope assignements, m.matches apparently does not work here yet! + Note: we need to use `isinstance` on scope assignments, m.matches apparently does not work here yet! """ new_body = [k[1]["node"] for k in sorted(body.items(), key=lambda x: x[1]["insert_idx"])] wrapper = MetadataWrapper(cst.Module(body=all_imports + new_body)) @@ -1615,7 +1615,7 @@ def get_class_node_and_dependencies( def create_modules(modular_mapper: ModularFileMapper) -> dict[str, cst.Module]: - """Create all the new modules based on visiting the modular file. It replaces all classes as necesary.""" + """Create all the new modules based on visiting the modular file. It replaces all classes as necessary.""" files = defaultdict(dict) current_file_indices = defaultdict(lambda: 0) diff --git a/utils/split_doctest_jobs.py b/utils/split_doctest_jobs.py index 0735298f312..d8c97f57401 100644 --- a/utils/split_doctest_jobs.py +++ b/utils/split_doctest_jobs.py @@ -49,7 +49,7 @@ if __name__ == "__main__": "--num_splits", type=int, default=1, - help="the number of splits into which the (flat) list of direcotry/file paths will be split. This has effect only if `only_return_keys` is `True`.", + help="the number of splits into which the (flat) list of directory/file paths will be split. This has effect only if `only_return_keys` is `True`.", ) args = parser.parse_args() diff --git a/utils/tests_fetcher.py b/utils/tests_fetcher.py index 40749044260..f41634d6127 100644 --- a/utils/tests_fetcher.py +++ b/utils/tests_fetcher.py @@ -185,7 +185,7 @@ def keep_doc_examples_only(content: str) -> str: def get_all_tests() -> List[str]: """ Walks the `tests` folder to return a list of files/subfolders. This is used to split the tests to run when using - paralellism. The split is: + parallelism. The split is: - folders under `tests`: (`tokenization`, `pipelines`, etc) except the subfolder `models` is excluded. - folders under `tests/models`: `bert`, `gpt2`, etc. @@ -854,7 +854,7 @@ def print_tree_deps_of(module, all_edges=None): def init_test_examples_dependencies() -> Tuple[Dict[str, List[str]], List[str]]: """ - The test examples do not import from the examples (which are just scripts, not modules) so we need som extra + The test examples do not import from the examples (which are just scripts, not modules) so we need some extra care initializing the dependency map, which is the goal of this function. It initializes the dependency map for example files by linking each example to the example test file for the example framework. diff --git a/utils/update_metadata.py b/utils/update_metadata.py index 8e4a7e3fe53..d2023ff4679 100755 --- a/utils/update_metadata.py +++ b/utils/update_metadata.py @@ -132,7 +132,7 @@ def camel_case_split(identifier: str) -> List[str]: identifier (`str`): The camel-cased name to parse. Returns: - `List[str]`: The list of words in the identifier (as seprated by capital letters). + `List[str]`: The list of words in the identifier (as separated by capital letters). Example: @@ -215,7 +215,7 @@ def get_frameworks_table() -> pd.DataFrame: def update_pipeline_and_auto_class_table(table: Dict[str, Tuple[str, str]]) -> Dict[str, Tuple[str, str]]: """ - Update the table maping models to pipelines and auto classes without removing old keys if they don't exist anymore. + Update the table mapping models to pipelines and auto classes without removing old keys if they don't exist anymore. Args: table (`Dict[str, Tuple[str, str]]`):