mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-03 12:50:06 +06:00

* Support `flash_attn_3` Implements fwd and tests for Flash Attention 3 https://github.com/Dao-AILab/flash-attention/commits/main/hopper - Includes checks for dropout>0 and ALiBi in `modeling_utils.PreTrainedModel._check_and_enable_flash_attn_3` (Dropout will likely be supported soon, so this will need to be updated and `modeling_flash_attention_utils._flash_attention_forward` at the `if _IS_FLASH_ATTN_3_AVAILABLE: ...` An example Llama implementation is included in `modeling_llama.py` but other models would still need to be updated Based on https://github.com/huggingface/transformers/pull/36190 which has model implementations and examples which could be merged * Add tests for Flash Attention 2 and 3 parity * ci fix * FA2 compatibiity - `_prepare_flash_attention_from_position_ids` ->`prepare_fa2_from_position_ids` - Remove bettertransformer check in Flash Attention 3 - Merge tests - Add licensing * ci fix * Test naming consistency * ci fix * Deprecation warning for `prepare_fa2_from_position_ids` * ci fix
63 lines
1.8 KiB
TOML
63 lines
1.8 KiB
TOML
[tool.coverage.run]
|
|
source = ["transformers"]
|
|
omit = [
|
|
"*/convert_*",
|
|
"*/__main__.py"
|
|
]
|
|
|
|
[tool.coverage.report]
|
|
exclude_lines = [
|
|
"pragma: no cover",
|
|
"raise",
|
|
"except",
|
|
"register_parameter"
|
|
]
|
|
|
|
[tool.ruff]
|
|
target-version = "py39"
|
|
line-length = 119
|
|
|
|
[tool.ruff.lint]
|
|
# Never enforce `E501` (line length violations).
|
|
ignore = ["C901", "E501", "E741", "F402", "F823" ]
|
|
# RUF013: Checks for the use of implicit Optional
|
|
# in type annotations when the default parameter value is None.
|
|
select = ["C", "E", "F", "I", "W", "RUF013", "UP006"]
|
|
extend-safe-fixes = ["UP006"]
|
|
|
|
# Ignore import violations in all `__init__.py` files.
|
|
[tool.ruff.lint.per-file-ignores]
|
|
"__init__.py" = ["E402", "F401", "F403", "F811"]
|
|
"src/transformers/file_utils.py" = ["F401"]
|
|
"src/transformers/utils/dummy_*.py" = ["F401"]
|
|
|
|
[tool.ruff.lint.isort]
|
|
lines-after-imports = 2
|
|
known-first-party = ["transformers"]
|
|
|
|
[tool.ruff.format]
|
|
# Like Black, use double quotes for strings.
|
|
quote-style = "double"
|
|
|
|
# Like Black, indent with spaces, rather than tabs.
|
|
indent-style = "space"
|
|
|
|
# Like Black, respect magic trailing commas.
|
|
skip-magic-trailing-comma = false
|
|
|
|
# Like Black, automatically detect the appropriate line ending.
|
|
line-ending = "auto"
|
|
|
|
[tool.pytest.ini_options]
|
|
addopts = "--doctest-glob='**/*.md'"
|
|
doctest_optionflags="NUMBER NORMALIZE_WHITESPACE ELLIPSIS"
|
|
markers = [
|
|
"flash_attn_3_test: marks tests related to flash attention 3 (deselect with '-m \"not flash_attn_3_test\"')",
|
|
"flash_attn_test: marks tests related to flash attention (deselect with '-m \"not flash_attn_test\"')",
|
|
"bitsandbytes: select (or deselect with `not`) bitsandbytes integration tests",
|
|
"generate: marks tests that use the GenerationTesterMixin"
|
|
]
|
|
log_cli = 1
|
|
log_cli_level = "WARNING"
|
|
asyncio_default_fixture_loop_scope = "function"
|