mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-24 23:08:57 +06:00

* Initial commit * Make some fixes * Make PT model full forward pass * Drop TF & Flax implementation, fix copies etc * Add Flax model and update some corresponding stuff * Drop some TF things * Update config and flax local attn * Add encoder_attention_type to config * . * Update docs * Do some cleansing * Fix some issues -> make style; add some docs * Fix position_bias + mask addition + Update tests * Fix repo consistency * Fix model consistency by removing flax operation over attn_mask * [WIP] Add PT TGlobal LongT5 * . * [WIP] Add flax tglobal model * [WIP] Update flax model to use the right attention type in the encoder * Fix flax tglobal model forward pass * Make the use of global_relative_attention_bias * Add test suites for TGlobal model * Fix minor bugs, clean code * Fix pt-flax equivalence though not convinced with correctness * Fix LocalAttn implementation to match the original impl. + update READMEs * Few updates * Update: [Flax] improve large model init and loading #16148 * Add ckpt conversion script accoring to #16853 + handle torch device placement * Minor updates to conversion script. * Typo: AutoModelForSeq2SeqLM -> FlaxAutoModelForSeq2SeqLM * gpu support + dtype fix * Apply some suggestions from code review Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com> * * Remove (de)parallelize stuff * Edit shape comments * Update README.md * make fix-copies * Remove caching logic for local & tglobal attention * Apply another batch of suggestions from code review * Add missing checkpoints * Format converting scripts * Drop (de)parallelize links from longT5 mdx * Fix converting script + revert config file change * Revert "Remove caching logic for local & tglobal attention" This reverts commit 2a619828f6ddc3e65bd9bb1725a12b77fa883a46. * Stash caching logic in Flax model * Make side relative bias used always * Drop caching logic in PT model * Return side bias as it was * Drop all remaining model parallel logic * Remove clamp statements * Move test files to the proper place * Update docs with new version of hf-doc-builder * Fix test imports * Make some minor improvements * Add missing checkpoints to docs * Make TGlobal model compatible with torch.onnx.export * Replace some np.ndarray with jnp.ndarray * Fix TGlobal for ONNX conversion + update docs * fix _make_global_fixed_block_ids and masked neg value * update flax model * style and quality * fix imports * remove load_tf_weights_in_longt5 from init and fix copies * add slow test for TGlobal model * typo fix * Drop obsolete is_parallelizable and one warning * Update __init__ files to fix repo-consistency * fix pipeline test * Fix some device placements * [wip]: Update tests -- need to generate summaries to update expected_summary * Fix quality * Update LongT5 model card * Update (slow) summarization tests * make style * rename checkpoitns * finish * fix flax tests Co-authored-by: phungvanduy <pvduy23@gmail.com> Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com> Co-authored-by: patil-suraj <surajp815@gmail.com>
76 lines
3.7 KiB
Plaintext
76 lines
3.7 KiB
Plaintext
docs/source/en/quicktour.mdx
|
|
docs/source/es/quicktour.mdx
|
|
docs/source/en/pipeline_tutorial.mdx
|
|
docs/source/en/autoclass_tutorial.mdx
|
|
docs/source/en/task_summary.mdx
|
|
docs/source/en/model_doc/speech_to_text.mdx
|
|
docs/source/en/model_doc/t5.mdx
|
|
docs/source/en/model_doc/t5v1.1.mdx
|
|
docs/source/en/model_doc/byt5.mdx
|
|
docs/source/en/model_doc/tapex.mdx
|
|
src/transformers/generation_utils.py
|
|
src/transformers/models/albert/modeling_albert.py
|
|
src/transformers/models/albert/modeling_tf_albert.py
|
|
src/transformers/models/bart/modeling_bart.py
|
|
src/transformers/models/beit/modeling_beit.py
|
|
src/transformers/models/bert/modeling_bert.py
|
|
src/transformers/models/bert/modeling_tf_bert.py
|
|
src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py
|
|
src/transformers/models/big_bird/modeling_big_bird.py
|
|
src/transformers/models/blenderbot/modeling_blenderbot.py
|
|
src/transformers/models/blenderbot_small/modeling_blenderbot_small.py
|
|
src/transformers/models/convnext/modeling_convnext.py
|
|
src/transformers/models/ctrl/modeling_ctrl.py
|
|
src/transformers/models/cvt/modeling_cvt.py
|
|
src/transformers/models/data2vec/modeling_data2vec_audio.py
|
|
src/transformers/models/data2vec/modeling_data2vec_vision.py
|
|
src/transformers/models/deit/modeling_deit.py
|
|
src/transformers/models/dpt/modeling_dpt.py
|
|
src/transformers/models/electra/modeling_electra.py
|
|
src/transformers/models/electra/modeling_tf_electra.py
|
|
src/transformers/models/glpn/modeling_glpn.py
|
|
src/transformers/models/gpt2/modeling_gpt2.py
|
|
src/transformers/models/gptj/modeling_gptj.py
|
|
src/transformers/models/hubert/modeling_hubert.py
|
|
src/transformers/models/layoutlmv2/modeling_layoutlmv2.py
|
|
src/transformers/models/layoutlmv3/modeling_layoutlmv3.py
|
|
src/transformers/models/longformer/modeling_longformer.py
|
|
src/transformers/models/longformer/modeling_tf_longformer.py
|
|
src/transformers/models/longt5/modeling_longt5.py
|
|
src/transformers/models/marian/modeling_marian.py
|
|
src/transformers/models/mbart/modeling_mbart.py
|
|
src/transformers/models/mobilebert/modeling_mobilebert.py
|
|
src/transformers/models/mobilebert/modeling_tf_mobilebert.py
|
|
src/transformers/models/opt/modeling_opt.py
|
|
src/transformers/models/opt/modeling_tf_opt.py
|
|
src/transformers/models/opt/modeling_flax_opt.py
|
|
src/transformers/models/pegasus/modeling_pegasus.py
|
|
src/transformers/models/plbart/modeling_plbart.py
|
|
src/transformers/models/poolformer/modeling_poolformer.py
|
|
src/transformers/models/reformer/modeling_reformer.py
|
|
src/transformers/models/resnet/modeling_resnet.py
|
|
src/transformers/models/roberta/modeling_roberta.py
|
|
src/transformers/models/roberta/modeling_tf_roberta.py
|
|
src/transformers/models/segformer/modeling_segformer.py
|
|
src/transformers/models/sew/modeling_sew.py
|
|
src/transformers/models/sew_d/modeling_sew_d.py
|
|
src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py
|
|
src/transformers/models/speech_to_text/modeling_speech_to_text.py
|
|
src/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py
|
|
src/transformers/models/swin/modeling_swin.py
|
|
src/transformers/models/trocr/modeling_trocr.py
|
|
src/transformers/models/unispeech/modeling_unispeech.py
|
|
src/transformers/models/unispeech_sat/modeling_unispeech_sat.py
|
|
src/transformers/models/van/modeling_van.py
|
|
src/transformers/models/vilt/modeling_vilt.py
|
|
src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py
|
|
src/transformers/models/vit/modeling_vit.py
|
|
src/transformers/models/vit/modeling_tf_vit.py
|
|
src/transformers/models/vit_mae/modeling_vit_mae.py
|
|
src/transformers/models/wav2vec2/modeling_wav2vec2.py
|
|
src/transformers/models/wav2vec2/tokenization_wav2vec2.py
|
|
src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py
|
|
src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py
|
|
src/transformers/models/wavlm/modeling_wavlm.py
|
|
src/transformers/models/yolos/modeling_yolos.py
|