mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-05 22:00:09 +06:00

* Reorganize example folder * Continue reorganization * Change requirements for tests * Final cleanup * Finish regroup with tests all passing * Copyright * Requirements and readme * Make a full link for the documentation * Address review comments * Apply suggestions from code review Co-authored-by: Lysandre Debut <lysandre@huggingface.co> * Add symlink * Reorg again * Apply suggestions from code review Co-authored-by: Thomas Wolf <thomwolf@users.noreply.github.com> * Adapt title * Update to new strucutre * Remove test * Update READMEs Co-authored-by: Lysandre Debut <lysandre@huggingface.co> Co-authored-by: Thomas Wolf <thomwolf@users.noreply.github.com>
22 lines
777 B
Bash
Executable File
22 lines
777 B
Bash
Executable File
#!/usr/bin/env bash
|
|
export PYTHONPATH="../":"${PYTHONPATH}"
|
|
python distillation.py \
|
|
--teacher facebook/bart-large-xsum --data_dir xsum \
|
|
--tokenizer_name facebook/bart-large-xsum \
|
|
--student_decoder_layers 6 --student_encoder_layers 12 \
|
|
--freeze_encoder --freeze_embeds \
|
|
--learning_rate=3e-4 \
|
|
--do_train \
|
|
--do_predict \
|
|
--fp16 --fp16_opt_level=O1 \
|
|
--val_check_interval 0.1 --n_val 1000 --eval_beams 2 --length_penalty=0.5 \
|
|
--max_target_length=60 --val_max_target_length=60 --test_max_target_length=100 \
|
|
--model_name_or_path IGNORED \
|
|
--alpha_hid=3. \
|
|
--train_batch_size=16 --eval_batch_size=16 --gradient_accumulation_steps=2 \
|
|
--sortish_sampler \
|
|
--num_train_epochs=6 \
|
|
--warmup_steps 500 \
|
|
--output_dir distilbart_xsum_12_6 \
|
|
"$@"
|