mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-29 01:02:25 +06:00

* 1. seqeval required by ner pl example. install from examples/requirements. 2. unrecognized arguments: save_steps * pl checkpoint callback filenotfound error: make directory and pass * #3159 pl checkpoint path difference * 1. Updated Readme for pl 2. pl script now also correct displays logs 3. pass gpu ids compared to number of gpus * Updated results in readme * 1. updated readme 2. removing deprecated pl methods 3. finalizing scripts * comment length check * using deprecated validation_end for stable results * style related changes
40 lines
1.5 KiB
Bash
40 lines
1.5 KiB
Bash
#!/usr/bin/env bash
|
|
|
|
# Install newest ptl.
|
|
pip install -U git+http://github.com/PyTorchLightning/pytorch-lightning/
|
|
# for seqeval metrics import
|
|
pip install -r ../requirements.txt
|
|
|
|
curl -L 'https://sites.google.com/site/germeval2014ner/data/NER-de-train.tsv?attredirects=0&d=1' \
|
|
| grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > train.txt.tmp
|
|
curl -L 'https://sites.google.com/site/germeval2014ner/data/NER-de-dev.tsv?attredirects=0&d=1' \
|
|
| grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > dev.txt.tmp
|
|
curl -L 'https://sites.google.com/site/germeval2014ner/data/NER-de-test.tsv?attredirects=0&d=1' \
|
|
| grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > test.txt.tmp
|
|
wget "https://raw.githubusercontent.com/stefan-it/fine-tuned-berts-seq/master/scripts/preprocess.py"
|
|
export MAX_LENGTH=128
|
|
export BERT_MODEL=bert-base-multilingual-cased
|
|
python3 preprocess.py train.txt.tmp $BERT_MODEL $MAX_LENGTH > train.txt
|
|
python3 preprocess.py dev.txt.tmp $BERT_MODEL $MAX_LENGTH > dev.txt
|
|
python3 preprocess.py test.txt.tmp $BERT_MODEL $MAX_LENGTH > test.txt
|
|
cat train.txt dev.txt test.txt | cut -d " " -f 2 | grep -v "^$"| sort | uniq > labels.txt
|
|
export BATCH_SIZE=32
|
|
export NUM_EPOCHS=3
|
|
export SEED=1
|
|
|
|
export OUTPUT_DIR_NAME=germeval-model
|
|
export CURRENT_DIR=${PWD}
|
|
export OUTPUT_DIR=${CURRENT_DIR}/${OUTPUT_DIR_NAME}
|
|
mkdir -p $OUTPUT_DIR
|
|
|
|
python3 run_pl_ner.py --data_dir ./ \
|
|
--model_type bert \
|
|
--labels ./labels.txt \
|
|
--model_name_or_path $BERT_MODEL \
|
|
--output_dir $OUTPUT_DIR \
|
|
--max_seq_length $MAX_LENGTH \
|
|
--num_train_epochs $NUM_EPOCHS \
|
|
--train_batch_size 32 \
|
|
--seed $SEED \
|
|
--do_train \
|
|
--do_predict |