#!/usr/bin/env bash # Install newest ptl. pip install -U git+http://github.com/PyTorchLightning/pytorch-lightning/ # for seqeval metrics import pip install -r ../requirements.txt curl -L 'https://sites.google.com/site/germeval2014ner/data/NER-de-train.tsv?attredirects=0&d=1' \ | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > train.txt.tmp curl -L 'https://sites.google.com/site/germeval2014ner/data/NER-de-dev.tsv?attredirects=0&d=1' \ | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > dev.txt.tmp curl -L 'https://sites.google.com/site/germeval2014ner/data/NER-de-test.tsv?attredirects=0&d=1' \ | grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > test.txt.tmp export MAX_LENGTH=128 export BERT_MODEL=bert-base-multilingual-cased python3 scripts/preprocess.py train.txt.tmp $BERT_MODEL $MAX_LENGTH > train.txt python3 scripts/preprocess.py dev.txt.tmp $BERT_MODEL $MAX_LENGTH > dev.txt python3 scripts/preprocess.py test.txt.tmp $BERT_MODEL $MAX_LENGTH > test.txt cat train.txt dev.txt test.txt | cut -d " " -f 2 | grep -v "^$"| sort | uniq > labels.txt export BATCH_SIZE=32 export NUM_EPOCHS=3 export SEED=1 export OUTPUT_DIR_NAME=germeval-model export CURRENT_DIR=${PWD} export OUTPUT_DIR=${CURRENT_DIR}/${OUTPUT_DIR_NAME} mkdir -p $OUTPUT_DIR # Add parent directory to python path to access lightning_base.py export PYTHONPATH="../":"${PYTHONPATH}" python3 run_pl_ner.py --data_dir ./ \ --model_type bert \ --labels ./labels.txt \ --model_name_or_path $BERT_MODEL \ --output_dir $OUTPUT_DIR \ --max_seq_length $MAX_LENGTH \ --num_train_epochs $NUM_EPOCHS \ --train_batch_size $BATCH_SIZE \ --seed $SEED \ --do_train \ --do_predict