mirror of
https://github.com/huggingface/transformers.git
synced 2025-08-01 02:31:11 +06:00
add distant debugging to run_transfo_xl
This commit is contained in:
parent
b31ba23913
commit
32fea876bb
@ -891,7 +891,7 @@ python run_openai_gpt.py \
|
|||||||
--train_batch_size 16 \
|
--train_batch_size 16 \
|
||||||
```
|
```
|
||||||
|
|
||||||
This command run in about 10 min on a single K-80 an gives an evaluation accuracy of 86.42% (the authors reports a median accuracy with the TensorFlow code of 85.8% and the OpenAI GPT paper reports a best single run accuracy of 86.5%).
|
This command runs in about 10 min on a single K-80 an gives an evaluation accuracy of about 86.4% (the authors report a median accuracy with the TensorFlow code of 85.8% and the OpenAI GPT paper reports a best single run accuracy of 86.5%).
|
||||||
|
|
||||||
#### Evaluating the pre-trained Transformer-XL on the WikiText 103 dataset
|
#### Evaluating the pre-trained Transformer-XL on the WikiText 103 dataset
|
||||||
|
|
||||||
@ -902,7 +902,7 @@ This command will download a pre-processed version of the WikiText 103 dataset i
|
|||||||
python run_transfo_xl.py --work_dir ../log
|
python run_transfo_xl.py --work_dir ../log
|
||||||
```
|
```
|
||||||
|
|
||||||
This command run in about 10 min on a single K-80 an gives an evaluation accuracy of 86.42% (the authors reports a median accuracy with the TensorFlow code of 85.8% and the OpenAI GPT paper reports a best single run accuracy of 86.5%).
|
This command runs in about 1 min on a V100 and gives an evaluation perplexity of 18.22 on WikiText-103 (the authors report a perplexity of about 18.3 on this dataset with the TensorFlow code).
|
||||||
|
|
||||||
## Fine-tuning BERT-large on GPUs
|
## Fine-tuning BERT-large on GPUs
|
||||||
|
|
||||||
|
@ -60,9 +60,18 @@ def main():
|
|||||||
help='do not log the eval result')
|
help='do not log the eval result')
|
||||||
parser.add_argument('--same_length', action='store_true',
|
parser.add_argument('--same_length', action='store_true',
|
||||||
help='set same length attention with masking')
|
help='set same length attention with masking')
|
||||||
|
parser.add_argument('--server_ip', type=str, default='', help="Can be used for distant debugging.")
|
||||||
|
parser.add_argument('--server_port', type=str, default='', help="Can be used for distant debugging.")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
assert args.ext_len >= 0, 'extended context length must be non-negative'
|
assert args.ext_len >= 0, 'extended context length must be non-negative'
|
||||||
|
|
||||||
|
if args.server_ip and args.server_port:
|
||||||
|
# Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
|
||||||
|
import ptvsd
|
||||||
|
print("Waiting for debugger attach")
|
||||||
|
ptvsd.enable_attach(address=(args.server_ip, args.server_port), redirect_output=True)
|
||||||
|
ptvsd.wait_for_attach()
|
||||||
|
|
||||||
device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
|
device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
|
||||||
logger.info("device: {}".format(device))
|
logger.info("device: {}".format(device))
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user