-
Notifications
You must be signed in to change notification settings - Fork 65
/
train.sh
47 lines (43 loc) · 1.26 KB
/
train.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/bin/bash
# This script trains the translation model on the binarized data using fairseq.
echo `date`
exp_dir=$1 # path of the experiment directory
model_arch=${2:-"transformer_18_18"} # model architecture (defaults to `transformer_18_18`)
fairseq-train $exp_dir/final_bin \
--max-source-positions=256 \
--max-target-positions=256 \
--source-lang=SRC \
--target-lang=TGT \
--max-update=1000000 \
--save-interval-updates=2500 \
--arch=$model_arch \
--activation-fn gelu \
--criterion=label_smoothed_cross_entropy \
--label-smoothing=0.1 \
--optimizer adam \
--adam-betas "(0.9, 0.98)" \
--lr-scheduler=inverse_sqrt \
--clip-norm 1.0 \
--warmup-init-lr 1e-07 \
--lr 5e-4 \
--warmup-updates 4000 \
--dropout 0.2 \
--save-dir $exp_dir/model \
--keep-last-epochs 5 \
--keep-interval-updates 3 \
--patience 10 \
--skip-invalid-size-inputs-valid-test \
--fp16 \
--user-dir model_configs \
--update-freq=32 \
--distributed-world-size 8 \
--num-workers 24 \
--max-tokens 1024 \
--eval-bleu \
--eval-bleu-args "{\"beam\": 1, \"lenpen\": 1.0, \"max_len_a\": 1.2, \"max_len_b\": 10}" \
--eval-bleu-detok moses \
--eval-bleu-remove-bpe sentencepiece \
--eval-bleu-print-samples \
--best-checkpoint-metric bleu \
--maximize-best-checkpoint-metric \
--task translation