forked from AI4Bharat/IndicTrans2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
compute_comet_score.sh
84 lines (59 loc) · 3.45 KB
/
compute_comet_score.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/bin/bash
# This script computes COMET metrics and also performs significance testing on the evaluation set
# where each subdirectory contains En-X pair
echo `date`
devtest_data_dir=$1 # path to the evaluation directory
model_name=${2-"Unbabel/wmt22-comet-da"} # name of the model checkpoint
# predefined list of languages supported by COMET
langs=(asm_Beng ben_Beng guj_Gujr hin_Deva kan_Knda mal_Mlym mar_Deva ory_Orya pan_Guru tam_Taml tel_Telu urd_Arab)
# we predefine a set of systems which we consider for evaluation
# feel free to change the below line in case you want to add or remove any system
system=(google azure nllb mbart50 m2m100 it1 it2)
# iterate over the list of predefined languages
for lang in "${langs[@]}"; do
mkdir -p "$devtest_data_dir/eng_Latn-$lang/comet"
# --------------------------------------------------------------
# COMET score computation
# --------------------------------------------------------------
# iterate over the list of predefined systems
for sys in "${system[@]}"; do
echo "${sys}"
# en - indic direction
if [ -f "$devtest_data_dir/eng_Latn-$lang/test.$lang.pred.$sys" ]; then
echo "eng_Latn-${lang}"
src_fname=$devtest_data_dir/eng_Latn-$lang/test.eng_Latn
pred_fname=$devtest_data_dir/eng_Latn-$lang/test.$lang.pred.$sys
ref_fname=$devtest_data_dir/eng_Latn-$lang/test.$lang
out_fname=$devtest_data_dir/eng_Latn-$lang/comet/eng_Latn_${lang}_${sys}_comet.txt
# Compute COMET scores using the `comet-score`
comet-score -s $src_fname -t $pred_fname -r $ref_fname --gpus 1 --model $model_name --quiet --only_system > $out_fname
fi
# indic - en direction
if [ -f "$devtest_data_dir/eng_Latn-$lang/test.eng_Latn.pred.$sys" ]; then
echo "${lang}-eng_Latn"
src_fname=$devtest_data_dir/eng_Latn-$lang/test.$lang
pred_fname=$devtest_data_dir/eng_Latn-$lang/test.eng_Latn.pred.$sys
ref_fname=$devtest_data_dir/eng_Latn-$lang/test.eng_Latn
out_fname=$devtest_data_dir/eng_Latn-$lang/comet/${lang}_eng_Latn_${sys}_comet.txt
# Compute COMET scores using the `comet-score`
comet-score -s $src_fname -t $pred_fname -r $ref_fname --gpus 1 --model $model_name --quiet --only_system > $out_fname
fi
done
# --------------------------------------------------------------
# COMET significance testing
# --------------------------------------------------------------
# en - indic direction
src_fname=$devtest_data_dir/eng_Latn-$lang/test.eng_Latn
pred_fname=$devtest_data_dir/eng_Latn-$lang/test.$lang.pred.*
ref_fname=$devtest_data_dir/eng_Latn-$lang/test.$lang
out_fname=$devtest_data_dir/eng_Latn-$lang/comet/eng_Latn_${lang}_comet_stat.txt
# Compute COMET significance scores using the `comet-compare`
comet-compare -s $src_fname -t $pred_fname -r $ref_fname > $out_fname
# indic-en direction
src_fname=$devtest_data_dir/eng_Latn-$lang/test.$lang
pred_fname=$devtest_data_dir/eng_Latn-$lang/test.eng_Latn.pred.*
ref_fname=$devtest_data_dir/eng_Latn-$lang/test.eng_Latn
out_fname=$devtest_data_dir/eng_Latn-$lang/comet/${lang}_eng_Latn_comet_stat.txt
# Compute COMET significance scores using the `comet-compare`
comet-compare -s $src_fname -t $pred_fname -r $ref_fname > $out_fname
done