-
Notifications
You must be signed in to change notification settings - Fork 295
/
prepare.sh
executable file
·97 lines (75 loc) · 2.02 KB
/
prepare.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/env bash
# fix segmentation fault reported in https://github.com/k2-fsa/icefall/issues/674
export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
set -eou pipefail
stage=-1
stop_stage=100
dl_dir=$PWD/download
lang_dir=data/lang_phone
lm_dir=data/lm
. shared/parse_options.sh || exit 1
mkdir -p $lang_dir
mkdir -p $lm_dir
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
log "dl_dir: $dl_dir"
if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then
log "Stage 0: Download data"
mkdir -p $dl_dir
if [ ! -f $dl_dir/waves_yesno/.completed ]; then
lhotse download yesno $dl_dir
fi
fi
if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
log "Stage 1: Prepare yesno manifest"
mkdir -p data/manifests
lhotse prepare yesno $dl_dir/waves_yesno data/manifests
fi
if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
log "Stage 2: Compute fbank for yesno"
mkdir -p data/fbank
./local/compute_fbank_yesno.py
fi
if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
log "Stage 3: Prepare lang"
# NOTE: "<UNK> SIL" is added for implementation convenience
# as the graph compiler code requires that there is a OOV word
# in the lexicon.
(
echo "<SIL> SIL"
echo "YES Y"
echo "NO N"
echo "<UNK> SIL"
) > $lang_dir/lexicon.txt
./local/prepare_lang.py
./local/prepare_lang_fst.py --lang-dir ./data/lang_phone --has-silence 1
fi
if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
log "Stage 4: Prepare G"
# We use a unigram G
cat <<EOF > $lm_dir/G.arpa
\data\\
ngram 1=4
\1-grams:
-1 NO
-1 YES
-99 <s>
-1 </s>
\end\\
EOF
if [ ! -f $lm_dir/G.fst.txt ]; then
python3 -m kaldilm \
--read-symbol-table="$lang_dir/words.txt" \
--disambig-symbol='#0' \
$lm_dir/G.arpa > $lm_dir/G.fst.txt
fi
fi
if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
log "Stage 5: Compile HLG"
if [ ! -f $lang_dir/HLG.pt ]; then
./local/compile_hlg.py --lang-dir $lang_dir
fi
fi