-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.py
executable file
·114 lines (94 loc) · 3.67 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env python3
# SPDX-FileCopyrightText: Copyright © Idiap Research Institute <[email protected]>
#
# SPDX-FileContributor: S. Pavankumar Dubagunta <[email protected]>
# SPDX-FileContributor: Mathew Magimai Doss <[email protected]>
# SPDX-FileContributor: Olivier Bornet <[email protected]>
# SPDX-FileContributor: Yannick Dayer <[email protected]>
#
# SPDX-License-Identifier: GPL-3.0-only
"""Provide a command line interface for testing a model's performance on a dataset."""
import argparse
from pathlib import Path
import keras
import numpy as np
from .rawdataset import RawDataset
def test(test_dir, model, output_dir, splice_size=25, verbose=0):
"""Score each feature and compute the accuracy of the model."""
Path(output_dir).mkdir(exist_ok=True, parents=True)
r = RawDataset(test_dir, splice_size=splice_size, mode="test")
m = keras.models.load_model(model)
spk_scores, spk_labels, spk_counts = {}, {}, {}
for w, feat, label in r:
pred = m.predict(feat, verbose=verbose)
# Get the speaker ID. This is useful when each speaker has multiple utterances
# and the results need to be calculated per speaker instead of per utterance.
# You need to configure this line according how the speaker ID can be extracted
# from you data.
# For e.g. the below line assumes that the basenames of the files start with
# speaker ID followed by an utterance ID, separated by a '_'.
# spk = w.split('/')[-1].split('_')[0]
# By default, we use the wav file name as the speaker ID, which means that
# each wav file corresponds to one speaker.
spk = w
if spk not in spk_scores:
spk_scores[spk] = np.sum(pred, axis=0)
spk_counts[spk] = len(pred)
# NOTE: Assuming the utterance labels are same across each speaker.
# Takes the label of the speaker's first utterance encountered.
spk_labels[spk] = label[0]
else:
spk_scores[spk] += np.sum(pred, axis=0)
spk_counts[spk] += len(pred)
nb_correct = 0
with (Path(output_dir) / "scores.txt").open("w") as f:
for spk in spk_labels:
label = spk_labels[spk]
posterior = spk_scores[spk] / spk_counts[spk]
prediction = np.argmax(posterior)
print(spk, label, posterior, file=f)
if prediction == label:
nb_correct += 1
accuracy = nb_correct / len(spk_labels)
with (Path(output_dir) / "accuracy.dat").open("w") as f:
print("accuracy", file=f)
print(accuracy, file=f)
def main():
"""Test the trained model on a test set.
Set the KERAS_BACKEND environment variable to torch or tensorflow.
"""
parser = argparse.ArgumentParser(
prog="rsclf-test",
description=main.__doc__,
)
# fmt: off
parser.add_argument(
"--feature-dir", required=True,
help="Path to the directory containing the features",
)
parser.add_argument(
"--model-filename", required=True, help="Path to the .keras model",
)
parser.add_argument(
"--output-dir", default="output-results",
help="Output directory",
)
parser.add_argument(
"--splice-size", type=int, default=25,
help="Slice size for feature context",
)
parser.add_argument(
"--verbose", type=int, default=0,
help="Keras verbose level for fit and predict",
)
# fmt: on
args = parser.parse_args()
test(
args.feature_dir,
args.model_filename,
args.output_dir,
args.splice_size,
args.verbose,
)
if __name__ == "__main__":
main()