32 using namespace kaldi;
35 typedef kaldi::uint32 uint32;
36 typedef kaldi::uint64 uint64;
38 const char *usage =
"Computes the Actual Term-Weighted Value and prints it." 41 " compute-atwv [options] <nof-trials> <ref-rspecifier>" 42 " <hyp-rspecifier> [alignment-csv-filename]\n" 44 " compute-atwv 32485.4 ark:ref.1 ark:hyp.1 ali.csv\n" 46 " compute-atwv 32485.4 ark:ref.1 ark:hyp.1\n" 49 " a) the number of trials is usually equal to the size of the searched\n" 50 " collection in seconds\n" 51 " b the ref-rspecifier/hyp-rspecifier are the kaldi IO specifiers \n" 52 " for both the reference and the hypotheses (found hits), " 53 " respectively The format is the same for both of them. Each line\n" 54 " is of the following format\n" 56 " <KW-ID> <utterance-id> <start-frame> <end-frame> <score>\n\n" 58 " KW106-189 348 459 560 0.8\n" 60 " b) the alignment-csv-filename is an optional parameter. \n" 61 " If present, the alignment i.e. detailed information about what \n" 62 " hypotheses match up with which reference entries will be \n" 63 " generated. The alignemnt file format is equivalent to \n" 64 " the alignment file produced using the F4DE tool. However, we do" 65 " not set some fields and the utterance identifiers are numeric.\n" 66 " You can use the script utils/int2sym.pl and the utterance and \n" 67 " keyword maps to convert the numerical ids into text form\n" 68 " c) the scores are expected to be probabilities. Please note that\n" 69 " the output from the kws-search is in -log(probability).\n" 70 " d) compute-atwv does not perform any score normalization (it's just\n" 71 " for scoring purposes). Without score normalization/calibration\n" 72 " the performance of the search will be quite poor.\n";
77 int frames_per_sec = 100;
81 po.Register(
"frames-per-sec", &frames_per_sec,
82 "Number of feature vector frames per second. This is used only when" 83 "writing the alignment to a file");
87 if (po.NumArgs() < 3 || po.NumArgs() > 4) {
93 KALDI_ERR <<
"The duration parameter is not a number";
96 KALDI_ERR <<
"The duration is either negative or zero";
102 std::string ref_rspecifier = po.GetArg(2),
103 hyp_rspecifier = po.GetArg(3),
104 ali_output = po.GetOptArg(4);
107 ref_reader(ref_rspecifier);
109 for (; !ref_reader.Done(); ref_reader.Next()) {
110 std::string kwid = ref_reader.Key();
111 std::vector<double> vals = ref_reader.Value();
112 if (vals.size() != 4) {
113 KALDI_ERR <<
"Incorrect format of the reference file" 114 <<
" -- 4 entries expected, " << vals.size() <<
" given!\n" 118 aligner.AddRef(inst);
122 hyp_reader(hyp_rspecifier);
124 for (; !hyp_reader.Done(); hyp_reader.Next()) {
125 std::string kwid = hyp_reader.Key();
126 std::vector<double> vals = hyp_reader.Value();
127 if (vals.size() != 4) {
128 KALDI_ERR <<
"Incorrect format of the hypotheses file" 129 <<
" -- 4 entries expected, " << vals.size() <<
" given!\n" 133 aligner.AddHyp(inst);
136 KALDI_LOG <<
"Read " << aligner.nof_hyps() <<
" hypotheses";
137 KALDI_LOG <<
"Read " << aligner.nof_refs() <<
" references";
140 if (ali_output !=
"") {
142 fs.open(ali_output.c_str(), std::fstream::out);
148 scores.AddAlignment(ali);
150 std::cout <<
"aproximate ATWV = " 151 << std::fixed << std::setprecision(4)
152 << scores.Atwv() << std::endl;
153 std::cout <<
"aproximate STWV = " 154 << std::fixed << std::setprecision(4)
155 << scores.Stwv() << std::endl;
157 float mtwv, mtwv_threshold, otwv;
158 scores.GetOracleMeasures(&mtwv, &mtwv_threshold, &otwv);
160 std::cout <<
"aproximate MTWV = " 161 << std::fixed << std::setprecision(4)
162 << mtwv << std::endl;
163 std::cout <<
"aproximate MTWV threshold = " 164 << std::fixed << std::setprecision(4)
165 << mtwv_threshold << std::endl;
166 std::cout <<
"aproximate OTWV = " 167 << std::fixed << std::setprecision(4)
168 << otwv << std::endl;
169 }
catch(
const std::exception &e) {
170 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
bool ConvertStringToReal(const std::string &str, T *out)
ConvertStringToReal converts a string into either float or double and returns false if there was any ...
void WriteCsv(std::iostream &os, const float frames_per_sec)
void Register(OptionsItf *opts)
void Register(OptionsItf *opts)