167 using namespace kaldi;
168 using fst::SymbolTable;
169 using fst::VectorFst;
172 typedef kaldi::int64 int64;
177 "Finds the path having the smallest edit-distance between a lattice\n" 178 "and a reference string.\n" 180 "Usage: lattice-oracle [options] <test-lattice-rspecifier> \\\n" 181 " <reference-rspecifier> \\\n" 182 " <transcriptions-wspecifier> \\\n" 183 " [<edit-distance-wspecifier>]\n" 184 " e.g.: lattice-oracle ark:lat.1 'ark:sym2int.pl -f 2- \\\n" 185 " data/lang/words.txt <data/test/text|' ark,t:-\n" 187 "Note the --write-lattices option by which you can write out the\n" 188 "optimal path as a lattice.\n" 189 "Note: you can use this program to compute the n-best oracle WER by\n" 190 "first piping the input lattices through lattice-to-nbest and then\n" 191 "nbest-to-lattice.\n";
195 std::string word_syms_filename;
196 std::string wild_syms_rxfilename;
197 std::string wildcard_symbols;
198 std::string lats_wspecifier;
200 po.Register(
"word-symbol-table", &word_syms_filename,
201 "Symbol table for words [for debug output]");
202 po.Register(
"wildcard-symbols-list", &wild_syms_rxfilename,
"Filename " 203 "(generally rxfilename) for file containing text-form list of " 204 "symbols that don't count as errors; this option requires " 205 "--word-symbol-table. Deprecated; use --wildcard-symbols " 207 po.Register(
"wildcard-symbols", &wildcard_symbols,
208 "Colon-separated list of integer ids of symbols that " 209 "don't count as errors. Preferred alternative to deprecated " 210 "option --wildcard-symbols-list.");
211 po.Register(
"write-lattices", &lats_wspecifier,
"If supplied, write the " 212 "lattice that contains only the oracle path to the given " 217 if (po.NumArgs() != 3 && po.NumArgs() != 4) {
222 std::string lats_rspecifier = po.GetArg(1),
223 reference_rspecifier = po.GetArg(2),
224 transcriptions_wspecifier = po.GetArg(3),
225 edit_distance_wspecifier = po.GetOptArg(4);
231 Int32Writer edit_distance_writer(edit_distance_wspecifier);
234 fst::SymbolTable *word_syms = NULL;
235 if (word_syms_filename !=
"")
236 if (!(word_syms = fst::SymbolTable::ReadText(word_syms_filename)))
237 KALDI_ERR <<
"Could not read symbol table from file " 238 << word_syms_filename;
241 if (wild_syms_rxfilename !=
"") {
242 KALDI_WARN <<
"--wildcard-symbols-list option deprecated.";
243 KALDI_ASSERT(wildcard_symbols.empty() &&
"Do not use both " 244 "--wildcard-symbols and --wildcard-symbols-list options.");
245 KALDI_ASSERT(word_syms != NULL &&
"--wildcard-symbols-list option " 246 "requires --word-symbol-table option");
249 std::vector<fst::StdArc::Label> wildcard_symbols_vec;
251 &wildcard_symbols_vec)) {
252 KALDI_ERR <<
"Expected colon-separated list of integers for " 253 <<
"--wildcard-symbols option, got: " << wildcard_symbols;
255 for (
size_t i = 0;
i < wildcard_symbols_vec.size();
i++)
256 wildcards.emplace_back(wildcard_symbols_vec[
i], 0);
259 int32 n_done = 0, n_fail = 0;
260 int32 tot_correct = 0, tot_substitutions = 0,
261 tot_insertions = 0, tot_deletions = 0, tot_words = 0;
263 for (; !lattice_reader.Done(); lattice_reader.Next()) {
264 std::string key = lattice_reader.Key();
265 const Lattice &lat = lattice_reader.Value();
266 std::cerr <<
"Lattice " << key <<
" read." << std::endl;
269 VectorFst<StdArc> lattice_fst;
271 CheckFst(lattice_fst,
"lattice_fst_", key);
274 if (!reference_reader.HasKey(key)) {
275 KALDI_WARN <<
"No reference present for utterance " << key;
279 const std::vector<int32> &reference = reference_reader.Value(key);
280 VectorFst<StdArc> reference_fst;
284 fst::Relabel(&reference_fst, wildcards, wildcards);
285 CheckFst(reference_fst,
"reference_fst_", key);
292 VectorFst<StdArc> edit_ref_fst;
293 fst::Compose(edit_distance_fst, reference_fst, &edit_ref_fst);
294 CheckFst(edit_ref_fst,
"composed_", key);
297 fst::ArcSort(&edit_ref_fst, fst::StdILabelCompare());
300 VectorFst<StdArc> result_fst;
301 fst::Compose(lattice_fst, edit_ref_fst, &result_fst);
302 CheckFst(result_fst,
"result_", key);
305 VectorFst<StdArc> best_path;
306 fst::ShortestPath(result_fst, &best_path);
307 CheckFst(best_path,
"best_path_", key);
309 if (best_path.Start() == fst::kNoStateId) {
310 KALDI_WARN <<
"Best-path failed for key " << key;
314 int32 correct, substitutions, insertions, deletions, num_words;
316 &insertions, &deletions, &num_words);
317 int32 tot_errs = substitutions + insertions + deletions;
318 if (edit_distance_wspecifier !=
"")
319 edit_distance_writer.Write(key, tot_errs);
320 KALDI_LOG <<
"%WER " << (100.*tot_errs) / num_words <<
" [ " << tot_errs
321 <<
" / " << num_words <<
", " << insertions <<
" insertions, " 322 << deletions <<
" deletions, " << substitutions <<
" sub ]";
323 tot_correct += correct;
324 tot_substitutions += substitutions;
325 tot_insertions += insertions;
326 tot_deletions += deletions;
327 tot_words += num_words;
329 std::vector<int32> oracle_words;
330 std::vector<int32> reference_words;
333 &reference_words, &weight);
334 KALDI_LOG <<
"For utterance " << key <<
", best cost " << weight;
335 if (transcriptions_wspecifier !=
"")
336 transcriptions_writer.Write(key, oracle_words);
337 if (word_syms != NULL) {
338 std::cerr << key <<
" (oracle) ";
339 for (
size_t i = 0;
i < oracle_words.size();
i++) {
340 std::string s = word_syms->Find(oracle_words[
i]);
343 <<
" not in symbol table.";
344 std::cerr << s <<
' ';
346 std::cerr <<
'\n' << key <<
" (reference) ";
347 for (
size_t i = 0; i < reference_words.size(); i++) {
348 std::string s = word_syms->Find(reference_words[i]);
351 <<
" not in symbol table.";
352 std::cerr << s <<
' ';
358 if (lats_wspecifier !=
"") {
366 fst::ArcSort(&clat, fst::ILabelCompare<CompactLatticeArc>());
367 fst::Compose(oracle_clat_mask, clat, &oracle_clat_mask);
368 fst::ShortestPath(oracle_clat_mask, &oracle_clat);
369 fst::Project(&oracle_clat, fst::PROJECT_OUTPUT);
372 if (oracle_clat.Start() == fst::kNoStateId) {
373 KALDI_WARN <<
"Failed to find the oracle path in the original " 374 <<
"lattice: " << key;
376 lats_writer.Write(key, oracle_clat);
383 int32 tot_errs = tot_substitutions + tot_deletions + tot_insertions;
385 KALDI_LOG <<
"Overall %WER " << (100.*tot_errs)/tot_words <<
" [ " 386 << tot_errs <<
" / " << tot_words <<
", " << tot_insertions
387 <<
" insertions, " << tot_deletions <<
" deletions, " 388 << tot_substitutions <<
" substitutions ]";
389 KALDI_LOG <<
"Scored " << n_done <<
" lattices, " << n_fail
390 <<
" not present in ref.";
391 }
catch(
const std::exception &e) {
392 std::cerr << e.what();
fst::StdArc::StateId StateId
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void ReadSymbolList(const std::string &rxfilename, fst::SymbolTable *word_syms, LabelPairVector *lpairs)
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
A templated class for writing objects to an archive or script file; see The Table concept...
fst::StdVectorFst StdVectorFst
bool GetLinearSymbolSequence(const Fst< Arc > &fst, std::vector< I > *isymbols_out, std::vector< I > *osymbols_out, typename Arc::Weight *tot_weight_out)
GetLinearSymbolSequence gets the symbol sequence from a linear FST.
Allows random access to a collection of objects in an archive or script file; see The Table concept...
void MakeLinearAcceptor(const std::vector< I > &labels, MutableFst< Arc > *ofst)
Creates unweighted linear acceptor from symbol sequence.
void ConvertLatticeToUnweightedAcceptor(const kaldi::Lattice &ilat, const LabelPairVector &wildcards, fst::StdVectorFst *ofst)
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
bool CheckFst(const fst::StdVectorFst &fst, string name, string key)
void ConvertLattice(const ExpandedFst< ArcTpl< Weight > > &ifst, MutableFst< ArcTpl< CompactLatticeWeightTpl< Weight, Int > > > *ofst, bool invert)
Convert lattice from a normal FST to a CompactLattice FST.
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
fst::VectorFst< LatticeArc > Lattice
fst::VectorFst< CompactLatticeArc > CompactLattice
fst::StdArc::Weight Weight
void CreateEditDistance(const fst::StdVectorFst &fst1, const fst::StdVectorFst &fst2, fst::StdVectorFst *pfst)
std::vector< std::pair< Label, Label > > LabelPairVector
#define KALDI_ASSERT(cond)
void TopSortCompactLatticeIfNeeded(CompactLattice *clat)
Topologically sort the compact lattice if not already topologically sorted.
void CountErrors(const fst::StdVectorFst &fst, int32 *correct, int32 *substitutions, int32 *insertions, int32 *deletions, int32 *num_words)