167     using namespace kaldi;
   168     using fst::SymbolTable;
   169     using fst::VectorFst;
   172     typedef kaldi::int64 int64;
   177         "Finds the path having the smallest edit-distance between a lattice\n"   178         "and a reference string.\n"   180         "Usage: lattice-oracle [options] <test-lattice-rspecifier> \\\n"   181         "                                <reference-rspecifier> \\\n"   182         "                                <transcriptions-wspecifier> \\\n"   183         "                                [<edit-distance-wspecifier>]\n"   184         " e.g.: lattice-oracle ark:lat.1 'ark:sym2int.pl -f 2- \\\n"   185         "                       data/lang/words.txt <data/test/text|' ark,t:-\n"   187         "Note the --write-lattices option by which you can write out the\n"   188         "optimal path as a lattice.\n"   189         "Note: you can use this program to compute the n-best oracle WER by\n"   190         "first piping the input lattices through lattice-to-nbest and then\n"   191         "nbest-to-lattice.\n";
   195     std::string word_syms_filename;
   196     std::string wild_syms_rxfilename;
   197     std::string wildcard_symbols;
   198     std::string lats_wspecifier;
   200     po.Register(
"word-symbol-table", &word_syms_filename,
   201                 "Symbol table for words [for debug output]");
   202     po.Register(
"wildcard-symbols-list", &wild_syms_rxfilename, 
"Filename "   203                 "(generally rxfilename) for file containing text-form list of "   204                 "symbols that don't count as errors; this option requires "   205                 "--word-symbol-table. Deprecated; use --wildcard-symbols "   207     po.Register(
"wildcard-symbols", &wildcard_symbols,
   208                 "Colon-separated list of integer ids of symbols that "   209                 "don't count as errors.  Preferred alternative to deprecated "   210                 "option --wildcard-symbols-list.");
   211     po.Register(
"write-lattices", &lats_wspecifier, 
"If supplied, write the "   212                 "lattice that contains only the oracle path to the given "   217     if (po.NumArgs() != 3 && po.NumArgs() != 4) {
   222     std::string lats_rspecifier = po.GetArg(1),
   223         reference_rspecifier = po.GetArg(2),
   224         transcriptions_wspecifier = po.GetArg(3),
   225         edit_distance_wspecifier = po.GetOptArg(4);
   231     Int32Writer edit_distance_writer(edit_distance_wspecifier);
   234     fst::SymbolTable *word_syms = NULL;
   235     if (word_syms_filename != 
"")
   236       if (!(word_syms = fst::SymbolTable::ReadText(word_syms_filename)))
   237         KALDI_ERR << 
"Could not read symbol table from file "   238                   << word_syms_filename;
   241     if (wild_syms_rxfilename != 
"") {
   242       KALDI_WARN << 
"--wildcard-symbols-list option deprecated.";
   243       KALDI_ASSERT(wildcard_symbols.empty() && 
"Do not use both "   244                    "--wildcard-symbols and --wildcard-symbols-list options.");
   245       KALDI_ASSERT(word_syms != NULL && 
"--wildcard-symbols-list option "   246                    "requires --word-symbol-table option");
   249       std::vector<fst::StdArc::Label> wildcard_symbols_vec;
   251                                  &wildcard_symbols_vec)) {
   252         KALDI_ERR << 
"Expected colon-separated list of integers for "   253                   << 
"--wildcard-symbols option, got: " << wildcard_symbols;
   255       for (
size_t i = 0; 
i < wildcard_symbols_vec.size(); 
i++)
   256         wildcards.emplace_back(wildcard_symbols_vec[
i], 0);
   259     int32 n_done = 0, n_fail = 0;
   260     int32 tot_correct = 0, tot_substitutions = 0,
   261           tot_insertions = 0, tot_deletions = 0, tot_words = 0;
   263     for (; !lattice_reader.Done(); lattice_reader.Next()) {
   264       std::string key = lattice_reader.Key();
   265       const Lattice &lat = lattice_reader.Value();
   266       std::cerr << 
"Lattice " << key << 
" read." << std::endl;
   269       VectorFst<StdArc> lattice_fst;
   271       CheckFst(lattice_fst, 
"lattice_fst_", key);
   274       if (!reference_reader.HasKey(key)) {
   275         KALDI_WARN << 
"No reference present for utterance " << key;
   279       const std::vector<int32> &reference = reference_reader.Value(key);
   280       VectorFst<StdArc> reference_fst;
   284       fst::Relabel(&reference_fst, wildcards, wildcards);
   285       CheckFst(reference_fst, 
"reference_fst_", key);
   292       VectorFst<StdArc> edit_ref_fst;
   293       fst::Compose(edit_distance_fst, reference_fst, &edit_ref_fst);
   294       CheckFst(edit_ref_fst, 
"composed_", key);
   297       fst::ArcSort(&edit_ref_fst, fst::StdILabelCompare());
   300       VectorFst<StdArc> result_fst;
   301       fst::Compose(lattice_fst, edit_ref_fst, &result_fst);
   302       CheckFst(result_fst, 
"result_", key);
   305       VectorFst<StdArc> best_path;
   306       fst::ShortestPath(result_fst, &best_path);
   307       CheckFst(best_path, 
"best_path_", key);
   309       if (best_path.Start() == fst::kNoStateId) {
   310         KALDI_WARN << 
"Best-path failed for key " << key;
   314         int32 correct, substitutions, insertions, deletions, num_words;
   316                     &insertions, &deletions, &num_words);
   317         int32 tot_errs = substitutions + insertions + deletions;
   318         if (edit_distance_wspecifier != 
"")
   319           edit_distance_writer.Write(key, tot_errs);
   320         KALDI_LOG << 
"%WER " << (100.*tot_errs) / num_words << 
" [ " << tot_errs
   321                   << 
" / " << num_words << 
", " << insertions << 
" insertions, "   322                   << deletions << 
" deletions, " << substitutions << 
" sub ]";
   323         tot_correct += correct;
   324         tot_substitutions += substitutions;
   325         tot_insertions += insertions;
   326         tot_deletions += deletions;
   327         tot_words += num_words;
   329         std::vector<int32> oracle_words;
   330         std::vector<int32> reference_words;
   333                                 &reference_words, &weight);
   334         KALDI_LOG << 
"For utterance " << key << 
", best cost " << weight;
   335         if (transcriptions_wspecifier != 
"")
   336           transcriptions_writer.Write(key, oracle_words);
   337         if (word_syms != NULL) {
   338           std::cerr << key << 
" (oracle) ";
   339           for (
size_t i = 0; 
i < oracle_words.size(); 
i++) {
   340             std::string s = word_syms->Find(oracle_words[
i]);
   343                   << 
" not in symbol table.";
   344             std::cerr << s << 
' ';
   346           std::cerr << 
'\n' << key << 
" (reference) ";
   347           for (
size_t i = 0; i < reference_words.size(); i++) {
   348             std::string s = word_syms->Find(reference_words[i]);
   351                         << 
" not in symbol table.";
   352             std::cerr << s << 
' ';
   358         if (lats_wspecifier != 
"") {
   366           fst::ArcSort(&clat, fst::ILabelCompare<CompactLatticeArc>());
   367           fst::Compose(oracle_clat_mask, clat, &oracle_clat_mask);
   368           fst::ShortestPath(oracle_clat_mask, &oracle_clat);
   369           fst::Project(&oracle_clat, fst::PROJECT_OUTPUT);
   372           if (oracle_clat.Start() == fst::kNoStateId) {
   373             KALDI_WARN << 
"Failed to find the oracle path in the original "   374                        << 
"lattice: " << key;
   376             lats_writer.Write(key, oracle_clat);
   383     int32 tot_errs = tot_substitutions + tot_deletions + tot_insertions;
   385     KALDI_LOG << 
"Overall %WER " << (100.*tot_errs)/tot_words << 
" [ "   386               << tot_errs << 
" / " << tot_words << 
", " << tot_insertions
   387               << 
" insertions, " << tot_deletions << 
" deletions, "   388               << tot_substitutions << 
" substitutions ]";
   389     KALDI_LOG << 
"Scored " << n_done << 
" lattices, " << n_fail
   390               << 
" not present in ref.";
   391   } 
catch(
const std::exception &e) {
   392     std::cerr << e.what();
 fst::StdArc::StateId StateId
 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
 
void ReadSymbolList(const std::string &rxfilename, fst::SymbolTable *word_syms, LabelPairVector *lpairs)
 
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g. 
 
A templated class for writing objects to an archive or script file; see The Table concept...
 
fst::StdVectorFst StdVectorFst
 
bool GetLinearSymbolSequence(const Fst< Arc > &fst, std::vector< I > *isymbols_out, std::vector< I > *osymbols_out, typename Arc::Weight *tot_weight_out)
GetLinearSymbolSequence gets the symbol sequence from a linear FST. 
 
Allows random access to a collection of objects in an archive or script file; see The Table concept...
 
void MakeLinearAcceptor(const std::vector< I > &labels, MutableFst< Arc > *ofst)
Creates unweighted linear acceptor from symbol sequence. 
 
void ConvertLatticeToUnweightedAcceptor(const kaldi::Lattice &ilat, const LabelPairVector &wildcards, fst::StdVectorFst *ofst)
 
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
 
bool CheckFst(const fst::StdVectorFst &fst, string name, string key)
 
void ConvertLattice(const ExpandedFst< ArcTpl< Weight > > &ifst, MutableFst< ArcTpl< CompactLatticeWeightTpl< Weight, Int > > > *ofst, bool invert)
Convert lattice from a normal FST to a CompactLattice FST. 
 
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
 
fst::VectorFst< LatticeArc > Lattice
 
fst::VectorFst< CompactLatticeArc > CompactLattice
 
fst::StdArc::Weight Weight
 
void CreateEditDistance(const fst::StdVectorFst &fst1, const fst::StdVectorFst &fst2, fst::StdVectorFst *pfst)
 
std::vector< std::pair< Label, Label > > LabelPairVector
 
#define KALDI_ASSERT(cond)
 
void TopSortCompactLatticeIfNeeded(CompactLattice *clat)
Topologically sort the compact lattice if not already topologically sorted. 
 
void CountErrors(const fst::StdVectorFst &fst, int32 *correct, int32 *substitutions, int32 *insertions, int32 *deletions, int32 *num_words)