36 fst::SymbolTable *word_syms,
37 LabelPairVector *lpairs) {
42 while (getline(ki.
Stream(), line)) {
44 std::istringstream ss(line);
46 if (ss.fail() || !ss.eof()) {
47 KALDI_ERR <<
"Bad line in symbol list: "<< line
52 KALDI_ERR <<
"Can't find symbol in symbol table: " 53 << line <<
", file is: " 56 lpairs->emplace_back(lab, 0);
64 const LabelPairVector &wildcards,
69 fst::Map(ofst, fst::RmWeightMapper<fst::StdArc>());
70 fst::Project(ofst, fst::PROJECT_OUTPUT);
71 fst::Relabel(ofst, wildcards, wildcards);
74 fst::ArcSort(ofst, fst::StdILabelCompare());
83 Weight correct_cost(0.0);
84 Weight substitution_cost(1.0);
85 Weight insertion_cost(1.0);
86 Weight deletion_cost(1.0);
89 std::vector<Label> fst1syms, fst2syms;
95 for (
size_t i = 0;
i < fst1syms.size();
i++)
96 pfst->AddArc(0, StdArc(fst1syms[
i], 0, deletion_cost, 0));
98 for (
size_t i = 0; i < fst2syms.size(); i++)
99 pfst->AddArc(0, StdArc(0, fst2syms[i], insertion_cost, 0));
102 for (
size_t i = 0; i < fst1syms.size(); i++) {
103 Label label1 = fst1syms[
i];
104 for (
size_t j = 0;
j < fst2syms.size();
j++) {
105 Label label2 = fst2syms[
j];
106 Weight cost(label1 == label2 ? correct_cost : substitution_cost);
107 pfst->AddArc(0,
StdArc(label1, label2, cost, 0));
110 pfst->SetFinal(0, Weight::One());
111 ArcSort(pfst, fst::StdOLabelCompare());
116 int32 *substitutions,
122 *correct = *substitutions = *insertions = *deletions = *num_words = 0;
125 StateId src = fst.Start();
126 while (fst.Final(src)== Weight::Zero()) {
127 for (fst::ArcIterator<fst::StdVectorFst> aiter(fst, src);
128 !aiter.Done(); aiter.Next()) {
130 if (arc.ilabel == arc.olabel && arc.ilabel != 0) {
133 }
else if (arc.ilabel == 0 && arc.olabel != 0) {
136 }
else if (arc.ilabel != 0 && arc.olabel == 0) {
138 }
else if (arc.ilabel != 0 && arc.olabel != 0) {
154 std::cerr <<
" " << name <<
" has " << numstates <<
" states" << std::endl;
155 std::stringstream ss;
156 ss << name << key <<
".fst";
158 return(fst.Start() == fst::kNoStateId);
165 int main(
int argc,
char *argv[]) {
167 using namespace kaldi;
168 using fst::SymbolTable;
169 using fst::VectorFst;
172 typedef kaldi::int64 int64;
177 "Finds the path having the smallest edit-distance between a lattice\n" 178 "and a reference string.\n" 180 "Usage: lattice-oracle [options] <test-lattice-rspecifier> \\\n" 181 " <reference-rspecifier> \\\n" 182 " <transcriptions-wspecifier> \\\n" 183 " [<edit-distance-wspecifier>]\n" 184 " e.g.: lattice-oracle ark:lat.1 'ark:sym2int.pl -f 2- \\\n" 185 " data/lang/words.txt <data/test/text|' ark,t:-\n" 187 "Note the --write-lattices option by which you can write out the\n" 188 "optimal path as a lattice.\n" 189 "Note: you can use this program to compute the n-best oracle WER by\n" 190 "first piping the input lattices through lattice-to-nbest and then\n" 191 "nbest-to-lattice.\n";
195 std::string word_syms_filename;
196 std::string wild_syms_rxfilename;
197 std::string wildcard_symbols;
198 std::string lats_wspecifier;
200 po.
Register(
"word-symbol-table", &word_syms_filename,
201 "Symbol table for words [for debug output]");
202 po.
Register(
"wildcard-symbols-list", &wild_syms_rxfilename,
"Filename " 203 "(generally rxfilename) for file containing text-form list of " 204 "symbols that don't count as errors; this option requires " 205 "--word-symbol-table. Deprecated; use --wildcard-symbols " 207 po.
Register(
"wildcard-symbols", &wildcard_symbols,
208 "Colon-separated list of integer ids of symbols that " 209 "don't count as errors. Preferred alternative to deprecated " 210 "option --wildcard-symbols-list.");
211 po.
Register(
"write-lattices", &lats_wspecifier,
"If supplied, write the " 212 "lattice that contains only the oracle path to the given " 222 std::string lats_rspecifier = po.
GetArg(1),
223 reference_rspecifier = po.
GetArg(2),
224 transcriptions_wspecifier = po.
GetArg(3),
225 edit_distance_wspecifier = po.
GetOptArg(4);
231 Int32Writer edit_distance_writer(edit_distance_wspecifier);
234 fst::SymbolTable *word_syms = NULL;
235 if (word_syms_filename !=
"")
236 if (!(word_syms = fst::SymbolTable::ReadText(word_syms_filename)))
237 KALDI_ERR <<
"Could not read symbol table from file " 238 << word_syms_filename;
241 if (wild_syms_rxfilename !=
"") {
242 KALDI_WARN <<
"--wildcard-symbols-list option deprecated.";
243 KALDI_ASSERT(wildcard_symbols.empty() &&
"Do not use both " 244 "--wildcard-symbols and --wildcard-symbols-list options.");
245 KALDI_ASSERT(word_syms != NULL &&
"--wildcard-symbols-list option " 246 "requires --word-symbol-table option");
249 std::vector<fst::StdArc::Label> wildcard_symbols_vec;
251 &wildcard_symbols_vec)) {
252 KALDI_ERR <<
"Expected colon-separated list of integers for " 253 <<
"--wildcard-symbols option, got: " << wildcard_symbols;
255 for (
size_t i = 0;
i < wildcard_symbols_vec.size();
i++)
256 wildcards.emplace_back(wildcard_symbols_vec[
i], 0);
259 int32 n_done = 0, n_fail = 0;
260 int32 tot_correct = 0, tot_substitutions = 0,
261 tot_insertions = 0, tot_deletions = 0, tot_words = 0;
263 for (; !lattice_reader.
Done(); lattice_reader.
Next()) {
264 std::string key = lattice_reader.
Key();
266 std::cerr <<
"Lattice " << key <<
" read." << std::endl;
269 VectorFst<StdArc> lattice_fst;
271 CheckFst(lattice_fst,
"lattice_fst_", key);
274 if (!reference_reader.
HasKey(key)) {
275 KALDI_WARN <<
"No reference present for utterance " << key;
279 const std::vector<int32> &reference = reference_reader.
Value(key);
280 VectorFst<StdArc> reference_fst;
284 fst::Relabel(&reference_fst, wildcards, wildcards);
285 CheckFst(reference_fst,
"reference_fst_", key);
292 VectorFst<StdArc> edit_ref_fst;
293 fst::Compose(edit_distance_fst, reference_fst, &edit_ref_fst);
294 CheckFst(edit_ref_fst,
"composed_", key);
297 fst::ArcSort(&edit_ref_fst, fst::StdILabelCompare());
300 VectorFst<StdArc> result_fst;
301 fst::Compose(lattice_fst, edit_ref_fst, &result_fst);
302 CheckFst(result_fst,
"result_", key);
305 VectorFst<StdArc> best_path;
306 fst::ShortestPath(result_fst, &best_path);
307 CheckFst(best_path,
"best_path_", key);
309 if (best_path.Start() == fst::kNoStateId) {
310 KALDI_WARN <<
"Best-path failed for key " << key;
314 int32 correct, substitutions, insertions, deletions, num_words;
316 &insertions, &deletions, &num_words);
317 int32 tot_errs = substitutions + insertions + deletions;
318 if (edit_distance_wspecifier !=
"")
319 edit_distance_writer.
Write(key, tot_errs);
320 KALDI_LOG <<
"%WER " << (100.*tot_errs) / num_words <<
" [ " << tot_errs
321 <<
" / " << num_words <<
", " << insertions <<
" insertions, " 322 << deletions <<
" deletions, " << substitutions <<
" sub ]";
323 tot_correct += correct;
324 tot_substitutions += substitutions;
325 tot_insertions += insertions;
326 tot_deletions += deletions;
327 tot_words += num_words;
329 std::vector<int32> oracle_words;
330 std::vector<int32> reference_words;
333 &reference_words, &weight);
334 KALDI_LOG <<
"For utterance " << key <<
", best cost " << weight;
335 if (transcriptions_wspecifier !=
"")
336 transcriptions_writer.
Write(key, oracle_words);
337 if (word_syms != NULL) {
338 std::cerr << key <<
" (oracle) ";
339 for (
size_t i = 0;
i < oracle_words.size();
i++) {
340 std::string s = word_syms->Find(oracle_words[
i]);
343 <<
" not in symbol table.";
344 std::cerr << s <<
' ';
346 std::cerr <<
'\n' << key <<
" (reference) ";
347 for (
size_t i = 0;
i < reference_words.size();
i++) {
348 std::string s = word_syms->Find(reference_words[
i]);
351 <<
" not in symbol table.";
352 std::cerr << s <<
' ';
358 if (lats_wspecifier !=
"") {
366 fst::ArcSort(&clat, fst::ILabelCompare<CompactLatticeArc>());
367 fst::Compose(oracle_clat_mask, clat, &oracle_clat_mask);
368 fst::ShortestPath(oracle_clat_mask, &oracle_clat);
369 fst::Project(&oracle_clat, fst::PROJECT_OUTPUT);
372 if (oracle_clat.Start() == fst::kNoStateId) {
373 KALDI_WARN <<
"Failed to find the oracle path in the original " 374 <<
"lattice: " << key;
376 lats_writer.
Write(key, oracle_clat);
383 int32 tot_errs = tot_substitutions + tot_deletions + tot_insertions;
385 KALDI_LOG <<
"Overall %WER " << (100.*tot_errs)/tot_words <<
" [ " 386 << tot_errs <<
" / " << tot_words <<
", " << tot_insertions
387 <<
" insertions, " << tot_deletions <<
" deletions, " 388 << tot_substitutions <<
" substitutions ]";
389 KALDI_LOG <<
"Scored " << n_done <<
" lattices, " << n_fail
390 <<
" not present in ref.";
391 }
catch(
const std::exception &e) {
392 std::cerr << e.what();
fst::StdArc::StateId StateId
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void ReadSymbolList(const std::string &rxfilename, fst::SymbolTable *word_syms, LabelPairVector *lpairs)
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
A templated class for writing objects to an archive or script file; see The Table concept...
fst::StdVectorFst StdVectorFst
bool GetLinearSymbolSequence(const Fst< Arc > &fst, std::vector< I > *isymbols_out, std::vector< I > *osymbols_out, typename Arc::Weight *tot_weight_out)
GetLinearSymbolSequence gets the symbol sequence from a linear FST.
void GetInputSymbols(const Fst< Arc > &fst, bool include_eps, std::vector< I > *symbols)
GetInputSymbols gets the list of symbols on the input of fst (including epsilon, if include_eps == tr...
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
void MakeLinearAcceptor(const std::vector< I > &labels, MutableFst< Arc > *ofst)
Creates unweighted linear acceptor from symbol sequence.
void ConvertLatticeToUnweightedAcceptor(const kaldi::Lattice &ilat, const LabelPairVector &wildcards, fst::StdVectorFst *ofst)
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
bool CheckFst(const fst::StdVectorFst &fst, string name, string key)
const T & Value(const std::string &key)
void ConvertLattice(const ExpandedFst< ArcTpl< Weight > > &ifst, MutableFst< ArcTpl< CompactLatticeWeightTpl< Weight, Int > > > *ofst, bool invert)
Convert lattice from a normal FST to a CompactLattice FST.
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
fst::VectorFst< LatticeArc > Lattice
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
bool HasKey(const std::string &key)
fst::VectorFst< CompactLatticeArc > CompactLattice
fst::StdArc::Weight Weight
int main(int argc, char *argv[])
int NumArgs() const
Number of positional parameters (c.f. argc-1).
void CreateEditDistance(const fst::StdVectorFst &fst1, const fst::StdVectorFst &fst2, fst::StdVectorFst *pfst)
std::vector< std::pair< Label, Label > > LabelPairVector
#define KALDI_ASSERT(cond)
std::string PrintableRxfilename(const std::string &rxfilename)
PrintableRxfilename turns the rxfilename into a more human-readable form for error reporting...
void TopSortCompactLatticeIfNeeded(CompactLattice *clat)
Topologically sort the compact lattice if not already topologically sorted.
void GetOutputSymbols(const Fst< Arc > &fst, bool include_eps, std::vector< I > *symbols)
GetOutputSymbols gets the list of symbols on the output of fst (including epsilon, if include_eps == true)
void CountErrors(const fst::StdVectorFst &fst, int32 *correct, int32 *substitutions, int32 *insertions, int32 *deletions, int32 *num_words)
std::string GetOptArg(int param) const