lattice-expand-ngram.cc
Go to the documentation of this file.
1 // latbin/lattice-expand-ngram.cc
2 
3 // Copyright 2014 Telepoint Global Hosting Service, LLC. (Author: David Snyder)
4 // See ../../COPYING for clarification regarding multiple authors
5 //
6 // Licensed under the Apache License, Version 2.0 (the "License");
7 // you may not use this file except in compliance with the License.
8 // You may obtain a copy of the License at
9 //
10 // http://www.apache.org/licenses/LICENSE-2.0
11 //
12 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
14 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
15 // MERCHANTABLITY OR NON-INFRINGEMENT.
16 // See the Apache 2 License for the specific language governing permissions and
17 // limitations under the License.
18 
19 #include "base/kaldi-common.h"
20 #include "util/common-utils.h"
21 #include "fstext/fstext-lib.h"
22 #include "lat/kaldi-lattice.h"
23 
24 int main(int argc, char *argv[]) {
25  try {
26  using namespace kaldi;
27  typedef kaldi::int32 int32;
28  typedef kaldi::int64 int64;
29  using fst::SymbolTable;
30  using fst::VectorFst;
32 
33  const char *usage =
34  "Expand lattices so that each arc has a unique n-label history, for\n"
35  "a specified n (defaults to 3).\n"
36  "Usage: lattice-expand-ngram [options] lattice-rspecifier "
37  "lattice-wspecifier\n"
38  "e.g.: lattice-expand-ngram --n=3 ark:lat ark:expanded_lat\n";
39 
40  ParseOptions po(usage);
41  int32 n = 3;
42 
43  std::string word_syms_filename;
44  po.Register("n", &n, "n-gram context to expand to.");
45 
46  po.Read(argc, argv);
47 
48  if (po.NumArgs() != 2) {
49  po.PrintUsage();
50  exit(1);
51  }
52 
53  KALDI_ASSERT(n > 0);
54 
55  std::string lats_rspecifier = po.GetArg(1),
56  lats_wspecifier = po.GetOptArg(2);
57 
59 
60  SequentialCompactLatticeReader lat_reader(lats_rspecifier);
61  CompactLatticeWriter lat_writer(lats_wspecifier);
62 
63  int32 n_done = 0, n_fail = 0;
64 
65  for (; !lat_reader.Done(); lat_reader.Next()) {
66  std::string key = lat_reader.Key();
67  KALDI_LOG << "Processing lattice for key " << key;
68  CompactLattice lat = lat_reader.Value();
69  CompactLattice expanded_lat;
70  ComposeDeterministicOnDemand(lat, &expand_fst, &expanded_lat);
71  if (expanded_lat.Start() == fst::kNoStateId) {
72  KALDI_WARN << "Empty lattice for utterance " << key;
73  n_fail++;
74  } else {
75  if (lat.NumStates() == expanded_lat.NumStates()) {
76  KALDI_LOG << "Lattice for key " << key
77  << " did not need to be expanded for order " << n << ".";
78  } else {
79  KALDI_LOG << "Lattice expanded from " << lat.NumStates() << " to "
80  << expanded_lat.NumStates() << " states for order " << n << ".";
81  }
82  lat_writer.Write(key, expanded_lat);
83  n_done++;
84  }
85  lat_reader.FreeCurrent();
86  }
87  KALDI_LOG << "Processed " << n_done << " lattices with " << n_fail
88  << " failures.";
89  return 0;
90  } catch(const std::exception &e) {
91  std::cerr << e.what();
92  return -1;
93  }
94 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void ComposeDeterministicOnDemand(const Fst< Arc > &fst1, DeterministicOnDemandFst< Arc > *fst2, MutableFst< Arc > *fst_composed)
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
int main(int argc, char *argv[])
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
struct rnnlm::@11::@12 n
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_WARN
Definition: kaldi-error.h:150
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
fst::VectorFst< CompactLatticeArc > CompactLattice
Definition: kaldi-lattice.h:46
int NumArgs() const
Number of positional parameters (c.f. argc-1).
The class UnweightedNgramFst is a DeterministicOnDemandFst whose states encode an n-gram history...
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
fst::ArcTpl< CompactLatticeWeight > CompactLatticeArc
Definition: kaldi-lattice.h:42
#define KALDI_LOG
Definition: kaldi-error.h:153
std::string GetOptArg(int param) const