lattice-determinize-pruned-parallel.cc
Go to the documentation of this file.
1 // latbin/lattice-determinize-pruned-parallel.cc
2 
3 // Copyright 2013 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 #include "base/kaldi-common.h"
20 #include "util/common-utils.h"
21 #include "lat/kaldi-lattice.h"
23 #include "lat/lattice-functions.h"
24 #include "lat/push-lattice.h"
25 #include "lat/minimize-lattice.h"
26 #include "util/kaldi-thread.h"
27 
28 namespace kaldi {
29 
30 class DeterminizeLatticeTask {
31  public:
32  // Initializer takes ownership of "lat".
35  std::string key,
36  BaseFloat acoustic_scale,
37  BaseFloat beam,
38  bool minimize,
39  Lattice *lat,
40  CompactLatticeWriter *clat_writer,
41  int32 *num_warn):
42  opts_(opts), key_(key), acoustic_scale_(acoustic_scale), beam_(beam),
43  minimize_(minimize), lat_(lat), clat_writer_(clat_writer),
44  num_warn_(num_warn) { }
45 
46  void operator () () {
47  Invert(lat_); // to get word labels on the input side.
48  // We apply the acoustic scale before determinization and will undo it
49  // afterward, since it can affect the result.
51  if (!TopSort(lat_)) {
52  KALDI_WARN << "Could not topologically sort lattice: this probably means it"
53  " has bad properties e.g. epsilon cycles. Your LM or lexicon might "
54  "be broken, e.g. LM with epsilon cycles or lexicon with empty words.";
55  (*num_warn_)++;
56  }
57  fst::ArcSort(lat_, fst::ILabelCompare<LatticeArc>());
59  KALDI_WARN << "For key " << key_ << ", determinization did not succeed"
60  "(partial output will be pruned tighter than the specified beam.)";
61  (*num_warn_)++;
62  }
63  delete lat_; // This is no longer needed so we can delete it now;
64  lat_ = NULL;
65  fst::Connect(&det_clat_); // remove states not leading to any final state,
66  if (minimize_) {
70  }
71  // Invert the original acoustic scaling
73  &det_clat_);
74  }
76  KALDI_VLOG(2) << "Wrote lattice with " << det_clat_.NumStates()
77  << " for key " << key_;
79  }
80  private:
82  std::string key_;
85  bool minimize_;
86  Lattice *lat_; // The lattice we're working on. Owned locally.
87  CompactLattice det_clat_; // The output of our process. Will be written
88  // to clat_writer_ in the destructor.
91 
92 };
93 
94 } // namespace kaldi
95 
96 
97 int main(int argc, char *argv[]) {
98  try {
99  using namespace kaldi;
100  typedef kaldi::int32 int32;
101 
102  const char *usage =
103  "Determinize lattices, keeping only the best path (sequence of acoustic states)\n"
104  "for each input-symbol sequence. This is a version of lattice-determnize-pruned\n"
105  "that accepts the --num-threads option. These programs do pruning as part of the\n"
106  "determinization algorithm, which is more efficient and prevents blowup.\n"
107  "See http://kaldi-asr.org/doc/lattices.html for more information on lattices.\n"
108  "\n"
109  "Usage: lattice-determinize-pruned-parallel [options] lattice-rspecifier lattice-wspecifier\n"
110  " e.g.: lattice-determinize-pruned-parallel --acoustic-scale=0.1 --beam=6.0 ark:in.lats ark:det.lats\n";
111 
112  ParseOptions po(usage);
113  BaseFloat acoustic_scale = 1.0;
114  BaseFloat beam = 10.0;
115  bool minimize = false;
116  TaskSequencerConfig sequencer_config; // has --num-threads option
117  fst::DeterminizeLatticePrunedOptions determinize_config; // Options used in DeterminizeLatticePruned--
118  // this options class does not have its own Register function as it's viewed as
119  // being more part of "fst world", so we register its elements independently.
120  determinize_config.max_mem = 50000000;
121  determinize_config.max_loop = 0; // was 500000;
122 
123  po.Register("acoustic-scale", &acoustic_scale,
124  "Scaling factor for acoustic likelihoods");
125  po.Register("beam", &beam, "Pruning beam [applied after acoustic scaling].");
126  po.Register("minimize", &minimize,
127  "If true, push and minimize after determinization");
128  determinize_config.Register(&po);
129  sequencer_config.Register(&po);
130  po.Read(argc, argv);
131 
132  if (po.NumArgs() != 2) {
133  po.PrintUsage();
134  exit(1);
135  }
136 
137  std::string lats_rspecifier = po.GetArg(1),
138  lats_wspecifier = po.GetArg(2);
139 
140 
141  // Read as regular lattice-- this is the form the determinization code
142  // accepts.
143  SequentialLatticeReader lat_reader(lats_rspecifier);
144 
145  // Write as compact lattice.
146  CompactLatticeWriter compact_lat_writer(lats_wspecifier);
147  TaskSequencer<DeterminizeLatticeTask> sequencer(sequencer_config);
148 
149  int32 n_done = 0, n_warn = 0;
150 
151  if (acoustic_scale == 0.0)
152  KALDI_ERR << "Do not use a zero acoustic scale (cannot be inverted)";
153 
154  for (; !lat_reader.Done(); lat_reader.Next()) {
155  std::string key = lat_reader.Key();
156 
157  Lattice *lat = lat_reader.Value().Copy(); // will give ownership to "task"
158  // below
159 
160  KALDI_VLOG(2) << "Processing lattice " << key;
161 
163  determinize_config, key, acoustic_scale, beam, minimize,
164  lat, &compact_lat_writer, &n_warn);
165  sequencer.Run(task);
166  n_done++;
167  }
168  sequencer.Wait();
169  KALDI_LOG << "Done " << n_done << " lattices, had warnings on " << n_warn
170  << " of these.";
171  return (n_done != 0 ? 0 : 1);
172  } catch(const std::exception &e) {
173  std::cerr << e.what();
174  return -1;
175  }
176 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void Run(C *c)
This function takes ownership of the pointer "c", and will delete it in the same sequence as Run was ...
Definition: kaldi-thread.h:190
bool DeterminizeLatticePruned(const ExpandedFst< ArcTpl< Weight > > &ifst, double beam, MutableFst< ArcTpl< CompactLatticeWeightTpl< Weight, IntType > > > *ofst, DeterminizeLatticePrunedOptions opts)
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
bool PushCompactLatticeStrings(MutableFst< ArcTpl< CompactLatticeWeightTpl< Weight, IntType > > > *clat)
This function pushes the transition-ids as far towards the start as they will go. ...
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
DeterminizeLatticeTask(fst::DeterminizeLatticePrunedOptions &opts, std::string key, BaseFloat acoustic_scale, BaseFloat beam, bool minimize, Lattice *lat, CompactLatticeWriter *clat_writer, int32 *num_warn)
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
bool PushCompactLatticeWeights(MutableFst< ArcTpl< CompactLatticeWeightTpl< Weight, IntType > > > *clat)
This function pushes the weights in the CompactLattice so that all states except possibly the start s...
const fst::DeterminizeLatticePrunedOptions & opts_
std::vector< std::vector< double > > AcousticLatticeScale(double acwt)
float BaseFloat
Definition: kaldi-types.h:29
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
bool MinimizeCompactLattice(MutableFst< ArcTpl< CompactLatticeWeightTpl< Weight, IntType > > > *clat, float delta)
This function minimizes the compact lattice.
void ScaleLattice(const std::vector< std::vector< ScaleFloat > > &scale, MutableFst< ArcTpl< Weight > > *fst)
Scales the pairs of weights in LatticeWeight or CompactLatticeWeight by viewing the pair (a...
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
fst::VectorFst< LatticeArc > Lattice
Definition: kaldi-lattice.h:44
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_ERR
Definition: kaldi-error.h:147
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
#define KALDI_WARN
Definition: kaldi-error.h:150
DeterminizeLatticeTask(const TransitionModel &trans_model, fst::DeterminizeLatticePhonePrunedOptions &opts, std::string key, BaseFloat acoustic_scale, BaseFloat beam, Lattice *lat, CompactLatticeWriter *clat_writer, int32 *num_warn)
fst::VectorFst< CompactLatticeArc > CompactLattice
Definition: kaldi-lattice.h:46
int NumArgs() const
Number of positional parameters (c.f. argc-1).
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
const fst::DeterminizeLatticePhonePrunedOptions & opts_
#define KALDI_LOG
Definition: kaldi-error.h:153
int main(int argc, char *argv[])
void Register(OptionsItf *opts)
Definition: kaldi-thread.h:160