lattice-determinize-phone-pruned-parallel.cc
Go to the documentation of this file.
1 // latbin/lattice-determinize-phone-pruned-parallel.cc
2 
3 // Copyright 2014 Guoguo Chen
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 #include "base/kaldi-common.h"
20 #include "hmm/transition-model.h"
21 #include "lat/kaldi-lattice.h"
23 #include "lat/lattice-functions.h"
24 #include "lat/push-lattice.h"
25 #include "util/common-utils.h"
26 #include "util/kaldi-thread.h"
27 
28 namespace kaldi {
29 
31  public:
32  // Initializer takes ownership of "lat".
34  const TransitionModel &trans_model,
36  std::string key,
37  BaseFloat acoustic_scale,
38  BaseFloat beam,
39  Lattice *lat,
40  CompactLatticeWriter *clat_writer,
41  int32 *num_warn):
42  trans_model_(&trans_model), opts_(opts), key_(key),
43  acoustic_scale_(acoustic_scale), beam_(beam),
44  lat_(lat), clat_writer_(clat_writer), num_warn_(num_warn) { }
45 
46  void operator () () {
47  // We apply the acoustic scale before determinization and will undo it
48  // afterward, since it can affect the result.
50 
53  KALDI_WARN << "For key " << key_ << ", determinization did not succeed"
54  "(partial output will be pruned tighter than the specified beam.)";
55  (*num_warn_)++;
56  }
57 
58  delete lat_;
59  lat_ = NULL;
60 
61  // Invert the original acoustic scaling
63  &det_clat_);
64  }
65 
67  KALDI_VLOG(2) << "Wrote lattice with " << det_clat_.NumStates()
68  << " for key " << key_;
70  }
71  private:
74  std::string key_;
77  // The lattice we're working on. Owned locally.
79  // The output of our process. Will be written to clat_writer_ in the
80  // destructor.
84 
85 };
86 
87 } // namespace kaldi
88 
89 int main(int argc, char *argv[]) {
90  try {
91  using namespace kaldi;
92  typedef kaldi::int32 int32;
93 
94  const char *usage =
95  "Determinize lattices, keeping only the best path (sequence of\n"
96  "acoustic states) for each input-symbol sequence. This is a version\n"
97  "of lattice-determinize-phone-pruned that accepts the --num-threads\n"
98  "option. The program does phone insertion when doing a first pass\n"
99  "determinization, it then removes the inserted symbols and does a\n"
100  "second pass determinization. It also does pruning as part of the\n"
101  "determinization algorithm, which is more efficient and prevents\n"
102  "blowup.\n"
103  "\n"
104  "Usage: lattice-determinize-phone-pruned-parallel [options] \\\n"
105  " <model> <lattice-rspecifier> <lattice-wspecifier>\n"
106  " e.g.: lattice-determinize-phone-pruned-parallel \\\n"
107  " --acoustic-scale=0.1 final.mdl ark:in.lats ark:det.lats\n";
108 
109  ParseOptions po(usage);
110  BaseFloat acoustic_scale = 1.0;
111  BaseFloat beam = 10.0;
112 
113  TaskSequencerConfig sequencer_opts;
115  determinize_opts.max_mem = 50000000;
116 
117  po.Register("acoustic-scale", &acoustic_scale, "Scaling factor for acoustic"
118  " likelihoods.");
119  po.Register("beam", &beam, "Pruning beam [applied after acoustic scaling].");
120  determinize_opts.Register(&po);
121  sequencer_opts.Register(&po);
122  po.Read(argc, argv);
123 
124  if (po.NumArgs() != 3) {
125  po.PrintUsage();
126  exit(1);
127  }
128 
129  std::string model_rxfilename = po.GetArg(1),
130  lats_rspecifier = po.GetArg(2),
131  lats_wspecifier = po.GetArg(3);
132 
133  TransitionModel trans_model;
134  ReadKaldiObject(model_rxfilename, &trans_model);
135 
136  // Reads as regular lattice-- this is the form the determinization code
137  // accepts.
138  SequentialLatticeReader lat_reader(lats_rspecifier);
139 
140  // Writes as compact lattice.
141  CompactLatticeWriter compact_lat_writer(lats_wspecifier);
142 
143  TaskSequencer<DeterminizeLatticeTask> sequencer(sequencer_opts);
144 
145  int32 n_done = 0, n_warn = 0;
146 
147  if (acoustic_scale == 0.0)
148  KALDI_ERR << "Do not use a zero acoustic scale (cannot be inverted)";
149 
150  for (; !lat_reader.Done(); lat_reader.Next()) {
151  std::string key = lat_reader.Key();
152 
153  // Will give ownership to "task" below.
154  Lattice *lat = lat_reader.Value().Copy();
155 
156  KALDI_VLOG(2) << "Processing lattice " << key;
157 
159  trans_model, determinize_opts, key, acoustic_scale, beam,
160  lat, &compact_lat_writer, &n_warn);
161  sequencer.Run(task);
162 
163  n_done++;
164  }
165  sequencer.Wait();
166  KALDI_LOG << "Done " << n_done << " lattices, determinization finished "
167  << "earlier than specified by the beam on " << n_warn << " of "
168  << "these.";
169  return (n_done != 0 ? 0 : 1);
170  } catch(const std::exception &e) {
171  std::cerr << e.what();
172  return -1;
173  }
174 }
int main(int argc, char *argv[])
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void Run(C *c)
This function takes ownership of the pointer "c", and will delete it in the same sequence as Run was ...
Definition: kaldi-thread.h:190
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Definition: kaldi-io.cc:832
std::vector< std::vector< double > > AcousticLatticeScale(double acwt)
float BaseFloat
Definition: kaldi-types.h:29
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
void ScaleLattice(const std::vector< std::vector< ScaleFloat > > &scale, MutableFst< ArcTpl< Weight > > *fst)
Scales the pairs of weights in LatticeWeight or CompactLatticeWeight by viewing the pair (a...
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
fst::VectorFst< LatticeArc > Lattice
Definition: kaldi-lattice.h:44
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_ERR
Definition: kaldi-error.h:147
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
#define KALDI_WARN
Definition: kaldi-error.h:150
DeterminizeLatticeTask(const TransitionModel &trans_model, fst::DeterminizeLatticePhonePrunedOptions &opts, std::string key, BaseFloat acoustic_scale, BaseFloat beam, Lattice *lat, CompactLatticeWriter *clat_writer, int32 *num_warn)
fst::VectorFst< CompactLatticeArc > CompactLattice
Definition: kaldi-lattice.h:46
int NumArgs() const
Number of positional parameters (c.f. argc-1).
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
const fst::DeterminizeLatticePhonePrunedOptions & opts_
#define KALDI_LOG
Definition: kaldi-error.h:153
bool DeterminizeLatticePhonePrunedWrapper(const kaldi::TransitionModel &trans_model, MutableFst< kaldi::LatticeArc > *ifst, double beam, MutableFst< kaldi::CompactLatticeArc > *ofst, DeterminizeLatticePhonePrunedOptions opts)
This function is a wrapper of DeterminizeLatticePhonePruned() that works for Lattice type FSTs...
void Register(OptionsItf *opts)
Definition: kaldi-thread.h:160