lattice-boost-ali.cc
Go to the documentation of this file.
1 // latbin/lattice-boost-ali.cc
2 
3 // Copyright 2009-2011 Microsoft Corporation
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include "base/kaldi-common.h"
21 #include "util/common-utils.h"
22 #include "fstext/fstext-lib.h"
23 #include "lat/kaldi-lattice.h"
24 #include "lat/lattice-functions.h"
25 
26 int main(int argc, char *argv[]) {
27  try {
28  typedef kaldi::int32 int32;
29  using fst::SymbolTable;
30  using fst::VectorFst;
31  using fst::StdArc;
32 
33  const char *usage =
34  "Boost graph likelihoods (decrease graph costs) by b * #frame-phone-errors\n"
35  "on each arc in the lattice. Useful for discriminative training, e.g.\n"
36  "boosted MMI. Modifies input lattices. This version takes the reference\n"
37  "in the form of alignments. Needs the model (just the transitions) to\n"
38  "transform pdf-ids to phones. Takes the --silence-phones option and these\n"
39  "phones appearing in the lattice are always assigned zero error, or with the\n"
40  "--max-silence-error option, at most this error-count per frame\n"
41  "(--max-silence-error=1 is equivalent to not specifying --silence-phones).\n"
42  "\n"
43  "Usage: lattice-boost-ali [options] model lats-rspecifier ali-rspecifier lats-wspecifier\n"
44  " e.g.: lattice-boost-ali --silence-phones=1:2:3 --b=0.05 1.mdl ark:1.lats ark:1.ali ark:boosted.lats\n";
45 
46  kaldi::BaseFloat b = 0.05;
47  kaldi::BaseFloat max_silence_error = 0.0;
48  std::string silence_phones_str;
49 
50  kaldi::ParseOptions po(usage);
51  po.Register("b", &b,
52  "Boosting factor (more -> more boosting of errors / larger margin)");
53  po.Register("max-silence", &max_silence_error,
54  "Maximum error assigned to silence phones [c.f. --silence-phones option]."
55  "0.0 -> original BMMI paper, 1.0 -> no special silence treatment.");
56  po.Register("silence-phones", &silence_phones_str,
57  "Colon-separated list of integer id's of silence phones, e.g. 46:47");
58  po.Read(argc, argv);
59 
60  if (po.NumArgs() != 4) {
61  po.PrintUsage();
62  exit(1);
63  }
64 
65  std::vector<int32> silence_phones;
66  if (!kaldi::SplitStringToIntegers(silence_phones_str, ":", false, &silence_phones))
67  KALDI_ERR << "Invalid silence-phones string " << silence_phones_str;
68  kaldi::SortAndUniq(&silence_phones);
69  if (silence_phones.empty())
70  KALDI_WARN <<"No silence phones specified, make sure this is what you intended.";
71 
72  std::string model_rxfilename = po.GetArg(1),
73  lats_rspecifier = po.GetArg(2),
74  ali_rspecifier = po.GetArg(3),
75  lats_wspecifier = po.GetArg(4);
76 
77  // Read as regular lattice and write as compact.
78  kaldi::SequentialLatticeReader lattice_reader(lats_rspecifier);
79  kaldi::RandomAccessInt32VectorReader alignment_reader(ali_rspecifier);
80  kaldi::CompactLatticeWriter compact_lattice_writer(lats_wspecifier);
81 
83  {
84  bool binary_in;
85  kaldi::Input ki(model_rxfilename, &binary_in);
86  trans.Read(ki.Stream(), binary_in);
87  }
88 
89  int32 n_done = 0, n_err = 0, n_no_ali = 0;
90 
91  for (; !lattice_reader.Done(); lattice_reader.Next()) {
92  std::string key = lattice_reader.Key();
93  kaldi::Lattice lat = lattice_reader.Value();
94  lattice_reader.FreeCurrent();
95 
96  if (lat.Start() == fst::kNoStateId) {
97  KALDI_WARN << "Empty lattice for utterance " << key;
98  n_err++;
99  continue;
100  }
101 
102  if (b != 0.0) {
103  if (!alignment_reader.HasKey(key)) {
104  KALDI_WARN << "No alignment for utterance " << key;
105  n_no_ali++;
106  continue;
107  }
108  const std::vector<int32> &alignment = alignment_reader.Value(key);
109  if (!LatticeBoost(trans, alignment, silence_phones, b,
110  max_silence_error, &lat)) {
111  n_err++; // will already have printed warning.
112  continue;
113  }
114  }
116  ConvertLattice(lat, &clat);
117  compact_lattice_writer.Write(key, clat);
118  n_done++;
119  }
120  KALDI_LOG << "Done " << n_done << " lattices, missing alignments for "
121  << n_no_ali << ", other errors on " << n_err;
122  return (n_done != 0 ? 0 : 1);
123  } catch(const std::exception &e) {
124  std::cerr << e.what();
125  return -1;
126  }
127 }
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
Definition: text-utils.h:68
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
fst::StdArc StdArc
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
void SortAndUniq(std::vector< T > *vec)
Sorts and uniq&#39;s (removes duplicates) from a vector.
Definition: stl-utils.h:39
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
std::istream & Stream()
Definition: kaldi-io.cc:826
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
const T & Value(const std::string &key)
void Read(std::istream &is, bool binary)
void ConvertLattice(const ExpandedFst< ArcTpl< Weight > > &ifst, MutableFst< ArcTpl< CompactLatticeWeightTpl< Weight, Int > > > *ofst, bool invert)
Convert lattice from a normal FST to a CompactLattice FST.
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
fst::VectorFst< LatticeArc > Lattice
Definition: kaldi-lattice.h:44
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_ERR
Definition: kaldi-error.h:147
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
#define KALDI_WARN
Definition: kaldi-error.h:150
bool HasKey(const std::string &key)
fst::VectorFst< CompactLatticeArc > CompactLattice
Definition: kaldi-lattice.h:46
int NumArgs() const
Number of positional parameters (c.f. argc-1).
bool LatticeBoost(const TransitionModel &trans, const std::vector< int32 > &alignment, const std::vector< int32 > &silence_phones, BaseFloat b, BaseFloat max_silence_error, Lattice *lat)
Boosts LM probabilities by b * [number of frame errors]; equivalently, adds -b*[number of frame error...
#define KALDI_LOG
Definition: kaldi-error.h:153
int main(int argc, char *argv[])