lattice-lmrescore-pruned.cc
Go to the documentation of this file.
1 // latbin/lattice-lmrescore-pruned.cc
2 
3 // Copyright 2017 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
21 #include "base/kaldi-common.h"
22 #include "util/common-utils.h"
23 #include "fstext/fstext-lib.h"
24 #include "fstext/kaldi-fst-io.h"
25 #include "lm/const-arpa-lm.h"
26 #include "lat/kaldi-lattice.h"
27 #include "lat/lattice-functions.h"
29 
30 int main(int argc, char *argv[]) {
31  try {
32  using namespace kaldi;
33  typedef kaldi::int32 int32;
34  typedef kaldi::int64 int64;
35  using fst::SymbolTable;
36  using fst::VectorFst;
37  using fst::StdArc;
38  using fst::ReadFstKaldi;
39 
40  const char *usage =
41  "This program can be used to subtract scores from one language model and\n"
42  "add scores from another one. It uses an efficient rescoring algorithm that\n"
43  "avoids exploring the entire composed lattice. The first (negative-weight)\n"
44  "language model is expected to be an FST, e.g. G.fst; the second one can\n"
45  "either be in FST or const-arpa format. Any FST-format language models will\n"
46  "be projected on their output by this program, making it unnecessary for the\n"
47  "caller to remove disambiguation symbols.\n"
48  "\n"
49  "Usage: lattice-lmrescore-pruned [options] <lm-to-subtract> <lm-to-add> <lattice-rspecifier> <lattice-wspecifier>\n"
50  " e.g.: lattice-lmrescore-pruned --acoustic-scale=0.1 \\\n"
51  " data/lang/G.fst data/lang_fg/G.fst ark:in.lats ark:out.lats\n"
52  " or: lattice-lmrescore-pruned --acoustic-scale=0.1 --add-const-arpa=true\\\n"
53  " data/lang/G.fst data/lang_fg/G.carpa ark:in.lats ark:out.lats\n";
54 
55  ParseOptions po(usage);
56 
57  // the options for the composition include --lattice-compose-beam,
58  // --max-arcs and --growth-ratio.
59  ComposeLatticePrunedOptions compose_opts;
60  BaseFloat lm_scale = 1.0;
61  BaseFloat acoustic_scale = 1.0;
62  bool add_const_arpa = false;
63 
64  po.Register("lm-scale", &lm_scale, "Scaling factor for <lm-to-add>; its negative "
65  "will be applied to <lm-to-subtract>.");
66  po.Register("acoustic-scale", &acoustic_scale, "Scaling factor for acoustic "
67  "probabilities (e.g. 0.1 for non-chain systems); important because "
68  "of its effect on pruning.");
69  po.Register("add-const-arpa", &add_const_arpa, "If true, <lm-to-add> is expected"
70  "to be in const-arpa format; if false it's expected to be in FST"
71  "format.");
72 
73 
74  po.Read(argc, argv);
75 
76  if (po.NumArgs() != 4) {
77  po.PrintUsage();
78  exit(1);
79  }
80 
81  std::string lm_to_subtract_rxfilename = po.GetArg(1),
82  lm_to_add_rxfilename = po.GetArg(2),
83  lats_rspecifier = po.GetArg(3),
84  lats_wspecifier = po.GetArg(4);
85 
86  KALDI_LOG << "Reading LMs...";
87  VectorFst<StdArc> *lm_to_subtract_fst = fst::ReadAndPrepareLmFst(
88  lm_to_subtract_rxfilename);
89  VectorFst<StdArc> *lm_to_add_fst = NULL;
90  ConstArpaLm const_arpa;
91  if (add_const_arpa) {
92  ReadKaldiObject(lm_to_add_rxfilename, &const_arpa);
93  } else {
94  lm_to_add_fst = fst::ReadAndPrepareLmFst(lm_to_add_rxfilename);
95  }
96  fst::BackoffDeterministicOnDemandFst<StdArc> lm_to_subtract_det_backoff(
97  *lm_to_subtract_fst);
98  fst::ScaleDeterministicOnDemandFst lm_to_subtract_det_scale(
99  -lm_scale, &lm_to_subtract_det_backoff);
100 
101 
102  fst::DeterministicOnDemandFst<StdArc> *lm_to_add_orig = NULL,
103  *lm_to_add = NULL;
104  if (add_const_arpa) {
105  lm_to_add = new ConstArpaLmDeterministicFst(const_arpa);
106  } else {
108  *lm_to_add_fst);
109  }
110  if (lm_scale != 1.0) {
111  lm_to_add_orig = lm_to_add;
112  lm_to_add = new fst::ScaleDeterministicOnDemandFst(lm_scale,
113  lm_to_add_orig);
114  }
115 
116  KALDI_LOG << "Done.";
117 
118  // We read and write as CompactLattice.
119  SequentialCompactLatticeReader clat_reader(lats_rspecifier);
120 
121  // Write as compact lattice.
122  CompactLatticeWriter compact_lattice_writer(lats_wspecifier);
123 
124  int32 num_done = 0, num_err = 0;
125 
126  for (; !clat_reader.Done(); clat_reader.Next()) {
127  std::string key = clat_reader.Key();
128  CompactLattice &clat = clat_reader.Value();
129 
130  if (acoustic_scale != 1.0) {
131  fst::ScaleLattice(fst::AcousticLatticeScale(acoustic_scale), &clat);
132  }
134 
135  // To avoid memory gradually increasing with time, we reconstruct the
136  // composed-LM FST for each lattice we process.
137  // It shouldn't make a difference in which order we provide the
138  // arguments to the composition; either way should work. They are both
139  // acceptors so the result is the same either way.
141  &lm_to_subtract_det_scale, lm_to_add);
142 
143  CompactLattice composed_clat;
144  ComposeCompactLatticePruned(compose_opts,
145  clat,
146  &combined_lms,
147  &composed_clat);
148 
149  if (composed_clat.NumStates() == 0) {
150  // Something went wrong. A warning will already have been printed.
151  num_err++;
152  } else {
153  if (acoustic_scale != 1.0) {
154  if (acoustic_scale == 0.0)
155  KALDI_ERR << "Acoustic scale cannot be zero.";
156  fst::ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale),
157  &composed_clat);
158  }
159  compact_lattice_writer.Write(key, composed_clat);
160  num_done++;
161  }
162  }
163  delete lm_to_subtract_fst;
164  delete lm_to_add_fst;
165  delete lm_to_add_orig;
166  delete lm_to_add;
167 
168  KALDI_LOG << "Overall, succeeded for " << num_done
169  << " lattices, failed for " << num_err;
170  return (num_done != 0 ? 0 : 1);
171  } catch(const std::exception &e) {
172  std::cerr << e.what();
173  return -1;
174  }
175 }
This class wraps an Fst, representing a language model, using the interface for "BackoffDeterministic...
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
Class ScaleDeterministicOnDemandFst takes another DeterministicOnDemandFst and scales the weights (li...
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
fst::StdArc StdArc
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Definition: kaldi-io.cc:832
int main(int argc, char *argv[])
std::vector< std::vector< double > > AcousticLatticeScale(double acwt)
float BaseFloat
Definition: kaldi-types.h:29
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
void ScaleLattice(const std::vector< std::vector< ScaleFloat > > &scale, MutableFst< ArcTpl< Weight > > *fst)
Scales the pairs of weights in LatticeWeight or CompactLatticeWeight by viewing the pair (a...
fst::VectorFst< fst::StdArc > * ReadAndPrepareLmFst(std::string rxfilename)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_ERR
Definition: kaldi-error.h:147
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
void ComposeCompactLatticePruned(const ComposeLatticePrunedOptions &opts, const CompactLattice &clat, fst::DeterministicOnDemandFst< fst::StdArc > *det_fst, CompactLattice *composed_clat)
Does pruned composition of a lattice &#39;clat&#39; with a DeterministicOnDemandFst &#39;det_fst&#39;; implements LM ...
fst::VectorFst< CompactLatticeArc > CompactLattice
Definition: kaldi-lattice.h:46
int NumArgs() const
Number of positional parameters (c.f. argc-1).
void ReadFstKaldi(std::istream &is, bool binary, VectorFst< Arc > *fst)
void TopSortCompactLatticeIfNeeded(CompactLattice *clat)
Topologically sort the compact lattice if not already topologically sorted.
#define KALDI_LOG
Definition: kaldi-error.h:153
This class wraps a ConstArpaLm format language model with the interface defined in DeterministicOnDem...