compose-lattice-pruned.h
Go to the documentation of this file.
1 // lat/compose-lattice-pruned.h
2 
3 // Copyright 2017 Johns Hopkins University (Author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #ifndef KALDI_LAT_COMPOSE_LATTICE_PRUNED_H_
21 #define KALDI_LAT_COMPOSE_LATTICE_PRUNED_H_
22 #include <fst/fstlib.h>
23 #include <fst/fst-decl.h>
24 #include <algorithm>
25 #include <map>
26 #include <set>
27 #include <vector>
28 #include "fstext/lattice-weight.h"
29 #include "itf/options-itf.h"
30 #include "lat/kaldi-lattice.h"
31 
32 namespace kaldi {
33 
34 
35 /*
36  This header implements pruned lattice composition, via the functions
37  ComposeCompactLatticePruned (we may later add ComposeLatticePruned if
38  needed).
39 
40  ComposeCompactLatticePruned does composition of a CompactLattice with a
41  DeterministicOnDemandFst<StdArc>, producing a CompactLattice. It's
42  intended for language model rescoring of lattices.
43 
44  The scenario is that you have produced a Lattice or CompactLattice via
45  conventional decoding, and you want to replace (or partially replace) the
46  language model scores in the lattice (which will probably will come from the
47  LM used to generate the HCLG.fst) with the language model scores from a
48  larger language model.
49 
50  The simpler alternative to using ComposeCompactLatticePruned is to use
51  ComposeCompactLatticeDeterministic. The advantages of ComposedCompactLatticePruned are:
52 
53  (1) The alternative might be too slow, because when you compose a lattice
54  with a high-order n-gram language model (or an RNNLM with a high-order
55  n-gram approximation) it can generate a lot more arcs than were present
56  in the original lattice.
57 
58  (2) For RNNLM rescoring, the n-gram approximation may not always
59  be choosing a very good history. In the n-gram approximation,
60  the LM score for a particular word given a history is taken
61  from a history that is the same as the desired history up to
62  the last, say, 4 words, but beyond that may differ. The
63  advantage of ComposeCompactLatticePruned functions over the alternative is
64  that it will often take, in a suitable sense, the "best" history
65  (instead of an arbitrary history); this happens simply because the
66  paths that are expected to be the best paths are visited first.
67 
68 
69  We now describe how you are expected to get the thing to compose with,
70  i.e. the DeterministicOnDemandFst<StdArc> that corrects the LM weights. It
71  will normally contain the LM used to create the original HCLG, with a
72  negative weight, composed with the LM you want to use, with a positive
73  weights (these weights might not be -1 and 1 if there is interpolation in the
74  picture). The LM we want to use will often be e.g. a 4-gram ARPA-type LM
75  (stored as a regular FST or, more compactly, as a .carpa file which is a
76  ConstArpaFst), or it will be some kind of RNNLM. You would use a
77  ComposeDeterministicOnDemandFst<StdArc> to combine the "base" language model
78  (with a negative weight, using either ConstArpaLm or
79  BackoffDeterministicOnDemandFst wrapped in ScaleDeterministicOnDemandFst)
80  with the RNNLM language model (the name of FST TBD, Hainan needs to write
81  this).
82 */
83 
84 
85 
86 
87 // This options class is used for ComposeCompactLatticePruned,
88 // and if in future we write a function ComposeLatticePruned, we'll
89 // use the same options class.
90 // Note: the binary that uses this may want to use an --acoustic-scale
91 // option, in case the acoustics need to be scaled down before this
92 // composition, because it will make a difference to which paths
93 // are explored in the lattice.
95  // 'lattice_compose_beam' is a beam that determines
96  // how much of a given composition space we will expand (at least,
97  // until we hit the limit imposed by 'max_arcs'.. This
98  // beam is applied using heuristically-estimated expected costs
99  // to the end of the lattice, so if you specify, for example,
100  // beam=5.0, it doesn't guarantee that all paths with best-cost
101  // within 5.0 of the best path in the composed output will be
102  // retained (However, this would be exact if the LM we were
103  // rescoring with had zero costs).
105 
106  // 'max_arcs' is the maximum number of arcs that we are willing to expand per
107  // lattice; once this limit is reached, we terminate the composition (however,
108  // this limit is not applied until at least one path to a final-state has been
109  // produced).
111 
112  // 'initial_num_arcs' is the number of arcs we use on the first outer
113  // iteration of the algorithm. This is so unimportant that we do not expose
114  // it on the command line.
116 
117  // 'growth_ratio' determines how much we allow the num-arcs to grow on each
118  // outer iteration of the algorithm. 1.5 is a reasonable value; if it is set
119  // too small, too much time will be taken in RecomputePruningInfo(), and if
120  // too large, the paths searched may be less optimal than they could be (the
121  // heuristics will be less accurate).
123 
124  ComposeLatticePrunedOptions(): lattice_compose_beam(6.0),
125  max_arcs(100000),
126  initial_num_arcs(100),
127  growth_ratio(1.5) { }
128  void Register(OptionsItf *po) {
129  po->Register("lattice-compose-beam", &lattice_compose_beam,
130  "Beam used in pruned lattice composition, which determines how "
131  "large the composed lattice may be.");
132  po->Register("max-arcs", &max_arcs, "Maximum number of arcs we allow in "
133  "any given lattice, during pruned composition (limits max size "
134  "of lattices; also see lattice-compose-beam).");
135  po->Register("growth-ratio", &growth_ratio, "Factor used in the lattice "
136  "composition algorithm; must be >1.0. Affects speed vs. "
137  "the optimality of the best-first composition.");
138  }
139 };
140 
141 
169  const ComposeLatticePrunedOptions &opts,
170  const CompactLattice &clat,
172  CompactLattice* composed_clat);
173 
174 
175 
176 
177 } // namespace kaldi
178 
179 #endif
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
kaldi::int32 int32
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
void ComposeCompactLatticePruned(const ComposeLatticePrunedOptions &opts, const CompactLattice &clat, fst::DeterministicOnDemandFst< fst::StdArc > *det_fst, CompactLattice *composed_clat)
Does pruned composition of a lattice &#39;clat&#39; with a DeterministicOnDemandFst &#39;det_fst&#39;; implements LM ...
fst::VectorFst< CompactLatticeArc > CompactLattice
Definition: kaldi-lattice.h:46