grammar-context-fst.h
Go to the documentation of this file.
1 // fstext/grammar-context-fst.h
2 
3 // Copyright 2018 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 //
20 
21 
22 #ifndef KALDI_FSTEXT_GRAMMAR_CONTEXT_FST_H_
23 #define KALDI_FSTEXT_GRAMMAR_CONTEXT_FST_H_
24 
25 /* This header defines a special form of the context FST "C" (the "C" in "HCLG")
26  that integrates with our framework for building dynamic graphs for grammars
27  that are too big to statically create, or graphs with on-the-fly pieces that
28  you want to create at recognition time without building the whole graph.
29 
30  This framework is limited to only work with models with left-biphone context.
31  (Fortunately this doesn't impact results, as our best models are all 'chain'
32  models with left biphone context).
33 
34  The main code exported from here is the class InverseLeftBiphoneContextFst,
35  which is similar to the InverseContextFst defined in context-fst.h, but
36  is limited to left-biphone context and also supports certain special
37  extensions we need to compile grammars.
38 
39  See \ref grammar (../doc/grammar.dox) for high-level
40  documentation on how this framework works.
41 */
42 
43 
44 #include <algorithm>
45 #include <string>
46 #include <vector>
47 #include <fst/fstlib.h>
48 #include <fst/fst-decl.h>
49 
50 #include "util/const-integer-set.h"
52 #include "fstext/context-fst.h"
53 
54 namespace fst {
55 
56 
69  kNontermBos = 0, // #nonterm_bos
70  kNontermBegin = 1, // #nonterm_begin
71  kNontermEnd = 2, // #nonterm_end
72  kNontermReenter = 3, // #nonterm_reenter
73  kNontermUserDefined = 4, // the lowest-numbered user-defined nonterminal, e.g. #nonterm:foo
74  // kNontermMediumNumber and kNontermBigNumber come into the encoding of
75  // nonterminal-related symbols in HCLG.fst. The only hard constraint on them
76  // is that kNontermBigNumber must be bigger than the biggest transition-id in
77  // your system, and kNontermMediumNumber must be >0. These values were chosen
78  // for ease of human inspection of numbers encoded with them.
80  kNontermBigNumber = 10000000
81 };
82 
83 
84 
85 // Returns the smallest multiple of 1000 that is strictly greater than
86 // nonterm_phones_offset. Used in the encoding of special symbol in HCLG;
87 // they are encoded as
88 // special_symbol =
89 // kNontermBigNumber + (nonterminal * encoding_multiple) + phone_index
90 inline int32 GetEncodingMultiple(int32 nonterm_phones_offset) {
91  int32 medium_number = static_cast<int32>(kNontermMediumNumber);
92  return medium_number *
93  ((nonterm_phones_offset + medium_number) / medium_number);
94 }
95 
121  int32 nonterm_phones_offset,
122  const std::vector<int32> &disambig_syms,
123  const VectorFst<StdArc> &ifst,
124  VectorFst<StdArc> *ofst,
125  std::vector<std::vector<int32> > *ilabels);
126 
127 
128 
129 /*
130  InverseLeftBiphoneContextFst represents the inverse of the context FST "C" (the "C" in
131  "HCLG") which transduces from symbols representing phone context windows
132  (e.g. "a, b, c") to individual phones, e.g. "a". So InverseContextFst
133  transduces from phones to symbols representing phone context windows. The
134  point is that the inverse is deterministic, so the DeterministicOnDemandFst
135  interface is applicable, which turns out to be a convenient way to implement
136  this.
137 
138  This doesn't implement the full Fst interface, it implements the
139  DeterministicOnDemandFst interface which is much simpler and which is
140  sufficient for what we need to do with this.
141 
142  Search for "hbka.pdf" ("Speech Recognition with Weighted Finite State
143  Transducers") by M. Mohri, for more context.
144 */
145 
147 public:
148  typedef StdArc Arc;
149  typedef typename StdArc::StateId StateId;
150  typedef typename StdArc::Weight Weight;
151  typedef typename StdArc::Label Label;
152 
169  InverseLeftBiphoneContextFst(Label nonterm_phones_offset,
170  const std::vector<int32>& phones,
171  const std::vector<int32>& disambig_syms);
172 
211  virtual StateId Start() { return 0; }
212 
213  virtual Weight Final(StateId s);
214 
216  virtual bool GetArc(StateId s, Label ilabel, Arc *arc);
217 
219 
220  // Returns a reference to a vector<vector<int32> > with information about all
221  // the input symbols of C (i.e. all the output symbols of this
222  // InverseContextFst). See
223  // "http://kaldi-asr.org/doc/tree_externals.html#tree_ilabel".
224  const std::vector<std::vector<int32> > &IlabelInfo() const {
225  return ilabel_info_;
226  }
227 
228  // A way to destructively obtain the ilabel-info. Only do this if you
229  // are just about to destroy this object.
230  void SwapIlabelInfo(std::vector<std::vector<int32> > *vec) { ilabel_info_.swap(*vec); }
231 
232 private:
233 
235  return nonterm_phones_offset_ + static_cast<int32>(n);
236  }
237 
241  Label FindLabel(const std::vector<int32> &label_info);
242 
243 
244  // Map type to map from vectors of int32 (representing ilabel-info,
245  // see http://kaldi-asr.org/doc/tree_externals.html#tree_ilabel) to
246  // Label (the output label in this FST).
247  typedef unordered_map<std::vector<int32>, Label,
249 
250 
251  // The following three variables were also passed in by the caller:
253 
254  // 'phone_syms_' are a set of phone-ids, typically 1, 2, .. num_phones.
256 
257  // disambig_syms_ is the set of integer ids of the disambiguation symbols,
258  // usually represented in text form as #0, #1, #2, etc. These are inserted
259  // into the grammar (for #0) and the lexicon (for #1, #2, ...) in order to
260  // make the composed FSTs determinizable. They are treated "specially" by the
261  // context FST in that they are not part of the context, they are just "passed
262  // through" via self-loops. See the Mohri chapter mrentioned above for more
263  // information.
265 
266 
267  // maps from vector<int32>, representing phonetic contexts of length
268  // context_width_ - 1, to Label. These are actually the output labels of this
269  // InverseContextFst (because of the "Inverse" part), but for historical
270  // reasons and because we've used the term ilabels" in the documentation, we
271  // still call these "ilabels").
272  VectorToLabelMap ilabel_map_;
273 
274  // ilabel_info_ is the reverse map of ilabel_map_.
275  // Indexed by olabel (although we call this ilabel_info_ for historical
276  // reasons and because is for the ilabels of C), ilabel_info_[i] gives
277  // information about the meaning of each symbol on the input of C
278  // aka the output of inv(C).
279  // See "http://kaldi-asr.org/doc/tree_externals.html#tree_ilabel".
280  std::vector<std::vector<int32> > ilabel_info_;
281 
282 };
283 
284 } // namespace fst
285 
286 
287 #endif // KALDI_FSTEXT_GRAMMAR_CONTEXT_FST_H_
fst::StdArc::StateId StateId
void SwapIlabelInfo(std::vector< std::vector< int32 > > *vec)
A hashing function-object for vectors.
Definition: stl-utils.h:216
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
Definition: graph.dox:21
fst::StdArc StdArc
virtual StateId Start()
Here is a note on the state space of InverseLeftBiphoneContextFst; see Special symbols in C...
kaldi::int32 int32
NonterminalValues
An anonymous enum to define some values for symbols used in our grammar-fst framework.
InverseLeftBiphoneContextFst(Label nonterm_phones_offset, const std::vector< int32 > &phones, const std::vector< int32 > &disambig_syms)
Constructor.
virtual bool GetArc(StateId s, Label ilabel, Arc *arc)
Note: ilabel must not be epsilon.
class DeterministicOnDemandFst is an "FST-like" base-class.
std::vector< std::vector< int32 > > ilabel_info_
const std::vector< std::vector< int32 > > & IlabelInfo() const
struct rnnlm::@11::@12 n
void ComposeContextLeftBiphone(int32 nonterm_phones_offset, const vector< int32 > &disambig_syms_in, const VectorFst< StdArc > &ifst, VectorFst< StdArc > *ofst, std::vector< std::vector< int32 > > *ilabels)
This is a variant of the function ComposeContext() which is to be used with our "grammar FST" framewo...
fst::StdArc::Label Label
fst::StdArc::Weight Weight
unordered_map< std::vector< int32 >, Label, kaldi::VectorHasher< int32 > > VectorToLabelMap
Label FindLabel(const std::vector< int32 > &label_info)
Finds the label index corresponding to this context-window of phones (likely of width context_width_)...
int32 GetEncodingMultiple(int32 nonterm_phones_offset)
kaldi::ConstIntegerSet< Label > disambig_syms_
int32 GetPhoneSymbolFor(enum NonterminalValues n)
kaldi::ConstIntegerSet< Label > phone_syms_