context-fst.h
Go to the documentation of this file.
1 // fstext/context-fst.h
2 
3 // Copyright 2009-2011 Microsoft Corporation
4 // 2018 Johns Hopkins University (author: Daniel Povey)
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 //
21 // This file includes material from the OpenFST Library v1.2.7 available at
22 // http://www.openfst.org and released under the Apache License Version 2.0.
23 //
24 // See ../../COPYING for clarification regarding multiple authors
25 //
26 // Licensed under the Apache License, Version 2.0 (the "License");
27 // you may not use this file except in compliance with the License.
28 // You may obtain a copy of the License at
29 //
30 // http://www.apache.org/licenses/LICENSE-2.0
31 //
32 // Unless required by applicable law or agreed to in writing, software
33 // distributed under the License is distributed on an "AS IS" BASIS,
34 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
35 // See the License for the specific language governing permissions and
36 // limitations under the License.
37 //
38 // Copyright 2005-2010 Google, Inc.
39 // Author: riley@google.com (Michael Riley)
40 
41 
42 
43 #ifndef KALDI_FSTEXT_CONTEXT_FST_H_
44 #define KALDI_FSTEXT_CONTEXT_FST_H_
45 
46 /* This header defines a context FST "C" (the "C" in "HCLG") which transduces
47  from symbols representing phone context windows (e.g. "a, b, c") to
48  individual phones, e.g. "a". Search for "hbka.pdf" ("Speech Recognition
49  with Weighted Finite State Transducers") by M. Mohri, for more context.
50 */
51 
52 #include <unordered_map>
53 using std::unordered_map;
54 
55 #include <algorithm>
56 #include <string>
57 #include <vector>
58 #include <fst/fstlib.h>
59 #include <fst/fst-decl.h>
60 
61 #include "util/const-integer-set.h"
63 
64 namespace fst {
65 
66 
67 
68 
70 void WriteILabelInfo(std::ostream &os, bool binary,
71  const std::vector<std::vector<int32> > &ilabel_info);
72 
74 void ReadILabelInfo(std::istream &is, bool binary,
75  std::vector<std::vector<int32> > *ilabel_info);
76 
77 
79 SymbolTable *CreateILabelInfoSymbolTable(const std::vector<std::vector<int32> > &ilabel_info,
80  const SymbolTable &phones_symtab,
81  std::string separator,
82  std::string disambig_prefix); // e.g. separator = "/", disambig_prefix = "#"
83 
84 
85 
109 void ComposeContext(const std::vector<int32> &disambig_syms,
110  int32 context_width, int32 central_position,
111  VectorFst<StdArc> *ifst,
112  VectorFst<StdArc> *ofst,
113  std::vector<std::vector<int32> > *ilabels_out,
114  bool project_ifst = false);
115 
116 
131 void AddSubsequentialLoop(StdArc::Label subseq_symbol,
132  MutableFst<StdArc> *fst);
133 
134 
135 /*
136  InverseContextFst represents the inverse of the context FST "C" (the "C" in
137  "HCLG") which transduces from symbols representing phone context windows
138  (e.g. "a, b, c") to individual phones, e.g. "a". So InverseContextFst
139  transduces from phones to symbols representing phone context windows. The
140  point is that the inverse is deterministic, so the DeterministicOnDemandFst
141  interface is applicable, which turns out to be a convenient way to implement
142  this.
143 
144  This doesn't implement the full Fst interface, it implements the
145  DeterministicOnDemandFst interface which is much simpler and which is
146  sufficient for what we need to do with this.
147 
148  Search for "hbka.pdf" ("Speech Recognition with Weighted Finite State
149  Transducers") by M. Mohri, for more context.
150 */
151 
153 public:
154  typedef StdArc Arc;
155  typedef typename StdArc::StateId StateId;
156  typedef typename StdArc::Weight Weight;
157  typedef typename StdArc::Label Label;
158 
174  InverseContextFst(Label subsequential_symbol,
175  const std::vector<int32>& phones,
176  const std::vector<int32>& disambig_syms,
177  int32 context_width,
178  int32 central_position);
179 
180 
181  virtual StateId Start() { return 0; }
182 
183  virtual Weight Final(StateId s);
184 
186  virtual bool GetArc(StateId s, Label ilabel, Arc *arc);
187 
189 
190  // Returns a reference to a vector<vector<int32> > with information about all
191  // the input symbols of C (i.e. all the output symbols of this
192  // InverseContextFst). See
193  // "http://kaldi-asr.org/doc/tree_externals.html#tree_ilabel".
194  const std::vector<std::vector<int32> > &IlabelInfo() const {
195  return ilabel_info_;
196  }
197 
198  // A way to destructively obtain the ilabel-info. Only do this if you
199  // are just about to destroy this object.
200  void SwapIlabelInfo(std::vector<std::vector<int32> > *vec) { ilabel_info_.swap(*vec); }
201 
202 private:
203 
206  StateId FindState(const std::vector<int32> &seq);
207 
211  Label FindLabel(const std::vector<int32> &label_info);
212 
213  inline bool IsDisambigSymbol(Label lab) { return (disambig_syms_.count(lab) != 0); }
214 
215  inline bool IsPhoneSymbol(Label lab) { return (phone_syms_.count(lab) != 0); }
216 
219  inline void CreateDisambigArc(StateId s, Label ilabel, Arc *arc);
220 
225  inline void CreatePhoneOrEpsArc(StateId src, StateId dst, Label ilabel,
226  const std::vector<int32> &phone_seq, Arc *arc);
227 
228 
232  inline void ShiftSequenceLeft(Label label, std::vector<int32> *phone_seq);
233 
243  inline void GetFullPhoneSequence(const std::vector<int32> &seq, Label label,
244  std::vector<int32> *full_phone_sequence);
245 
246  // Map type to map from vectors of int32 (representing phonetic contexts,
247  // which will be of dimension context_width - 1) to StateId (corresponding to
248  // the state index in this FST).
249  typedef unordered_map<std::vector<int32>, StateId,
251 
252  // Map type to map from vectors of int32 (representing ilabel-info,
253  // see http://kaldi-asr.org/doc/tree_externals.html#tree_ilabel) to
254  // Label (the output label in this FST).
255  typedef unordered_map<std::vector<int32>, Label,
257 
258 
259  // Sometimes called N, context_width_ this is the width of the
260  // phonetic context, e.g. 3 for triphone, 2 for biphone, one for monophone.
261  // It is a user-specified value.
263 
264  // Sometimes called P, central_position_ is is the (zero-based) "central
265  // position" in the context window, meaning the phone that is "in" a certain
266  // context. The most widely used values of (context-width, central-position)
267  // are: (3,1) for triphone, (1,0) for monophone, and (2, 1) for left biphone.
268  // This is also specified by the user. As an example, in the left-biphone
269  // [ 5, 6 ], we view it as "the phone numbered 6 with the phone numbered 5 as
270  // its left-context".
272 
273  // The following three variables were also passed in by the caller:
274 
275  // 'phone_syms_' are a set of phone-ids, typically 1, 2, .. num_phones.
277 
278  // disambig_syms_ is the set of integer ids of the disambiguation symbols,
279  // usually represented in text form as #0, #1, #2, etc. These are inserted
280  // into the grammar (for #0) and the lexicon (for #1, #2, ...) in order to
281  // make the composed FSTs determinizable. They are treated "specially" by the
282  // context FST in that they are not part of the context, they are just "passed
283  // through" via self-loops. See the Mohri chapter mrentioned above for more
284  // information.
286 
287  // subsequential_symbol_, represented as "$" in the Mohri chapter mentioned
288  // above, is something which terminates phonetic sequences to force out the
289  // last phones-in-context. In our implementation it's added to det(LG) as a
290  // self-loop on final states before composing with C.
291  // (c.f. AddSubsequentialLoop()).
293 
294 
295  // pseudo_eps_symbol_, which in printed form we refer to as "#-1", is a symbol that
296  // appears on the ilabels of the context transducer C, i.e. the olabels of this
297  // FST which is C's inverse. It is a symbol we introduce to solve a special problem
298  // in systems with right-context (context_width_ > central_position_ + 1) that
299  // use disambiguation symbols. It exists to prevent CLG from being nondeterminizable.
300  //
301  // The issue is that, in this case, the disambiguation symbols are shifted
302  // left w.r.t. the phones, and there becomes an ambiguity, if a disambiguation
303  // symbol appears at the start of a sequence on the input of CLG, about
304  // whether it was at the very start of the input of LG, or just after, say,
305  // the first real phone. This can lead to determinization failure under
306  // certain circumstances. What we do if we need pseudo_eps_symbol_ to be not
307  // epsilon, we create a special symbol with symbol-id 1 and sequence
308  // representation (ilabels entry) [ 0 ] .
310 
311  // maps from vector<int32>, representing phonetic contexts of length
312  // context_width_ - 1, to StateId. (The states of the "C" fst correspond to
313  // phonetic contexts, but we only create them as and when they are needed).
314  VectorToStateMap state_map_;
315 
316  // The inverse of 'state_map_': gives us the phonetic context corresponding to
317  // each state-id.
318  std::vector<std::vector<int32> > state_seqs_;
319 
320  // maps from vector<int32>, representing phonetic contexts of length
321  // context_width_ - 1, to Label. These are actually the output labels of this
322  // InverseContextFst (because of the "Inverse" part), but for historical
323  // reasons and because we've used the term ilabels" in the documentation, we
324  // still call these "ilabels").
325  VectorToLabelMap ilabel_map_;
326 
327  // ilabel_info_ is the reverse map of ilabel_map_.
328  // Indexed by olabel (although we call this ilabel_info_ for historical
329  // reasons and because is for the ilabels of C), ilabel_info_[i] gives
330  // information about the meaning of each symbol on the input of C
331  // aka the output of inv(C).
332  // See "http://kaldi-asr.org/doc/tree_externals.html#tree_ilabel".
333  std::vector<std::vector<int32> > ilabel_info_;
334 
335 };
336 
337 } // namespace fst
338 
339 
340 #endif // KALDI_FSTEXT_CONTEXT_FST_H_
fst::StdArc::StateId StateId
const std::vector< std::vector< int32 > > & IlabelInfo() const
Definition: context-fst.h:194
void WriteILabelInfo(std::ostream &os, bool binary, const vector< vector< int32 > > &info)
Utility function for writing ilabel-info vectors to disk.
Definition: context-fst.cc:325
VectorToStateMap state_map_
Definition: context-fst.h:314
A hashing function-object for vectors.
Definition: stl-utils.h:216
StdArc::StateId StateId
Definition: context-fst.h:155
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
Definition: graph.dox:21
void CreateDisambigArc(StateId s, Label ilabel, Arc *arc)
Create disambiguation-symbol self-loop arc; where &#39;ilabel&#39; must correspond to a disambiguation symbol...
Definition: context-fst.cc:184
fst::StdArc StdArc
virtual StateId Start()
Definition: context-fst.h:181
void CreatePhoneOrEpsArc(StateId src, StateId dst, Label ilabel, const std::vector< int32 > &phone_seq, Arc *arc)
Creates an arc, this function is to be called only when &#39;ilabel&#39; corresponds to a phone...
Definition: context-fst.cc:196
Label FindLabel(const std::vector< int32 > &label_info)
Finds the label index corresponding to this context-window of phones (likely of width context_width_)...
Definition: context-fst.cc:231
std::vector< std::vector< int32 > > ilabel_info_
Definition: context-fst.h:333
virtual Weight Final(StateId s)
Definition: context-fst.cc:109
kaldi::int32 int32
InverseContextFst(Label subsequential_symbol, const std::vector< int32 > &phones, const std::vector< int32 > &disambig_syms, int32 context_width, int32 central_position)
Constructor.
Definition: context-fst.cc:27
bool IsDisambigSymbol(Label lab)
Definition: context-fst.h:213
unordered_map< std::vector< int32 >, StateId, kaldi::VectorHasher< int32 > > VectorToStateMap
Definition: context-fst.h:250
kaldi::ConstIntegerSet< Label > phone_syms_
Definition: context-fst.h:276
StdArc::Label Label
Definition: context-fst.h:157
void ReadILabelInfo(std::istream &is, bool binary, vector< vector< int32 > > *info)
Utility function for reading ilabel-info vectors from disk.
Definition: context-fst.cc:335
VectorToLabelMap ilabel_map_
Definition: context-fst.h:325
virtual bool GetArc(StateId s, Label ilabel, Arc *arc)
Note: ilabel must not be epsilon.
Definition: context-fst.cc:129
class DeterministicOnDemandFst is an "FST-like" base-class.
unordered_map< std::vector< int32 >, Label, kaldi::VectorHasher< int32 > > VectorToLabelMap
Definition: context-fst.h:256
void GetFullPhoneSequence(const std::vector< int32 > &seq, Label label, std::vector< int32 > *full_phone_sequence)
This utility function does something equivalent to the following 3 steps: *full_phone_sequence = seq;...
Definition: context-fst.cc:93
void ShiftSequenceLeft(Label label, std::vector< int32 > *phone_seq)
If phone_seq is nonempty then this function it left by one and appends &#39;label&#39; to it...
Definition: context-fst.cc:85
void SwapIlabelInfo(std::vector< std::vector< int32 > > *vec)
Definition: context-fst.h:200
StateId FindState(const std::vector< int32 > &seq)
Returns the state-id corresponding to this vector of phones; creates the state it if necessary...
Definition: context-fst.cc:216
void AddSubsequentialLoop(StdArc::Label subseq_symbol, MutableFst< StdArc > *fst)
Modifies an FST so that it transuces the same paths, but the input side of the paths can all have the...
Definition: context-fst.cc:297
fst::StdArc::Label Label
fst::StdArc::Weight Weight
kaldi::ConstIntegerSet< Label > disambig_syms_
Definition: context-fst.h:285
void ComposeContext(const vector< int32 > &disambig_syms_in, int32 context_width, int32 central_position, VectorFst< StdArc > *ifst, VectorFst< StdArc > *ofst, vector< vector< int32 > > *ilabels_out, bool project_ifst)
Used in the command-line tool fstcomposecontext.
Definition: context-fst.cc:246
SymbolTable * CreateILabelInfoSymbolTable(const vector< vector< int32 > > &info, const SymbolTable &phones_symtab, std::string separator, std::string initial_disambig)
The following function is mainly of use for printing and debugging.
Definition: context-fst.cc:345
StdArc::Weight Weight
Definition: context-fst.h:156
bool IsPhoneSymbol(Label lab)
Definition: context-fst.h:215
std::vector< std::vector< int32 > > state_seqs_
Definition: context-fst.h:318