training-graph-compiler.cc
Go to the documentation of this file.
1 // decoder/training-graph-compiler.cc
2 
3 // Copyright 2009-2011 Microsoft Corporation
4 // 2018 Johns Hopkins University (author: Daniel Povey)
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 
12 // http://www.apache.org/licenses/LICENSE-2.0
13 
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
21 #include "hmm/hmm-utils.h" // for GetHTransducer
22 
23 namespace kaldi {
24 
25 
27  const ContextDependency &ctx_dep, // Does not maintain reference to this.
28  fst::VectorFst<fst::StdArc> *lex_fst,
29  const std::vector<int32> &disambig_syms,
30  const TrainingGraphCompilerOptions &opts):
31  trans_model_(trans_model), ctx_dep_(ctx_dep), lex_fst_(lex_fst),
32  disambig_syms_(disambig_syms), opts_(opts) {
33  using namespace fst;
34  const std::vector<int32> &phone_syms = trans_model_.GetPhones(); // needed to create context fst.
35 
36  KALDI_ASSERT(!phone_syms.empty());
37  KALDI_ASSERT(IsSortedAndUniq(phone_syms));
39  for (int32 i = 0; i < disambig_syms_.size(); i++)
40  if (std::binary_search(phone_syms.begin(), phone_syms.end(),
41  disambig_syms_[i]))
42  KALDI_ERR << "Disambiguation symbol " << disambig_syms_[i]
43  << " is also a phone.";
44 
45  subsequential_symbol_ = 1 + phone_syms.back();
46  if (!disambig_syms_.empty() && subsequential_symbol_ <= disambig_syms_.back())
47  subsequential_symbol_ = 1 + disambig_syms_.back();
48 
49  {
50  int32 N = ctx_dep.ContextWidth(),
51  P = ctx_dep.CentralPosition();
52  if (P != N-1)
53  AddSubsequentialLoop(subsequential_symbol_, lex_fst_); // This is needed for
54  // systems with right-context or we will not successfully compose
55  // with C.
56  }
57 
58  { // make sure lexicon is olabel sorted.
59  fst::OLabelCompare<fst::StdArc> olabel_comp;
60  fst::ArcSort(lex_fst_, olabel_comp);
61  }
62 }
63 
65  const std::vector<int32> &transcript,
66  fst::VectorFst<fst::StdArc> *out_fst) {
67  using namespace fst;
68  VectorFst<StdArc> word_fst;
69  MakeLinearAcceptor(transcript, &word_fst);
70  return CompileGraph(word_fst, out_fst);
71 }
72 
73 bool TrainingGraphCompiler::CompileGraph(const fst::VectorFst<fst::StdArc> &word_fst,
74  fst::VectorFst<fst::StdArc> *out_fst) {
75  using namespace fst;
76  KALDI_ASSERT(lex_fst_ !=NULL);
77  KALDI_ASSERT(out_fst != NULL);
78 
79  VectorFst<StdArc> phone2word_fst;
80  // TableCompose more efficient than compose.
81  TableCompose(*lex_fst_, word_fst, &phone2word_fst, &lex_cache_);
82 
83  KALDI_ASSERT(phone2word_fst.Start() != kNoStateId);
84 
85  const std::vector<int32> &phone_syms = trans_model_.GetPhones(); // needed to create context fst.
86 
87  // inv_cfst will be expanded on the fly, as needed.
89  phone_syms,
93 
94 
95  VectorFst<StdArc> ctx2word_fst;
96  ComposeDeterministicOnDemandInverse(phone2word_fst, &inv_cfst, &ctx2word_fst);
97  // now ctx2word_fst is C * LG, assuming phone2word_fst is written as LG.
98  KALDI_ASSERT(ctx2word_fst.Start() != kNoStateId);
99 
100  HTransducerConfig h_cfg;
102 
103  std::vector<int32> disambig_syms_h; // disambiguation symbols on
104  // input side of H.
105  VectorFst<StdArc> *H = GetHTransducer(inv_cfst.IlabelInfo(),
106  ctx_dep_,
107  trans_model_,
108  h_cfg,
109  &disambig_syms_h);
110 
111  VectorFst<StdArc> &trans2word_fst = *out_fst; // transition-id to word.
112  TableCompose(*H, ctx2word_fst, &trans2word_fst);
113 
114  KALDI_ASSERT(trans2word_fst.Start() != kNoStateId);
115 
116  // Epsilon-removal and determinization combined. This will fail if not determinizable.
117  DeterminizeStarInLog(&trans2word_fst);
118 
119  if (!disambig_syms_h.empty()) {
120  RemoveSomeInputSymbols(disambig_syms_h, &trans2word_fst);
121  // we elect not to remove epsilons after this phase, as it is
122  // a little slow.
123  if (opts_.rm_eps)
124  RemoveEpsLocal(&trans2word_fst);
125  }
126 
127 
128  // Encoded minimization.
129  MinimizeEncoded(&trans2word_fst);
130 
131  std::vector<int32> disambig;
132  bool check_no_self_loops = true;
133  AddSelfLoops(trans_model_,
134  disambig,
136  opts_.reorder,
137  check_no_self_loops,
138  &trans2word_fst);
139 
140  delete H;
141  return true;
142 }
143 
144 
146  const std::vector<std::vector<int32> > &transcripts,
147  std::vector<fst::VectorFst<fst::StdArc>*> *out_fsts) {
148  using namespace fst;
149  std::vector<const VectorFst<StdArc>* > word_fsts(transcripts.size());
150  for (size_t i = 0; i < transcripts.size(); i++) {
151  VectorFst<StdArc> *word_fst = new VectorFst<StdArc>();
152  MakeLinearAcceptor(transcripts[i], word_fst);
153  word_fsts[i] = word_fst;
154  }
155  bool ans = CompileGraphs(word_fsts, out_fsts);
156  for (size_t i = 0; i < transcripts.size(); i++)
157  delete word_fsts[i];
158  return ans;
159 }
160 
162  const std::vector<const fst::VectorFst<fst::StdArc>* > &word_fsts,
163  std::vector<fst::VectorFst<fst::StdArc>* > *out_fsts) {
164 
165  using namespace fst;
166  KALDI_ASSERT(lex_fst_ !=NULL);
167  KALDI_ASSERT(out_fsts != NULL && out_fsts->empty());
168  out_fsts->resize(word_fsts.size(), NULL);
169  if (word_fsts.empty()) return true;
170 
171  const std::vector<int32> &phone_syms = trans_model_.GetPhones(); // needed to create context fst.
172 
173  // inv_cfst will be expanded on the fly, as needed.
175  phone_syms,
179 
180  for (size_t i = 0; i < word_fsts.size(); i++) {
181  VectorFst<StdArc> phone2word_fst;
182  // TableCompose more efficient than compose.
183  TableCompose(*lex_fst_, *(word_fsts[i]), &phone2word_fst, &lex_cache_);
184 
185  KALDI_ASSERT(phone2word_fst.Start() != kNoStateId &&
186  "Perhaps you have words missing in your lexicon?");
187 
188  VectorFst<StdArc> ctx2word_fst;
189  ComposeDeterministicOnDemandInverse(phone2word_fst, &inv_cfst, &ctx2word_fst);
190  // now ctx2word_fst is C * LG, assuming phone2word_fst is written as LG.
191  KALDI_ASSERT(ctx2word_fst.Start() != kNoStateId);
192 
193  (*out_fsts)[i] = ctx2word_fst.Copy(); // For now this contains the FST with symbols
194  // representing phones-in-context.
195  }
196 
197  HTransducerConfig h_cfg;
199 
200  std::vector<int32> disambig_syms_h;
201  VectorFst<StdArc> *H = GetHTransducer(inv_cfst.IlabelInfo(),
202  ctx_dep_,
203  trans_model_,
204  h_cfg,
205  &disambig_syms_h);
206 
207  for (size_t i = 0; i < out_fsts->size(); i++) {
208  VectorFst<StdArc> &ctx2word_fst = *((*out_fsts)[i]);
209  VectorFst<StdArc> trans2word_fst;
210  TableCompose(*H, ctx2word_fst, &trans2word_fst);
211 
212  DeterminizeStarInLog(&trans2word_fst);
213 
214  if (!disambig_syms_h.empty()) {
215  RemoveSomeInputSymbols(disambig_syms_h, &trans2word_fst);
216  if (opts_.rm_eps)
217  RemoveEpsLocal(&trans2word_fst);
218  }
219 
220  // Encoded minimization.
221  MinimizeEncoded(&trans2word_fst);
222 
223  std::vector<int32> disambig;
224  bool check_no_self_loops = true;
225  AddSelfLoops(trans_model_,
226  disambig,
228  opts_.reorder,
229  check_no_self_loops,
230  &trans2word_fst);
231 
232  KALDI_ASSERT(trans2word_fst.Start() != kNoStateId);
233 
234  *((*out_fsts)[i]) = trans2word_fst;
235  }
236 
237  delete H;
238  return true;
239 }
240 
241 
242 } // end namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
virtual int32 ContextWidth() const
ContextWidth() returns the value N (e.g.
Definition: context-dep.h:61
const ContextDependency & ctx_dep_
void TableCompose(const Fst< Arc > &ifst1, const Fst< Arc > &ifst2, MutableFst< Arc > *ofst, const TableComposeOptions &opts=TableComposeOptions())
const std::vector< int32 > & GetPhones() const
Returns a sorted, unique list of phones.
void RemoveEpsLocal(MutableFst< Arc > *fst)
RemoveEpsLocal remove some (but not necessarily all) epsilons in an FST, using an algorithm that is g...
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
Definition: graph.dox:21
void AddSelfLoops(const TransitionModel &trans_model, const std::vector< int32 > &disambig_syms, BaseFloat self_loop_scale, bool reorder, bool check_no_self_loops, fst::VectorFst< fst::StdArc > *fst)
For context, see AddSelfLoops().
Definition: hmm-utils.cc:602
fst::TableComposeCache< fst::Fst< fst::StdArc > > lex_cache_
void MinimizeEncoded(VectorFst< Arc > *fst, float delta=kDelta)
Definition: fstext-utils.h:114
kaldi::int32 int32
const TransitionModel & trans_model_
void SortAndUniq(std::vector< T > *vec)
Sorts and uniq&#39;s (removes duplicates) from a vector.
Definition: stl-utils.h:39
fst::VectorFst< fst::StdArc > * lex_fst_
void ComposeDeterministicOnDemandInverse(const Fst< Arc > &right, DeterministicOnDemandFst< Arc > *left, MutableFst< Arc > *fst_composed)
This function does &#39;*fst_composed = Compose(Inverse(*fst2), fst1)&#39; Note that the arguments are revers...
void DeterminizeStarInLog(VectorFst< StdArc > *fst, float delta, bool *debug_ptr, int max_states)
void MakeLinearAcceptor(const std::vector< I > &labels, MutableFst< Arc > *ofst)
Creates unweighted linear acceptor from symbol sequence.
Configuration class for the GetHTransducer() function; see The HTransducerConfig configuration class ...
Definition: hmm-utils.h:36
bool CompileGraphFromText(const std::vector< int32 > &transcript, fst::VectorFst< fst::StdArc > *out_fst)
TrainingGraphCompiler(const TransitionModel &trans_model, const ContextDependency &ctx_dep, fst::VectorFst< fst::StdArc > *lex_fst, const std::vector< int32 > &disambig_syms, const TrainingGraphCompilerOptions &opts)
virtual int32 CentralPosition() const
Central position P of the phone context, in 0-based numbering, e.g.
Definition: context-dep.h:62
#define KALDI_ERR
Definition: kaldi-error.h:147
void AddSubsequentialLoop(StdArc::Label subseq_symbol, MutableFst< StdArc > *fst)
Modifies an FST so that it transuces the same paths, but the input side of the paths can all have the...
Definition: context-fst.cc:297
bool CompileGraphsFromText(const std::vector< std::vector< int32 > > &word_grammar, std::vector< fst::VectorFst< fst::StdArc > *> *out_fsts)
bool CompileGraph(const fst::VectorFst< fst::StdArc > &word_grammar, fst::VectorFst< fst::StdArc > *out_fst)
fst::VectorFst< fst::StdArc > * GetHTransducer(const std::vector< std::vector< int32 > > &ilabel_info, const ContextDependencyInterface &ctx_dep, const TransitionModel &trans_model, const HTransducerConfig &config, std::vector< int32 > *disambig_syms_left)
Returns the H tranducer; result owned by caller.
Definition: hmm-utils.cc:254
BaseFloat transition_scale
Transition log-prob scale, see Scaling of transition and acoustic probabilities.
Definition: hmm-utils.h:40
TrainingGraphCompilerOptions opts_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
bool CompileGraphs(const std::vector< const fst::VectorFst< fst::StdArc > *> &word_fsts, std::vector< fst::VectorFst< fst::StdArc > *> *out_fsts)
bool IsSortedAndUniq(const std::vector< T > &vec)
Returns true if the vector is sorted and contains each element only once.
Definition: stl-utils.h:63
void RemoveSomeInputSymbols(const std::vector< I > &to_remove, MutableFst< Arc > *fst)
RemoveSomeInputSymbols removes any symbol that appears in "to_remove", from the input side of the FST...