kws-functions2.cc
Go to the documentation of this file.
1 // kws/kws-functions.cc
2 
3 // Copyright 2012 Johns Hopkins University (Author: Guoguo Chen)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
21 #include "lat/lattice-functions.h"
22 #include "kws/kws-functions.h"
25 
26 // this file implements things in kws-functions.h; it's an overflow from
27 // kws-functions.cc (we split it up for compilation speed and to avoid
28 // generating too-large object files on cygwin).
29 
30 namespace kaldi {
31 
32 
33 // This function replaces a symbol with epsilon wherever it appears
34 // (fst must be an acceptor).
35 template<class Arc>
36 static void ReplaceSymbolWithEpsilon(typename Arc::Label symbol,
37  fst::VectorFst<Arc> *fst) {
38  typedef typename Arc::StateId StateId;
39  for (StateId s = 0; s < fst->NumStates(); s++) {
40  for (fst::MutableArcIterator<fst::VectorFst<Arc> > aiter(fst, s);
41  !aiter.Done(); aiter.Next()) {
42  Arc arc = aiter.Value();
43  KALDI_ASSERT(arc.ilabel == arc.olabel);
44  if (arc.ilabel == symbol) {
45  arc.ilabel = 0;
46  arc.olabel = 0;
47  aiter.SetValue(arc);
48  }
49  }
50  }
51 }
52 
53 void DoFactorMerging(KwsProductFst *factor_transducer,
54  KwsLexicographicFst *index_transducer) {
55  using namespace fst;
57 
58  // Encode the transducer first
59  EncodeMapper<KwsProductArc> encoder(kEncodeLabels, ENCODE);
60  Encode(factor_transducer, &encoder);
61 
62 
63  // We want DeterminizeStar to remove epsilon arcs, so turn whatever it encoded
64  // epsilons as, into actual epsilons.
65  {
66  KwsProductArc epsilon_arc(0, 0, KwsProductWeight::One(), 0);
67  Label epsilon_label = encoder(epsilon_arc).ilabel;
68  ReplaceSymbolWithEpsilon(epsilon_label, factor_transducer);
69  }
70 
71 
72  MaybeDoSanityCheck(*factor_transducer);
73 
74  // Use DeterminizeStar
75  KALDI_VLOG(2) << "DoFactorMerging: determinization...";
76  KwsProductFst dest_transducer;
77  DeterminizeStar(*factor_transducer, &dest_transducer);
78 
79  MaybeDoSanityCheck(dest_transducer);
80 
81  // Commenting the minimization out, as it moves states/arcs in a way we don't
82  // want in some rare cases. For example, if we have two arcs from starting
83  // state, which have same words on the input side, but different cluster IDs
84  // on the output side, it may make the two arcs sharing a common final arc,
85  // which will cause problem in the factor disambiguation stage (we will not
86  // be able to add disambiguation symbols for both paths). We do a final step
87  // optimization anyway so commenting this out shouldn't matter too much.
88  // KALDI_VLOG(2) << "DoFactorMerging: minimization...";
89  // Minimize(&dest_transducer);
90 
91  MaybeDoSanityCheck(dest_transducer);
92 
93  Decode(&dest_transducer, encoder);
94 
95  Map(dest_transducer, index_transducer, KwsProductFstToKwsLexicographicFstMapper());
96 }
97 
99  using namespace fst;
101 
102  StateId ns = index_transducer->NumStates();
103  for (StateId s = 0; s < ns; s++) {
104  for (MutableArcIterator<KwsLexicographicFst>
105  aiter(index_transducer, s); !aiter.Done(); aiter.Next()) {
106  KwsLexicographicArc arc = aiter.Value();
107  if (index_transducer->Final(arc.nextstate) != KwsLexicographicWeight::Zero())
108  arc.ilabel = s;
109  else
110  arc.olabel = 0;
111  aiter.SetValue(arc);
112  }
113  }
114 }
115 
117  int32 max_states,
118  bool allow_partial) {
119  using namespace fst;
120  KwsLexicographicFst ifst = *index_transducer;
121  EncodeMapper<KwsLexicographicArc> encoder(kEncodeLabels, ENCODE);
122  Encode(&ifst, &encoder);
123  KALDI_VLOG(2) << "OptimizeFactorTransducer: determinization...";
124  if (allow_partial) {
125  DeterminizeStar(ifst, index_transducer, kDelta, NULL, max_states, true);
126  } else {
127  try {
128  DeterminizeStar(ifst, index_transducer, kDelta, NULL, max_states,
129  false);
130  } catch(const std::exception &e) {
131  KALDI_WARN << e.what();
132  *index_transducer = ifst;
133  }
134  }
135  KALDI_VLOG(2) << "OptimizeFactorTransducer: minimization...";
136  Minimize(index_transducer, static_cast<KwsLexicographicFst *>(NULL), fst::kDelta, true);
137  Decode(index_transducer, encoder);
138 }
139 
140 } // end namespace kaldi
fst::StdArc::StateId StateId
fst::StdArc::Label Label
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
fst::VectorFst< KwsProductArc > KwsProductFst
Definition: kaldi-kws.h:49
Lattice::StateId StateId
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
Definition: graph.dox:21
kaldi::int32 int32
StdLStdLStdArc KwsLexicographicArc
Definition: kaldi-kws.h:45
fst::VectorFst< KwsLexicographicArc > KwsLexicographicFst
Definition: kaldi-kws.h:46
void DoFactorMerging(KwsProductFst *factor_transducer, KwsLexicographicFst *index_transducer)
static void ReplaceSymbolWithEpsilon(typename Arc::Label symbol, fst::VectorFst< Arc > *fst)
#define KALDI_WARN
Definition: kaldi-error.h:150
fst::StdArc::Label Label
void MaybeDoSanityCheck(const KwsLexicographicFst &index_transducer)
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
LogXStdXStdprimeArc KwsProductArc
Definition: kaldi-kws.h:48
void DoFactorDisambiguation(KwsLexicographicFst *index_transducer)
void OptimizeFactorTransducer(KwsLexicographicFst *index_transducer, int32 max_states, bool allow_partial)
bool DeterminizeStar(F &ifst, MutableFst< typename F::Arc > *ofst, float delta, bool *debug_ptr, int max_states, bool allow_partial)
This function implements the normal version of DeterminizeStar, in which the output strings are repre...