kws-functions.h
Go to the documentation of this file.
1 // kws/kws-functions.h
2 
3 // Copyright 2012 Johns Hopkins University (Author: Guoguo Chen)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
21 #ifndef KALDI_KWS_KWS_FUNCTIONS_H_
22 #define KALDI_KWS_KWS_FUNCTIONS_H_
23 
24 #include <vector>
25 
26 #include "lat/kaldi-lattice.h"
27 #include "kws/kaldi-kws.h"
28 
29 namespace kaldi {
30 
31 // We store the time information of the arc into class "Interval". "Interval"
32 // has a public function "int32 Overlap(Interval interval)" which takes in
33 // another interval and returns the overlap of that interval and the current
34 // interval.
35 class Interval {
36  public:
37  Interval() {}
38  Interval(int32 start, int32 end) : start_(start), end_(end) {}
39  Interval(const Interval &interval) : start_(interval.Start()), end_(interval.End()) {}
40  int32 Overlap(Interval interval) {
41  return std::max<int32>(0, std::min(end_, interval.end_) -
42  std::max(start_, interval.start_));
43  }
44  int32 Start() const {return start_;}
45  int32 End() const {return end_;}
46  ~Interval() {}
47 
48  private:
51 };
52 
53 // We define a function bool CompareInterval(const Interval &i1, const Interval
54 // &i2) to compare the Interval defined above. If interval i1 is in front of
55 // interval i2, then return true; otherwise return false.
56 bool CompareInterval(const Interval &i1,
57  const Interval &i2);
58 
59 // This function clusters the arcs with same word id and overlapping time-spans.
60 // Examples of clusters:
61 // 0 1 a a (0.1s ~ 0.5s) and 2 3 a a (0.2s ~ 0.4s) are within the same cluster;
62 // 0 1 a a (0.1s ~ 0.5s) and 5 6 b b (0.2s ~ 0.4s) are in different clusters;
63 // 0 1 a a (0.1s ~ 0.5s) and 7 8 a a (0.9s ~ 1.4s) are also in different clusters.
64 // It puts disambiguating symbols in the olabels, leaving the words on the
65 // ilabels.
67  const std::vector<int32> &state_times);
68 
69 // This function contains two steps: weight pushing and factor generation. The
70 // original ShortestDistance() is not very efficient, so we do the weight
71 // pushing and shortest path manually by computing the alphas and betas. The
72 // factor generation step expand the lattice to the LXTXT' semiring, with
73 // additional start state and end state (and corresponding arcs) added.
74 bool CreateFactorTransducer(const CompactLattice &clat,
75  const std::vector<int32> &state_times,
76  int32 utterance_id,
77  KwsProductFst *factor_transducer);
78 
79 // This function removes the arcs with long silence. By "long" we mean arcs with
80 // #frames exceeding the given max_silence_frames. We do this filtering because
81 // the gap between adjacent words in a keyword must be <= 0.5 second.
82 // Note that we should not remove the arcs created in the factor generation
83 // step, so the "search area" is limited to the original arcs before factor
84 // generation.
85 void RemoveLongSilences(int32 max_silence_frames,
86  const std::vector<int32> &state_times,
87  KwsProductFst *factor_transducer);
88 
89 // Do the factor merging part: encode input and output, and apply weighted
90 // epsilon removal, determinization and minimization. Modifies factor_transducer.
91 void DoFactorMerging(KwsProductFst *factor_transducer,
92  KwsLexicographicFst *index_transducer);
93 
94 // Do the factor disambiguation step: remove the cluster id's for the non-final
95 // arcs and insert disambiguation symbols for the final arcs
96 void DoFactorDisambiguation(KwsLexicographicFst *index_transducer);
97 
98 // Do the optimization: do encoded determinization, minimization
99 void OptimizeFactorTransducer(KwsLexicographicFst *index_transducer,
100  int32 max_states,
101  bool allow_partial);
102 
103 // the following two functions will, if GetVerboseLevel() >= 2, check that the
104 // cost of the second-best path in the transducers is not negative, and print
105 // out some associated debugging info if GetVerboseLevel() >= 3. The best path
106 // in the transducers will typically be for the empty word sequence, and it may
107 // have negative cost (i.e. probability more than one), but the second-best one
108 // should not have negative cost. A warning will be printed if
109 // GetVerboseLevel() >= 2 and a substantially negative cost is found.
110 void MaybeDoSanityCheck(const KwsProductFst &factor_transducer);
111 void MaybeDoSanityCheck(const KwsLexicographicFst &index_transducer);
112 
113 
114 // this Mapper class is used in some of the the internals; we have to declare it
115 // in the header because, for the sake of compilation time, we split up the
116 // implementation into two .cc files.
118  public:
123 
125 
126  inline ToArc operator()(const FromArc &arc) const {
127  return ToArc(arc.ilabel,
128  arc.olabel,
129  (arc.weight == FromWeight::Zero() ?
130  ToWeight::Zero() :
131  ToWeight(arc.weight.Value1().Value(),
132  StdLStdWeight(arc.weight.Value2().Value1().Value(),
133  arc.weight.Value2().Value2().Value()))),
134  arc.nextstate);
135  }
136 
137  fst::MapFinalAction FinalAction() const { return fst::MAP_NO_SUPERFINAL; }
138 
139  fst::MapSymbolsAction InputSymbolsAction() const { return fst::MAP_COPY_SYMBOLS; }
140 
141  fst::MapSymbolsAction OutputSymbolsAction() const { return fst::MAP_COPY_SYMBOLS;}
142 
143  uint64 Properties(uint64 props) const { return props; }
144 };
145 
146 
147 
148 } // namespace kaldi
149 
150 
151 #endif // KALDI_KWS_KWS_FUNCTIONS_H_
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
fst::VectorFst< KwsProductArc > KwsProductFst
Definition: kaldi-kws.h:49
fst::MapSymbolsAction OutputSymbolsAction() const
bool CompareInterval(const Interval &i1, const Interval &i2)
bool ClusterLattice(CompactLattice *clat, const std::vector< int32 > &state_times)
Interval(int32 start, int32 end)
Definition: kws-functions.h:38
kaldi::int32 int32
LogXStdXStdprimeWeight KwsProductWeight
Definition: kaldi-kws.h:47
Interval(const Interval &interval)
Definition: kws-functions.h:39
fst::LexicographicWeight< TropicalWeight, TropicalWeight > StdLStdWeight
Definition: kaldi-kws.h:34
ToArc operator()(const FromArc &arc) const
int32 Overlap(Interval interval)
Definition: kws-functions.h:40
StdLStdLStdArc KwsLexicographicArc
Definition: kaldi-kws.h:45
fst::VectorFst< KwsLexicographicArc > KwsLexicographicFst
Definition: kaldi-kws.h:46
void DoFactorMerging(KwsProductFst *factor_transducer, KwsLexicographicFst *index_transducer)
int32 Start() const
Definition: kws-functions.h:44
bool CreateFactorTransducer(const CompactLattice &clat, const std::vector< int32 > &state_times, int32 utterance_id, KwsProductFst *factor_transducer)
fst::MapSymbolsAction InputSymbolsAction() const
fst::VectorFst< CompactLatticeArc > CompactLattice
Definition: kaldi-lattice.h:46
void MaybeDoSanityCheck(const KwsLexicographicFst &index_transducer)
void RemoveLongSilences(int32 max_silence_frames, const std::vector< int32 > &state_times, KwsProductFst *factor_transducer)
StdLStdLStdWeight KwsLexicographicWeight
Definition: kaldi-kws.h:44
LogXStdXStdprimeArc KwsProductArc
Definition: kaldi-kws.h:48
void DoFactorDisambiguation(KwsLexicographicFst *index_transducer)
int32 End() const
Definition: kws-functions.h:45
void OptimizeFactorTransducer(KwsLexicographicFst *index_transducer, int32 max_states, bool allow_partial)