online-endpoint.h
Go to the documentation of this file.
1 // online2/online-endpoint.h
2 
3 // Copyright 2013 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
21 #ifndef KALDI_ONLINE2_ONLINE_ENDPOINT_H_
22 #define KALDI_ONLINE2_ONLINE_ENDPOINT_H_
23 
24 #include <string>
25 #include <vector>
26 #include <deque>
27 
28 #include "matrix/matrix-lib.h"
29 #include "util/common-utils.h"
30 #include "base/kaldi-error.h"
31 #include "feat/feature-functions.h"
32 #include "feat/feature-mfcc.h"
33 #include "feat/feature-plp.h"
34 #include "itf/online-feature-itf.h"
35 #include "lat/kaldi-lattice.h"
36 #include "hmm/transition-model.h"
39 
40 namespace kaldi {
43 
44 
93  // The values set in the initializer will probably never be used.
94  OnlineEndpointRule(bool must_contain_nonsilence = true,
95  BaseFloat min_trailing_silence = 1.0,
96  BaseFloat max_relative_cost = std::numeric_limits<BaseFloat>::infinity(),
97  BaseFloat min_utterance_length = 0.0):
98  must_contain_nonsilence(must_contain_nonsilence),
99  min_trailing_silence(min_trailing_silence),
100  max_relative_cost(max_relative_cost),
101  min_utterance_length(min_utterance_length) { }
102 
103  void Register(OptionsItf *opts) {
104  opts->Register("must-contain-nonsilence", &must_contain_nonsilence,
105  "If true, for this endpointing rule to apply there must"
106  "be nonsilence in the best-path traceback.");
107  opts->Register("min-trailing-silence", &min_trailing_silence,
108  "This endpointing rule requires duration of trailing silence"
109  "(in seconds) to be >= this value.");
110  opts->Register("max-relative-cost", &max_relative_cost,
111  "This endpointing rule requires relative-cost of final-states"
112  " to be <= this value (describes how good the probability "
113  "of final-states is).");
114  opts->Register("min-utterance-length", &min_utterance_length,
115  "This endpointing rule requires utterance-length (in seconds) "
116  "to be >= this value.");
117  };
118  // for convenience add this RegisterWithPrefix function, because
119  // we'll be registering this as a config with several different
120  // prefixes.
121  void RegisterWithPrefix(const std::string &prefix, OptionsItf *opts) {
122  ParseOptions po_prefix(prefix, opts);
123  this->Register(&po_prefix);
124  }
125 };
126 
128  std::string silence_phones;
129 
136 
151 
153  rule1(false, 5.0, std::numeric_limits<BaseFloat>::infinity(), 0.0),
154  rule2(true, 0.5, 2.0, 0.0),
155  rule3(true, 1.0, 8.0, 0.0),
156  rule4(true, 2.0, std::numeric_limits<BaseFloat>::infinity(), 0.0),
157  rule5(false, 0.0, std::numeric_limits<BaseFloat>::infinity(), 20.0) { }
158 
159  void Register(OptionsItf *opts) {
160  opts->Register("endpoint.silence-phones", &silence_phones, "List of phones "
161  "that are considered to be silence phones by the "
162  "endpointing code.");
163  rule1.RegisterWithPrefix("endpoint.rule1", opts);
164  rule2.RegisterWithPrefix("endpoint.rule2", opts);
165  rule3.RegisterWithPrefix("endpoint.rule3", opts);
166  rule4.RegisterWithPrefix("endpoint.rule4", opts);
167  rule5.RegisterWithPrefix("endpoint.rule5", opts);
168  }
169 };
170 
171 
172 
173 
174 
175 
179 bool EndpointDetected(const OnlineEndpointConfig &config,
180  int32 num_frames_decoded,
181  int32 trailing_silence_frames,
182  BaseFloat frame_shift_in_seconds,
183  BaseFloat final_relative_cost);
184 
185 
191 template <typename FST, typename DEC>
193  const std::string &silence_phones,
194  const DEC &decoder);
195 
196 
199 template <typename FST>
200 bool EndpointDetected(
201  const OnlineEndpointConfig &config,
202  const TransitionModel &tmodel,
203  BaseFloat frame_shift_in_seconds,
204  const LatticeFasterOnlineDecoderTpl<FST> &decoder);
205 
208 template <typename FST>
209 bool EndpointDetected(
210  const OnlineEndpointConfig &config,
211  const TransitionModel &tmodel,
212  BaseFloat frame_shift_in_seconds,
214 
215 
216 
217 
219 
220 } // namespace kaldi
221 
222 
223 
224 #endif // KALDI_ONLINE2_ONLINE_ENDPOINT_
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
OnlineEndpointRule rule1
e.g.
LatticeIncrementalOnlineDecoderTpl is as LatticeIncrementalDecoderTpl but also supports an efficient ...
OnlineEndpointRule rule2
rule2 times out after 0.5 seconds of silence if we reached the final-state with good probability (rel...
kaldi::int32 int32
bool EndpointDetected(const OnlineEndpointConfig &config, int32 num_frames_decoded, int32 trailing_silence_frames, BaseFloat frame_shift_in_seconds, BaseFloat final_relative_cost)
This function returns true if this set of endpointing rules thinks we should terminate decoding...
OnlineEndpointRule rule5
rule5 times out after the utterance is 20 seconds long, regardless of anything else.
void Register(OptionsItf *opts)
OnlineEndpointRule rule4
rule4 times out after 2.0 seconds of silence after decoding something, even if we did not reach a fin...
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
OnlineEndpointRule(bool must_contain_nonsilence=true, BaseFloat min_trailing_silence=1.0, BaseFloat max_relative_cost=std::numeric_limits< BaseFloat >::infinity(), BaseFloat min_utterance_length=0.0)
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
void RegisterWithPrefix(const std::string &prefix, OptionsItf *opts)
int32 TrailingSilenceLength(const TransitionModel &tmodel, const std::string &silence_phones_str, const DEC &decoder)
returns the number of frames of trailing silence in the best-path traceback (not using final-probs)...
LatticeFasterOnlineDecoderTpl is as LatticeFasterDecoderTpl but also supports an efficient way to get...
void Register(OptionsItf *opts)
This header contains a simple facility for endpointing, that should be used in conjunction with the "...
OnlineEndpointRule rule3
rule3 times out after 1.0 seconds of silence if we reached the final-state with OK probability (relat...