21 #ifndef KALDI_ONLINE2_ONLINE_ENDPOINT_H_ 22 #define KALDI_ONLINE2_ONLINE_ENDPOINT_H_ 96 BaseFloat max_relative_cost = std::numeric_limits<BaseFloat>::infinity(),
98 must_contain_nonsilence(must_contain_nonsilence),
99 min_trailing_silence(min_trailing_silence),
100 max_relative_cost(max_relative_cost),
101 min_utterance_length(min_utterance_length) { }
104 opts->
Register(
"must-contain-nonsilence", &must_contain_nonsilence,
105 "If true, for this endpointing rule to apply there must" 106 "be nonsilence in the best-path traceback.");
107 opts->
Register(
"min-trailing-silence", &min_trailing_silence,
108 "This endpointing rule requires duration of trailing silence" 109 "(in seconds) to be >= this value.");
110 opts->
Register(
"max-relative-cost", &max_relative_cost,
111 "This endpointing rule requires relative-cost of final-states" 112 " to be <= this value (describes how good the probability " 113 "of final-states is).");
114 opts->
Register(
"min-utterance-length", &min_utterance_length,
115 "This endpointing rule requires utterance-length (in seconds) " 116 "to be >= this value.");
153 rule1(false, 5.0, std::numeric_limits<
BaseFloat>::infinity(), 0.0),
154 rule2(true, 0.5, 2.0, 0.0),
155 rule3(true, 1.0, 8.0, 0.0),
156 rule4(true, 2.0, std::numeric_limits<
BaseFloat>::infinity(), 0.0),
157 rule5(false, 0.0, std::numeric_limits<
BaseFloat>::infinity(), 20.0) { }
160 opts->
Register(
"endpoint.silence-phones", &silence_phones,
"List of phones " 161 "that are considered to be silence phones by the " 162 "endpointing code.");
180 int32 num_frames_decoded,
181 int32 trailing_silence_frames,
191 template <
typename FST,
typename DEC>
193 const std::string &silence_phones,
199 template <
typename FST>
208 template <
typename FST>
224 #endif // KALDI_ONLINE2_ONLINE_ENDPOINT_ This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
OnlineEndpointRule rule1
e.g.
LatticeIncrementalOnlineDecoderTpl is as LatticeIncrementalDecoderTpl but also supports an efficient ...
OnlineEndpointRule rule2
rule2 times out after 0.5 seconds of silence if we reached the final-state with good probability (rel...
bool EndpointDetected(const OnlineEndpointConfig &config, int32 num_frames_decoded, int32 trailing_silence_frames, BaseFloat frame_shift_in_seconds, BaseFloat final_relative_cost)
This function returns true if this set of endpointing rules thinks we should terminate decoding...
OnlineEndpointRule rule5
rule5 times out after the utterance is 20 seconds long, regardless of anything else.
bool must_contain_nonsilence
void Register(OptionsItf *opts)
OnlineEndpointRule rule4
rule4 times out after 2.0 seconds of silence after decoding something, even if we did not reach a fin...
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
OnlineEndpointRule(bool must_contain_nonsilence=true, BaseFloat min_trailing_silence=1.0, BaseFloat max_relative_cost=std::numeric_limits< BaseFloat >::infinity(), BaseFloat min_utterance_length=0.0)
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
void RegisterWithPrefix(const std::string &prefix, OptionsItf *opts)
BaseFloat max_relative_cost
BaseFloat min_utterance_length
int32 TrailingSilenceLength(const TransitionModel &tmodel, const std::string &silence_phones_str, const DEC &decoder)
returns the number of frames of trailing silence in the best-path traceback (not using final-probs)...
LatticeFasterOnlineDecoderTpl is as LatticeFasterDecoderTpl but also supports an efficient way to get...
void Register(OptionsItf *opts)
std::string silence_phones
This header contains a simple facility for endpointing, that should be used in conjunction with the "...
OnlineEndpointRule rule3
rule3 times out after 1.0 seconds of silence if we reached the final-state with OK probability (relat...
BaseFloat min_trailing_silence