27 const std::string &rule_name,
31 bool contains_nonsilence = (utterance_length > trailing_silence);
38 KALDI_VLOG(2) <<
"Endpointing rule " << rule_name <<
" activated: " 39 << (contains_nonsilence ?
"true" :
"false" ) <<
',' 40 << trailing_silence <<
',' << relative_cost <<
',' 47 int32 num_frames_decoded,
48 int32 trailing_silence_frames,
51 KALDI_ASSERT(num_frames_decoded >= trailing_silence_frames);
53 BaseFloat utterance_length = num_frames_decoded * frame_shift_in_seconds,
54 trailing_silence = trailing_silence_frames * frame_shift_in_seconds;
57 trailing_silence, final_relative_cost, utterance_length))
60 trailing_silence, final_relative_cost, utterance_length))
63 trailing_silence, final_relative_cost, utterance_length))
66 trailing_silence, final_relative_cost, utterance_length))
69 trailing_silence, final_relative_cost, utterance_length))
74 template <
typename FST,
typename DEC>
76 const std::string &silence_phones_str,
78 std::vector<int32> silence_phones;
80 KALDI_ERR <<
"Bad --silence-phones option in endpointing config: " 81 << silence_phones_str;
82 std::sort(silence_phones.begin(), silence_phones.end());
84 "Duplicates in --silence-phones option in endpointing config");
86 "Endpointing requires nonempty --endpoint.silence-phones option");
89 bool use_final_probs =
false;
90 typename DEC::BestPathIterator iter =
91 decoder.BestPathEnd(use_final_probs, NULL);
92 int32 num_silence_frames = 0;
93 while (!iter.Done()) {
96 iter = decoder.TraceBackBestPath(iter, &arc);
97 if (arc.ilabel != 0) {
99 if (silence_set.count(phone) != 0) {
100 num_silence_frames++;
106 return num_silence_frames;
109 template <
typename FST>
120 trailing_silence_frames = TrailingSilenceLength<FST, LatticeFasterOnlineDecoderTpl<FST>>(tmodel,
124 return EndpointDetected(config, num_frames_decoded, trailing_silence_frames,
125 frame_shift_in_seconds, final_relative_cost);
128 template <
typename FST>
139 trailing_silence_frames = TrailingSilenceLength<FST, LatticeIncrementalOnlineDecoderTpl<FST>>(tmodel,
143 return EndpointDetected(config, num_frames_decoded, trailing_silence_frames,
144 frame_shift_in_seconds, final_relative_cost);
152 bool EndpointDetected<fst::Fst<fst::StdArc> >(
160 bool EndpointDetected<fst::GrammarFst>(
167 bool EndpointDetected<fst::Fst<fst::StdArc> >(
175 bool EndpointDetected<fst::GrammarFst>(
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
fst::ArcTpl< LatticeWeight > LatticeArc
OnlineEndpointRule rule1
e.g.
LatticeIncrementalOnlineDecoderTpl is as LatticeIncrementalDecoderTpl but also supports an efficient ...
OnlineEndpointRule rule2
rule2 times out after 0.5 seconds of silence if we reached the final-state with good probability (rel...
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
bool EndpointDetected(const OnlineEndpointConfig &config, int32 num_frames_decoded, int32 trailing_silence_frames, BaseFloat frame_shift_in_seconds, BaseFloat final_relative_cost)
This function returns true if this set of endpointing rules thinks we should terminate decoding...
OnlineEndpointRule rule5
rule5 times out after the utterance is 20 seconds long, regardless of anything else.
bool must_contain_nonsilence
OnlineEndpointRule rule4
rule4 times out after 2.0 seconds of silence after decoding something, even if we did not reach a fin...
static bool RuleActivated(const OnlineEndpointRule &rule, const std::string &rule_name, BaseFloat trailing_silence, BaseFloat relative_cost, BaseFloat utterance_length)
BaseFloat max_relative_cost
int32 NumFramesDecoded() const
BaseFloat min_utterance_length
int32 TrailingSilenceLength(const TransitionModel &tmodel, const std::string &silence_phones_str, const DEC &decoder)
returns the number of frames of trailing silence in the best-path traceback (not using final-probs)...
LatticeFasterOnlineDecoderTpl is as LatticeFasterDecoderTpl but also supports an efficient way to get...
BaseFloat FinalRelativeCost() const
FinalRelativeCost() serves the same purpose as ReachedFinal(), but gives more information.
std::string silence_phones
int32 NumFramesDecoded() const
Returns the number of frames decoded so far.
BaseFloat FinalRelativeCost() const
FinalRelativeCost() serves the same purpose as ReachedFinal(), but gives more information.
#define KALDI_ASSERT(cond)
This header contains a simple facility for endpointing, that should be used in conjunction with the "...
OnlineEndpointRule rule3
rule3 times out after 1.0 seconds of silence if we reached the final-state with OK probability (relat...
bool IsSortedAndUniq(const std::vector< T > &vec)
Returns true if the vector is sorted and contains each element only once.
int32 TransitionIdToPhone(int32 trans_id) const
BaseFloat min_trailing_silence