online-nnet3-incremental-decoding.h
Go to the documentation of this file.
1 // online2/online-nnet3-incremental-decoding.h
2 
3 // Copyright 2019 Zhehuai Chen
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
21 #ifndef KALDI_ONLINE2_ONLINE_NNET3_INCREMENTAL_DECODING_H_
22 #define KALDI_ONLINE2_ONLINE_NNET3_INCREMENTAL_DECODING_H_
23 
24 #include <string>
25 #include <vector>
26 #include <deque>
27 
29 #include "matrix/matrix-lib.h"
30 #include "util/common-utils.h"
31 #include "base/kaldi-error.h"
32 #include "itf/online-feature-itf.h"
36 #include "hmm/transition-model.h"
37 #include "hmm/posterior.h"
38 
39 namespace kaldi {
42 
43 
50 template <typename FST>
52  public:
53 
54  // Constructor. The pointer 'features' is not being given to this class to own
55  // and deallocate, it is owned externally.
57  const TransitionModel &trans_model,
59  const FST &fst,
60  OnlineNnet2FeaturePipeline *features);
61 
66  void InitDecoding(int32 frame_offset = 0);
67 
69  void AdvanceDecoding();
70 
74  void FinalizeDecoding() { decoder_.FinalizeDecoding(); }
75 
76  int32 NumFramesDecoded() const { return decoder_.NumFramesDecoded(); }
77 
78  int32 NumFramesInLattice() const { return decoder_.NumFramesInLattice(); }
79 
80  /* Gets the lattice. The output lattice has any acoustic scaling in it
81  (which will typically be desirable in an online-decoding context); if you
82  want an un-scaled lattice, scale it using ScaleLattice() with the inverse
83  of the acoustic weight.
84 
85  @param [in] num_frames_to_include The number of frames you want
86  to be included in the lattice. Must be in the range
87  [NumFramesInLattice().. NumFramesDecoded()]. If you
88  make it a few frames less than NumFramesDecoded(), it
89  will save significant computation.
90  @param [in] use_final_probs True if you want the lattice to
91  contain final-probs (if at least one state was final
92  on the most recently decoded frame). Must be false
93  if num_frames_to_include < NumFramesDecoded().
94  Must be true if you have previously called
95  FinalizeDecoding().
96  */
97  const CompactLattice &GetLattice(int32 num_frames_to_include,
98  bool use_final_probs = false) {
99  return decoder_.GetLattice(num_frames_to_include, use_final_probs);
100  }
101 
102 
103 
104 
105 
110  void GetBestPath(bool end_of_utterance,
111  Lattice *best_path) const;
112 
113 
116  bool EndpointDetected(const OnlineEndpointConfig &config);
117 
119 
121  private:
122 
124 
125  // this is remembered from the constructor; it's ultimately
126  // derived from calling FrameShiftInSeconds() on the feature pipeline.
128 
129  // we need to keep a reference to the transition model around only because
130  // it's needed by the endpointing code.
132 
134 
136 
137 };
138 
139 
141 
143 
144 } // namespace kaldi
145 
146 
147 
148 #endif // KALDI_ONLINE2_ONLINE_NNET3_DECODING_H_
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
SingleUtteranceNnet3IncrementalDecoderTpl(const LatticeIncrementalDecoderConfig &decoder_opts, const TransitionModel &trans_model, const nnet3::DecodableNnetSimpleLoopedInfo &info, const FST &fst, OnlineNnet2FeaturePipeline *features)
const CompactLattice & GetLattice(int32 num_frames_to_include, bool use_final_probs=false)
LatticeIncrementalOnlineDecoderTpl is as LatticeIncrementalDecoderTpl but also supports an efficient ...
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
Definition: graph.dox:21
kaldi::int32 int32
This file contains a different version of the feature-extraction pipeline in online-feature-pipeline...
void InitDecoding(int32 frame_offset=0)
Initializes the decoding and sets the frame offset of the underlying decodable object.
void AdvanceDecoding()
Advances the decoding as far as we can.
SingleUtteranceNnet3IncrementalDecoderTpl< fst::Fst< fst::StdArc > > SingleUtteranceNnet3IncrementalDecoder
fst::VectorFst< LatticeArc > Lattice
Definition: kaldi-lattice.h:44
fst::VectorFst< CompactLatticeArc > CompactLattice
Definition: kaldi-lattice.h:46
const LatticeIncrementalOnlineDecoderTpl< FST > & Decoder() const
The normal decoder, lattice-faster-decoder.h, sometimes has an issue when doing real-time application...
bool EndpointDetected(const OnlineEndpointConfig &config)
This function calls EndpointDetected from online-endpoint.h, with the required arguments.
You will instantiate this class when you want to decode a single utterance using the online-decoding ...
OnlineNnet2FeaturePipeline is a class that&#39;s responsible for putting together the various parts of th...
When you instantiate class DecodableNnetSimpleLooped, you should give it a const reference to this cl...
void GetBestPath(bool end_of_utterance, Lattice *best_path) const
Outputs an FST corresponding to the single best path through the current lattice. ...