20 #ifndef KALDI_NNET3_NNET_AM_DECODABLE_SIMPLE_H_ 21 #define KALDI_NNET3_NNET_AM_DECODABLE_SIMPLE_H_ 57 extra_left_context(0),
58 extra_right_context(0),
59 extra_left_context_initial(-1),
60 extra_right_context_final(-1),
61 frame_subsampling_factor(1),
64 debug_computation(false) {
69 opts->
Register(
"extra-left-context", &extra_left_context,
70 "Number of frames of additional left-context to add on top " 71 "of the neural net's inherent left context (may be useful in " 73 opts->
Register(
"extra-right-context", &extra_right_context,
74 "Number of frames of additional right-context to add on top " 75 "of the neural net's inherent right context (may be useful in " 77 opts->
Register(
"extra-left-context-initial", &extra_left_context_initial,
78 "If >= 0, overrides the --extra-left-context value at the " 79 "start of an utterance.");
80 opts->
Register(
"extra-right-context-final", &extra_right_context_final,
81 "If >= 0, overrides the --extra-right-context value at the " 82 "end of an utterance.");
83 opts->
Register(
"frame-subsampling-factor", &frame_subsampling_factor,
84 "Required if the frame-rate of the output (e.g. in 'chain' " 85 "models) is less than the frame-rate of the original " 87 opts->
Register(
"acoustic-scale", &acoustic_scale,
88 "Scaling factor for acoustic log-likelihoods (caution: is a no-op " 89 "if set in the program nnet3-compute");
90 opts->
Register(
"frames-per-chunk", &frames_per_chunk,
91 "Number of frames in each chunk that is separately evaluated " 92 "by the neural net. Measured before any subsampling, if the " 93 "--frame-subsampling-factor options is used (i.e. counts " 95 opts->
Register(
"debug-computation", &debug_computation,
"If true, turn on " 96 "debug for the actual computation (very verbose!)");
100 optimize_config.
Register(&optimization_opts);
104 compute_config.
Register(&compute_opts);
108 static bool warned_frames_per_chunk =
false;
109 if (frame_subsampling_factor < 1 || frames_per_chunk < 1) {
110 KALDI_ERR <<
"--frame-subsampling-factor and " 111 <<
"--frames-per-chunk must be > 0";
114 int32 n =
Lcm(frame_subsampling_factor, nnet_modulus);
116 if (frames_per_chunk % n != 0) {
118 int32 new_frames_per_chunk = n * ((frames_per_chunk + n - 1) / n);
119 if (!warned_frames_per_chunk) {
120 warned_frames_per_chunk =
true;
121 if (nnet_modulus == 1) {
123 KALDI_LOG <<
"Increasing --frames-per-chunk from " << frames_per_chunk
124 <<
" to " << new_frames_per_chunk
125 <<
" to make it a multiple of " 126 <<
"--frame-subsampling-factor=" 129 KALDI_LOG <<
"Increasing --frames-per-chunk from " << frames_per_chunk
130 <<
" to " << new_frames_per_chunk <<
" due to " 131 <<
"--frame-subsampling-factor=" << frame_subsampling_factor
133 <<
"nnet shift-invariance modulus = " << nnet_modulus;
136 frames_per_chunk = new_frames_per_chunk;
188 int32 online_ivector_period = 1);
206 if (subsampled_frame < current_log_post_subsampled_offset_ ||
207 subsampled_frame >= current_log_post_subsampled_offset_ +
208 current_log_post_.NumRows())
209 EnsureFrameIsComputed(subsampled_frame);
210 return current_log_post_(subsampled_frame -
211 current_log_post_subsampled_offset_,
219 void EnsureFrameIsComputed(
int32 subsampled_frame);
225 void DoNnetComputation(
int32 input_t_start,
228 int32 output_t_start,
229 int32 num_subsampled_frames);
236 void GetCurrentIvector(
int32 output_t_start,
237 int32 num_output_frames,
244 int32 GetIvectorDim()
const;
324 int32 online_ivector_period = 1,
331 return decodable_nnet_.NumFrames();
338 return (frame == NumFramesReady() - 1);
396 int32 online_ivector_period = 1);
402 return decodable_nnet_->NumFrames();
409 return (frame == NumFramesReady() - 1);
432 #endif // KALDI_NNET3_NNET_AM_DECODABLE_SIMPLE_H_
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void Register(OptionsItf *opts)
const TransitionModel & trans_model_
virtual int32 NumIndices() const
Returns the number of states in the acoustic model (they will be indexed one-based, i.e.
CachingOptimizingCompiler compiler_
const MatrixBase< BaseFloat > * online_ivector_feats_
void DeletePointers(std::vector< A *> *v)
Deletes any non-NULL pointers in the vector v, and sets the corresponding entries of v to NULL...
DecodableInterface provides a link between the (acoustic-modeling and feature-processing) code and th...
NnetSimpleComputationOptions()
Base class which provides matrix operations not involving resizing or allocation. ...
DecodableNnetSimple * decodable_nnet_
This class enables you to do the compilation and optimization in one call, and also ensures that if t...
Matrix< BaseFloat > * online_ivectors_copy_
CachingOptimizingCompiler & compiler_
int32 extra_left_context_initial
#define KALDI_DISALLOW_COPY_AND_ASSIGN(type)
virtual bool IsLastFrame(int32 frame) const
Returns true if this is the last frame.
CachingOptimizingCompilerOptions compiler_config
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
I Lcm(I m, I n)
Returns the least common multiple of two integers.
DecodableNnetSimple decodable_nnet_
const MatrixBase< BaseFloat > & feats_
int32 online_ivector_period_
void CheckAndFixConfigs(int32 nnet_modulus)
const VectorBase< BaseFloat > * ivector_
int32 current_log_post_subsampled_offset_
int32 nnet_right_context_
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
CuVector< BaseFloat > log_priors_
virtual int32 NumFramesReady() const
The call NumFramesReady() will return the number of frames currently available for this decodable obj...
int32 extra_right_context_final
virtual int32 NumIndices() const
Returns the number of states in the acoustic model (they will be indexed one-based, i.e.
void Register(OptionsItf *opts)
const TransitionModel & trans_model_
virtual bool IsLastFrame(int32 frame) const
Returns true if this is the last frame.
void Register(OptionsItf *opts)
NnetSimpleComputationOptions opts_
Vector< BaseFloat > * ivector_copy_
int32 extra_right_context
~DecodableAmNnetSimpleParallel()
A class representing a vector.
NnetComputeOptions compute_config
#define KALDI_ASSERT(cond)
virtual int32 NumFramesReady() const
The call NumFramesReady() will return the number of frames currently available for this decodable obj...
CachingOptimizingCompiler compiler_
int32 num_subsampled_frames_
Matrix< BaseFloat > current_log_post_
Provides a vector abstraction class.
NnetOptimizeOptions optimize_config
Matrix< BaseFloat > * feats_copy_
BaseFloat GetOutput(int32 subsampled_frame, int32 pdf_id)
int32 frame_subsampling_factor