35 int32 online_ivector_period):
38 output_dim_(nnet_.OutputDim(
"output")),
41 ivector_(ivector), online_ivector_feats_(online_ivectors),
42 online_ivector_period_(online_ivector_period),
44 current_log_post_subsampled_offset_(0) {
50 KALDI_ASSERT(!(ivector != NULL && online_ivectors != NULL));
51 KALDI_ASSERT(!(online_ivectors != NULL && online_ivector_period <= 0 &&
52 "You need to set the --online-ivector-period option!"));
65 int32 online_ivector_period,
67 compiler_(am_nnet.GetNnet(), opts.optimize_config, opts.compiler_config),
68 decodable_nnet_(opts, am_nnet.GetNnet(), am_nnet.Priors(),
69 feats, compiler != NULL ? compiler : &
compiler_,
70 ivector, online_ivectors,
71 online_ivector_period),
72 trans_model_(trans_model) {
79 int32 transition_id) {
86 return ivector_->Dim();
87 else if (online_ivector_feats_ != NULL)
88 return online_ivector_feats_->NumCols();
95 subsampled_frame < num_subsampled_frames_);
96 int32 feature_dim = feats_.NumCols(),
97 ivector_dim = GetIvectorDim(),
98 nnet_input_dim = nnet_.InputDim(
"input"),
99 nnet_ivector_dim = std::max<int32>(0, nnet_.InputDim(
"ivector"));
100 if (feature_dim != nnet_input_dim)
101 KALDI_ERR <<
"Neural net expects 'input' features with dimension " 102 << nnet_input_dim <<
" but you provided " 104 if (ivector_dim != std::max<int32>(0, nnet_.InputDim(
"ivector")))
105 KALDI_ERR <<
"Neural net expects 'ivector' features with dimension " 106 << nnet_ivector_dim <<
" but you provided " << ivector_dim;
108 int32 current_subsampled_frames_computed = current_log_post_.NumRows(),
109 current_subsampled_offset = current_log_post_subsampled_offset_;
110 KALDI_ASSERT(subsampled_frame < current_subsampled_offset ||
111 subsampled_frame >= current_subsampled_offset +
112 current_subsampled_frames_computed);
116 int32 subsampling_factor = opts_.frame_subsampling_factor,
117 subsampled_frames_per_chunk = opts_.frames_per_chunk / subsampling_factor,
118 start_subsampled_frame = subsampled_frame,
119 num_subsampled_frames = std::min<int32>(num_subsampled_frames_ -
120 start_subsampled_frame,
121 subsampled_frames_per_chunk),
122 last_subsampled_frame = start_subsampled_frame + num_subsampled_frames - 1;
125 int32 first_output_frame = start_subsampled_frame * subsampling_factor,
126 last_output_frame = last_subsampled_frame * subsampling_factor;
128 KALDI_ASSERT(opts_.extra_left_context >= 0 && opts_.extra_right_context >= 0);
129 int32 extra_left_context = opts_.extra_left_context,
130 extra_right_context = opts_.extra_right_context;
131 if (first_output_frame == 0 && opts_.extra_left_context_initial >= 0)
132 extra_left_context = opts_.extra_left_context_initial;
133 if (last_subsampled_frame == num_subsampled_frames_ - 1 &&
134 opts_.extra_right_context_final >= 0)
135 extra_right_context = opts_.extra_right_context_final;
136 int32 left_context = nnet_left_context_ + extra_left_context,
137 right_context = nnet_right_context_ + extra_right_context;
138 int32 first_input_frame = first_output_frame - left_context,
139 last_input_frame = last_output_frame + right_context,
140 num_input_frames = last_input_frame + 1 - first_input_frame;
142 GetCurrentIvector(first_output_frame,
143 last_output_frame - first_output_frame,
147 if (first_input_frame >= 0 &&
148 last_input_frame < feats_.NumRows()) {
151 DoNnetComputation(first_input_frame, input_feats, ivector,
152 first_output_frame, num_subsampled_frames);
156 for (
int32 i = 0;
i < num_input_frames;
i++) {
158 int32 t =
i + first_input_frame;
160 if (t >= tot_input_feats) t = tot_input_feats - 1;
164 DoNnetComputation(first_input_frame, feats_block, ivector,
165 first_output_frame, num_subsampled_frames);
173 if (subsampled_frame < current_log_post_subsampled_offset_ ||
174 subsampled_frame >= current_log_post_subsampled_offset_ +
175 current_log_post_.NumRows())
176 EnsureFrameIsComputed(subsampled_frame);
178 subsampled_frame - current_log_post_subsampled_offset_));
182 int32 num_output_frames,
184 if (ivector_ != NULL) {
185 *ivector = *ivector_;
187 }
else if (online_ivector_feats_ == NULL) {
196 int32 frame_to_search = output_t_start + num_output_frames / 2;
197 int32 ivector_frame = frame_to_search / online_ivector_period_;
199 if (ivector_frame >= online_ivector_feats_->NumRows()) {
200 int32 margin = ivector_frame - (online_ivector_feats_->NumRows() - 1);
201 if (margin * online_ivector_period_ > 50) {
203 KALDI_ERR <<
"Could not get iVector for frame " << frame_to_search
204 <<
", only available till frame " 205 << online_ivector_feats_->NumRows()
206 <<
" * ivector-period=" << online_ivector_period_
207 <<
" (mismatched --online-ivector-period?)";
209 ivector_frame = online_ivector_feats_->NumRows() - 1;
211 *ivector = online_ivector_feats_->Row(ivector_frame);
219 int32 output_t_start,
220 int32 num_subsampled_frames) {
225 bool shift_time =
true;
228 int32 time_offset = (shift_time ? -output_t_start : 0);
231 request.
inputs.reserve(2);
234 time_offset + input_t_start + input_feats.
NumRows()));
235 if (ivector.
Dim() != 0) {
236 std::vector<Index> indexes;
237 indexes.push_back(
Index(0, 0, 0));
241 output_spec.
name =
"output";
243 int32 subsample = opts_.frame_subsampling_factor;
244 output_spec.
indexes.resize(num_subsampled_frames);
246 for (
int32 i = 0;
i < num_subsampled_frames;
i++)
247 output_spec.
indexes[
i].t = time_offset + output_t_start +
i * subsample;
249 request.
outputs[0].Swap(&output_spec);
251 std::shared_ptr<const NnetComputation> computation =
compiler_.
Compile(request);
252 Nnet *nnet_to_update = NULL;
253 NnetComputer computer(opts_.compute_config, *computation,
254 nnet_, nnet_to_update);
259 if (ivector.
Dim() > 0) {
260 ivector_feats_cu.
Resize(1, ivector.
Dim());
261 ivector_feats_cu.
Row(0).CopyFromVec(ivector);
262 computer.
AcceptInput(
"ivector", &ivector_feats_cu);
268 if (log_priors_.Dim() != 0)
271 cu_output.
Scale(opts_.acoustic_scale);
272 current_log_post_.Resize(0, 0);
274 cu_output.
Swap(¤t_log_post_);
275 current_log_post_subsampled_offset_ = output_t_start / subsample;
279 static bool warned_frames_per_chunk =
false;
280 int32 nnet_modulus = nnet_.Modulus();
281 if (opts_.frame_subsampling_factor < 1 ||
282 opts_.frames_per_chunk < 1)
283 KALDI_ERR <<
"--frame-subsampling-factor and --frames-per-chunk must be > 0";
285 int32 n =
Lcm(opts_.frame_subsampling_factor, nnet_modulus);
287 if (opts_.frames_per_chunk % n != 0) {
289 int32 frames_per_chunk = n * ((opts_.frames_per_chunk + n - 1) / n);
290 if (!warned_frames_per_chunk) {
291 warned_frames_per_chunk =
true;
292 if (nnet_modulus == 1) {
294 KALDI_LOG <<
"Increasing --frames-per-chunk from " 295 << opts_.frames_per_chunk <<
" to " 296 << frames_per_chunk <<
" to make it a multiple of " 297 <<
"--frame-subsampling-factor=" 298 << opts_.frame_subsampling_factor;
300 KALDI_LOG <<
"Increasing --frames-per-chunk from " 301 << opts_.frames_per_chunk <<
" to " 302 << frames_per_chunk <<
" due to " 303 <<
"--frame-subsampling-factor=" 304 << opts_.frame_subsampling_factor <<
" and " 305 <<
"nnet shift-invariance modulus = " << nnet_modulus;
308 opts_.frames_per_chunk = frames_per_chunk;
320 int32 online_ivector_period):
321 compiler_(am_nnet.GetNnet(), opts.optimize_config, opts.compiler_config),
325 online_ivectors_copy_(NULL),
331 if (online_ivectors != NULL)
337 online_ivector_period);
341 KALDI_ERR <<
"Error occurred in constructor (see above)";
359 int32 transition_id) {
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
DecodableNnetSimple(const NnetSimpleComputationOptions &opts, const Nnet &nnet, const VectorBase< BaseFloat > &priors, const MatrixBase< BaseFloat > &feats, CachingOptimizingCompiler *compiler, const VectorBase< BaseFloat > *ivector=NULL, const MatrixBase< BaseFloat > *online_ivectors=NULL, int32 online_ivector_period=1)
This constructor takes features as input, and you can either supply a single iVector input...
const TransitionModel & trans_model_
CachingOptimizingCompiler compiler_
bool store_component_stats
you should set need_component_stats to true if you need the average-activation and average-derivative...
const CuSubVector< Real > Row(MatrixIndexT i) const
bool need_model_derivative
if need_model_derivative is true, then we'll be doing either model training or model-derivative compu...
int32 TransitionIdToPdfFast(int32 trans_id) const
Base class which provides matrix operations not involving resizing or allocation. ...
DecodableNnetSimple * decodable_nnet_
This class enables you to do the compilation and optimization in one call, and also ensures that if t...
Matrix< BaseFloat > * online_ivectors_copy_
CachingOptimizingCompiler & compiler_
virtual BaseFloat LogLikelihood(int32 frame, int32 transition_id)
Returns the log likelihood, which will be negated in the decoder.
std::vector< IoSpecification > inputs
This class represents a matrix that's stored on the GPU if we have one, and in memory if not...
virtual BaseFloat LogLikelihood(int32 frame, int32 transition_id)
Returns the log likelihood, which will be negated in the decoder.
const Nnet & GetNnet() const
void CheckAndFixConfigs()
void GetOutputForFrame(int32 frame, VectorBase< BaseFloat > *output)
struct Index is intended to represent the various indexes by which we number the rows of the matrices...
I Lcm(I m, I n)
Returns the least common multiple of two integers.
DecodableNnetSimple decodable_nnet_
This file contains some miscellaneous functions dealing with class Nnet.
const MatrixBase< BaseFloat > & feats_
void GetCurrentIvector(int32 output_t_start, int32 num_output_frames, Vector< BaseFloat > *ivector)
void AcceptInput(const std::string &node_name, CuMatrix< BaseFloat > *input)
e.g.
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
DecodableAmNnetSimple(const NnetSimpleComputationOptions &opts, const TransitionModel &trans_model, const AmNnetSimple &am_nnet, const MatrixBase< BaseFloat > &feats, const VectorBase< BaseFloat > *ivector=NULL, const MatrixBase< BaseFloat > *online_ivectors=NULL, int32 online_ivector_period=1, CachingOptimizingCompiler *compiler=NULL)
This constructor takes features as input, and you can either supply a single iVector input...
int32 nnet_right_context_
CuVector< BaseFloat > log_priors_
void AddVecToRows(Real alpha, const CuVectorBase< Real > &row, Real beta=1.0)
(for each row r of *this), r = alpha * row + beta * r
void Swap(Matrix< Real > *mat)
void DoNnetComputation(int32 input_t_start, const MatrixBase< BaseFloat > &input_feats, const VectorBase< BaseFloat > &ivector, int32 output_t_start, int32 num_subsampled_frames)
const TransitionModel & trans_model_
MatrixIndexT Dim() const
Returns the dimension of the vector.
void GetSimpleNnetContext(int32 *nnet_left_context, int32 *nnet_right_context)
const VectorBase< BaseFloat > & Priors() const
std::shared_ptr< const NnetComputation > Compile(const ComputationRequest &request)
Does the compilation and returns a const pointer to the result, which is owned by this class...
NnetSimpleComputationOptions opts_
Vector< BaseFloat > * ivector_copy_
std::vector< Index > indexes
bool IsSimpleNnet(const Nnet &nnet)
This function returns true if the nnet has the following properties: It has an output called "output"...
A class representing a vector.
class NnetComputer is responsible for executing the computation described in the "computation" object...
void EnsureFrameIsComputed(int32 subsampled_frame)
#define KALDI_ASSERT(cond)
std::vector< IoSpecification > outputs
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
int32 GetIvectorDim() const
CachingOptimizingCompiler compiler_
int32 num_subsampled_frames_
void GetOutputDestructive(const std::string &output_name, CuMatrix< BaseFloat > *output)
Provides a vector abstraction class.
Matrix< BaseFloat > * feats_copy_
Sub-matrix representation.
BaseFloat GetOutput(int32 subsampled_frame, int32 pdf_id)
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
int32 frame_subsampling_factor
void Resize(MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Allocate the memory.
DecodableAmNnetSimpleParallel(const NnetSimpleComputationOptions &opts, const TransitionModel &trans_model, const AmNnetSimple &am_nnet, const MatrixBase< BaseFloat > &feats, const VectorBase< BaseFloat > *ivector=NULL, const MatrixBase< BaseFloat > *online_ivectors=NULL, int32 online_ivector_period=1)
This decodable object is for use in multi-threaded decoding.
void Run()
This does either the forward or backward computation, depending when it is called (in a typical compu...