29 Nnet *nnet_to_update):
30 nnet_(nnet), nnet_to_update_(nnet_to_update) {
47 const std::vector<NnetExample> &data,
48 double *tot_accuracy) {
65 double *tot_accuracy) {
67 int32 num_chunks = data.size();
98 static int32 num_times_printed = 0;
101 for (
int32 c = 0; c < num_components; c++) {
110 bool need_last_output =
114 KALDI_VLOG(3) <<
"Stddev of data for component " << c
115 <<
" for this minibatch is " 120 if (!need_last_output)
126 const std::vector<NnetExample> &data,
128 double *tot_accuracy)
const {
129 BaseFloat tot_objf = 0.0, tot_weight = 0.0;
131 int32 num_chunks = data.size();
136 std::vector<MatrixElement<BaseFloat> > sv_labels;
137 sv_labels.reserve(num_chunks);
138 for (
int32 m = 0; m < num_chunks; m++) {
140 "Training code currently does not support multi-frame egs");
141 const std::vector<std::pair<int32,BaseFloat> > &labels = data[m].labels[0];
142 for (
size_t i = 0;
i < labels.size();
i++) {
144 "Possibly egs come from alignments from mismatching model");
146 sv_labels.push_back(elem);
150 if (tot_accuracy != NULL)
155 KALDI_VLOG(4) <<
"Objective function is " << (tot_objf/tot_weight) <<
" over " 156 << tot_weight <<
" samples (weighted).";
162 const std::vector<NnetExample> &data)
const {
168 std::vector<int32> best_pdf_cpu;
171 best_pdf.CopyToVec(&best_pdf_cpu);
175 "Training code currently does not support multi-frame egs");
176 const std::vector<std::pair<int32,BaseFloat> > &labels = data[
i].labels[0];
177 for (
size_t j = 0;
j < labels.size();
j++) {
178 int32 ref_pdf_id = labels[
j].first,
179 hyp_pdf_id = best_pdf_cpu[
i];
181 tot_accuracy += weight * (hyp_pdf_id == ref_pdf_id ? 1.0 : 0.0);
200 output_deriv, component_to_update,
202 input_deriv.Swap(deriv);
208 const std::vector<NnetExample> &data,
212 KALDI_ASSERT(data[0].input_frames.NumRows() >= num_splice);
214 int32 feat_dim = data[0].input_frames.NumCols(),
215 spk_dim = data[0].spk_info.Dim(),
216 tot_dim = feat_dim + spk_dim;
225 int32 num_chunks = data.size();
227 input_mat->
Resize(num_splice * num_chunks,
230 for (
int32 chunk = 0; chunk < num_chunks; chunk++) {
232 chunk * num_splice, num_splice,
241 chunk * num_splice, num_splice,
250 for (
size_t i = 0;
i < egs.size();
i++)
251 for (
size_t j = 0;
j < egs[
i].labels.size();
j++)
252 for (
size_t k = 0; k < egs[
i].labels[
j].size(); k++)
253 ans += egs[
i].labels[
j][k].second;
259 const std::vector<NnetExample> &examples,
260 double *tot_accuracy) {
266 const std::vector<NnetExample> &examples,
267 Nnet *nnet_to_update,
268 double *tot_accuracy) {
269 if (nnet_to_update == NULL)
275 KALDI_LOG <<
"Error doing backprop, nnet info is: " << nnet.
Info();
282 const std::vector<NnetExample> &examples,
284 Nnet *nnet_to_update,
285 double *tot_accuracy) {
286 if (nnet_to_update == NULL) {
287 KALDI_WARN <<
"Was not expecting to reach this code path " 288 <<
"(wastefully formatting data twice)";
296 KALDI_LOG <<
"Error doing backprop, nnet info is: " << nnet.
Info();
304 const std::vector<NnetExample> &validation_set,
307 bool treat_as_gradient =
true;
308 gradient->
SetZero(treat_as_gradient);
309 std::vector<NnetExample> batch;
310 batch.reserve(batch_size);
311 double tot_objf = 0.0;
312 for (
int32 start_pos = 0;
313 start_pos < static_cast<int32>(validation_set.size());
314 start_pos += batch_size) {
317 i < std::min(start_pos + batch_size,
318 static_cast<int32>(validation_set.size()));
320 batch.push_back(validation_set[
i]);
326 return tot_objf / validation_set.size();
331 const std::vector<NnetExample> &validation_set,
333 double *tot_accuracy) {
334 double tot_accuracy_tmp;
337 std::vector<NnetExample> batch;
338 batch.reserve(batch_size);
339 double tot_objf = 0.0;
340 for (
int32 start_pos = 0;
341 start_pos < static_cast<int32>(validation_set.size());
342 start_pos += batch_size) {
345 i < std::min(start_pos + batch_size,
346 static_cast<int32>(validation_set.size()));
348 batch.push_back(validation_set[
i]);
351 tot_accuracy != NULL ? &tot_accuracy_tmp : NULL);
353 *tot_accuracy += tot_accuracy_tmp;
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
const Component & GetComponent(int32 c) const
int32 LeftContext() const
Returns the left-context summed over all the Components...
void Backprop(CuMatrix< BaseFloat > *deriv) const
Backprop must be called after ComputeObjfAndDeriv.
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
double ComputeNnetGradient(const Nnet &nnet, const std::vector< NnetExample > &validation_set, int32 batch_size, Nnet *gradient)
ComputeNnetGradient is mostly used to compute gradients on validation sets; it divides the example in...
Abstract class, basic element of the network, it is a box with defined inputs, outputs, and tranformation functions interface.
virtual bool BackpropNeedsInput() const
int32 FirstUpdatableComponent() const
Returns the index of the lowest-numbered component which is updatable, or NumComponents() if none are...
int32 OutputDim() const
The output dimension of the network – typically the number of pdfs.
This class represents a matrix that's stored on the GPU if we have one, and in memory if not...
double ComputeNnetObjf(const Nnet &nnet, const std::vector< NnetExample > &examples, double *tot_accuracy)
Computes objective function over a minibatch.
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
void FormatInput(const std::vector< NnetExample > &data)
Formats the input as a single matrix and sets the size of forward_data_, and sets up chunk_info_out_...
double DoBackprop(const Nnet &nnet, const std::vector< NnetExample > &examples, Nnet *nnet_to_update, double *tot_accuracy)
This function computes the objective function and either updates the model or adds to parameter gradi...
void FormatNnetInput(const Nnet &nnet, const std::vector< NnetExample > &data, Matrix< BaseFloat > *input_mat)
Takes the input to the nnet for a minibatch of examples, and formats as a single matrix.
bool SameDim(const MatrixBase< Real > &M, const MatrixBase< Real > &N)
int32 NumComponents() const
Returns number of components– think of this as similar to # of layers, but e.g.
void CompObjfAndDeriv(const std::vector< MatrixElement< Real > > &elements, const CuMatrix< Real > &A, Real *tot_objf, Real *tot_weight)
Here, A is interpreted as a matrix of probabilities, and "elements" as a list of posteriors (possibly...
void SetZero(bool treat_as_gradient)
std::vector< CuMatrix< BaseFloat > > forward_data_
int32 RightContext() const
Returns the right-context summed over all the Components...
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const =0
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
double ComputeTotAccuracy(const std::vector< NnetExample > &data) const
BaseFloat TotalNnetTrainingWeight(const std::vector< NnetExample > &egs)
Returns the total weight summed over all the examples...
void FindRowMaxId(CuArray< int32 > *id) const
Find the id of the maximal element for each row (resizes the 'id' array to the appropriate size)...
MatrixIndexT NumCols() const
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const =0
Perform forward pass propagation Input->Output.
#define KALDI_ASSERT(cond)
int32 g_kaldi_verbose_level
This is set by util/parse-options.
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
This header provides functionality for sample-by-sample stochastic gradient descent and gradient comp...
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
std::vector< ChunkInfo > chunk_info_out_
NnetUpdater(const Nnet &nnet, Nnet *nnet_to_update)
MatrixIndexT NumRows() const
Dimensions.
double ComputeForMinibatch(const std::vector< NnetExample > &data, double *tot_accuracy)
Does the entire forward and backward computation for this minbatch.
void CopyRowsFromVec(const VectorBase< Real > &v)
This function has two modes of operation.
void ComputeChunkInfo(int32 input_chunk_size, int32 num_chunks, std::vector< ChunkInfo > *chunk_info_out) const
Uses the output of the Context() functions of the network, to compute a vector of size NumComponents(...
double ComputeObjfAndDeriv(const std::vector< NnetExample > &data, CuMatrix< BaseFloat > *deriv, double *tot_accuracy=NULL) const
Computes objective function and derivative at output layer, but does not do the backprop [for that...
void GetOutput(CuMatrix< BaseFloat > *output)
Sub-matrix representation.
virtual bool BackpropNeedsOutput() const
void Resize(MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Allocate the memory.
int32 InputDim() const
Dimension of the input features, e.g.