nnet-convolutional-component.h
Go to the documentation of this file.
1 // nnet3/nnet-convolutional-component.h
2 
3 // Copyright 2017 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #ifndef KALDI_NNET3_NNET_CONVOLUTIONAL_COMPONENT_H_
21 #define KALDI_NNET3_NNET_CONVOLUTIONAL_COMPONENT_H_
22 
23 #include "nnet3/nnet-common.h"
26 #include "nnet3/convolution.h"
27 #include <iostream>
28 
29 namespace kaldi {
30 namespace nnet3 {
31 
37 
38 
212 class TimeHeightConvolutionComponent: public UpdatableComponent {
213  public:
214 
215  // The use of this constructor should only precede InitFromConfig()
217 
218  // Copy constructor
220 
221  virtual int32 InputDim() const;
222  virtual int32 OutputDim() const;
223 
224  virtual std::string Info() const;
225  virtual void InitFromConfig(ConfigLine *cfl);
226  virtual std::string Type() const { return "TimeHeightConvolutionComponent"; }
227  virtual int32 Properties() const {
230  }
231  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
232  const CuMatrixBase<BaseFloat> &in,
233  CuMatrixBase<BaseFloat> *out) const;
234  virtual void Backprop(const std::string &debug_info,
235  const ComponentPrecomputedIndexes *indexes,
236  const CuMatrixBase<BaseFloat> &in_value,
237  const CuMatrixBase<BaseFloat> &out_value,
238  const CuMatrixBase<BaseFloat> &out_deriv,
239  void *memo,
240  Component *to_update,
241  CuMatrixBase<BaseFloat> *in_deriv) const;
242 
243  virtual void Read(std::istream &is, bool binary);
244  virtual void Write(std::ostream &os, bool binary) const;
245  virtual Component* Copy() const {
246  return new TimeHeightConvolutionComponent(*this);
247  }
248 
249 
250  // Some functions that are only to be reimplemented for GeneralComponents.
251 
252  // This ReorderIndexes function may insert 'blank' indexes (indexes with
253  // t == kNoTime) as well as reordering the indexes. This is allowed
254  // behavior of ReorderIndexes functions.
255  virtual void ReorderIndexes(std::vector<Index> *input_indexes,
256  std::vector<Index> *output_indexes) const;
257 
258  virtual void GetInputIndexes(const MiscComputationInfo &misc_info,
259  const Index &output_index,
260  std::vector<Index> *desired_indexes) const;
261 
262  // This function returns true if at least one of the input indexes used to
263  // compute this output index is computable.
264  virtual bool IsComputable(const MiscComputationInfo &misc_info,
265  const Index &output_index,
266  const IndexSet &input_index_set,
267  std::vector<Index> *used_inputs) const;
268 
270  const MiscComputationInfo &misc_info,
271  const std::vector<Index> &input_indexes,
272  const std::vector<Index> &output_indexes,
273  bool need_backprop) const;
274 
275  // Some functions from base-class UpdatableComponent.
276  virtual void Scale(BaseFloat scale);
277  virtual void Add(BaseFloat alpha, const Component &other);
278  virtual void PerturbParams(BaseFloat stddev);
279  virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
280  virtual int32 NumParameters() const;
281  virtual void Vectorize(VectorBase<BaseFloat> *params) const;
282  virtual void UnVectorize(const VectorBase<BaseFloat> &params);
283  virtual void FreezeNaturalGradient(bool freeze);
284 
285 
287  public:
290  computation(other.computation) { }
291  virtual PrecomputedIndexes *Copy() const;
292  virtual void Write(std::ostream &os, bool binary) const;
293  virtual void Read(std::istream &os, bool binary);
294  virtual std::string Type() const {
295  return "TimeHeightConvolutionComponentPrecomputedIndexes";
296  }
297  virtual ~PrecomputedIndexes() { }
298 
300  };
301 
302  void ScaleLinearParams(BaseFloat alpha) { linear_params_.Scale(alpha); }
303 
304  void ConsolidateMemory();
305  private:
306 
307  void Check() const;
308 
309  // computes derived parameters required_time_offsets_ and all_time_offsets_.
310  void ComputeDerived();
311 
312  // Function that updates linear_params_ and bias_params_, which
313  // uses the natural gradient code.
315  const PrecomputedIndexes &indexes,
316  const CuMatrixBase<BaseFloat> &in_value,
317  const CuMatrixBase<BaseFloat> &out_deriv);
318 
319  // Function that updates linear_params_ and bias_params_, which
320  // does not use the natural gradient code.
321  void UpdateSimple(
322  const PrecomputedIndexes &indexes,
323  const CuMatrixBase<BaseFloat> &in_value,
324  const CuMatrixBase<BaseFloat> &out_deriv);
325 
326  // Function called to initialize linear_params_ if init-unit=true in the config
327  // line.
328  void InitUnit();
329 
331 
332  // all_time_offsets_ is a copy of the corresponding variable in
333  // model, stored as a vector instead of as a set for efficiency.
334  std::vector<int32> all_time_offsets_;
335  // time_offset_required_ is a vector with the same dimension as
336  // 'all_time_offsets_', which is true if the corresponding time-offset
337  // is a member of model_.required_time_offsets_.
338  std::vector<bool> time_offset_required_;
339 
340  // the linear parameters of the convolution.
341  // dimension is model_.ParamRows() by model.ParamCols(),
342  // which equals num-filters-out by
343  // (num-filters-in * patch-rows * patch-cols),
344  // a.k.a.
345  // (num-filters-in * num-time-offsets * num-height-offset).
347  // the bias parameters of the convolution, dimension is
348  // model_.num_filters_out.
350 
351 
352  // Maximum amount of temporary memory in megabytes that is allowed to be used
353  // in the convolution computation. (this is per computation, but it's
354  // released immediately after it's used, so it doesn't matter how many there
355  // are).
357 
358  // Controls whether or not the natural-gradient is used.
359  // Note: even if this is true, if is_gradient_ (from the
360  // UpdatableComponent base class) is true, we'll do the 'simple'
361  // update that doesn't include natural gradient.
363 
364  // Preconditioner for the input space, of dimension linear_params_.NumCols() +
365  // 1 (the 1 is for the bias). As with other natural-gradient objects, it's
366  // not stored with the model on disk but is reinitialized each time we start
367  // up.
369 
370  // Preconditioner for the output space, of dimension
371  // linear_params_.NumRows().
373 };
374 
375 
376 
450 class TdnnComponent: public UpdatableComponent {
451  public:
452 
453  // The use of this constructor should only precede InitFromConfig()
454  TdnnComponent();
455 
456  // Copy constructor
457  TdnnComponent(const TdnnComponent &other);
458 
459  virtual int32 InputDim() const {
460  return linear_params_.NumCols() / static_cast<int32>(time_offsets_.size());
461  }
462  virtual int32 OutputDim() const { return linear_params_.NumRows(); }
463 
464  virtual std::string Info() const;
465  virtual void InitFromConfig(ConfigLine *cfl);
466  virtual std::string Type() const { return "TdnnComponent"; }
467  virtual int32 Properties() const {
469  (bias_params_.Dim() == 0 ? kPropagateAdds : 0)|
471  }
472  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
473  const CuMatrixBase<BaseFloat> &in,
474  CuMatrixBase<BaseFloat> *out) const;
475  virtual void Backprop(const std::string &debug_info,
476  const ComponentPrecomputedIndexes *indexes,
477  const CuMatrixBase<BaseFloat> &in_value,
478  const CuMatrixBase<BaseFloat> &out_value,
479  const CuMatrixBase<BaseFloat> &out_deriv,
480  void *memo,
481  Component *to_update,
482  CuMatrixBase<BaseFloat> *in_deriv) const;
483 
484  virtual void Read(std::istream &is, bool binary);
485  virtual void Write(std::ostream &os, bool binary) const;
486  virtual Component* Copy() const {
487  return new TdnnComponent(*this);
488  }
489 
490 
491  // Some functions that are only to be reimplemented for GeneralComponents.
492 
493  // This ReorderIndexes function may insert 'blank' indexes (indexes with
494  // t == kNoTime) as well as reordering the indexes. This is allowed
495  // behavior of ReorderIndexes functions.
496  virtual void ReorderIndexes(std::vector<Index> *input_indexes,
497  std::vector<Index> *output_indexes) const;
498 
499  virtual void GetInputIndexes(const MiscComputationInfo &misc_info,
500  const Index &output_index,
501  std::vector<Index> *desired_indexes) const;
502 
503  // This function returns true if at least one of the input indexes used to
504  // compute this output index is computable.
505  virtual bool IsComputable(const MiscComputationInfo &misc_info,
506  const Index &output_index,
507  const IndexSet &input_index_set,
508  std::vector<Index> *used_inputs) const;
509 
511  const MiscComputationInfo &misc_info,
512  const std::vector<Index> &input_indexes,
513  const std::vector<Index> &output_indexes,
514  bool need_backprop) const;
515 
516  // Some functions from base-class UpdatableComponent.
517  virtual void Scale(BaseFloat scale);
518  virtual void Add(BaseFloat alpha, const Component &other);
519  virtual void PerturbParams(BaseFloat stddev);
520  virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
521  virtual int32 NumParameters() const;
522  virtual void Vectorize(VectorBase<BaseFloat> *params) const;
523  virtual void UnVectorize(const VectorBase<BaseFloat> &params);
524  virtual void FreezeNaturalGradient(bool freeze);
525 
526 
528  public:
531  row_stride(other.row_stride), row_offsets(other.row_offsets) { }
532  virtual PrecomputedIndexes *Copy() const;
533  virtual void Write(std::ostream &os, bool binary) const;
534  virtual void Read(std::istream &os, bool binary);
535  virtual std::string Type() const {
536  return "TdnnComponentPrecomputedIndexes";
537  }
538  virtual ~PrecomputedIndexes() { }
539 
540 
541  // input_row_stride is the stride (in number of rows) we have to take in the
542  // input matrix each time we form a sub-matrix that will be part of the
543  // input to the tdnn operation. Normally this will be 1, but it may be,
544  // for example, 3 in layers where we do subsampling.
545  int32 row_stride;
546 
547  // 'row_offsets' is of the same dimension as time_offsets_. Each element
548  // describes the row offset (in the input matrix) of a sub-matrix, and each.
549  // We will append together these sub-matrices (row-wise) to be the input to
550  // the affine or linear transform.
551  std::vector<int32> row_offsets;
552  };
553 
555 
556  // This allows you to resize the vector in order to add a bias where
557  // there previously was none-- obviously this should be done carefully.
559 
560  BaseFloat OrthonormalConstraint() const { return orthonormal_constraint_; }
561 
562  void ConsolidateMemory();
563  private:
564 
565  // This static function is a utility function that extracts a CuSubMatrix
566  // representing a subset of rows of 'input_matrix'.
567  // The numpy syntax would be:
568  // return input_matrix[row_offset:row_stride:num_output_rows*row_stride,:]
569  static CuSubMatrix<BaseFloat> GetInputPart(
570  const CuMatrixBase<BaseFloat> &input_matrix,
571  int32 num_output_rows,
572  int32 row_stride,
573  int32 row_offset);
574 
575  // see the definition for more explanation.
576  static void ModifyComputationIo(time_height_convolution::ConvolutionComputationIo *io);
577 
578  void Check() const;
579 
580  // Function that updates linear_params_, and bias_params_ if present, which
581  // uses the natural gradient code.
583  const PrecomputedIndexes &indexes,
584  const CuMatrixBase<BaseFloat> &in_value,
585  const CuMatrixBase<BaseFloat> &out_deriv);
586 
587  // Function that updates linear_params_, and bias_params_ if present, which
588  // does not use the natural gradient code.
589  void UpdateSimple(
590  const PrecomputedIndexes &indexes,
591  const CuMatrixBase<BaseFloat> &in_value,
592  const CuMatrixBase<BaseFloat> &out_deriv);
593 
594 
595 
596 
597  // time_offsets_ is the list of time-offsets of the input that
598  // we append together; it will typically be (-1,0,1) or (-3,0,3).
599  std::vector<int32> time_offsets_;
600 
601  // the linear parameters of the network; its NumRows() is the output
602  // dim, and its NumCols() equals the input dim times time_offsets_.size().
604 
605  // the bias parameters if this is an affine transform, or the empty vector if
606  // this is a linear operation (i.e. use-bias == false in the config).
608 
609  // If nonzero, this controls how we apply an orthonormal constraint to the
610  // parameter matrix; see docs for ConstrainOrthonormal() in nnet-utils.h.
611  // This class just returns the value via the OrthonormalConstraint() function;
612  // it doesn't actually do anything with it directly.
613  BaseFloat orthonormal_constraint_;
614 
615  // Controls whether or not the natural-gradient is used. Note: even if this
616  // is true, if is_gradient_ (from the UpdatableComponent base class) is true,
617  // we'll do the 'simple' update that doesn't include natural gradient.
619 
620  // Preconditioner for the input space, of dimension linear_params_.NumCols() +
621  // 1 (the 1 is for the bias). As with other natural-gradient objects, it's
622  // not stored with the model on disk but is reinitialized each time we start
623  // up.
625 
626  // Preconditioner for the output space, of dimension
627  // linear_params_.NumRows().
629 };
630 
631 
632 
633 
634 
635 
636 
637 } // namespace nnet3
638 } // namespace kaldi
639 
640 
641 #endif
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
virtual void Vectorize(VectorBase< BaseFloat > *params) const
Turns the parameters into vector form.
Abstract base-class for neural-net components.
An abstract representation of a set of Indexes.
TdnnComponent is a more memory-efficient alternative to manually splicing several frames of input and...
This comment explains the basic framework used for everything related to time-height convolution...
Definition: convolution.h:125
virtual Component * Copy() const
Copies component (deep copy).
kaldi::int32 int32
Keywords for search: natural gradient, naturalgradient, NG-SGD.
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
virtual void FreezeNaturalGradient(bool freeze)
freezes/unfreezes NaturalGradient updates, if applicable (to be overriden by components that use Natu...
virtual void GetInputIndexes(const MiscComputationInfo &misc_info, const Index &output_index, std::vector< Index > *desired_indexes) const
This function only does something interesting for non-simple Components.
virtual void UnVectorize(const VectorBase< BaseFloat > &params)
Converts the parameters from vector form.
virtual bool IsComputable(const MiscComputationInfo &misc_info, const Index &output_index, const IndexSet &input_index_set, std::vector< Index > *used_inputs) const
This function only does something interesting for non-simple Components, and it exists to make it pos...
void UpdateNaturalGradient(const PrecomputedIndexes &indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv)
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
struct Index is intended to represent the various indexes by which we number the rows of the matrices...
Definition: nnet-common.h:44
virtual ComponentPrecomputedIndexes * PrecomputeIndexes(const MiscComputationInfo &misc_info, const std::vector< Index > &input_indexes, const std::vector< Index > &output_indexes, bool need_backprop) const
This function must return NULL for simple Components.
This file contains some fairly low-level utilities for implementing convolutional neural networks and...
CuMatrixBase< BaseFloat > & LinearParams()
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
virtual int32 InputDim() const
Returns input-dimension of this component.
virtual void Add(BaseFloat alpha, const Component &other)
This virtual function when called by – an UpdatableComponent adds the parameters of another updatabl...
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
virtual void ReorderIndexes(std::vector< Index > *input_indexes, std::vector< Index > *output_indexes) const
This function only does something interesting for non-simple Components.
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments &#39;to_update&#39; and &#39;in_deriv&#39; are non-NULL...
void UpdateSimple(const PrecomputedIndexes &indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv)
virtual int32 OutputDim() const
Returns output-dimension of this component.
This class is used for a piece of a CuMatrix.
Definition: matrix-common.h:70
virtual Component * Copy() const
Copies component (deep copy).
virtual BaseFloat DotProduct(const UpdatableComponent &other) const
Computes dot-product between parameters of two instances of a Component.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual int32 InputDim() const
Returns input-dimension of this component.
Class UpdatableComponent is a Component which has trainable parameters; it extends the interface of C...
virtual int32 OutputDim() const
Returns output-dimension of this component.
This struct represents the structure of a convolution computation.
Definition: convolution.h:252
Matrix for CUDA computing.
Definition: matrix-common.h:69
This class is responsible for parsing input like hi-there xx=yyy a=b c empty= f-oo=Append(bar, sss) ba_z=123 bing=&#39;a b c&#39; baz="a b c d=&#39;a b&#39; e" and giving you access to the fields, in this case.
Definition: text-utils.h:205
virtual void PerturbParams(BaseFloat stddev)
This function is to be used in testing.
virtual int32 NumParameters() const
The following new virtual function returns the total dimension of the parameters in this class...
void ConsolidateMemory()
This virtual function relates to memory management, and avoiding fragmentation.
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.