doc/nnet-attention-component_8h_source.html

 // nnet3/nnet-attention-component.h

 // Copyright      2017  Johns Hopkins University (author: Daniel Povey)

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.

 #ifndef KALDI_NNET3_NNET_ATTENTION_COMPONENT_H_
 #define KALDI_NNET3_NNET_ATTENTION_COMPONENT_H_

 #include "nnet3/nnet-common.h"
 #include "nnet3/nnet-component-itf.h"
 #include "nnet3/natural-gradient-online.h"
 #include "nnet3/attention.h"
 #include <iostream>

 namespace kaldi {
 namespace nnet3 {


 class RestrictedAttentionComponent: public Component {
  public:

   // The use of this constructor should only precede InitFromConfig()
   RestrictedAttentionComponent() { }

   // Copy constructor
   RestrictedAttentionComponent(const RestrictedAttentionComponent &other);

   virtual int32 InputDim() const {
     // the input is interpreted as being appended blocks one for each head; each
     // such block is interpreted as (key, value, query).
     int32 query_dim = key_dim_ + context_dim_;
     return num_heads_ * (key_dim_ + value_dim_ + query_dim);
   }
   virtual int32 OutputDim() const {
     // the output consists of appended blocks, one for each head; each such
     // block is is the attention weighted average of the input values, to which
     // we append softmax encoding of the positions we chose, if output_context_
     // == true.
     return num_heads_ * (value_dim_ + (output_context_ ? context_dim_ : 0));
   }
   virtual std::string Info() const;
   virtual void InitFromConfig(ConfigLine *cfl);
   virtual std::string Type() const { return "RestrictedAttentionComponent"; }
   virtual int32 Properties() const {
     return kReordersIndexes|kBackpropNeedsInput|kPropagateAdds|kBackpropAdds|
         kStoresStats|kUsesMemo;
   }
   virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
                          const CuMatrixBase<BaseFloat> &in,
                          CuMatrixBase<BaseFloat> *out) const;
   virtual void StoreStats(const CuMatrixBase<BaseFloat> &in_value,
                           const CuMatrixBase<BaseFloat> &out_value,
                           void *memo);
   virtual void Scale(BaseFloat scale);
   virtual void Add(BaseFloat alpha, const Component &other);
   virtual void ZeroStats();

   virtual void Backprop(const std::string &debug_info,
                         const ComponentPrecomputedIndexes *indexes,
                         const CuMatrixBase<BaseFloat> &in_value,
                         const CuMatrixBase<BaseFloat> &out_value,
                         const CuMatrixBase<BaseFloat> &out_deriv,
                         void *memo,
                         Component *to_update,
                         CuMatrixBase<BaseFloat> *in_deriv) const;
   virtual void Read(std::istream &is, bool binary);
   virtual void Write(std::ostream &os, bool binary) const;
   virtual Component* Copy() const {
     return new RestrictedAttentionComponent(*this);
   }
   virtual void DeleteMemo(void *memo) const { delete static_cast<Memo*>(memo); }

   // Some functions that are only to be reimplemented for GeneralComponents.

   // This ReorderIndexes function may insert 'blank' indexes (indexes with
   // t == kNoTime) as well as reordering the indexes.  This is allowed
   // behavior of ReorderIndexes functions.
   virtual void ReorderIndexes(std::vector<Index> *input_indexes,
                               std::vector<Index> *output_indexes) const;

   virtual void GetInputIndexes(const MiscComputationInfo &misc_info,
                                const Index &output_index,
                                std::vector<Index> *desired_indexes) const;

   // This function returns true if at least one of the input indexes used to
   // compute this output index is computable.
   virtual bool IsComputable(const MiscComputationInfo &misc_info,
                             const Index &output_index,
                             const IndexSet &input_index_set,
                             std::vector<Index> *used_inputs) const;

   virtual ComponentPrecomputedIndexes* PrecomputeIndexes(
       const MiscComputationInfo &misc_info,
       const std::vector<Index> &input_indexes,
       const std::vector<Index> &output_indexes,
       bool need_backprop) const;

   class PrecomputedIndexes: public ComponentPrecomputedIndexes {
    public:
     PrecomputedIndexes() { }
     PrecomputedIndexes(const PrecomputedIndexes &other):
         io(other.io) { }
     virtual PrecomputedIndexes *Copy() const;
     virtual void Write(std::ostream &os, bool binary) const;
     virtual void Read(std::istream &os, bool binary);
     virtual std::string Type() const {
       return "RestrictedAttentionComponentPrecomputedIndexes";
     }
     virtual ~PrecomputedIndexes() { }

     time_height_convolution::ConvolutionComputationIo io;
   };

   // This is what's returned as the 'memo' from the Propagate() function.
   struct Memo {
     // c is of dimension (num_heads_ * num-output-frames) by context_dim_,
     // where num-output-frames is the number of frames of output the
     // corresponding Propagate function produces.
     // Each block of 'num-output-frames' rows of c_t is the
     // post-softmax matrix of weights.
     CuMatrix<BaseFloat> c;
   };

  private:

   // Does the propagation for one head; this is called for each
   // head by the top-level Propagate function.  Later on we may
   // figure out a way to avoid doing this sequentially.
   // 'in' and 'out' are submatrices of the 'in' and 'out' passed
   // to the top-level Propagate function, and 'c' is a submatrix
   // of the 'c' matrix in the memo we're creating.
   //
   // Assumes 'c' has already been zerooed.
   void PropagateOneHead(
       const time_height_convolution::ConvolutionComputationIo &io,
       const CuMatrixBase<BaseFloat> &in,
       CuMatrixBase<BaseFloat> *c,
       CuMatrixBase<BaseFloat> *out) const;


   // does the backprop for one head; called by Backprop().
   void BackpropOneHead(
       const time_height_convolution::ConvolutionComputationIo &io,
       const CuMatrixBase<BaseFloat> &in_value,
       const CuMatrixBase<BaseFloat> &c,
       const CuMatrixBase<BaseFloat> &out_deriv,
       CuMatrixBase<BaseFloat> *in_deriv) const;

   // This function, used in ReorderIndexes() and PrecomputedIndexes(),
   // works out what grid structure over time we will have at the input
   // and the output.
   // Note: it may produce a grid that encompasses more than what was
   // listed in 'input_indexes' and 'output_indexes'.  This is OK.
   // ReorderIndexes() will add placeholders with t == kNoTime for
   // padding, and at the input of this component those placeholders
   // will be zero; at the output the placeholders will be ignored.
   void GetComputationStructure(
       const std::vector<Index> &input_indexes,
       const std::vector<Index> &output_indexes,
       time_height_convolution::ConvolutionComputationIo *io) const;

   // This function, used in ReorderIndexes(), obtains the indexes with the
   // correct structure and order (the structure is specified in the 'io' object.
   // This may involve not just reordering the provided indexes, but padding them
   // with indexes that have kNoTime as the time.
   //
   // Basically the indexes this function outputs form a grid where 't' has the
   // larger stride than the (n, x) pairs.   The number of distinct (n, x) pairs
   // should equal io.num_images.  Where 't' values need to appear in the
   // new indexes that were not present in the old indexes, they get replaced with
   // kNoTime.
   void GetIndexes(
       const std::vector<Index> &input_indexes,
       const std::vector<Index> &output_indexes,
       time_height_convolution::ConvolutionComputationIo &io,
       std::vector<Index> *new_input_indexes,
       std::vector<Index> *new_output_indexes) const;

   static void CreateIndexesVector(
       const std::vector<std::pair<int32, int32> > &n_x_pairs,
       int32 t_start, int32 t_step, int32 num_t_values,
       const std::unordered_set<Index, IndexHasher> &index_set,
       std::vector<Index> *output_indexes);


   void Check() const;

   int32 num_heads_;
   int32 key_dim_;
   int32 value_dim_;
   int32 num_left_inputs_;
   int32 num_right_inputs_;
   int32 time_stride_;
   int32 context_dim_;  // This derived parameter equals 1 + num_left_inputs_ +
                        // num_right_inputs_.
   int32 num_left_inputs_required_;
   int32 num_right_inputs_required_;
   bool output_context_;
   BaseFloat key_scale_;

   double stats_count_;  // Count of frames corresponding to the stats.
   Vector<double> entropy_stats_;  // entropy stats, indexed per head.
                                   // (dimension is num_heads_).  Divide
                                   // by stats_count_ to normalize.
   CuMatrix<double> posterior_stats_;  // stats of posteriors of different
                                       // offsets, of dimension num_heads_ *
                                       // context_dim_ (num-heads has the
                                       // larger stride).
 };


 } // namespace nnet3
 } // namespace kaldi


 #endif
kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::nnet3::RestrictedAttentionComponent::GetIndexes
void GetIndexes(const std::vector< Index > &input_indexes, const std::vector< Index > &output_indexes, time_height_convolution::ConvolutionComputationIo &io, std::vector< Index > *new_input_indexes, std::vector< Index > *new_output_indexes) const
Definition: nnet-attention-component.cc:597

kaldi::nnet3::RestrictedAttentionComponent::Add
virtual void Add(BaseFloat alpha, const Component &other)
This virtual function when called by – an UpdatableComponent adds the parameters of another updatabl...
Definition: nnet-attention-component.cc:261

kaldi::nnet3::RestrictedAttentionComponent::GetComputationStructure
void GetComputationStructure(const std::vector< Index > &input_indexes, const std::vector< Index > &output_indexes, time_height_convolution::ConvolutionComputationIo *io) const
Definition: nnet-attention-component.cc:389

kaldi::nnet3::RestrictedAttentionComponent::InputDim
virtual int32 InputDim() const
Returns input-dimension of this component.
Definition: nnet-attention-component.h:115

kaldi::nnet3::RestrictedAttentionComponent::key_scale_
BaseFloat key_scale_
Definition: nnet-attention-component.h:292

kaldi::nnet3::RestrictedAttentionComponent::Check
void Check() const
Definition: nnet-attention-component.cc:277

kaldi::nnet3::RestrictedAttentionComponent::Read
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
Definition: nnet-attention-component.cc:473

kaldi::nnet3::RestrictedAttentionComponent::time_stride_
int32 time_stride_
Definition: nnet-attention-component.h:286

nnet-component-itf.h

nnet-common.h

kaldi::nnet3::Component
Abstract base-class for neural-net components.
Definition: nnet-component-itf.h:114

kaldi::nnet3::RestrictedAttentionComponent::Copy
virtual Component * Copy() const
Copies component (deep copy).
Definition: nnet-attention-component.h:155

kaldi::nnet3::kUsesMemo
Definition: nnet-component-itf.h:79

kaldi::nnet3::IndexSet
An abstract representation of a set of Indexes.
Definition: nnet-computation-graph.h:322

kaldi::nnet3::RestrictedAttentionComponent::PrecomputedIndexes::PrecomputedIndexes
PrecomputedIndexes(const PrecomputedIndexes &other)
Definition: nnet-attention-component.h:188

kaldi::nnet3::RestrictedAttentionComponent::num_left_inputs_required_
int32 num_left_inputs_required_
Definition: nnet-attention-component.h:289

kaldi::nnet3::RestrictedAttentionComponent::BackpropOneHead
void BackpropOneHead(const time_height_convolution::ConvolutionComputationIo &io, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &c, const CuMatrixBase< BaseFloat > &out_deriv, CuMatrixBase< BaseFloat > *in_deriv) const
Definition: nnet-attention-component.cc:335

kaldi::nnet3::RestrictedAttentionComponent::output_context_
bool output_context_
Definition: nnet-attention-component.h:291

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

kaldi::nnet3::RestrictedAttentionComponent::CreateIndexesVector
static void CreateIndexesVector(const std::vector< std::pair< int32, int32 > > &n_x_pairs, int32 t_start, int32 t_step, int32 num_t_values, const std::unordered_set< Index, IndexHasher > &index_set, std::vector< Index > *output_indexes)
Utility function used in GetIndexes().
Definition: nnet-attention-component.cc:575

kaldi::CuMatrix
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71

kaldi::nnet3::MiscComputationInfo
Definition: nnet-computation.h:55

kaldi::nnet3::RestrictedAttentionComponent::IsComputable
virtual bool IsComputable(const MiscComputationInfo &misc_info, const Index &output_index, const IndexSet &input_index_set, std::vector< Index > *used_inputs) const
This function only does something interesting for non-simple Components, and it exists to make it pos...
Definition: nnet-attention-component.cc:527

kaldi::nnet3::RestrictedAttentionComponent::RestrictedAttentionComponent
RestrictedAttentionComponent()
Definition: nnet-attention-component.h:110

kaldi::nnet3::RestrictedAttentionComponent::num_right_inputs_required_
int32 num_right_inputs_required_
Definition: nnet-attention-component.h:290

kaldi::nnet3::RestrictedAttentionComponent::posterior_stats_
CuMatrix< double > posterior_stats_
Definition: nnet-attention-component.h:298

kaldi::nnet3::RestrictedAttentionComponent::OutputDim
virtual int32 OutputDim() const
Returns output-dimension of this component.
Definition: nnet-attention-component.h:121

kaldi::nnet3::RestrictedAttentionComponent::Backprop
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments &#39;to_update&#39; and &#39;in_deriv&#39; are non-NULL...
Definition: nnet-attention-component.cc:292

kaldi::nnet3::RestrictedAttentionComponent::PrecomputedIndexes::Copy
virtual PrecomputedIndexes * Copy() const
Definition: nnet-attention-component.cc:649

kaldi::nnet3::RestrictedAttentionComponent::PrecomputedIndexes::io
time_height_convolution::ConvolutionComputationIo io
Definition: nnet-attention-component.h:198

kaldi::nnet3::RestrictedAttentionComponent::DeleteMemo
virtual void DeleteMemo(void *memo) const
This virtual function only needs to be overwritten by Components that return a non-NULL memo from the...
Definition: nnet-attention-component.h:158

kaldi::nnet3::RestrictedAttentionComponent::num_right_inputs_
int32 num_right_inputs_
Definition: nnet-attention-component.h:285

kaldi::nnet3::Index
struct Index is intended to represent the various indexes by which we number the rows of the matrices...
Definition: nnet-common.h:44

kaldi::nnet3::RestrictedAttentionComponent::Scale
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
Definition: nnet-attention-component.cc:255

kaldi::nnet3::RestrictedAttentionComponent::entropy_stats_
Vector< double > entropy_stats_
Definition: nnet-attention-component.h:295

kaldi::nnet3::RestrictedAttentionComponent::InitFromConfig
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
Definition: nnet-attention-component.cc:80

kaldi::nnet3::RestrictedAttentionComponent::key_dim_
int32 key_dim_
Definition: nnet-attention-component.h:282

float

kaldi::nnet3::RestrictedAttentionComponent::stats_count_
double stats_count_
Definition: nnet-attention-component.h:294

kaldi::nnet3::RestrictedAttentionComponent::Memo::c
CuMatrix< BaseFloat > c
Definition: nnet-attention-component.h:208

natural-gradient-online.h

kaldi::nnet3::RestrictedAttentionComponent::PrecomputeIndexes
virtual ComponentPrecomputedIndexes * PrecomputeIndexes(const MiscComputationInfo &misc_info, const std::vector< Index > &input_indexes, const std::vector< Index > &output_indexes, bool need_backprop) const
This function must return NULL for simple Components.
Definition: nnet-attention-component.cc:622

kaldi::nnet3::RestrictedAttentionComponent::Type
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
Definition: nnet-attention-component.h:130

kaldi::nnet3::kStoresStats
Definition: nnet-component-itf.h:71

kaldi::nnet3::RestrictedAttentionComponent::value_dim_
int32 value_dim_
Definition: nnet-attention-component.h:283

kaldi::nnet3::kBackpropNeedsInput
Definition: nnet-component-itf.h:65

kaldi::nnet3::RestrictedAttentionComponent::ZeroStats
virtual void ZeroStats()
Components that provide an implementation of StoreStats should also provide an implementation of Zero...
Definition: nnet-attention-component.cc:249

kaldi::nnet3::RestrictedAttentionComponent::GetInputIndexes
virtual void GetInputIndexes(const MiscComputationInfo &misc_info, const Index &output_index, std::vector< Index > *desired_indexes) const
This function only does something interesting for non-simple Components.
Definition: nnet-attention-component.cc:507

kaldi::nnet3::RestrictedAttentionComponent::PrecomputedIndexes
Definition: nnet-attention-component.h:185

kaldi::nnet3::kPropagateAdds
Definition: nnet-component-itf.h:53

kaldi::nnet3::RestrictedAttentionComponent::Info
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
Definition: nnet-attention-component.cc:32

kaldi::nnet3::RestrictedAttentionComponent::PrecomputedIndexes::Type
virtual std::string Type() const
Definition: nnet-attention-component.h:193

kaldi::nnet3::RestrictedAttentionComponent::context_dim_
int32 context_dim_
Definition: nnet-attention-component.h:287

kaldi::nnet3::RestrictedAttentionComponent::Propagate
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
Definition: nnet-attention-component.cc:132

kaldi::nnet3::RestrictedAttentionComponent::Write
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
Definition: nnet-attention-component.cc:442

kaldi::nnet3::RestrictedAttentionComponent::Memo
Definition: nnet-attention-component.h:202

kaldi::nnet3::RestrictedAttentionComponent::PrecomputedIndexes::Read
virtual void Read(std::istream &os, bool binary)
Definition: nnet-attention-component.cc:661

kaldi::nnet3::RestrictedAttentionComponent::ReorderIndexes
virtual void ReorderIndexes(std::vector< Index > *input_indexes, std::vector< Index > *output_indexes) const
This function only does something interesting for non-simple Components.
Definition: nnet-attention-component.cc:376

kaldi::nnet3::time_height_convolution::ConvolutionComputationIo
Definition: convolution.h:380

kaldi::nnet3::kReordersIndexes
Definition: nnet-component-itf.h:58

kaldi::CuMatrixBase
Matrix for CUDA computing.
Definition: matrix-common.h:69

kaldi::nnet3::kBackpropAdds
Definition: nnet-component-itf.h:61

kaldi::nnet3::ComponentPrecomputedIndexes
Definition: nnet-component-itf.h:97

kaldi::Vector< double >

kaldi::ConfigLine
This class is responsible for parsing input like hi-there xx=yyy a=b c empty= f-oo=Append(bar, sss) ba_z=123 bing=&#39;a b c&#39; baz="a b c d=&#39;a b&#39; e" and giving you access to the fields, in this case.
Definition: text-utils.h:205

kaldi::nnet3::RestrictedAttentionComponent
RestrictedAttentionComponent implements an attention model with restricted temporal context...
Definition: nnet-attention-component.h:106

kaldi::nnet3::RestrictedAttentionComponent::PrecomputedIndexes::~PrecomputedIndexes
virtual ~PrecomputedIndexes()
Definition: nnet-attention-component.h:196

kaldi::nnet3::RestrictedAttentionComponent::PrecomputedIndexes::Write
virtual void Write(std::ostream &os, bool binary) const
Definition: nnet-attention-component.cc:653

attention.h
This file contains the lower-level interface for self-attention.

kaldi::nnet3::RestrictedAttentionComponent::Properties
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
Definition: nnet-attention-component.h:131

kaldi::nnet3::RestrictedAttentionComponent::PropagateOneHead
void PropagateOneHead(const time_height_convolution::ConvolutionComputationIo &io, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *c, CuMatrixBase< BaseFloat > *out) const
Definition: nnet-attention-component.cc:160

kaldi::nnet3::RestrictedAttentionComponent::StoreStats
virtual void StoreStats(const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, void *memo)
This function may store stats on average activation values, and for some component types...
Definition: nnet-attention-component.cc:200

kaldi::nnet3::RestrictedAttentionComponent::num_heads_
int32 num_heads_
Definition: nnet-attention-component.h:281

kaldi::nnet3::RestrictedAttentionComponent::PrecomputedIndexes::PrecomputedIndexes
PrecomputedIndexes()
Definition: nnet-attention-component.h:187

kaldi::nnet3::RestrictedAttentionComponent::num_left_inputs_
int32 num_left_inputs_
Definition: nnet-attention-component.h:284