nnet-computation.h
Go to the documentation of this file.
1 // nnet3/nnet-computation.h
2 
3 // Copyright 2012-2015 Johns Hopkins University (author: Daniel Povey)
4 // 2015 Xiaohui Zhang
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 #ifndef KALDI_NNET3_NNET_COMPUTATION_H_
22 #define KALDI_NNET3_NNET_COMPUTATION_H_
23 
24 #include "nnet3/nnet-common.h"
25 #include "nnet3/nnet-nnet.h"
26 
27 #include <iostream>
28 #include <sstream>
29 #include <vector>
30 #include <map>
31 
32 
33 namespace kaldi {
34 namespace nnet3 {
35 
36 
50 // MiscComputationInfo is a place we enter information about a requested
51 // computation that doesn't easily fit into the framework as given: things like
52 // the maximum unrolling we want to do, or how far ahead in time we want a
53 // particular adaptation method to be able to look. Elements of this are
54 // interpreted by individual components, for the most part.
56  // will add members here as needed.
57 
58  bool operator== (const MiscComputationInfo &other) const { return true; }
59  // This will print this in a human-readable way, for debugging.
60  void Print(std::ostream &os) const { };
61 };
62 
63 
64 // This defines one type of input that the network gets, or output that it will
65 // produce. For inputs, the name should correspond to an input or component
66 // node name in the nnet (components are allowed so context can be provided in
67 // recurrent setups); for outputs, the name should be an output node name in the
68 // Nnet.
69 // note: this structure is used to represent egs both before and after merging
70 // into minibatches; if this merging has been done, the indexes will vary in
71 // the 'n' dimension.
73  std::string name;
74  std::vector<Index> indexes;
75  bool has_deriv; // For output nodes, true if a derivative w.r.t. that output
76  // will be supplied. For input nodes, true if the derivative
77  // w.r.t. that input will be needed.
78  IoSpecification(): has_deriv(false) { }
79 
81  name(other.name), indexes(other.indexes), has_deriv(other.has_deriv) { }
82  IoSpecification(const std::string &name, const std::vector<Index> &indexes,
83  bool has_deriv = false):
84  name(name), indexes(indexes), has_deriv(has_deriv) { }
85  // This constructor sets n = 0, x = 0 and t from t_start to t_end-1; and
86  // has_deriv to false.
87  IoSpecification(const std::string &name, int32 t_start, int32 t_end);
88 
91  void Print(std::ostream &os) const;
92 
93  void Swap(IoSpecification *other);
94 
95  void Read(std::istream &istream, bool binary);
96 
97  void Write(std::ostream &ostream, bool binary) const;
98 
99  bool operator== (const IoSpecification &other) const;
100 };
101 
103  size_t operator () (const IoSpecification &io_spec) const noexcept;
104 };
105 
106 
107 // struct ComputationRequest is whatever we need in addition to the
108 // network itself in order to create the structure of a computation. The most
109 // important things it specifies are the available indexes available at
110 // the input, the indexes requested at various output nodes, and whether or
111 // not we want to do backprop.
112 // The same input or output node cannot be listed twice in "inputs" or
113 // "outputs".
115  std::vector<IoSpecification> inputs;
116  std::vector<IoSpecification> outputs;
117 
122 
127 
131 
132  ComputationRequest(): need_model_derivative(false),
133  store_component_stats(false) { }
134 
137  bool NeedDerivatives() const;
138 
142  int32 IndexForInput(const std::string &node_name) const;
143 
147  int32 IndexForOutput(const std::string &node_name) const;
148 
151  void Print(std::ostream &os) const;
152 
153  void Read(std::istream &istream, bool binary);
154 
155  void Write(std::ostream &ostream, bool binary) const;
156 
157  bool operator== (const ComputationRequest &other) const;
158 };
159 
160 // Hash function for ComputationRequest. It converts
161 // ComputationRequest to hash code by looking at input
162 // and output IoSpecifications vectors.
164  size_t operator()(const ComputationRequest *cr) const noexcept;
165 };
166 
167 // Equality function for ComputationRequest pointer
169  public:
170  bool operator() (const ComputationRequest* cr1,
171  const ComputationRequest* cr2) const {
172  return (*cr1) == (*cr2);
173  }
174 };
175 
176 
297 
298 
299 
300 // struct NnetComputation defines the specific steps of a neural-net
301 // computation. View this as a compiled program; given the Nnet and the
302 // ComputationRequest, we compile to struct NnetComputation.
304  struct MatrixInfo {
309  MatrixInfo(int32 num_rows, int32 num_cols,
310  MatrixStrideType stride_type):
311  num_rows(num_rows), num_cols(num_cols), stride_type(stride_type) {}
312  void Read(std::istream &istream, bool binary);
313  void Write(std::ostream &ostream, bool binary) const;
314  };
316  bool is_deriv; // true if this represents a derivative, not a value.
317  std::vector<Cindex> cindexes;
318  MatrixDebugInfo(): is_deriv(false) { }
319  void Swap(MatrixDebugInfo *other); // Shallow swap
320  void Read(std::istream &istream, bool binary);
321  void Write(std::ostream &ostream, bool binary) const;
322  };
323  struct SubMatrixInfo {
324  int32 matrix_index; // index into "matrices": the underlying matrix.
330  SubMatrixInfo(int32 matrix_index, int32 row_offset, int32 num_rows,
331  int32 col_offset, int32 num_cols):
332  matrix_index(matrix_index), row_offset(row_offset), num_rows(num_rows),
333  col_offset(col_offset), num_cols(num_cols) {}
334  void Read(std::istream &istream, bool binary);
335  void Write(std::ostream &ostream, bool binary) const;
336  bool operator== (const SubMatrixInfo &other) const;
337  };
338  struct Command {
348  // Constructor where alpha is not specified;
349  // This constructor may become deprecated.
351  int32 arg1 = -1, int32 arg2 = -1, int32 arg3 = -1, int32 arg4 = -1,
352  int32 arg5 = -1, int32 arg6 = -1, int32 arg7 = -1):
353  command_type(command_type), alpha(1.0),
354  arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4), arg5(arg5), arg6(arg6),
355  arg7(arg7) { }
356  // Constructor where you can specify alpha.
358  int32 arg1 = -1, int32 arg2 = -1, int32 arg3 = -1, int32 arg4 = -1,
359  int32 arg5 = -1, int32 arg6 = -1, int32 arg7 = -1):
360  command_type(command_type), alpha(alpha),
361  arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4), arg5(arg5), arg6(arg6),
362  arg7(arg7) { }
363  void Read(std::istream &istream, bool binary);
364  void Write(std::ostream &ostream, bool binary) const;
365  };
367  // For each step of the computation for which we might possibly need to store
368  // a ComponentPrecomputedIndexes object (and note that this is only applicable
369  // for non-simple Components), this struct stores some information.
370  // The primary data is in 'data', it's an object of type inheriting from
371  // ComponentPrecomputedIndexes.
372  // The 'input_indexes' and 'output_indexes' are the vectors that were provided
373  // to the function Component::PrecomputeIndexes() when generating these
374  // PrecomputedIndexes objects. They currently only stored in cases where
375  // the 'n' values in the computation are numbered only zero and one, because
376  // these types of computations are compiled in 'shortcut' compilation, and
377  // in that case we'll need these indexes later in order to generate the
378  // 'expanded' computation (see the function ExpandComputation()).
380  std::vector<Index> input_indexes;
381  std::vector<Index> output_indexes;
382  PrecomputedIndexesInfo(): data(NULL) { }
383  };
384 
385 
386  // "matrices" describes the sizes of the matrices that we use as variables in
387  // the computation [note: index zero is reserved for an empty matrix]. Note:
388  // we generally don't refer to matrices, even ones known to be whole matrices,
389  // using their matrix index directly, but via their submatrix indexes.
390  std::vector<MatrixInfo> matrices;
391 
392  // debug information for each of the matrices (indexed by matrix-index), only
393  // computed if requested in the compiler options.
394  std::vector<MatrixDebugInfo> matrix_debug_info;
395 
396 
397  // Because some parts of the computation may involve parts of matrix, we
398  // declare sub-matrices. Some of these sub-matrices correspond to entire
399  // matrices (this is so that a sub-matrix index can be used to refer to either
400  // part of, or all of, a matrix). The first one (index 0) is an empty
401  // sub-matrix, which we use whenever an empty matrix is called for.
402  // Note: there is no rule against having identical submatrices. These
403  // will be removed by class ComputationRenumberer in nnet-optimize.cc.
404  std::vector<SubMatrixInfo> submatrices;
405 
406  // For Components that require precomputed indexes for their Propagate and
407  // Backprop operations. The index into this vector is referred to in
408  // kPropagate and kBackprop operations. Index 0 in the vector is reserved for
409  // the NULL pointer, which is used for "simple" components and others that do
410  // not require precomputed indexes.
411  // These are owned here.
412  std::vector<PrecomputedIndexesInfo> component_precomputed_indexes;
413 
414  // Used in commands kAddRows, kAddToRows, kCopyRows, which
415  // contain indexes into this data-member.
416  // Each vector<int32> is a vector of row-indexes (with -1 usually treated as
417  // a special case meaning "don't do anything for this row" for add
418  // commands, or "use zero" for copy commands.
419  std::vector<std::vector<int32> > indexes;
420 
421  // Used in commands kAddRowsMulti, kAddToRowsMulti, kCopyRowsMulti and
422  // kCopyToRowsMulti. Contains pairs (sub-matrix index, row index)- or the
423  // special pair (-1,-1) meaning "don't do anything for this row" for add
424  // commands, or "use zero" for copy commands.
425  std::vector<std::vector<std::pair<int32,int32> > > indexes_multi;
426 
427 
428  // Indexes used in kAddRowRanges commands, containing pairs (start-index,
429  // end-index)
430  std::vector<std::vector<std::pair<int32,int32> > > indexes_ranges;
431 
432 // // Information about where the values and derivatives of inputs and outputs of
433 // // the neural net live. Indexed by the node_index (the same index as used for
434 // // the nodes_ array in the Nnet), each pair is (value_matrix_index,
435 // // deriv_matrix_index), with 0 for derivatives that are not present.
436 // unordered_map<int32, std::pair<int32, int32> > input_output_info;
437 
438  // The sequence of commands.
439  std::vector<Command> commands;
440 
441  // This is a copy of "need_model_derivative" from the ComputationRequest.
443 
444  // computed from "indexes" by ComputeCudaIndexes().
445  std::vector<CuArray<int32> > indexes_cuda;
446 
447  // computed from "indexes_ranges" by ComputeCudaIndexes().
448  std::vector<CuArray<Int32Pair> > indexes_ranges_cuda;
449 
450 
455  int32 NewMatrix(int32 num_rows, int32 num_cols, MatrixStrideType stride_type);
456 
463  int32 NewSubMatrix(int32 base_submatrix,
464  int32 row_offset, int32 num_rows,
465  int32 col_offset, int32 num_cols);
466 
467  // returns true if this submatrix corresponds to the whole of a matrix.
468  // submatrix_index must be > 0.
469  bool IsWholeMatrix(int32 submatrix_index) const;
470 
471  // This must be called after setting up the computation but prior to actually
472  // using the Computation object in a computation, to compute CUDA versions of
473  // the indexes.
474  void ComputeCudaIndexes();
475 
476  // This function produces pretty-print ouput intended to allow a human to
477  // interpret the computation.
478  void Print(std::ostream &os, const Nnet &nnet) const;
479 
480  void Read(std::istream &istream, bool binary);
481  void Write(std::ostream &ostream, bool binary) const;
482 
483  // This function outputs a vector of strings, one for each submatrix,
484  // that explains the meaning of each one: something like "m1", "m2";
485  // and for parts of matrices, "m1(0:10, 20:40)".
486  void GetSubmatrixStrings(const Nnet &nnet,
487  std::vector<std::string> *submat_strings) const;
488 
489  // This function outputs a vector, indexed by matrix index, that gives you for
490  // each matrix, the index of a submatrix which refers to the whole of that
491  // matrix; it makes sure that each matrix has such a submatrix.
492  void GetWholeSubmatrices(std::vector<int32> *whole_submatrices) const;
493 
494 
495  // This function outputs information similar to Print(), but outputs the
496  // preamble as a string and a vector of strings, one per command (with no
497  // newlines on these). This is used in the debugging code in NnetComputer.
498  // either pointer argument may be NULL.
499  void GetCommandStrings(const Nnet &nnet,
500  std::string *preamble,
501  std::vector<std::string> *command_strings) const;
502 
503 
504  // destructor deletes pointers in component_precomputed_indexes.
505  ~NnetComputation();
506  // removes all information from this struct, makes it as a newly constructed one.
507  void Clear() { *this = NnetComputation(); }
508 
509  // Copy constructor
510  NnetComputation(const NnetComputation &other);
511  // Assignment operator.
512  NnetComputation &operator = (const NnetComputation &other);
513  // Default constructor
514  NnetComputation(): need_model_derivative(false) { }
515 };
516 
517 // A helper class equipped with the stream insertion operator<< to print out
518 // the NnetComputation in a human-readable way, with NnetComputation::Print(),
519 // for debugging purposes, e.g.:
520 // KALDI_VLOG(3) << NnetComputationPrintInserter{mycomputation, mynet};
523  const Nnet& nnet;
524  void Print(std::ostream& os) const {
525  computation.Print(os, nnet);
526  }
527  friend inline std::ostream &operator <<(std::ostream &os,
529  xhis.Print(os);
530  return os;
531  }
532 };
533 
534 } // namespace nnet3
535 } // namespace kaldi
536 
537 #endif
CommandType
CommandType is an enum that describes the category of the command used in the NnetComputation.
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
IoSpecification(const IoSpecification &other)
Command(CommandType command_type=kNoOperationMarker, int32 arg1=-1, int32 arg2=-1, int32 arg3=-1, int32 arg4=-1, int32 arg5=-1, int32 arg6=-1, int32 arg7=-1)
bool store_component_stats
you should set need_component_stats to true if you need the average-activation and average-derivative...
std::vector< MatrixDebugInfo > matrix_debug_info
bool need_model_derivative
if need_model_derivative is true, then we&#39;ll be doing either model training or model-derivative compu...
MiscComputationInfo misc_info
misc_info is for extensibility to things that don&#39;t easily fit into the framework.
void Print(std::ostream &os, const Nnet &nnet) const
kaldi::int32 int32
std::ostream & operator<<(std::ostream &ostream, const Index &index)
Definition: nnet-common.cc:424
std::vector< IoSpecification > inputs
std::vector< MatrixInfo > matrices
void NnetComputation(const Nnet &nnet, const CuMatrixBase< BaseFloat > &input, bool pad_input, CuMatrixBase< BaseFloat > *output)
Does the basic neural net computation, on a sequence of data (e.g.
std::vector< Command > commands
Command(BaseFloat alpha, CommandType command_type=kNoOperationMarker, int32 arg1=-1, int32 arg2=-1, int32 arg3=-1, int32 arg4=-1, int32 arg5=-1, int32 arg6=-1, int32 arg7=-1)
std::vector< CuArray< int32 > > indexes_cuda
std::vector< std::vector< std::pair< int32, int32 > > > indexes_multi
std::vector< SubMatrixInfo > submatrices
MatrixStrideType
Definition: matrix-common.h:44
std::vector< CuArray< Int32Pair > > indexes_ranges_cuda
SubMatrixInfo(int32 matrix_index, int32 row_offset, int32 num_rows, int32 col_offset, int32 num_cols)
std::vector< PrecomputedIndexesInfo > component_precomputed_indexes
bool operator==(const MiscComputationInfo &other) const
std::vector< Index > indexes
std::vector< IoSpecification > outputs
IoSpecification(const std::string &name, const std::vector< Index > &indexes, bool has_deriv=false)
std::vector< std::vector< int32 > > indexes
void Print(std::ostream &os) const
MatrixInfo(int32 num_rows, int32 num_cols, MatrixStrideType stride_type)
std::vector< std::vector< std::pair< int32, int32 > > > indexes_ranges