doc/nnet-optimize_8cc_source.html

 // nnet3/nnet-optimize.cc

 // Copyright      2015  Johns Hopkins University (author: Daniel Povey)
 //                2015  Xiaohui Zhang

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.

 #include <iomanip>
 #include "nnet3/nnet-optimize.h"
 #include "nnet3/nnet-optimize-utils.h"
 #include "nnet3/nnet-utils.h"
 #include "base/timer.h"

 namespace kaldi {
 namespace nnet3 {

 void NnetOptimizeOptions::Read(std::istream &is, bool binary) {
   ExpectToken(is, binary, "<NnetOptimizeOptions>");
   ExpectToken(is, binary, "<Optimize>");
   ReadBasicType(is, binary, &optimize);
   ExpectToken(is, binary, "<ConsolidateModelUpdate>");
   ReadBasicType(is, binary, &consolidate_model_update);
   ExpectToken(is, binary, "<PropagateInPlace>");
   ReadBasicType(is, binary, &propagate_in_place);
   ExpectToken(is, binary, "<BackpropInPlace>");
   ReadBasicType(is, binary, &backprop_in_place);
   if (PeekToken(is, binary) == 'O') {
     ExpectToken(is, binary, "<OptimizeRowOps>");
     ReadBasicType(is, binary, &optimize_row_ops);
   }
   if (PeekToken(is, binary) == 'S') {
     ExpectToken(is, binary, "<SplitRowOps>");
     ReadBasicType(is, binary, &split_row_ops);
   }
   if (PeekToken(is, binary) == 'E') {
     ExpectToken(is, binary, "<ExtendMatrices>");
     ReadBasicType(is, binary, &extend_matrices);
   }
   ExpectToken(is, binary, "<ConvertAddition>");
   ReadBasicType(is, binary, &convert_addition);
   ExpectToken(is, binary, "<RemoveAssignments>");
   ReadBasicType(is, binary, &remove_assignments);
   ExpectToken(is, binary, "<AllowLeftMerge>");
   ReadBasicType(is, binary, &allow_left_merge);
   ExpectToken(is, binary, "<AllowRightMerge>");
   ReadBasicType(is, binary, &allow_right_merge);
   ExpectToken(is, binary, "<InitializeUndefined>");
   ReadBasicType(is, binary, &initialize_undefined);
   ExpectToken(is, binary, "<MoveSizingCommands>");
   ReadBasicType(is, binary, &move_sizing_commands);
   ExpectToken(is, binary, "<AllocateFromOther>");
   ReadBasicType(is, binary, &allocate_from_other);
   ExpectToken(is, binary, "<MinDerivTime>");
   ReadBasicType(is, binary, &min_deriv_time);
   ExpectToken(is, binary, "<MaxDerivTime>");
   ReadBasicType(is, binary, &max_deriv_time);
   if (PeekToken(is, binary) == 'M') {
     ExpectToken(is, binary, "<MaxDerivTimeRelative>");
     ReadBasicType(is, binary, &max_deriv_time_relative);
   }
   if (PeekToken(is, binary) == 'S') {
     ExpectToken(is, binary, "<SnipRowOps>");
     ReadBasicType(is, binary, &snip_row_ops);
   }
   if (PeekToken(is, binary) == 'M') {
     ExpectToken(is, binary, "<MemoryCompressionLevel>");
     ReadBasicType(is, binary, &memory_compression_level);
   }
   ExpectToken(is, binary, "</NnetOptimizeOptions>");
 }

 void NnetOptimizeOptions::Write(std::ostream &os, bool binary) const {
   WriteToken(os, binary, "<NnetOptimizeOptions>");
   WriteToken(os, binary, "<Optimize>");
   WriteBasicType(os, binary, optimize);
   WriteToken(os, binary, "<ConsolidateModelUpdate>");
   WriteBasicType(os, binary, consolidate_model_update);
   WriteToken(os, binary, "<PropagateInPlace>");
   WriteBasicType(os, binary, propagate_in_place);
   WriteToken(os, binary, "<BackpropInPlace>");
   WriteBasicType(os, binary, backprop_in_place);
   WriteToken(os, binary, "<OptimizeRowOps>");
   WriteBasicType(os, binary, optimize_row_ops);
   WriteToken(os, binary, "<SplitRowOps>");
   WriteBasicType(os, binary, split_row_ops);
   WriteToken(os, binary, "<ExtendMatrices>");
   WriteBasicType(os, binary, extend_matrices);
   WriteToken(os, binary, "<ConvertAddition>");
   WriteBasicType(os, binary, convert_addition);
   WriteToken(os, binary, "<RemoveAssignments>");
   WriteBasicType(os, binary, remove_assignments);
   WriteToken(os, binary, "<AllowLeftMerge>");
   WriteBasicType(os, binary, allow_left_merge);
   WriteToken(os, binary, "<AllowRightMerge>");
   WriteBasicType(os, binary, allow_right_merge);
   WriteToken(os, binary, "<InitializeUndefined>");
   WriteBasicType(os, binary, initialize_undefined);
   WriteToken(os, binary, "<MoveSizingCommands>");
   WriteBasicType(os, binary, move_sizing_commands);
   WriteToken(os, binary, "<AllocateFromOther>");
   WriteBasicType(os, binary, allocate_from_other);
   WriteToken(os, binary, "<MinDerivTime>");
   WriteBasicType(os, binary, min_deriv_time);
   WriteToken(os, binary, "<MaxDerivTime>");
   WriteBasicType(os, binary, max_deriv_time);
   WriteToken(os, binary, "<MaxDerivTimeRelative>");
   WriteBasicType(os, binary, max_deriv_time_relative);
   WriteToken(os, binary, "<SnipRowOps>");
   WriteBasicType(os, binary, snip_row_ops);
   WriteToken(os, binary, "<MemoryCompressionLevel>");
   WriteBasicType(os, binary, memory_compression_level);
   WriteToken(os, binary, "</NnetOptimizeOptions>");
 }

 bool NnetOptimizeOptions::operator == (const NnetOptimizeOptions &other) const {
   return (other.optimize == optimize &&
           other.consolidate_model_update == consolidate_model_update &&
           other.propagate_in_place == propagate_in_place &&
           other.backprop_in_place == backprop_in_place &&
           other.optimize_row_ops == optimize_row_ops &&
           other.split_row_ops == split_row_ops &&
           other.convert_addition == convert_addition &&
           other.remove_assignments == remove_assignments &&
           other.allow_left_merge == allow_left_merge &&
           other.allow_right_merge == allow_right_merge &&
           other.initialize_undefined == initialize_undefined &&
           other.move_sizing_commands == move_sizing_commands &&
           other.allocate_from_other == allocate_from_other &&
           other.min_deriv_time == min_deriv_time &&
           other.max_deriv_time == max_deriv_time &&
           other.max_deriv_time_relative == max_deriv_time_relative &&
           other.snip_row_ops == snip_row_ops &&
           other.memory_compression_level == memory_compression_level);
 }

 // move commands that resize and zero matrices to as late/early as possible.
 // (however, keep input and output commands where they were; it creates other
 // headaches if we move those).
 void MoveSizingCommands(const Nnet &nnet, NnetComputation *computation) {
   ComputationVariables variables;
   variables.Init(*computation);
   std::vector<CommandAttributes> attributes;
   ComputeCommandAttributes(nnet, *computation, variables, &attributes);
   std::vector<std::vector<Access> > variable_accesses;
   ComputeVariableAccesses(variables, attributes, &variable_accesses);
   std::vector<MatrixAccesses> matrix_accesses;
   ComputeMatrixAccesses(nnet, *computation, variables, attributes,
                         &matrix_accesses);

   // The way we will renumber the commands is, we will first set this vector up
   // with pairs (command-index * 3, pointer-to-command), and we will then modify
   // the command-indexes in this vector to the numbers that we want, and sort
   // it.  The reason for the * 3 is so that we can number commands "just-after"
   // existing indexes (by adding 1) and "just-before" (by subtracting 1).
   int32 num_commands = computation->commands.size(),
       num_matrices = matrix_accesses.size();

   // Matrix allocation commands tend to be followed by a command that zeroes the
   // matrix.  We want to treat the two commands as a single unit for purposes of
   // reordering.  is_command_pair[c] will be true if command c is the first
   // element of such a pair.
   std::vector<bool> is_command_pair(num_commands, false);
   for (int32 c = 0; c + 1 < num_commands; c++) {
     if (computation->commands[c].command_type == kAllocMatrix &&
         computation->commands[c+1].command_type == kSetConst &&
         computation->commands[c].arg1 == computation->commands[c+1].arg1 &&
         computation->commands[c+1].alpha == 0.0) {
       is_command_pair[c] = true;
     }
   }

   // 'command_reordering' contains (new-number, old-number) of commands.
   // the new-number is multiplied by 3 for reasons explained above.
   std::vector<std::pair<int32,int32> >
       command_reordering(num_commands);
   // Note: for now we include the second-elements-of-pairs (i.e.  the zeroing
   // commands that follow allocation commands) here; we'll ignore them later.
   for (int32 c = 0; c < num_commands; c++) {
     command_reordering[c].first = c * 3;
     command_reordering[c].second = c;
   }
   for (int32 m = 1; m < num_matrices; m++) {
     const MatrixAccesses &ma = matrix_accesses[m];
     // The following if-block relates to reordering of allocation (and,
     // implicitly, zeroing) commands.
     if (ma.allocate_command != -1 &&
         computation->commands[ma.allocate_command].command_type == kAllocMatrix) {
       // first_access_command will be index of first access, except for the
       // zeroing command that immediately follows the initialization command.
       int32 first_access_command = -1;
       // this block sets 'first_access_command'.
       if (!ma.accesses.empty()) {
         first_access_command = ma.accesses[0].command_index;
         if (first_access_command == ma.allocate_command + 1 &&
             is_command_pair[ma.allocate_command]) {
           if (ma.accesses.size() > 1)
             first_access_command = ma.accesses[1].command_index;
           else
             first_access_command = -1;
         }
       }
       if (first_access_command != -1) {
         KALDI_ASSERT(first_access_command > ma.allocate_command);
         // move the initialization command to just before the first access.
         command_reordering[ma.allocate_command].first =
             first_access_command * 3 - 1;
       }
     }
     // The following if-block relates to reordering of deallocation
     // commands.
     if (ma.deallocate_command != -1 && !ma.accesses.empty() &&
         computation->commands[ma.deallocate_command].command_type ==
         kDeallocMatrix) {
       int32 last_access_command = ma.accesses.back().command_index;
       // move the deallocation command to just after the last access.
       command_reordering[ma.deallocate_command].first =
           last_access_command * 3 + 1;
     }
   }
   std::sort(command_reordering.begin(), command_reordering.end());
   std::vector<NnetComputation::Command> reordered_commands;
   reordered_commands.reserve(num_commands);
   for (int32 c = 0; c < num_commands; c++) {
     int32 old_index = command_reordering[c].second;
     NnetComputation::Command &old_command = computation->commands[old_index];
     // the following assert is because this optimization is not allowed
     // after looped optimization.
     KALDI_ASSERT(old_command.command_type != kGotoLabel);
     if (old_index > 0 && is_command_pair[old_index - 1]) {
       // If the old command-index was a zeroing command that follows
       // an allocation command, ignore it; it will be reordered to
       // right after wherever the allocation command went, and we'll
       // deal with it when we deal with the first element of the pair.
       continue;
     } else {
       reordered_commands.push_back(computation->commands[old_index]);
       if (is_command_pair[old_index]) {
         // if this command is the first member of an (allocation, zeroing)
         // pair then we need to deal with the zeroing command as well.
         reordered_commands.push_back(computation->commands[old_index + 1]);
       }
     }
   }
   computation->commands = reordered_commands;
 }

 // This function removes commands of type kSetConst (with alpha=0.0), where
 // possible.
 void RemoveUnnecessaryZeroing(const Nnet &nnet,
                               NnetComputation *computation) {
   Analyzer a;
   a.Init(nnet, *computation);

   // OK, now we'll work out which matrices have all their pieces (i.e. all the
   // variables belonging to that matrix) written to as the first instruction
   // apart from the initial zeroing.  These matrices can have the initial
   // zeroing replaced by a sizing operation that leaves the data undefined.
   int32 num_matrices = a.matrix_accesses.size();
   for (int32 matrix_index = 0; matrix_index < num_matrices; matrix_index++) {
     const MatrixAccesses &accesses = a.matrix_accesses[matrix_index];
     if (accesses.accesses.empty())
       continue;
     int32 zeroing_command_index = accesses.accesses[0].command_index;
     NnetComputation::Command *command =
         &(computation->commands[zeroing_command_index]);
     if (!(command->command_type == kSetConst &&
           command->alpha == 0.0)) {
       continue;  // First command is not a zeroing command
     }
     // OK, the first command that accesses this matrix is a zeroing command;
     // we're going to figure out whether it was necessary.
     std::vector<int32> variables_for_matrix;
     a.variables.AppendVariablesForMatrix(matrix_index, &variables_for_matrix);
     bool all_variables_ok = true;  // if this stays true, it means we don't need
                                    // the initial zeroing.
     for (size_t i = 0; i < variables_for_matrix.size(); i++) {
       int32 variable_index = variables_for_matrix[i];
       const std::vector<Access> &v_accesses =
           a.variable_accesses[variable_index];
       if (v_accesses.size() > 1 &&
           v_accesses[1].access_type != kWriteAccess) {
         all_variables_ok = false;  // first access after zeroing was not a write
         break;
       }
       if (v_accesses.size() == 1 &&
           accesses.is_output) {
         // the only command that touches this variable is the allocation, and it
         // is an output variable.  (this is unusual, but can happen e.g. if it's
         // a derivative, but due to min_deriv_time and max_deriv_time it ends up
         // always being zero.
         all_variables_ok = false;
         break;
       }
     }
     if (all_variables_ok) {
       // Here is where the change actually happens.
       // Remove the zeroing command.
       command->command_type = kNoOperation;
     }
   }
 }

 /*
   This function is called from RemoveUnnecessaryAllocation.  The input is two
   sorted, unique lists, of (deallocation-commands, allocation-commands)
   e.g. (d1, d2, d3.. ), (a1, a2, a3..); and to the output is *appended* a list
   of pairs (d, a).  Each output pair must satisfy the property that d < a, and
   no member of the input lists may appear more than once in the output pairs
   (although it's OK for input a and d values not to appear in any output pairs).

   The goal of the implementation is to output as many pairs as possible, and
   secondarily for the pairs to be as close as possible to each other (to avoid
   wasting too much memory).  I'm not sure if this implementation achieves that.
 */
 static void ComputeCommandPairs(
     const std::pair<std::vector<int32>, std::vector<int32> > &lists,
     std::vector<std::pair<int32,int32> > *pairs) {
   std::vector<int32> d_list = lists.first;

   std::set<int32> a_set;
   CopyVectorToSet(lists.second, &a_set);

   std::vector<int32>::reverse_iterator iter = d_list.rbegin(),
       end = d_list.rend();

   // from the latest to the earliest deallocation command...
   for (; iter != end; ++iter) {
     int32 d = *iter;
     std::set<int32>::iterator a_iter = a_set.upper_bound(d);
     // a_iter is an iterator to the first element a of the set 'a_set' such
     // that a > d, or a_set.end() if no such element exists.
     if (a_iter == a_set.end())
       continue;  // we will output no pair for this d.
     int32 a = *a_iter;
     KALDI_PARANOID_ASSERT(a > d);  // or code error
     a_set.erase(a_iter);  // remove this a from 'a_set' so it doesn't get used
                           // twice
     pairs->push_back(std::pair<int32,int32>(d, a));
   }
 }

 void RemoveUnnecessaryAllocation(const Nnet &nnet,
                                  NnetComputation *computation) {
   // For each size of matrix and stride-type, represented as a pair<int32,int32>
   // (the num-rows, and the num-cols * (stride-type == kDefaultStride ? 1 : -1), we
   // accumulate a list of indexes of deallocation commands that
   // are for that size, and a list of indexes of allocation commands
   // for that size.
   // For each distinct matrix size, we then call ComputeCommandPairs on those
   // two lists, to get pairs of (deallocation, allocation) command-indexes that
   // we can optimize out to a single command.

   // The map is from a (num-rows,num-columns) to two lists, of
   // (deallocation-commands, allocation-commands).  The order may seem
   // backwards, but that's the order of the pairs we are looking for.
   typedef unordered_map<std::pair<int32,int32>,
       std::pair<std::vector<int32>,std::vector<int32> >,
       PairHasher<int32> > MapType;
   MapType pair_map;
   int32 num_commands = computation->commands.size();
   for (int32 command_index = 0; command_index < num_commands; command_index++) {
     NnetComputation::Command &command = computation->commands[command_index];
     if (command.command_type == kAllocMatrix ||
         command.command_type == kDeallocMatrix) {
       int32 s = command.arg1, m = computation->submatrices[s].matrix_index,
           num_rows = computation->matrices[m].num_rows,
           num_cols = computation->matrices[m].num_cols,
           num_cols_mod = num_cols * (
               computation->matrices[m].stride_type == kDefaultStride ? 1 : -1);
       std::pair<int32,int32> p(num_rows, num_cols_mod);
       std::pair<std::vector<int32>,std::vector<int32> > &lists = pair_map[p];
       if (command.command_type == kDeallocMatrix)
         lists.first.push_back(command_index);
       else
         lists.second.push_back(command_index);
     }
   }

   MapType::const_iterator iter = pair_map.begin(), end = pair_map.end();
   std::vector<std::pair<int32,int32> > command_pairs;
   for (; iter != end; ++iter)
     ComputeCommandPairs(iter->second, &command_pairs);

   for (size_t i = 0; i < command_pairs.size(); i++) {
     int32 dealloc_index = command_pairs[i].first,
         alloc_index = command_pairs[i].second;
     NnetComputation::Command
         &dealloc_command = computation->commands[dealloc_index],
         &alloc_command = computation->commands[alloc_index];
     KALDI_ASSERT(dealloc_command.command_type ==
                  kDeallocMatrix);
     KALDI_ASSERT(alloc_command.command_type ==
                  kAllocMatrix);
     // remove the deallocation command.
     dealloc_command.command_type =  kNoOperation;
     alloc_command.arg2 = dealloc_command.arg1;
     alloc_command.command_type = kSwapMatrix;
   }
   RemoveNoOps(computation);
   FixGotoLabel(computation);
 }


 void VariableMergingOptimization(const NnetOptimizeOptions &config,
                                  const Nnet &nnet,
                                  NnetComputation *computation) {
   bool changed = true;
   while (changed) {
     changed = false;
     VariableMergingOptimizer opt(config, nnet, computation);
     if (opt.MergeVariables())
       changed = true;
   }
 }


 void ConvertAdditionToAssignment(const Nnet &nnet,
                                  NnetComputation *computation) {
   Analyzer analyzer;
   analyzer.Init(nnet, *computation);
   ComputationAnalysis analysis(*computation, analyzer);
   int32 num_commands = computation->commands.size();
   for (int32 command = 0; command < num_commands; command++) {
     NnetComputation::Command &c = computation->commands[command];
     switch (c.command_type) {
       case kMatrixAdd: case kAddRows: case kAddRowsMulti:
       case kAddToRowsMulti: {
         const std::vector<int32> &submatrices_written =
             analyzer.command_attributes[command].submatrices_written;
         KALDI_ASSERT(!submatrices_written.empty());
         std::vector<int32>::const_iterator iter = submatrices_written.begin(),
             end = submatrices_written.end();
         bool can_convert = true;
         for (; iter != end; ++iter) {
           int32 submatrix_written = *iter;
           int32 first_access_command = analysis.FirstNontrivialAccess(
               submatrix_written);
           // first_access_command is first command other than zeroing and
           // allocation that accesses this submatrix.  It can be assumed to be a
           // write command, since it makes no sense to read a variable before
           // it's written to.  If it's before this command then we need to add
           // rather than copy; we can't do the conversion to a copy command.
           if (first_access_command != command) {
             can_convert = false;
             break;
           }
         }
         if (can_convert) {  // convert to a copy command.
           switch (c.command_type) {
             case kMatrixAdd: c.command_type = kMatrixCopy;
               break;
             case kAddRows: c.command_type = kCopyRows;
                break;
             case kAddRowsMulti: c.command_type = kCopyRowsMulti;
               break;
             // note: kCopyToRowsMulti does not currently support alpha != 1.0.
             case kAddToRowsMulti: if (c.alpha == 1.0) c.command_type = kCopyToRowsMulti;
               break;
             default: KALDI_ERR << "Unexpected command type.";
           }
         }
         break;
       }
       default:
         break;
     }
   }
 }


 int32 MaxOutputTimeInRequest(const ComputationRequest &request) {
   int32 ans = std::numeric_limits<int32>::min();
   for (size_t i = 0; i < request.outputs.size(); i++) {
     const std::vector<Index> &indexes (request.outputs[i].indexes);
     std::vector<Index>::const_iterator iter = indexes.begin(),
         end = indexes.end();
     for (; iter != end; ++iter)
       if (iter->t > ans)
         ans = iter->t;
   }
   if (ans == std::numeric_limits<int32>::min()) {
     KALDI_ERR << "Failed to find any output indexes in computation request.";
   }
   return ans;
 }


 void Optimize(const NnetOptimizeOptions &config,
               const Nnet &nnet,
               int32 max_output_time_in_request,
               NnetComputation *computation) {
   if (GetVerboseLevel() >= 3) {
     CheckComputation(nnet, *computation, true);
     KALDI_LOG << "Before optimization, max memory use (bytes) = "
               << GetMaxMemoryUse(*computation);
   }

   { // Call LimitDerivativeTimes(); it's important that this
     // should come before other optimizations (search for "insist" in
     // nnet-optimize-utils.cc for the reasons).
     // this will do nothing unless --min-deriv-time or --max-deriv-time
     // or --max-deriv-time-relative was set.
     int32 max_deriv_time = config.max_deriv_time;
     if (config.max_deriv_time_relative != std::numeric_limits<int32>::max())
       max_deriv_time = config.max_deriv_time_relative +
           max_output_time_in_request;
     if (config.min_deriv_time != std::numeric_limits<int32>::min() ||
         max_deriv_time != std::numeric_limits<int32>::max())
       LimitDerivativeTimes(nnet, config.min_deriv_time,
                            max_deriv_time, computation);
   }

   if (GetVerboseLevel() >= 3)
     CheckComputation(nnet, *computation, true);

   if (config.optimize && config.consolidate_model_update) {
     ConsolidateModelUpdate(nnet, computation);

     if (GetVerboseLevel() >= 3)
       CheckComputation(nnet, *computation, true);
   }

   if (config.optimize && config.convert_addition) {
     ConvertAdditionToAssignment(nnet, computation);
     if (GetVerboseLevel() >= 3)
       CheckComputation(nnet, *computation, true);
   }


   if (config.optimize &&  (config.snip_row_ops || config.optimize_row_ops ||
                            config.split_row_ops)) {
     bool must_renumber = false;
     if (config.snip_row_ops && SnipRowOps(computation))
       must_renumber = true;
     if (config.split_row_ops && SplitRowOps(computation))
       must_renumber = true;
     if (config.optimize_row_ops && ReplaceRowWithMatrixOps(computation))
       must_renumber = true;

     if (must_renumber) {
       RenumberComputation(computation);
       if (GetVerboseLevel() >= 3)
         CheckComputation(nnet, *computation, false);
     }
   }

   if (config.optimize && config.extend_matrices &&
       !config.optimize_looped_computation) {
     ExtendMatrices(computation);
     if (GetVerboseLevel() >= 3)
       CheckComputation(nnet, *computation, false);
   }


   if (config.optimize &&
       (config.remove_assignments || config.backprop_in_place ||
        config.propagate_in_place)) {
     VariableMergingOptimization(config, nnet, computation);
     if (GetVerboseLevel() >= 3)
       CheckComputation(nnet, *computation, false);
   }

   if (config.optimize && config.initialize_undefined) {
     RemoveUnnecessaryZeroing(nnet, computation);
     if (GetVerboseLevel() >= 3)
       CheckComputation(nnet, *computation, false);
   }


   if ((config.optimize && config.move_sizing_commands) ||
       config.optimize_looped_computation) {
     MoveSizingCommands(nnet, computation);
     if (GetVerboseLevel() >= 3)
       CheckComputation(nnet, *computation, false);
   }

   // the looped computation optimization has to go before
   // 'RemoveUnnecessaryAllocation()'.  We don't gate this by 'config.optimize'
   // because it's necessary for looped computation to run.
   if (config.optimize_looped_computation) {
     OptimizeLoopedComputation(nnet, computation);
     if (GetVerboseLevel() >= 3)
       CheckComputation(nnet, *computation, false);
   }

   if (config.optimize && config.allocate_from_other &&
       !config.optimize_looped_computation) {
     // Don't do this if it's an looped computation because we're not sure if it
     // would be correct in that case, as written.  In any case the performance
     // benefit is tiny.
     RemoveUnnecessaryAllocation(nnet, computation);
     if (GetVerboseLevel() >= 3)
       CheckComputation(nnet, *computation, false);
   }

   // The following is not configurable because it is necessary for
   // the computation to run correctly (we do it after compilation too,
   // but the operations may have been put out of order by
   // other optimizations.)
   ConsolidateIoOperations(nnet, computation);

   if (config.optimize_looped_computation)
     FixGotoLabel(computation);


   if (config.memory_compression_level > 0 &&
       !config.optimize_looped_computation) {
     OptimizeMemoryCompression(nnet, config.memory_compression_level,
                               computation);
     if (GetVerboseLevel() >= 3)
       CheckComputation(nnet, *computation, false);
   }

   if (GetVerboseLevel() >= 3) {
     CheckComputation(nnet, *computation, false);
     KALDI_LOG << "After optimization, max memory use (bytes) = "
               << GetMaxMemoryUse(*computation);
   }
 }


 CachingOptimizingCompiler::CachingOptimizingCompiler(
     const Nnet &nnet,
     const CachingOptimizingCompilerOptions config):
     nnet_(nnet), config_(config),
     seconds_taken_total_(0.0), seconds_taken_compile_(0.0),
     seconds_taken_optimize_(0.0), seconds_taken_expand_(0.0),
     seconds_taken_check_(0.0), seconds_taken_indexes_(0.0),
     seconds_taken_io_(0.0), cache_(config.cache_capacity),
     nnet_left_context_(-1), nnet_right_context_(-1) { }

 CachingOptimizingCompiler::CachingOptimizingCompiler(
     const Nnet &nnet,
     const NnetOptimizeOptions &opt_config,
     const CachingOptimizingCompilerOptions config):
     nnet_(nnet), config_(config), opt_config_(opt_config),
     seconds_taken_total_(0.0), seconds_taken_compile_(0.0),
     seconds_taken_optimize_(0.0), seconds_taken_expand_(0.0),
     seconds_taken_check_(0.0), seconds_taken_indexes_(0.0),
     seconds_taken_io_(0.0), cache_(config.cache_capacity),
     nnet_left_context_(-1), nnet_right_context_(-1) { }

 void CachingOptimizingCompiler::GetSimpleNnetContext(
     int32 *nnet_left_context, int32 *nnet_right_context) {
   if (nnet_left_context_ == -1) {
     ComputeSimpleNnetContext(nnet_, &nnet_left_context_,
                              &nnet_right_context_);
   }
   *nnet_left_context = nnet_left_context_;
   *nnet_right_context = nnet_right_context_;
 }

 void CachingOptimizingCompiler::ReadCache(std::istream &is, bool binary) {
   {
     Timer timer;
     NnetOptimizeOptions opt_config_cached;
     opt_config_cached.Read(is, binary);
     // we won't read cached computations if any optimize option has been changed.
     if (!(opt_config_ == opt_config_cached))
       return;
     cache_.Read(is, binary);
     seconds_taken_io_ += timer.Elapsed();
   }
   if (GetVerboseLevel() >= 2) {
     Timer timer;
     cache_.Check(nnet_);
     seconds_taken_check_ += timer.Elapsed();
     // we consider the check time part of the total time...  this is very
     // arbitrary but it only affects printed times-taken.
     seconds_taken_total_ += timer.Elapsed();
   }

 }

 void CachingOptimizingCompiler::WriteCache(std::ostream &os, bool binary) {
   Timer timer;
   opt_config_.Write(os, binary);
   cache_.Write(os, binary);
   seconds_taken_io_ += timer.Elapsed();
 }

 CachingOptimizingCompiler::~CachingOptimizingCompiler() {
   if (seconds_taken_total_ > 0.0 || seconds_taken_io_ > 0.0) {
     std::ostringstream os;
     double seconds_taken_misc = seconds_taken_total_ - seconds_taken_compile_
         - seconds_taken_optimize_ - seconds_taken_expand_
         - seconds_taken_check_ - seconds_taken_indexes_;
     os << std::setprecision(3) << seconds_taken_total_
        << " seconds taken in nnet3 compilation total (breakdown: "
        << seconds_taken_compile_ << " compilation, "
        << seconds_taken_optimize_ << " optimization, "
        << seconds_taken_expand_ << " shortcut expansion, "
        << seconds_taken_check_ << " checking, "
        << seconds_taken_indexes_ << " computing indexes, "
        << seconds_taken_misc << " misc.) + "
        << seconds_taken_io_ << " I/O.";
     KALDI_LOG << os.str();
     // note: the leftover amount is misc things like hashing and == comparisons on
     // computation-requests, and calling RequestIsDecomposable().
   }
 }

 std::shared_ptr<const NnetComputation> CachingOptimizingCompiler::Compile(
     const ComputationRequest  &in_request) {
   Timer timer;
   std::shared_ptr<const NnetComputation>  ans = CompileInternal(in_request);
   seconds_taken_total_ += timer.Elapsed();
   return ans;
 }

 std::shared_ptr<const NnetComputation> CachingOptimizingCompiler::CompileInternal(
     const ComputationRequest  &request) {
   std::shared_ptr<const NnetComputation> ans = cache_.Find(request);
   if (ans != NULL) {
     return ans;
   } else {
     const NnetComputation *computation = NULL;
     if (config_.use_shortcut)
       computation = CompileViaShortcut(request);
     if (computation == NULL)
       computation = CompileNoShortcut(request);
     KALDI_ASSERT(computation != NULL);
     return cache_.Insert(request, computation);
   }
 }


 const NnetComputation *CachingOptimizingCompiler::CompileNoShortcut(
     const ComputationRequest &request) {

   Compiler compiler(request, nnet_);
   // note: 'opts' only contains 'output_debug_info', which is true by default.
   // There may be situations where we'd prefer not to keep it, for speed.
   CompilerOptions opts;
   NnetComputation *computation = new NnetComputation;

   {
     Timer timer;
     compiler.CreateComputation(opts, computation);
     seconds_taken_compile_ += timer.Elapsed();
   }

   int32 verbose_cutoff = 4;
   if (GetVerboseLevel() >= verbose_cutoff) {
     std::ostringstream os1;
     request.Print(os1);
     KALDI_LOG << "Computation request is " << os1.str();
     std::ostringstream os2;
     computation->Print(os2, nnet_);
     KALDI_LOG << "Generated computation is: " << os2.str();
   }

   { // some checking.  Note: there may come a time when we might
     // prefer to disable this checking.
     Timer timer;
     CheckComputationOptions check_config;
     // we can do the rewrite check since it's before optimization.
     check_config.check_rewrite = true;
     ComputationChecker checker(check_config, nnet_, *computation);
     checker.Check();
     seconds_taken_check_ += timer.Elapsed();
   }

   {
     Timer timer;
     Optimize(opt_config_, nnet_,
              MaxOutputTimeInRequest(request),
              computation);
     seconds_taken_optimize_ += timer.Elapsed();
   }

   if (GetVerboseLevel() >= verbose_cutoff) {
     std::ostringstream os;
     computation->Print(os, nnet_);
     KALDI_LOG << "Optimized computation is: " << os.str();
   }

   {  // check the computation again.
     Timer timer;
     CheckComputationOptions check_config;
     ComputationChecker checker(check_config, nnet_, *computation);
     checker.Check();
     seconds_taken_check_ += timer.Elapsed();
   }

   {
     Timer timer;
     computation->ComputeCudaIndexes();
     seconds_taken_indexes_ += timer.Elapsed();
   }
   return computation;
 }


 const NnetComputation *CachingOptimizingCompiler::CompileViaShortcut(
     const ComputationRequest &request) {
   int32 num_n_values;
   ComputationRequest mini_request;
   if (!RequestIsDecomposable(request, &mini_request, &num_n_values))
     return NULL;

   // By invoking CompileInternal() on the mini request, we go through the same
   // caching process as for any externally requested computation.
   std::shared_ptr<const NnetComputation> mini_computation =
       CompileInternal(mini_request);

   // note: by default we always create debug_info, even in regular compilation.
   // (e.g. it defaults to true in CompilerOptions).  If it really seems to be a
   // significant overhead, we can revisit this at some point in future.
   bool need_debug_info = true;


   NnetComputation *ans = new NnetComputation();

   {
     Timer timer;
     ExpandComputation(nnet_, request.misc_info, *mini_computation,
                       need_debug_info, num_n_values, ans);
     seconds_taken_expand_ += timer.Elapsed();
   }
   if (GetVerboseLevel() >= 3) {
     CheckComputation(nnet_, *ans, false);
   }

   {
     Timer timer;
     ans->ComputeCudaIndexes();
     seconds_taken_indexes_ += timer.Elapsed();
   }
   return ans;
 }


 static void SplitComputationIntoSegments(
     const NnetComputation &computation,
     std::vector<std::pair<int32, int32> > *segments) {

   int32 num_commands = computation.commands.size();
   segments->clear();
   int32 cur_start = 0;
   for (int32 c = 0; c < num_commands; c++) {
     if (computation.commands[c].command_type == kNoOperationMarker) {
       segments->push_back(std::pair<int32, int32>(cur_start, c));
       cur_start = c + 1;
     }
   }
   segments->push_back(std::pair<int32, int32>(cur_start, num_commands));
 }


 void ConsolidateIoOperations(const Nnet &nnet,
                              NnetComputation *computation) {
   // These segments, represented as (start-index, end-index),
   // are segments of the computation separated by kNoOperationMarker.
   std::vector<std::pair<int32, int32> > segments;
   SplitComputationIntoSegments(*computation, &segments);

   int32 num_commands = computation->commands.size();
   std::vector<NnetComputation::Command> reordered_commands(num_commands);
   // put kNoOperationMarker between all segments in the reordered commands.
   for (size_t s = 0; s + 1 < segments.size(); s++)
     reordered_commands[segments[s].second].command_type = kNoOperationMarker;

   // for each segment we'll divide the commands up into those that must appear
   // at the left of the segment (kAcceptInput for inputs and output-derivs), those
   // that must appear in the middle (most commands), those that must appear
   // on the right (kProvideOutput for output nodes and input derivatives).
   std::vector<int32> left_commands, middle_commands, right_commands;

   for (size_t s = 0; s < segments.size(); s++) {
     int32 segment_start = segments[s].first,
         segment_end = segments[s].second;
     left_commands.clear();
     middle_commands.clear();
     right_commands.clear();
     for (int32 c = segment_start; c < segment_end; c++) {
       if (computation->commands[c].command_type == kProvideOutput) {
         right_commands.push_back(c);
       } else if (computation->commands[c].command_type == kAcceptInput) {
         left_commands.push_back(c);
       } else {
         middle_commands.push_back(c);
       }
     }
     std::vector<int32>::const_iterator iter = left_commands.begin(),
         end = left_commands.end();
     int32 c = segment_start;
     for (; iter != end; ++iter, ++c)
       reordered_commands[c] = computation->commands[*iter];
     iter = middle_commands.begin();
     end = middle_commands.end();
     for (; iter != end; ++iter, ++c)
       reordered_commands[c] = computation->commands[*iter];
     iter = right_commands.begin();
     end = right_commands.end();
     for (; iter != end; ++iter, ++c)
       reordered_commands[c] = computation->commands[*iter];
     KALDI_ASSERT(c == segment_end);
   }
   computation->commands.swap(reordered_commands);
 }


 } // namespace nnet3
 } // namespace kaldi
kaldi::nnet3::CachingOptimizingCompiler::seconds_taken_check_
double seconds_taken_check_
Definition: nnet-optimize.h:298

kaldi::nnet3::ComputationVariables::Init
void Init(const NnetComputation &computation)
Definition: nnet-analyze.cc:133

kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::nnet3::NnetOptimizeOptions::allocate_from_other
bool allocate_from_other
Definition: nnet-optimize.h:51

kaldi::nnet3::VariableMergingOptimizer
This class is responsible for merging matrices, although you probably want to access it via the the f...
Definition: nnet-optimize-utils.h:133

kaldi::nnet3::kMatrixCopy
Definition: nnet-computation.h:291

kaldi::nnet3::NnetOptimizeOptions::allow_right_merge
bool allow_right_merge
Definition: nnet-optimize.h:48

kaldi::nnet3::ComputationVariables::AppendVariablesForMatrix
void AppendVariablesForMatrix(int32 matrix_index, std::vector< int32 > *variable_indexes) const
Appends to variables_indexes the sorted list of variables corresponding to a matrix index...
Definition: nnet-analyze.cc:156

kaldi::nnet3::NnetOptimizeOptions::optimize_row_ops
bool optimize_row_ops
Definition: nnet-optimize.h:42

kaldi::nnet3::ComputationAnalysis::FirstNontrivialAccess
int32 FirstNontrivialAccess(int32 s) const
Returns the first command (read or write) that accesses any part of &#39;s&#39; except for zeroing it (i...
Definition: nnet-analyze.cc:1182

kaldi::nnet3::NnetOptimizeOptions::Read
void Read(std::istream &is, bool binary)
Definition: nnet-optimize.cc:30

kaldi::nnet3::CachingOptimizingCompiler::CompileNoShortcut
const NnetComputation * CompileNoShortcut(const ComputationRequest &request)
Definition: nnet-optimize.cc:741

kaldi::nnet3::SplitRowOps
bool SplitRowOps(NnetComputation *computation)
This function detects cases where commands of type kAddRowsMulti, kAddToRowsMulti, kCopyRowsMulti, kCopyToRowsMulti use indexes that correspond to at most two submatrices, in two distinct ranges without gaps filled by -1&#39;s, and could be converted to at most two commands of type kMatrixAdd, kMatrixCopy, kAddRows or kCopyRows.
Definition: nnet-optimize-utils.cc:2894

kaldi::nnet3::OptimizeLoopedComputation
void OptimizeLoopedComputation(const Nnet &nnet, NnetComputation *computation)
This function tries to optimize computation &#39;computation&#39; for an &#39;looped&#39; computation.
Definition: nnet-optimize-utils.cc:4544

kaldi::nnet3::CachingOptimizingCompiler::CompileViaShortcut
const NnetComputation * CompileViaShortcut(const ComputationRequest &request)
Definition: nnet-optimize.cc:808

kaldi::nnet3::NnetOptimizeOptions::max_deriv_time_relative
int32 max_deriv_time_relative
Definition: nnet-optimize.h:54

kaldi::nnet3::ConsolidateIoOperations
void ConsolidateIoOperations(const Nnet &nnet, NnetComputation *computation)
This optimization puts the input operations (kAcceptInput) and output operations (kProvideOutput) at ...
Definition: nnet-optimize.cc:869

kaldi::nnet3::CachingOptimizingCompiler::cache_
ComputationCache cache_
Definition: nnet-optimize.h:302

kaldi::nnet3::kSwapMatrix
Definition: nnet-computation.h:289

kaldi::nnet3::NnetComputation::ComputeCudaIndexes
void ComputeCudaIndexes()
Definition: nnet-computation.cc:84

kaldi::nnet3::ComputationRequest::misc_info
MiscComputationInfo misc_info
misc_info is for extensibility to things that don&#39;t easily fit into the framework.
Definition: nnet-computation.h:130

kaldi::nnet3::NnetOptimizeOptions::Write
void Write(std::ostream &os, bool binary) const
Definition: nnet-optimize.cc:85

kaldi::ReadBasicType
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55

kaldi::nnet3::ComputeCommandPairs
static void ComputeCommandPairs(const std::pair< std::vector< int32 >, std::vector< int32 > > &lists, std::vector< std::pair< int32, int32 > > *pairs)
Definition: nnet-optimize.cc:328

kaldi::nnet3::RenumberComputation
void RenumberComputation(NnetComputation *computation)
This function detects submatrices and matrices that are never used (e.g.
Definition: nnet-optimize-utils.cc:693

kaldi::nnet3::NnetOptimizeOptions::move_sizing_commands
bool move_sizing_commands
Definition: nnet-optimize.h:50

kaldi::GetVerboseLevel
int32 GetVerboseLevel()
Get verbosity level, usually set via command line &#39;–verbose=&#39; switch.
Definition: kaldi-error.h:60

kaldi::nnet3::kProvideOutput
Definition: nnet-computation.h:294

kaldi::nnet3::MatrixAccesses::is_output
bool is_output
true if this matrix is an output of the computation (i.e.
Definition: nnet-analyze.h:270

kaldi::nnet3::kNoOperation
Definition: nnet-computation.h:295

kaldi::nnet3::NnetOptimizeOptions::remove_assignments
bool remove_assignments
Definition: nnet-optimize.h:46

kaldi::nnet3::NnetComputation::Print
void Print(std::ostream &os, const Nnet &nnet) const
Definition: nnet-computation.cc:717

kaldi::nnet3::VariableMergingOptimization
void VariableMergingOptimization(const NnetOptimizeOptions &config, const Nnet &nnet, NnetComputation *computation)
This wraps class VariableMergingOptimizer in a simplified interface.
Definition: nnet-optimize.cc:417

kaldi::nnet3::RequestIsDecomposable
bool RequestIsDecomposable(const ComputationRequest &request, ComputationRequest *mini_request, int32 *num_n_values)
This function, used in &#39;shortcut&#39; compilation where we first compile a smaller computation with the s...
Definition: nnet-optimize-utils.cc:3852

kaldi::nnet3::kCopyRowsMulti
Definition: nnet-computation.h:292

kaldi::nnet3::ConvertAdditionToAssignment
void ConvertAdditionToAssignment(const Nnet &nnet, NnetComputation *computation)
This converts addition operations (things with Add in their names) to copy operations (things with Co...
Definition: nnet-optimize.cc:430

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

kaldi::nnet3::CachingOptimizingCompiler::~CachingOptimizingCompiler
~CachingOptimizingCompiler()
Definition: nnet-optimize.cc:695

kaldi::nnet3::ComputationChecker
Definition: nnet-analyze.h:411

kaldi::nnet3::kDeallocMatrix
Definition: nnet-computation.h:289

kaldi::nnet3::NnetComputation::matrices
std::vector< MatrixInfo > matrices
Definition: nnet-computation.h:390

kaldi::nnet3::kGotoLabel
Definition: nnet-computation.h:296

kaldi::nnet3::kCopyRows
Definition: nnet-computation.h:291

kaldi::nnet3::kAddRows
Definition: nnet-computation.h:291

kaldi::nnet2::NnetComputation
void NnetComputation(const Nnet &nnet, const CuMatrixBase< BaseFloat > &input, bool pad_input, CuMatrixBase< BaseFloat > *output)
Does the basic neural net computation, on a sequence of data (e.g.
Definition: nnet-compute.cc:160

kaldi::nnet3::MatrixAccesses
Definition: nnet-analyze.h:249

kaldi::CopyVectorToSet
void CopyVectorToSet(const std::vector< A > &v, std::set< A > *s)
Copies the contents of a vector to a set.
Definition: stl-utils.h:172

kaldi::nnet3::CachingOptimizingCompiler::nnet_
const Nnet & nnet_
Definition: nnet-optimize.h:288

kaldi::nnet3::ComputeCommandAttributes
void ComputeCommandAttributes(const Nnet &nnet, const NnetComputation &computation, const ComputationVariables &vars, std::vector< CommandAttributes > *attributes)
Definition: nnet-analyze.cc:284

kaldi::nnet3::ExtendMatrices
void ExtendMatrices(NnetComputation *computation)
This is not really an optimization in itself but it can make things easier for class VariableMergingO...
Definition: nnet-optimize-utils.cc:1270

kaldi::nnet3::kAcceptInput
Definition: nnet-computation.h:294

kaldi::nnet3::NnetComputation::commands
std::vector< Command > commands
Definition: nnet-computation.h:439

kaldi::nnet3::ComputationCache::Find
std::shared_ptr< const NnetComputation > Find(const ComputationRequest &request)
Definition: nnet-optimize-utils.cc:4951

kaldi::nnet3::kAllocMatrix
Definition: nnet-computation.h:289

timer.h

kaldi::nnet3::NnetOptimizeOptions::min_deriv_time
int32 min_deriv_time
Definition: nnet-optimize.h:52

kaldi::nnet3::LimitDerivativeTimes
void LimitDerivativeTimes(const Nnet &nnet, int32 min_deriv_time, int32 max_deriv_time, NnetComputation *computation)
Definition: nnet-optimize-utils.cc:2215

kaldi::nnet3::ComputationCache::Write
void Write(std::ostream &os, bool binary) const
Definition: nnet-optimize-utils.cc:5051

kaldi::nnet3::Analyzer::command_attributes
std::vector< CommandAttributes > command_attributes
Definition: nnet-analyze.h:296

kaldi::nnet3::kSetConst
Definition: nnet-computation.h:289

nnet-utils.h
This file contains some miscellaneous functions dealing with class Nnet.

kaldi::nnet3::OptimizeMemoryCompression
void OptimizeMemoryCompression(const Nnet &nnet, int32 memory_compression_level, NnetComputation *computation)
Performs optimization to reduce memory usage where possible, making use of the kCompressMatrix and kD...
Definition: nnet-optimize-utils.cc:4899

kaldi::nnet3::CachingOptimizingCompilerOptions
Definition: nnet-optimize.h:192

kaldi::nnet3::NnetOptimizeOptions::optimize_looped_computation
bool optimize_looped_computation
Definition: nnet-optimize.h:60

kaldi::nnet3::MatrixAccesses::accesses
std::vector< Access > accesses
Records the indexes of commands that access the matrix, and the type (read, read/write, write).
Definition: nnet-analyze.h:264

kaldi::nnet3::ComputationCache::Insert
std::shared_ptr< const NnetComputation > Insert(const ComputationRequest &request, const NnetComputation *computation)
Definition: nnet-optimize-utils.cc:4974

kaldi::nnet3::kMatrixAdd
Definition: nnet-computation.h:291

kaldi::nnet3::NnetComputation::Command::arg1
int32 arg1
Definition: nnet-computation.h:341

kaldi::nnet3::NnetOptimizeOptions::snip_row_ops
bool snip_row_ops
Definition: nnet-optimize.h:55

kaldi::nnet3::NnetOptimizeOptions::split_row_ops
bool split_row_ops
Definition: nnet-optimize.h:43

kaldi::nnet3::MoveSizingCommands
void MoveSizingCommands(const Nnet &nnet, NnetComputation *computation)
This optimization moves commands that allocate and zero matrices to as late as possible, and moves commands that deallocate matrices to as early as possible.
Definition: nnet-optimize.cc:152

kaldi::nnet3::NnetOptimizeOptions::optimize
bool optimize
Definition: nnet-optimize.h:38

kaldi::nnet3::CachingOptimizingCompiler::seconds_taken_expand_
double seconds_taken_expand_
Definition: nnet-optimize.h:297

nnet-optimize.h

kaldi::nnet3::NnetOptimizeOptions::consolidate_model_update
bool consolidate_model_update
Definition: nnet-optimize.h:39

kaldi::nnet3::kNoOperationMarker
Definition: nnet-computation.h:295

kaldi::nnet3::CompilerOptions
Definition: nnet-compile.h:34

kaldi::nnet3::kCopyToRowsMulti
Definition: nnet-computation.h:292

kaldi::nnet3::ComputationRequest
Definition: nnet-computation.h:114

kaldi::nnet3::NnetOptimizeOptions::operator==
bool operator==(const NnetOptimizeOptions &other) const
Definition: nnet-optimize.cc:128

kaldi::nnet3::GetMaxMemoryUse
int64 GetMaxMemoryUse(const NnetComputation &computation)
Definition: nnet-analyze.cc:1439

kaldi::nnet3::ComputationVariables
This class relates the matrices and sub-matrices in the computation to imaginary "variables", such that we can think of the operations as operating on sets of individual variables, and we can then do analysis that lets us do optimization.
Definition: nnet-analyze.h:121

kaldi::nnet3::CachingOptimizingCompiler::seconds_taken_io_
double seconds_taken_io_
Definition: nnet-optimize.h:300

kaldi::nnet3::Analyzer::Init
void Init(const Nnet &nnet, const NnetComputation &computation)
Definition: nnet-analyze.cc:1421

kaldi::nnet3::ExpectToken
static void ExpectToken(const std::string &token, const std::string &what_we_are_parsing, const std::string **next_token)
Definition: nnet-descriptor.cc:45

kaldi::nnet3::ComputationCache::Read
void Read(std::istream &is, bool binary)
Definition: nnet-optimize-utils.cc:5018

kaldi::nnet3::ComputeSimpleNnetContext
void ComputeSimpleNnetContext(const Nnet &nnet, int32 *left_context, int32 *right_context)
ComputeSimpleNnetContext computes the left-context and right-context of a nnet.
Definition: nnet-utils.cc:146

kaldi::nnet3::CachingOptimizingCompiler::seconds_taken_total_
double seconds_taken_total_
Definition: nnet-optimize.h:294

kaldi::nnet3::MaxOutputTimeInRequest
int32 MaxOutputTimeInRequest(const ComputationRequest &request)
Definition: nnet-optimize.cc:484

kaldi::nnet3::RemoveUnnecessaryAllocation
void RemoveUnnecessaryAllocation(const Nnet &nnet, NnetComputation *computation)
This optimization detects cases where we deallocate a matrix, and then later allocate another matrix ...
Definition: nnet-optimize.cc:355

kaldi::nnet3::NnetComputation::Command::alpha
BaseFloat alpha
Definition: nnet-computation.h:340

kaldi::nnet3::RemoveUnnecessaryZeroing
void RemoveUnnecessaryZeroing(const Nnet &nnet, NnetComputation *computation)
This optimization function removes, where possible, commands of type type kSetConst.
Definition: nnet-optimize.cc:262

kaldi::nnet3::ComputeVariableAccesses
void ComputeVariableAccesses(const ComputationVariables &variables, const std::vector< CommandAttributes > &command_attributes, std::vector< std::vector< Access > > *variable_accesses)
After the command-level attributes have been computed, this function organizes them per variable (see...
Definition: nnet-analyze.cc:421

kaldi::kDefaultStride
Definition: matrix-common.h:45

kaldi::nnet3::NnetComputation::submatrices
std::vector< SubMatrixInfo > submatrices
Definition: nnet-computation.h:404

kaldi::nnet3::NnetComputation::Command
Definition: nnet-computation.h:338

kaldi::nnet3::NnetOptimizeOptions::convert_addition
bool convert_addition
Definition: nnet-optimize.h:45

kaldi::nnet3::NnetOptimizeOptions::propagate_in_place
bool propagate_in_place
Definition: nnet-optimize.h:40

kaldi::nnet3::NnetOptimizeOptions
Definition: nnet-optimize.h:35

kaldi::nnet3::Nnet
Definition: nnet-nnet.h:115

kaldi::nnet3::ComputationCache::Check
void Check(const Nnet &nnet) const
Definition: nnet-optimize-utils.cc:5037

kaldi::nnet3::ReplaceRowWithMatrixOps
bool ReplaceRowWithMatrixOps(NnetComputation *computation)
This function detects cases where commands of type kCopyRows, kAddRows or kAddToRows can be converted...
Definition: nnet-optimize-utils.cc:2288

KALDI_ERR
#define KALDI_ERR
Definition: kaldi-error.h:147

kaldi::nnet3::CachingOptimizingCompiler::CachingOptimizingCompiler
CachingOptimizingCompiler(const Nnet &nnet, const CachingOptimizingCompilerOptions config=CachingOptimizingCompilerOptions())
Definition: nnet-optimize.cc:635

KALDI_PARANOID_ASSERT
#define KALDI_PARANOID_ASSERT(cond)
Definition: kaldi-error.h:206

kaldi::nnet3::NnetOptimizeOptions::memory_compression_level
int32 memory_compression_level
Definition: nnet-optimize.h:56

kaldi::nnet3::Analyzer::variable_accesses
std::vector< std::vector< Access > > variable_accesses
Definition: nnet-analyze.h:297

kaldi::nnet3::kAddRowsMulti
Definition: nnet-computation.h:292

kaldi::nnet3::CachingOptimizingCompiler::seconds_taken_compile_
double seconds_taken_compile_
Definition: nnet-optimize.h:295

kaldi::nnet3::MatrixAccesses::deallocate_command
int32 deallocate_command
Index of the command that deallocates the matrix (which will be of type kDeallocMatrix or kSwapMatrix...
Definition: nnet-analyze.h:257

kaldi::nnet3::kWriteAccess
Definition: nnet-analyze.h:77

kaldi::WriteToken
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134

kaldi::nnet3::CachingOptimizingCompiler::GetSimpleNnetContext
void GetSimpleNnetContext(int32 *nnet_left_context, int32 *nnet_right_context)
Definition: nnet-optimize.cc:656

kaldi::nnet3::NnetComputation::Command::command_type
CommandType command_type
Definition: nnet-computation.h:339

kaldi::nnet3::FixGotoLabel
void FixGotoLabel(NnetComputation *computation)
This function ensures that the arg1 of a final command of type kGotoLabel is the same as the command ...
Definition: nnet-optimize-utils.cc:4552

kaldi::PeekToken
int PeekToken(std::istream &is, bool binary)
PeekToken will return the first character of the next token, or -1 if end of file.
Definition: io-funcs.cc:170

kaldi::nnet3::CachingOptimizingCompiler::seconds_taken_optimize_
double seconds_taken_optimize_
Definition: nnet-optimize.h:296

kaldi::nnet3::Optimize
void Optimize(const NnetOptimizeOptions &config, const Nnet &nnet, int32 max_output_time_in_request, NnetComputation *computation)
This is the top-level function for optimizing a computation.
Definition: nnet-optimize.cc:501

kaldi::nnet3::CheckComputationOptions::check_rewrite
bool check_rewrite
Definition: nnet-analyze.h:394

kaldi::nnet3::NnetOptimizeOptions::extend_matrices
bool extend_matrices
Definition: nnet-optimize.h:44

kaldi::Timer
Definition: timer.h:63

kaldi::nnet3::ComputationChecker::Check
void Check()
Definition: nnet-analyze.cc:579

rnnlm::i
int i
Definition: mikolov-rnnlm-lib.cc:66

kaldi::nnet3::CachingOptimizingCompiler::Compile
std::shared_ptr< const NnetComputation > Compile(const ComputationRequest &request)
Does the compilation and returns a const pointer to the result, which is owned by this class...
Definition: nnet-optimize.cc:716

kaldi::nnet3::ExpandComputation
void ExpandComputation(const Nnet &nnet, const MiscComputationInfo &misc_info, const NnetComputation &computation, bool need_debug_info, int32 num_n_values, NnetComputation *expanded_computation)
This function is used in &#39;shortcut&#39; compilation to expand a computation that has been compiled for ex...
Definition: nnet-optimize-utils.cc:3804

kaldi::nnet3::ComputeMatrixAccesses
void ComputeMatrixAccesses(const Nnet &nnet, const NnetComputation &computation, const ComputationVariables &variables, const std::vector< CommandAttributes > &command_attributes, std::vector< MatrixAccesses > *matrix_accesses)
This function organizes information in the CommandAttributes in a way that is convenient to access pe...
Definition: nnet-analyze.cc:467

kaldi::nnet3::CachingOptimizingCompiler::ReadCache
void ReadCache(std::istream &is, bool binary)
Definition: nnet-optimize.cc:666

kaldi::nnet3::ConsolidateModelUpdate
void ConsolidateModelUpdate(const Nnet &nnet, NnetComputation *computation)
This optimization consolidates the model-update part of backprop commands, for components in (e...
Definition: nnet-optimize-utils.cc:1551

kaldi::nnet3::Analyzer::matrix_accesses
std::vector< MatrixAccesses > matrix_accesses
Definition: nnet-analyze.h:298

kaldi::nnet3::CachingOptimizingCompilerOptions::use_shortcut
bool use_shortcut
Definition: nnet-optimize.h:193

kaldi::nnet3::CachingOptimizingCompiler::WriteCache
void WriteCache(std::ostream &os, bool binary)
Definition: nnet-optimize.cc:688

kaldi::nnet3::CheckComputation
void CheckComputation(const Nnet &nnet, const NnetComputation &computation, bool check_rewrite)
This is a convenience interface for class ComputationChecker.
Definition: nnet-analyze.cc:1145

kaldi::nnet3::CachingOptimizingCompiler::nnet_left_context_
int32 nnet_left_context_
Definition: nnet-optimize.h:305

kaldi::nnet3::Compiler::CreateComputation
void CreateComputation(const CompilerOptions &opts, NnetComputation *computation)
Definition: nnet-compile.cc:50

kaldi::nnet3::NnetComputation
Definition: nnet-computation.h:303

KALDI_ASSERT
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

kaldi::nnet3::NnetOptimizeOptions::allow_left_merge
bool allow_left_merge
Definition: nnet-optimize.h:47

kaldi::nnet3::CachingOptimizingCompiler::nnet_right_context_
int32 nnet_right_context_
Definition: nnet-optimize.h:306

kaldi::nnet3::ComputationRequest::outputs
std::vector< IoSpecification > outputs
Definition: nnet-computation.h:116

kaldi::nnet3::RemoveNoOps
void RemoveNoOps(NnetComputation *computation)
Removes commands of type kNoOperation in the computation.
Definition: nnet-optimize-utils.cc:703

kaldi::nnet3::NnetOptimizeOptions::backprop_in_place
bool backprop_in_place
Definition: nnet-optimize.h:41

kaldi::nnet3::Compiler
This class creates an initial version of the NnetComputation, without any optimization or sharing of ...
Definition: nnet-compile.h:44

kaldi::nnet3::VariableMergingOptimizer::MergeVariables
bool MergeVariables()
Definition: nnet-optimize-utils.cc:723

kaldi::nnet3::NnetOptimizeOptions::max_deriv_time
int32 max_deriv_time
Definition: nnet-optimize.h:53

nnet-optimize-utils.h

kaldi::nnet3::MatrixAccesses::allocate_command
int32 allocate_command
Index of the command that allocates the matrix (which will be of type kAllocMatrix or kSwapMatrix)...
Definition: nnet-analyze.h:253

kaldi::nnet3::CachingOptimizingCompiler::seconds_taken_indexes_
double seconds_taken_indexes_
Definition: nnet-optimize.h:299

kaldi::WriteBasicType
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34

kaldi::nnet3::Analyzer::variables
ComputationVariables variables
Definition: nnet-analyze.h:295

kaldi::nnet3::CachingOptimizingCompiler::opt_config_
NnetOptimizeOptions opt_config_
Definition: nnet-optimize.h:290

kaldi::nnet3::SnipRowOps
bool SnipRowOps(NnetComputation *computation)
This function detects cases where commands of type kCopyRows, kAddRows, kAddRowsMulti, kAddToRowsMulti, kCopyRowsMulti, kCopyToRowsMulti or kAddRowRanges use indexes that start or end with -1&#39;s or equivalents, and replace them with similar commands that act on a sub-matrix of the matrices they are currently acting on.
Definition: nnet-optimize-utils.cc:2537

kaldi::nnet3::kAddToRowsMulti
Definition: nnet-computation.h:292

kaldi::nnet3::ComputationRequest::Print
void Print(std::ostream &os) const
This function is for printing info about the computation request in a human-readable way...
Definition: nnet-computation.cc:1092

KALDI_LOG
#define KALDI_LOG
Definition: kaldi-error.h:153

kaldi::nnet3::CheckComputationOptions
Definition: nnet-analyze.h:391

kaldi::Timer::Elapsed
double Elapsed() const
Returns time in seconds.
Definition: timer.h:74

kaldi::nnet3::NnetOptimizeOptions::initialize_undefined
bool initialize_undefined
Definition: nnet-optimize.h:49

kaldi::nnet3::Analyzer
This struct exists to set up various pieces of analysis; it helps avoid the repetition of code where ...
Definition: nnet-analyze.h:294

kaldi::nnet3::CachingOptimizingCompiler::config_
CachingOptimizingCompilerOptions config_
Definition: nnet-optimize.h:289

rnnlm::d
double d
Definition: mikolov-rnnlm-lib.cc:64

kaldi::nnet3::CachingOptimizingCompiler::CompileInternal
std::shared_ptr< const NnetComputation > CompileInternal(const ComputationRequest &request)
Definition: nnet-optimize.cc:724

kaldi::nnet3::SplitComputationIntoSegments
static void SplitComputationIntoSegments(const NnetComputation &computation, std::vector< std::pair< int32, int32 > > *segments)
Split the computation up into segments bounded by kNoOperationMarker.
Definition: nnet-optimize.cc:852

kaldi::PairHasher
A hashing function-object for pairs of ints.
Definition: stl-utils.h:235

kaldi::nnet3::ComputationAnalysis
This class performs various kinds of specific analysis on top of what class Analyzer gives you immedi...
Definition: nnet-analyze.h:308