34   ExpectToken(is, binary, 
"<ConsolidateModelUpdate>");
    79     ExpectToken(is, binary, 
"<MemoryCompressionLevel>");
    86   WriteToken(os, binary, 
"<NnetOptimizeOptions>");
    89   WriteToken(os, binary, 
"<ConsolidateModelUpdate>");
   103   WriteToken(os, binary, 
"<RemoveAssignments>");
   109   WriteToken(os, binary, 
"<InitializeUndefined>");
   111   WriteToken(os, binary, 
"<MoveSizingCommands>");
   113   WriteToken(os, binary, 
"<AllocateFromOther>");
   119   WriteToken(os, binary, 
"<MaxDerivTimeRelative>");
   123   WriteToken(os, binary, 
"<MemoryCompressionLevel>");
   125   WriteToken(os, binary, 
"</NnetOptimizeOptions>");
   154   variables.
Init(*computation);
   155   std::vector<CommandAttributes> attributes;
   157   std::vector<std::vector<Access> > variable_accesses;
   159   std::vector<MatrixAccesses> matrix_accesses;
   169       num_matrices = matrix_accesses.size();
   175   std::vector<bool> is_command_pair(num_commands, 
false);
   176   for (
int32 c = 0; c + 1 < num_commands; c++) {
   180         computation->
commands[c+1].alpha == 0.0) {
   181       is_command_pair[c] = 
true;
   187   std::vector<std::pair<int32,int32> >
   188       command_reordering(num_commands);
   191   for (
int32 c = 0; c < num_commands; c++) {
   192     command_reordering[c].first = c * 3;
   193     command_reordering[c].second = c;
   195   for (
int32 m = 1; m < num_matrices; m++) {
   203       int32 first_access_command = -1;
   206         first_access_command = ma.
accesses[0].command_index;
   210             first_access_command = ma.
accesses[1].command_index;
   212             first_access_command = -1;
   215       if (first_access_command != -1) {
   219             first_access_command * 3 - 1;
   227       int32 last_access_command = ma.
accesses.back().command_index;
   230           last_access_command * 3 + 1;
   233   std::sort(command_reordering.begin(), command_reordering.end());
   234   std::vector<NnetComputation::Command> reordered_commands;
   235   reordered_commands.reserve(num_commands);
   236   for (
int32 c = 0; c < num_commands; c++) {
   237     int32 old_index = command_reordering[c].second;
   242     if (old_index > 0 && is_command_pair[old_index - 1]) {
   249       reordered_commands.push_back(computation->
commands[old_index]);
   250       if (is_command_pair[old_index]) {
   253         reordered_commands.push_back(computation->
commands[old_index + 1]);
   257   computation->
commands = reordered_commands;
   265   a.
Init(nnet, *computation);
   272   for (
int32 matrix_index = 0; matrix_index < num_matrices; matrix_index++) {
   276     int32 zeroing_command_index = accesses.
accesses[0].command_index;
   278         &(computation->
commands[zeroing_command_index]);
   280           command->
alpha == 0.0)) {
   285     std::vector<int32> variables_for_matrix;
   287     bool all_variables_ok = 
true;  
   289     for (
size_t i = 0; 
i < variables_for_matrix.size(); 
i++) {
   290       int32 variable_index = variables_for_matrix[
i];
   291       const std::vector<Access> &v_accesses =
   293       if (v_accesses.size() > 1 &&
   295         all_variables_ok = 
false;  
   298       if (v_accesses.size() == 1 &&
   304         all_variables_ok = 
false;
   308     if (all_variables_ok) {
   329     const std::pair<std::vector<int32>, std::vector<int32> > &lists,
   330     std::vector<std::pair<int32,int32> > *pairs) {
   331   std::vector<int32> d_list = lists.first;
   333   std::set<int32> a_set;
   336   std::vector<int32>::reverse_iterator iter = d_list.rbegin(),
   340   for (; iter != end; ++iter) {
   342     std::set<int32>::iterator a_iter = a_set.upper_bound(d);
   345     if (a_iter == a_set.end())
   351     pairs->push_back(std::pair<int32,int32>(d, a));
   369   typedef unordered_map<std::pair<int32,int32>,
   370       std::pair<std::vector<int32>,std::vector<int32> >,
   374   for (
int32 command_index = 0; command_index < num_commands; command_index++) {
   379           num_rows = computation->
matrices[m].num_rows,
   380           num_cols = computation->
matrices[m].num_cols,
   381           num_cols_mod = num_cols * (
   383       std::pair<int32,int32> p(num_rows, num_cols_mod);
   384       std::pair<std::vector<int32>,std::vector<int32> > &lists = pair_map[p];
   386         lists.first.push_back(command_index);
   388         lists.second.push_back(command_index);
   392   MapType::const_iterator iter = pair_map.begin(), end = pair_map.end();
   393   std::vector<std::pair<int32,int32> > command_pairs;
   394   for (; iter != end; ++iter)
   397   for (
size_t i = 0; 
i < command_pairs.size(); 
i++) {
   398     int32 dealloc_index = command_pairs[
i].first,
   399         alloc_index = command_pairs[
i].second;
   401         &dealloc_command = computation->
commands[dealloc_index],
   402         &alloc_command = computation->
commands[alloc_index];
   409     alloc_command.arg2 = dealloc_command.
arg1;
   433   analyzer.
Init(nnet, *computation);
   436   for (
int32 command = 0; command < num_commands; command++) {
   441         const std::vector<int32> &submatrices_written =
   444         std::vector<int32>::const_iterator iter = submatrices_written.begin(),
   445             end = submatrices_written.end();
   446         bool can_convert = 
true;
   447         for (; iter != end; ++iter) {
   448           int32 submatrix_written = *iter;
   456           if (first_access_command != command) {
   472             default: 
KALDI_ERR << 
"Unexpected command type.";
   485   int32 ans = std::numeric_limits<int32>::min();
   486   for (
size_t i = 0; 
i < request.
outputs.size(); 
i++) {
   487     const std::vector<Index> &indexes (request.
outputs[
i].indexes);
   488     std::vector<Index>::const_iterator iter = indexes.begin(),
   490     for (; iter != end; ++iter)
   494   if (ans == std::numeric_limits<int32>::min()) {
   495     KALDI_ERR << 
"Failed to find any output indexes in computation request.";
   503               int32 max_output_time_in_request,
   507     KALDI_LOG << 
"Before optimization, max memory use (bytes) = "   519           max_output_time_in_request;
   521         max_deriv_time != std::numeric_limits<int32>::max())
   523                            max_deriv_time, computation);
   545     bool must_renumber = 
false;
   547       must_renumber = 
true;
   549       must_renumber = 
true;
   551       must_renumber = 
true;
   629     KALDI_LOG << 
"After optimization, max memory use (bytes) = "   638     nnet_(nnet), config_(config),
   639     seconds_taken_total_(0.0), seconds_taken_compile_(0.0),
   640     seconds_taken_optimize_(0.0), seconds_taken_expand_(0.0),
   641     seconds_taken_check_(0.0), seconds_taken_indexes_(0.0),
   642     seconds_taken_io_(0.0), cache_(config.cache_capacity),
   643     nnet_left_context_(-1), nnet_right_context_(-1) { }
   657     int32 *nnet_left_context, 
int32 *nnet_right_context) {
   670     opt_config_cached.
Read(is, binary);
   697     std::ostringstream os;
   702        << 
" seconds taken in nnet3 compilation total (breakdown: "   707        << seconds_taken_indexes_ << 
" computing indexes, "   708        << seconds_taken_misc << 
" misc.) + "   719   std::shared_ptr<const NnetComputation>  ans = 
CompileInternal(in_request);
   726   std::shared_ptr<const NnetComputation> ans = 
cache_.
Find(request);
   733     if (computation == NULL)
   756   int32 verbose_cutoff = 4;
   758     std::ostringstream os1;
   760     KALDI_LOG << 
"Computation request is " << os1.str();
   761     std::ostringstream os2;
   763     KALDI_LOG << 
"Generated computation is: " << os2.str();
   786     std::ostringstream os;
   788     KALDI_LOG << 
"Optimized computation is: " << os.str();
   817   std::shared_ptr<const NnetComputation> mini_computation =
   823   bool need_debug_info = 
true;
   831                       need_debug_info, num_n_values, ans);
   854     std::vector<std::pair<int32, int32> > *segments) {
   859   for (
int32 c = 0; c < num_commands; c++) {
   861       segments->push_back(std::pair<int32, int32>(cur_start, c));
   865   segments->push_back(std::pair<int32, int32>(cur_start, num_commands));
   873   std::vector<std::pair<int32, int32> > segments;
   877   std::vector<NnetComputation::Command> reordered_commands(num_commands);
   879   for (
size_t s = 0; s + 1 < segments.size(); s++)
   886   std::vector<int32> left_commands, middle_commands, right_commands;
   888   for (
size_t s = 0; s < segments.size(); s++) {
   889     int32 segment_start = segments[s].first,
   890         segment_end = segments[s].second;
   891     left_commands.clear();
   892     middle_commands.clear();
   893     right_commands.clear();
   894     for (
int32 c = segment_start; c < segment_end; c++) {
   896         right_commands.push_back(c);
   898         left_commands.push_back(c);
   900         middle_commands.push_back(c);
   903     std::vector<int32>::const_iterator iter = left_commands.begin(),
   904         end = left_commands.end();
   905     int32 c = segment_start;
   906     for (; iter != end; ++iter, ++c)
   907       reordered_commands[c] = computation->
commands[*iter];
   908     iter = middle_commands.begin();
   909     end = middle_commands.end();
   910     for (; iter != end; ++iter, ++c)
   911       reordered_commands[c] = computation->
commands[*iter];
   912     iter = right_commands.begin();
   913     end = right_commands.end();
   914     for (; iter != end; ++iter, ++c)
   915       reordered_commands[c] = computation->
commands[*iter];
   918   computation->
commands.swap(reordered_commands);
 double seconds_taken_check_
 
void Init(const NnetComputation &computation)
 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
 
This class is responsible for merging matrices, although you probably want to access it via the the f...
 
void AppendVariablesForMatrix(int32 matrix_index, std::vector< int32 > *variable_indexes) const
Appends to variables_indexes the sorted list of variables corresponding to a matrix index...
 
int32 FirstNontrivialAccess(int32 s) const
Returns the first command (read or write) that accesses any part of 's' except for zeroing it (i...
 
void Read(std::istream &is, bool binary)
 
const NnetComputation * CompileNoShortcut(const ComputationRequest &request)
 
bool SplitRowOps(NnetComputation *computation)
This function detects cases where commands of type kAddRowsMulti, kAddToRowsMulti, kCopyRowsMulti, kCopyToRowsMulti use indexes that correspond to at most two submatrices, in two distinct ranges without gaps filled by -1's, and could be converted to at most two commands of type kMatrixAdd, kMatrixCopy, kAddRows or kCopyRows. 
 
void OptimizeLoopedComputation(const Nnet &nnet, NnetComputation *computation)
This function tries to optimize computation 'computation' for an 'looped' computation. 
 
const NnetComputation * CompileViaShortcut(const ComputationRequest &request)
 
int32 max_deriv_time_relative
 
void ConsolidateIoOperations(const Nnet &nnet, NnetComputation *computation)
This optimization puts the input operations (kAcceptInput) and output operations (kProvideOutput) at ...
 
void ComputeCudaIndexes()
 
MiscComputationInfo misc_info
misc_info is for extensibility to things that don't easily fit into the framework. 
 
void Write(std::ostream &os, bool binary) const
 
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
 
static void ComputeCommandPairs(const std::pair< std::vector< int32 >, std::vector< int32 > > &lists, std::vector< std::pair< int32, int32 > > *pairs)
 
void RenumberComputation(NnetComputation *computation)
This function detects submatrices and matrices that are never used (e.g. 
 
bool move_sizing_commands
 
int32 GetVerboseLevel()
Get verbosity level, usually set via command line '–verbose=' switch. 
 
bool is_output
true if this matrix is an output of the computation (i.e. 
 
void Print(std::ostream &os, const Nnet &nnet) const
 
void VariableMergingOptimization(const NnetOptimizeOptions &config, const Nnet &nnet, NnetComputation *computation)
This wraps class VariableMergingOptimizer in a simplified interface. 
 
bool RequestIsDecomposable(const ComputationRequest &request, ComputationRequest *mini_request, int32 *num_n_values)
This function, used in 'shortcut' compilation where we first compile a smaller computation with the s...
 
void ConvertAdditionToAssignment(const Nnet &nnet, NnetComputation *computation)
This converts addition operations (things with Add in their names) to copy operations (things with Co...
 
~CachingOptimizingCompiler()
 
std::vector< MatrixInfo > matrices
 
void NnetComputation(const Nnet &nnet, const CuMatrixBase< BaseFloat > &input, bool pad_input, CuMatrixBase< BaseFloat > *output)
Does the basic neural net computation, on a sequence of data (e.g. 
 
void CopyVectorToSet(const std::vector< A > &v, std::set< A > *s)
Copies the contents of a vector to a set. 
 
void ComputeCommandAttributes(const Nnet &nnet, const NnetComputation &computation, const ComputationVariables &vars, std::vector< CommandAttributes > *attributes)
 
void ExtendMatrices(NnetComputation *computation)
This is not really an optimization in itself but it can make things easier for class VariableMergingO...
 
std::vector< Command > commands
 
std::shared_ptr< const NnetComputation > Find(const ComputationRequest &request)
 
void LimitDerivativeTimes(const Nnet &nnet, int32 min_deriv_time, int32 max_deriv_time, NnetComputation *computation)
 
void Write(std::ostream &os, bool binary) const
 
std::vector< CommandAttributes > command_attributes
 
This file contains some miscellaneous functions dealing with class Nnet. 
 
void OptimizeMemoryCompression(const Nnet &nnet, int32 memory_compression_level, NnetComputation *computation)
Performs optimization to reduce memory usage where possible, making use of the kCompressMatrix and kD...
 
bool optimize_looped_computation
 
std::vector< Access > accesses
Records the indexes of commands that access the matrix, and the type (read, read/write, write). 
 
std::shared_ptr< const NnetComputation > Insert(const ComputationRequest &request, const NnetComputation *computation)
 
void MoveSizingCommands(const Nnet &nnet, NnetComputation *computation)
This optimization moves commands that allocate and zero matrices to as late as possible, and moves commands that deallocate matrices to as early as possible. 
 
double seconds_taken_expand_
 
bool consolidate_model_update
 
bool operator==(const NnetOptimizeOptions &other) const
 
int64 GetMaxMemoryUse(const NnetComputation &computation)
 
This class relates the matrices and sub-matrices in the computation to imaginary "variables", such that we can think of the operations as operating on sets of individual variables, and we can then do analysis that lets us do optimization. 
 
void Init(const Nnet &nnet, const NnetComputation &computation)
 
static void ExpectToken(const std::string &token, const std::string &what_we_are_parsing, const std::string **next_token)
 
void Read(std::istream &is, bool binary)
 
void ComputeSimpleNnetContext(const Nnet &nnet, int32 *left_context, int32 *right_context)
ComputeSimpleNnetContext computes the left-context and right-context of a nnet. 
 
double seconds_taken_total_
 
int32 MaxOutputTimeInRequest(const ComputationRequest &request)
 
void RemoveUnnecessaryAllocation(const Nnet &nnet, NnetComputation *computation)
This optimization detects cases where we deallocate a matrix, and then later allocate another matrix ...
 
void RemoveUnnecessaryZeroing(const Nnet &nnet, NnetComputation *computation)
This optimization function removes, where possible, commands of type type kSetConst. 
 
void ComputeVariableAccesses(const ComputationVariables &variables, const std::vector< CommandAttributes > &command_attributes, std::vector< std::vector< Access > > *variable_accesses)
After the command-level attributes have been computed, this function organizes them per variable (see...
 
std::vector< SubMatrixInfo > submatrices
 
void Check(const Nnet &nnet) const
 
bool ReplaceRowWithMatrixOps(NnetComputation *computation)
This function detects cases where commands of type kCopyRows, kAddRows or kAddToRows can be converted...
 
CachingOptimizingCompiler(const Nnet &nnet, const CachingOptimizingCompilerOptions config=CachingOptimizingCompilerOptions())
 
#define KALDI_PARANOID_ASSERT(cond)
 
int32 memory_compression_level
 
std::vector< std::vector< Access > > variable_accesses
 
double seconds_taken_compile_
 
int32 deallocate_command
Index of the command that deallocates the matrix (which will be of type kDeallocMatrix or kSwapMatrix...
 
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters. 
 
void GetSimpleNnetContext(int32 *nnet_left_context, int32 *nnet_right_context)
 
void FixGotoLabel(NnetComputation *computation)
This function ensures that the arg1 of a final command of type kGotoLabel is the same as the command ...
 
int PeekToken(std::istream &is, bool binary)
PeekToken will return the first character of the next token, or -1 if end of file. 
 
double seconds_taken_optimize_
 
void Optimize(const NnetOptimizeOptions &config, const Nnet &nnet, int32 max_output_time_in_request, NnetComputation *computation)
This is the top-level function for optimizing a computation. 
 
std::shared_ptr< const NnetComputation > Compile(const ComputationRequest &request)
Does the compilation and returns a const pointer to the result, which is owned by this class...
 
void ExpandComputation(const Nnet &nnet, const MiscComputationInfo &misc_info, const NnetComputation &computation, bool need_debug_info, int32 num_n_values, NnetComputation *expanded_computation)
This function is used in 'shortcut' compilation to expand a computation that has been compiled for ex...
 
void ComputeMatrixAccesses(const Nnet &nnet, const NnetComputation &computation, const ComputationVariables &variables, const std::vector< CommandAttributes > &command_attributes, std::vector< MatrixAccesses > *matrix_accesses)
This function organizes information in the CommandAttributes in a way that is convenient to access pe...
 
void ReadCache(std::istream &is, bool binary)
 
void ConsolidateModelUpdate(const Nnet &nnet, NnetComputation *computation)
This optimization consolidates the model-update part of backprop commands, for components in (e...
 
std::vector< MatrixAccesses > matrix_accesses
 
void WriteCache(std::ostream &os, bool binary)
 
void CheckComputation(const Nnet &nnet, const NnetComputation &computation, bool check_rewrite)
This is a convenience interface for class ComputationChecker. 
 
void CreateComputation(const CompilerOptions &opts, NnetComputation *computation)
 
#define KALDI_ASSERT(cond)
 
int32 nnet_right_context_
 
std::vector< IoSpecification > outputs
 
void RemoveNoOps(NnetComputation *computation)
Removes commands of type kNoOperation in the computation. 
 
This class creates an initial version of the NnetComputation, without any optimization or sharing of ...
 
int32 allocate_command
Index of the command that allocates the matrix (which will be of type kAllocMatrix or kSwapMatrix)...
 
double seconds_taken_indexes_
 
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
 
ComputationVariables variables
 
NnetOptimizeOptions opt_config_
 
bool SnipRowOps(NnetComputation *computation)
This function detects cases where commands of type kCopyRows, kAddRows, kAddRowsMulti, kAddToRowsMulti, kCopyRowsMulti, kCopyToRowsMulti or kAddRowRanges use indexes that start or end with -1's or equivalents, and replace them with similar commands that act on a sub-matrix of the matrices they are currently acting on. 
 
void Print(std::ostream &os) const
This function is for printing info about the computation request in a human-readable way...
 
double Elapsed() const
Returns time in seconds. 
 
bool initialize_undefined
 
This struct exists to set up various pieces of analysis; it helps avoid the repetition of code where ...
 
CachingOptimizingCompilerOptions config_
 
std::shared_ptr< const NnetComputation > CompileInternal(const ComputationRequest &request)
 
static void SplitComputationIntoSegments(const NnetComputation &computation, std::vector< std::pair< int32, int32 > > *segments)
Split the computation up into segments bounded by kNoOperationMarker. 
 
A hashing function-object for pairs of ints. 
 
This class performs various kinds of specific analysis on top of what class Analyzer gives you immedi...