34 ExpectToken(is, binary,
"<ConsolidateModelUpdate>");
79 ExpectToken(is, binary,
"<MemoryCompressionLevel>");
86 WriteToken(os, binary,
"<NnetOptimizeOptions>");
89 WriteToken(os, binary,
"<ConsolidateModelUpdate>");
103 WriteToken(os, binary,
"<RemoveAssignments>");
109 WriteToken(os, binary,
"<InitializeUndefined>");
111 WriteToken(os, binary,
"<MoveSizingCommands>");
113 WriteToken(os, binary,
"<AllocateFromOther>");
119 WriteToken(os, binary,
"<MaxDerivTimeRelative>");
123 WriteToken(os, binary,
"<MemoryCompressionLevel>");
125 WriteToken(os, binary,
"</NnetOptimizeOptions>");
154 variables.
Init(*computation);
155 std::vector<CommandAttributes> attributes;
157 std::vector<std::vector<Access> > variable_accesses;
159 std::vector<MatrixAccesses> matrix_accesses;
169 num_matrices = matrix_accesses.size();
175 std::vector<bool> is_command_pair(num_commands,
false);
176 for (
int32 c = 0; c + 1 < num_commands; c++) {
180 computation->
commands[c+1].alpha == 0.0) {
181 is_command_pair[c] =
true;
187 std::vector<std::pair<int32,int32> >
188 command_reordering(num_commands);
191 for (
int32 c = 0; c < num_commands; c++) {
192 command_reordering[c].first = c * 3;
193 command_reordering[c].second = c;
195 for (
int32 m = 1; m < num_matrices; m++) {
203 int32 first_access_command = -1;
206 first_access_command = ma.
accesses[0].command_index;
210 first_access_command = ma.
accesses[1].command_index;
212 first_access_command = -1;
215 if (first_access_command != -1) {
219 first_access_command * 3 - 1;
227 int32 last_access_command = ma.
accesses.back().command_index;
230 last_access_command * 3 + 1;
233 std::sort(command_reordering.begin(), command_reordering.end());
234 std::vector<NnetComputation::Command> reordered_commands;
235 reordered_commands.reserve(num_commands);
236 for (
int32 c = 0; c < num_commands; c++) {
237 int32 old_index = command_reordering[c].second;
242 if (old_index > 0 && is_command_pair[old_index - 1]) {
249 reordered_commands.push_back(computation->
commands[old_index]);
250 if (is_command_pair[old_index]) {
253 reordered_commands.push_back(computation->
commands[old_index + 1]);
257 computation->
commands = reordered_commands;
265 a.
Init(nnet, *computation);
272 for (
int32 matrix_index = 0; matrix_index < num_matrices; matrix_index++) {
276 int32 zeroing_command_index = accesses.
accesses[0].command_index;
278 &(computation->
commands[zeroing_command_index]);
280 command->
alpha == 0.0)) {
285 std::vector<int32> variables_for_matrix;
287 bool all_variables_ok =
true;
289 for (
size_t i = 0;
i < variables_for_matrix.size();
i++) {
290 int32 variable_index = variables_for_matrix[
i];
291 const std::vector<Access> &v_accesses =
293 if (v_accesses.size() > 1 &&
295 all_variables_ok =
false;
298 if (v_accesses.size() == 1 &&
304 all_variables_ok =
false;
308 if (all_variables_ok) {
329 const std::pair<std::vector<int32>, std::vector<int32> > &lists,
330 std::vector<std::pair<int32,int32> > *pairs) {
331 std::vector<int32> d_list = lists.first;
333 std::set<int32> a_set;
336 std::vector<int32>::reverse_iterator iter = d_list.rbegin(),
340 for (; iter != end; ++iter) {
342 std::set<int32>::iterator a_iter = a_set.upper_bound(d);
345 if (a_iter == a_set.end())
351 pairs->push_back(std::pair<int32,int32>(d, a));
369 typedef unordered_map<std::pair<int32,int32>,
370 std::pair<std::vector<int32>,std::vector<int32> >,
374 for (
int32 command_index = 0; command_index < num_commands; command_index++) {
379 num_rows = computation->
matrices[m].num_rows,
380 num_cols = computation->
matrices[m].num_cols,
381 num_cols_mod = num_cols * (
383 std::pair<int32,int32> p(num_rows, num_cols_mod);
384 std::pair<std::vector<int32>,std::vector<int32> > &lists = pair_map[p];
386 lists.first.push_back(command_index);
388 lists.second.push_back(command_index);
392 MapType::const_iterator iter = pair_map.begin(), end = pair_map.end();
393 std::vector<std::pair<int32,int32> > command_pairs;
394 for (; iter != end; ++iter)
397 for (
size_t i = 0;
i < command_pairs.size();
i++) {
398 int32 dealloc_index = command_pairs[
i].first,
399 alloc_index = command_pairs[
i].second;
401 &dealloc_command = computation->
commands[dealloc_index],
402 &alloc_command = computation->
commands[alloc_index];
409 alloc_command.arg2 = dealloc_command.
arg1;
433 analyzer.
Init(nnet, *computation);
436 for (
int32 command = 0; command < num_commands; command++) {
441 const std::vector<int32> &submatrices_written =
444 std::vector<int32>::const_iterator iter = submatrices_written.begin(),
445 end = submatrices_written.end();
446 bool can_convert =
true;
447 for (; iter != end; ++iter) {
448 int32 submatrix_written = *iter;
456 if (first_access_command != command) {
472 default:
KALDI_ERR <<
"Unexpected command type.";
485 int32 ans = std::numeric_limits<int32>::min();
486 for (
size_t i = 0;
i < request.
outputs.size();
i++) {
487 const std::vector<Index> &indexes (request.
outputs[
i].indexes);
488 std::vector<Index>::const_iterator iter = indexes.begin(),
490 for (; iter != end; ++iter)
494 if (ans == std::numeric_limits<int32>::min()) {
495 KALDI_ERR <<
"Failed to find any output indexes in computation request.";
503 int32 max_output_time_in_request,
507 KALDI_LOG <<
"Before optimization, max memory use (bytes) = " 519 max_output_time_in_request;
521 max_deriv_time != std::numeric_limits<int32>::max())
523 max_deriv_time, computation);
545 bool must_renumber =
false;
547 must_renumber =
true;
549 must_renumber =
true;
551 must_renumber =
true;
629 KALDI_LOG <<
"After optimization, max memory use (bytes) = " 638 nnet_(nnet), config_(config),
639 seconds_taken_total_(0.0), seconds_taken_compile_(0.0),
640 seconds_taken_optimize_(0.0), seconds_taken_expand_(0.0),
641 seconds_taken_check_(0.0), seconds_taken_indexes_(0.0),
642 seconds_taken_io_(0.0), cache_(config.cache_capacity),
643 nnet_left_context_(-1), nnet_right_context_(-1) { }
657 int32 *nnet_left_context,
int32 *nnet_right_context) {
670 opt_config_cached.
Read(is, binary);
697 std::ostringstream os;
702 <<
" seconds taken in nnet3 compilation total (breakdown: " 707 << seconds_taken_indexes_ <<
" computing indexes, " 708 << seconds_taken_misc <<
" misc.) + " 719 std::shared_ptr<const NnetComputation> ans =
CompileInternal(in_request);
726 std::shared_ptr<const NnetComputation> ans =
cache_.
Find(request);
733 if (computation == NULL)
756 int32 verbose_cutoff = 4;
758 std::ostringstream os1;
760 KALDI_LOG <<
"Computation request is " << os1.str();
761 std::ostringstream os2;
763 KALDI_LOG <<
"Generated computation is: " << os2.str();
786 std::ostringstream os;
788 KALDI_LOG <<
"Optimized computation is: " << os.str();
817 std::shared_ptr<const NnetComputation> mini_computation =
823 bool need_debug_info =
true;
831 need_debug_info, num_n_values, ans);
854 std::vector<std::pair<int32, int32> > *segments) {
859 for (
int32 c = 0; c < num_commands; c++) {
861 segments->push_back(std::pair<int32, int32>(cur_start, c));
865 segments->push_back(std::pair<int32, int32>(cur_start, num_commands));
873 std::vector<std::pair<int32, int32> > segments;
877 std::vector<NnetComputation::Command> reordered_commands(num_commands);
879 for (
size_t s = 0; s + 1 < segments.size(); s++)
886 std::vector<int32> left_commands, middle_commands, right_commands;
888 for (
size_t s = 0; s < segments.size(); s++) {
889 int32 segment_start = segments[s].first,
890 segment_end = segments[s].second;
891 left_commands.clear();
892 middle_commands.clear();
893 right_commands.clear();
894 for (
int32 c = segment_start; c < segment_end; c++) {
896 right_commands.push_back(c);
898 left_commands.push_back(c);
900 middle_commands.push_back(c);
903 std::vector<int32>::const_iterator iter = left_commands.begin(),
904 end = left_commands.end();
905 int32 c = segment_start;
906 for (; iter != end; ++iter, ++c)
907 reordered_commands[c] = computation->
commands[*iter];
908 iter = middle_commands.begin();
909 end = middle_commands.end();
910 for (; iter != end; ++iter, ++c)
911 reordered_commands[c] = computation->
commands[*iter];
912 iter = right_commands.begin();
913 end = right_commands.end();
914 for (; iter != end; ++iter, ++c)
915 reordered_commands[c] = computation->
commands[*iter];
918 computation->
commands.swap(reordered_commands);
double seconds_taken_check_
void Init(const NnetComputation &computation)
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
This class is responsible for merging matrices, although you probably want to access it via the the f...
void AppendVariablesForMatrix(int32 matrix_index, std::vector< int32 > *variable_indexes) const
Appends to variables_indexes the sorted list of variables corresponding to a matrix index...
int32 FirstNontrivialAccess(int32 s) const
Returns the first command (read or write) that accesses any part of 's' except for zeroing it (i...
void Read(std::istream &is, bool binary)
const NnetComputation * CompileNoShortcut(const ComputationRequest &request)
bool SplitRowOps(NnetComputation *computation)
This function detects cases where commands of type kAddRowsMulti, kAddToRowsMulti, kCopyRowsMulti, kCopyToRowsMulti use indexes that correspond to at most two submatrices, in two distinct ranges without gaps filled by -1's, and could be converted to at most two commands of type kMatrixAdd, kMatrixCopy, kAddRows or kCopyRows.
void OptimizeLoopedComputation(const Nnet &nnet, NnetComputation *computation)
This function tries to optimize computation 'computation' for an 'looped' computation.
const NnetComputation * CompileViaShortcut(const ComputationRequest &request)
int32 max_deriv_time_relative
void ConsolidateIoOperations(const Nnet &nnet, NnetComputation *computation)
This optimization puts the input operations (kAcceptInput) and output operations (kProvideOutput) at ...
void ComputeCudaIndexes()
MiscComputationInfo misc_info
misc_info is for extensibility to things that don't easily fit into the framework.
void Write(std::ostream &os, bool binary) const
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
static void ComputeCommandPairs(const std::pair< std::vector< int32 >, std::vector< int32 > > &lists, std::vector< std::pair< int32, int32 > > *pairs)
void RenumberComputation(NnetComputation *computation)
This function detects submatrices and matrices that are never used (e.g.
bool move_sizing_commands
int32 GetVerboseLevel()
Get verbosity level, usually set via command line '–verbose=' switch.
bool is_output
true if this matrix is an output of the computation (i.e.
void Print(std::ostream &os, const Nnet &nnet) const
void VariableMergingOptimization(const NnetOptimizeOptions &config, const Nnet &nnet, NnetComputation *computation)
This wraps class VariableMergingOptimizer in a simplified interface.
bool RequestIsDecomposable(const ComputationRequest &request, ComputationRequest *mini_request, int32 *num_n_values)
This function, used in 'shortcut' compilation where we first compile a smaller computation with the s...
void ConvertAdditionToAssignment(const Nnet &nnet, NnetComputation *computation)
This converts addition operations (things with Add in their names) to copy operations (things with Co...
~CachingOptimizingCompiler()
std::vector< MatrixInfo > matrices
void NnetComputation(const Nnet &nnet, const CuMatrixBase< BaseFloat > &input, bool pad_input, CuMatrixBase< BaseFloat > *output)
Does the basic neural net computation, on a sequence of data (e.g.
void CopyVectorToSet(const std::vector< A > &v, std::set< A > *s)
Copies the contents of a vector to a set.
void ComputeCommandAttributes(const Nnet &nnet, const NnetComputation &computation, const ComputationVariables &vars, std::vector< CommandAttributes > *attributes)
void ExtendMatrices(NnetComputation *computation)
This is not really an optimization in itself but it can make things easier for class VariableMergingO...
std::vector< Command > commands
std::shared_ptr< const NnetComputation > Find(const ComputationRequest &request)
void LimitDerivativeTimes(const Nnet &nnet, int32 min_deriv_time, int32 max_deriv_time, NnetComputation *computation)
void Write(std::ostream &os, bool binary) const
std::vector< CommandAttributes > command_attributes
This file contains some miscellaneous functions dealing with class Nnet.
void OptimizeMemoryCompression(const Nnet &nnet, int32 memory_compression_level, NnetComputation *computation)
Performs optimization to reduce memory usage where possible, making use of the kCompressMatrix and kD...
bool optimize_looped_computation
std::vector< Access > accesses
Records the indexes of commands that access the matrix, and the type (read, read/write, write).
std::shared_ptr< const NnetComputation > Insert(const ComputationRequest &request, const NnetComputation *computation)
void MoveSizingCommands(const Nnet &nnet, NnetComputation *computation)
This optimization moves commands that allocate and zero matrices to as late as possible, and moves commands that deallocate matrices to as early as possible.
double seconds_taken_expand_
bool consolidate_model_update
bool operator==(const NnetOptimizeOptions &other) const
int64 GetMaxMemoryUse(const NnetComputation &computation)
This class relates the matrices and sub-matrices in the computation to imaginary "variables", such that we can think of the operations as operating on sets of individual variables, and we can then do analysis that lets us do optimization.
void Init(const Nnet &nnet, const NnetComputation &computation)
static void ExpectToken(const std::string &token, const std::string &what_we_are_parsing, const std::string **next_token)
void Read(std::istream &is, bool binary)
void ComputeSimpleNnetContext(const Nnet &nnet, int32 *left_context, int32 *right_context)
ComputeSimpleNnetContext computes the left-context and right-context of a nnet.
double seconds_taken_total_
int32 MaxOutputTimeInRequest(const ComputationRequest &request)
void RemoveUnnecessaryAllocation(const Nnet &nnet, NnetComputation *computation)
This optimization detects cases where we deallocate a matrix, and then later allocate another matrix ...
void RemoveUnnecessaryZeroing(const Nnet &nnet, NnetComputation *computation)
This optimization function removes, where possible, commands of type type kSetConst.
void ComputeVariableAccesses(const ComputationVariables &variables, const std::vector< CommandAttributes > &command_attributes, std::vector< std::vector< Access > > *variable_accesses)
After the command-level attributes have been computed, this function organizes them per variable (see...
std::vector< SubMatrixInfo > submatrices
void Check(const Nnet &nnet) const
bool ReplaceRowWithMatrixOps(NnetComputation *computation)
This function detects cases where commands of type kCopyRows, kAddRows or kAddToRows can be converted...
CachingOptimizingCompiler(const Nnet &nnet, const CachingOptimizingCompilerOptions config=CachingOptimizingCompilerOptions())
#define KALDI_PARANOID_ASSERT(cond)
int32 memory_compression_level
std::vector< std::vector< Access > > variable_accesses
double seconds_taken_compile_
int32 deallocate_command
Index of the command that deallocates the matrix (which will be of type kDeallocMatrix or kSwapMatrix...
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
void GetSimpleNnetContext(int32 *nnet_left_context, int32 *nnet_right_context)
void FixGotoLabel(NnetComputation *computation)
This function ensures that the arg1 of a final command of type kGotoLabel is the same as the command ...
int PeekToken(std::istream &is, bool binary)
PeekToken will return the first character of the next token, or -1 if end of file.
double seconds_taken_optimize_
void Optimize(const NnetOptimizeOptions &config, const Nnet &nnet, int32 max_output_time_in_request, NnetComputation *computation)
This is the top-level function for optimizing a computation.
std::shared_ptr< const NnetComputation > Compile(const ComputationRequest &request)
Does the compilation and returns a const pointer to the result, which is owned by this class...
void ExpandComputation(const Nnet &nnet, const MiscComputationInfo &misc_info, const NnetComputation &computation, bool need_debug_info, int32 num_n_values, NnetComputation *expanded_computation)
This function is used in 'shortcut' compilation to expand a computation that has been compiled for ex...
void ComputeMatrixAccesses(const Nnet &nnet, const NnetComputation &computation, const ComputationVariables &variables, const std::vector< CommandAttributes > &command_attributes, std::vector< MatrixAccesses > *matrix_accesses)
This function organizes information in the CommandAttributes in a way that is convenient to access pe...
void ReadCache(std::istream &is, bool binary)
void ConsolidateModelUpdate(const Nnet &nnet, NnetComputation *computation)
This optimization consolidates the model-update part of backprop commands, for components in (e...
std::vector< MatrixAccesses > matrix_accesses
void WriteCache(std::ostream &os, bool binary)
void CheckComputation(const Nnet &nnet, const NnetComputation &computation, bool check_rewrite)
This is a convenience interface for class ComputationChecker.
void CreateComputation(const CompilerOptions &opts, NnetComputation *computation)
#define KALDI_ASSERT(cond)
int32 nnet_right_context_
std::vector< IoSpecification > outputs
void RemoveNoOps(NnetComputation *computation)
Removes commands of type kNoOperation in the computation.
This class creates an initial version of the NnetComputation, without any optimization or sharing of ...
int32 allocate_command
Index of the command that allocates the matrix (which will be of type kAllocMatrix or kSwapMatrix)...
double seconds_taken_indexes_
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
ComputationVariables variables
NnetOptimizeOptions opt_config_
bool SnipRowOps(NnetComputation *computation)
This function detects cases where commands of type kCopyRows, kAddRows, kAddRowsMulti, kAddToRowsMulti, kCopyRowsMulti, kCopyToRowsMulti or kAddRowRanges use indexes that start or end with -1's or equivalents, and replace them with similar commands that act on a sub-matrix of the matrices they are currently acting on.
void Print(std::ostream &os) const
This function is for printing info about the computation request in a human-readable way...
double Elapsed() const
Returns time in seconds.
bool initialize_undefined
This struct exists to set up various pieces of analysis; it helps avoid the repetition of code where ...
CachingOptimizingCompilerOptions config_
std::shared_ptr< const NnetComputation > CompileInternal(const ComputationRequest &request)
static void SplitComputationIntoSegments(const NnetComputation &computation, std::vector< std::pair< int32, int32 > > *segments)
Split the computation up into segments bounded by kNoOperationMarker.
A hashing function-object for pairs of ints.
This class performs various kinds of specific analysis on top of what class Analyzer gives you immedi...