29                            std::vector<int32*> *submatrix_args) {
    30   submatrix_args->clear();
    35       submatrix_args->push_back(&c->
arg1);
    38       submatrix_args->push_back(&c->
arg1);
    39       submatrix_args->push_back(&c->
arg2);
    42       submatrix_args->push_back(&c->
arg3);
    43       submatrix_args->push_back(&c->
arg4);
    47       submatrix_args->push_back(&c->
arg3);
    48       submatrix_args->push_back(&c->
arg4);
    49       submatrix_args->push_back(&c->
arg5);
    50       submatrix_args->push_back(&c->
arg6);
    57       submatrix_args->push_back(&c->
arg1);
    58       submatrix_args->push_back(&c->
arg2);
    64       submatrix_args->push_back(&c->
arg1);
    67       submatrix_args->push_back(&c->
arg1);
    81                            std::vector<int32*> *submatrix_args) {
    82   submatrix_args->clear();
    83   std::vector<NnetComputation::Command>::iterator iter = commands->begin(),
    84       end = commands->end();
    85   std::vector<int32*> this_submatrix_args;
    86   for (; iter != end; ++iter) {
    88     submatrix_args->insert(submatrix_args->end(),
    89                            this_submatrix_args.begin(),
    90                            this_submatrix_args.end());
    97                                      std::vector<int32*> *matrix_args) {
    99   matrix_args->reserve(computation->
submatrices.size());
   100   for (
int32 s = 1; s < num_submatrices; s++)
   101     matrix_args->push_back(&(computation->
submatrices[s].matrix_index));
   106                               std::vector<int32*> *indexes_multi_args) {
   107   indexes_multi_args->clear();
   108   std::vector<NnetComputation::Command>::iterator iter = commands->begin(),
   109       end = commands->end();
   110   for (; iter != end; ++iter) {
   116       indexes_multi_args->push_back(&(command.
arg2));
   122                                std::vector<int32*> *indexes_ranges_args) {
   123   indexes_ranges_args->clear();
   124   std::vector<NnetComputation::Command>::iterator iter = commands->begin(),
   125       end = commands->end();
   126   for (; iter != end; ++iter) {
   129       indexes_ranges_args->push_back(&(command.
arg3));
   134                          std::vector<int32*> *indexes_args) {
   135   indexes_args->clear();
   136   std::vector<NnetComputation::Command>::iterator iter = commands->begin(),
   137       end = commands->end();
   138   for (; iter != end; ++iter) {
   142       indexes_args->push_back(&(command.
arg3));
   191       return submat.matrix_index +
   192           19553 * submat.row_offset +
   193           29297 * submat.num_rows +
   194           42209 * submat.col_offset +
   195           56527 * submat.num_cols;
   209                      const std::vector<T> *ptr2)
 const {
   210       size_t size1 = ptr1->size(), size2 = ptr2->size();
   211       if (size1 < size2) 
return true;
   212       else if (size1 > size2) 
return false;
   213       else return (*ptr1 < *ptr2);  
   222                                 const std::vector<int32> &to_remove,
   223                                 std::vector<int32> *renumbering);
   230                                  std::vector<int32> *renumbering);
   257     const std::vector<bool> &used,
   258     std::vector<int32> *renumbering) {
   259   renumbering->clear();
   260   renumbering->reserve(used.size());
   261   std::vector<bool>::const_iterator iter = used.begin(), end = used.end();
   263   for (; iter != end; ++iter) {
   264     if (*iter) renumbering->push_back(cur_index++);
   265     else renumbering->push_back(-1);
   272     int32 old_num_elements,
   273     const std::vector<int32> &to_remove,
   274     std::vector<int32> *renumbering) {
   276   renumbering->clear();
   277   renumbering->resize(old_num_elements, 0);
   278   int32 num_remove = to_remove.size();
   279   for (
int32 r = 0; r < num_remove; r++) {
   280     int32 this_remove = to_remove[r];
   283     KALDI_ASSERT(this_remove > 0 && this_remove < old_num_elements);
   284     (*renumbering)[this_remove] = -1;
   286   int32 cur_number = 0;
   287   for (
int32 i = 0; 
i < old_num_elements; 
i++) {
   288     if ((*renumbering)[
i] != -1)
   289       (*renumbering)[
i] = cur_number++;
   292                static_cast<int32>(to_remove.size()));
   300   std::vector<std::pair<int32, int32> > memo_to_commands;
   301   std::vector<int32> memo_indexes_used;
   302   std::pair<int32, int32> blank(-1, -1);
   304   for (
int32 c = 0; c < num_commands; c++) {
   308       if (memo_index > 0) {
   309         if (memo_to_commands.size() <= 
static_cast<size_t>(memo_index))
   310           memo_to_commands.resize(memo_index + 1, blank);
   312         memo_to_commands[memo_index].first = c;
   313         memo_indexes_used.push_back(memo_index);
   317       if (memo_index > 0) {
   318         if (memo_to_commands.size() <= 
static_cast<size_t>(memo_index))
   319           memo_to_commands.resize(memo_index + 1, blank);
   321                      memo_to_commands[memo_index].second == -1);
   322         memo_to_commands[memo_index].second = c;
   326   int32 new_memo_index = 1;
   327   for (std::vector<int32>::iterator iter = memo_indexes_used.begin();
   328        iter != memo_indexes_used.end(); ++iter) {
   329     int32 memo_index = *iter;
   330     int32 propagate_command = memo_to_commands[memo_index].first,
   331         backprop_command = memo_to_commands[memo_index].second;
   333                  "Propagate generates memo but backprop doesn't use it.");
   341                                         std::vector<int32*> *submatrix_args) {
   344   size_t extra_size = 0;
   347   submatrix_args->reserve(submatrix_args->size() + extra_size);
   350     std::vector<std::pair<int32, int32> > &indexes_multi =
   352     std::vector<std::pair<int32, int32> >::iterator
   353         iter = indexes_multi.begin(), end = indexes_multi.end();
   354     for (; iter != end; ++iter)
   355       if (iter->first != -1)
   356         submatrix_args->push_back(&(iter->first));
   368   std::vector<int32*> submatrix_args;
   370   std::vector<int32*>::iterator iter = submatrix_args.begin(),
   371       end = submatrix_args.end();
   372   int32 cur_submatrix_index = -1;  
   375   for (; iter != end; ++iter) {
   376     int32 submatrix_index = **iter;
   377     if (submatrix_index > 0 && submatrix_index != cur_submatrix_index) {
   378       cur_submatrix_index = submatrix_index;
   392   for (
int32 s = 1; s < num_submatrices; s++) {
   406   int32 cur_index = 1, num_submatrices_orig =
   413   for (int32 s = 1; s < num_submatrices_orig; s++) {
   415       const NnetComputation::SubMatrixInfo &info =
   417       if (submat_map.count(info) > 0) {  
   429   std::vector<int32*> submatrix_args;
   431   std::vector<int32*>::iterator iter = submatrix_args.begin(),
   432       end = submatrix_args.end();
   433   for (; iter != end; ++iter) {
   440       **iter = new_submatrix_index;
   443   std::vector<NnetComputation::SubMatrixInfo> new_submatrices;
   445   new_submatrices.reserve(num_submatrices_old);
   446   for (
int32 s = 0; s < num_submatrices_old; s++)
   455   std::vector<int32*> matrix_args;
   457   for (
int32 s = 1; s < num_submatrices; s++) {
   464     *matrix_index = new_matrix_index;
   467   std::vector<NnetComputation::MatrixInfo> new_matrices;
   469   new_matrices.reserve(num_matrices_old);
   470   for (
int32 m = 0; m < num_matrices_old; m++)
   475   std::vector<NnetComputation::MatrixDebugInfo> new_debug_info;
   477   KALDI_ASSERT(debug_info_size == 0 || debug_info_size == num_matrices_old);
   478   new_debug_info.reserve(debug_info_size);
   479   for (
int32 m = 0; m < debug_info_size; m++) {
   504   if (num_indexes_multi == 0)
   506   std::vector<bool> indexes_multi_used(num_indexes_multi, 
false);
   507   std::vector<int32*> indexes_multi_args;
   509   std::vector<int32*>::iterator iter = indexes_multi_args.begin(),
   510       end = indexes_multi_args.end();
   511   for (; iter != end; ++iter) {
   512     int32 indexes_multi_index = **iter;
   514                  indexes_multi_index < num_indexes_multi);
   515     indexes_multi_used[indexes_multi_index] = 1;
   519   std::vector<int32> old_to_new(num_indexes_multi, -1);
   522   if (new_num_indexes_multi == num_indexes_multi)
   524   std::vector<std::vector<std::pair<int32, int32> > >
   525       new_indexes_multi(new_num_indexes_multi);
   526   for (
int32 i = 0; 
i < num_indexes_multi; 
i++) {
   527     if (old_to_new[
i] != -1)
   532   for (iter = indexes_multi_args.begin(); iter != end; ++iter)
   533     **iter = old_to_new[**iter];
   541   if (old_indexes_multi_size == 0)
   547   std::vector<int32> indexes_multi_old_to_new(old_indexes_multi_size);
   548   typedef std::vector<std::pair<int32,int32> > PairVectorType;
   549   typedef std::map<
const PairVectorType*, 
int32,
   551   MapType indexes_multi_map;
   553     std::pair<MapType::iterator, bool> p =
   554         indexes_multi_map.insert(std::pair<const PairVectorType*, int32>(
   557       indexes_multi_old_to_new[
i] = cur_index++;
   559       int32 index_from_map = p.first->second;
   560       indexes_multi_old_to_new[
i] = index_from_map;
   563   if (cur_index == old_indexes_multi_size)
   565   std::vector<PairVectorType> new_indexes_multi(cur_index);
   566   for (int32 
i = 0; 
i < old_indexes_multi_size; 
i++) {
   567     int32 new_index = indexes_multi_old_to_new[
i];
   572   std::vector<int32*> indexes_multi_args;
   574   std::vector<int32*>::const_iterator iter = indexes_multi_args.begin(),
   575       end = indexes_multi_args.end();
   576   for (; iter != end; ++iter)
   577     **iter = indexes_multi_old_to_new[**iter];
   583   if (old_num_indexes == 0)
   585   std::vector<int32*> indexes_args;
   588   std::vector<bool> indexes_seen(old_num_indexes, 
false);
   589   std::vector<int32*>::const_iterator iter = indexes_args.begin(),
   590       end = indexes_args.end();
   591   for (; iter != end; ++iter)
   592     indexes_seen[**iter] = 
true;
   594   std::vector<int32> old_to_new_index(old_num_indexes);
   595   typedef std::map<const std::vector<int32>*, 
int32,
   600   for (int32 
i = 0; 
i < old_num_indexes; 
i++) {
   601     if (!indexes_seen[
i]) {
   602       old_to_new_index[
i] = -1;
   604       std::pair<MapType::iterator, bool> p =
   605           indexes_map.insert(std::pair<
const std::vector<int32>*, int32>(
   608         old_to_new_index[
i] = cur_index++;
   610         int32 index_from_map = p.first->second;
   611         old_to_new_index[
i] = index_from_map;
   615   if (cur_index == old_num_indexes)
   617   std::vector<std::vector<int32> > new_indexes(cur_index);
   618   for (int32 
i = 0; 
i < old_num_indexes; 
i++) {
   619     int32 new_index = old_to_new_index[
i];
   626   for (iter = indexes_args.begin(); iter != end; ++iter) {
   627     int32 old_index = **iter;
   628     KALDI_ASSERT(old_index >= 0 && old_index < old_num_indexes);
   629     int32 new_index = old_to_new_index[old_index];
   637   if (old_num_indexes_ranges == 0)
   639   std::vector<int32*> indexes_ranges_args;
   642   std::vector<bool> is_seen(old_num_indexes_ranges, 
false);
   643   std::vector<int32*>::const_iterator iter = indexes_ranges_args.begin(),
   644       end = indexes_ranges_args.end();
   645   for (; iter != end; ++iter)
   646     is_seen[**iter] = 
true;
   648   std::vector<int32> old_to_new_index(old_num_indexes_ranges);
   649   typedef std::map<const std::vector<std::pair<int32, int32> >*, 
int32,
   653   for (int32 
i = 0; 
i < old_num_indexes_ranges; 
i++) {
   655       old_to_new_index[
i] = -1;
   657       std::pair<MapType::iterator, bool> p =
   659               std::pair<
const std::vector<std::pair<int32, int32> >*, int32>(
   662         old_to_new_index[
i] = cur_index++;
   664         int32 index_from_map = p.first->second;
   665         old_to_new_index[
i] = index_from_map;
   669   if (cur_index == old_num_indexes_ranges)
   671   std::vector<std::vector<std::pair<int32, int32> > > new_indexes_ranges(
   673   for (int32 
i = 0; 
i < old_num_indexes_ranges; 
i++) {
   674     int32 new_index = old_to_new_index[
i];
   681   for (iter = indexes_ranges_args.begin(); iter != end; ++iter) {
   682     int32 old_index = **iter;
   683     KALDI_ASSERT(old_index >= 0 && old_index < old_num_indexes_ranges);
   684     int32 new_index = old_to_new_index[old_index];
   705       std::remove_if(computation->
commands.begin(),
   715     config_(config), nnet_(nnet),
   717     already_called_merge_variables_(false) {
   730   for (
int32 command_index = 0; command_index < num_commands;
   735     int32 s1 = -1, s2 = -1;
   763     if (s1 > 0 && s2 > 0) {
   764       std::pair<bool,bool> p = 
MayBeMerged(command_index, s1, s2);
   766         DoMerge(command_index, s1, s2);
   768       } 
else if (p.second) {
   769         DoMerge(command_index, s2, s1);
   796       computation.
matrices[a.matrix_index];
   808   std::vector<int32> variable_indexes;
   810   std::vector<int32>::const_iterator iter = variable_indexes.begin(),
   811       end = variable_indexes.end();
   812   for (; iter != end; ++iter) {
   821                                        int32 s_to_discard) {
   829   KALDI_ASSERT(m_to_keep != m_to_discard && m_to_keep > 0 && m_to_discard > 0);
   833     std::vector<int32>::const_iterator iter =
   836     for (; iter != end; ++iter) {
   837       int32 submatrix_index = *iter;
   848   const std::vector<MatrixAccesses> &matrix_accesses =
   875   int32 dealloc_keep = matrix_accesses[m_to_keep].deallocate_command,
   876       dealloc_discard = matrix_accesses[m_to_discard].deallocate_command;
   877   if (dealloc_discard != -1) {
   900     int32 alloc_keep = matrix_accesses[m_to_keep].allocate_command,
   901         alloc_discard = matrix_accesses[m_to_discard].allocate_command;
   910     int32 matrix_whose_zeroing_to_discard;
   911     if (discard_alloc_command.command_type == 
kAcceptInput) {
   913       matrix_whose_zeroing_to_discard = m_to_keep;
   916       matrix_whose_zeroing_to_discard = m_to_discard;
   920     int32 zeroing_command_to_discard =
   921      matrix_accesses[matrix_whose_zeroing_to_discard].accesses[0].command_index;
   925         zeroing_command.
alpha == 0.0) {
   947   KALDI_ASSERT(s1 > 0 && s2 > 0 && static_cast<size_t>(command_index) <
   950     return std::pair<bool,bool>(
false,
false);
   954   if (m1 == m2) 
return std::pair<bool,bool>(
false,
false);
   955   std::vector<int32> variable_indexes;
   958   std::vector<int32>::iterator iter = variable_indexes.begin(),
   959       end = variable_indexes.end();
   961   for (; iter != end; ++iter)
   963       return std::pair<bool,bool>(
false,
false);
   966       &m2_access = matrix_accesses[m2];
   968   if ((m1_access.
is_input && m2_access.is_input) ||
   969       (m1_access.
is_output && m2_access.is_output))
   970     return std::pair<bool,bool>(
false,
false);
   973        m2_access.is_input || m2_access.is_output) &&
   976     return std::pair<bool,bool>(
false,
false);
   992     return std::pair<bool,bool>(
false,
false);
  1002       return std::pair<bool,bool>(left, right);  
  1007       return std::pair<bool,bool>(left, right);  
  1011   return std::pair<bool,bool>(
false,
false);
  1032   bool CanBeExtended(
int32 dest_submatrix_index,
  1033                      int32 src_submatrix_index);
  1038   void Extend(
int32 *dest_submatrix_index, 
int32 *src_submatrix_index);
  1042   void FixComputation();
  1046   void FixDebugInfo();
  1068     min_proportion_(0.8),
  1075     for (
int32 m = 1; m < num_matrices; m++)
  1080     std::vector<NnetComputation::Command>::iterator
  1083     for (; command_iter != command_end; ++command_iter) {
  1100                                    int32 src_submatrix_index) {
  1104   if (src_submatrix.
matrix_index == dest_submatrix.matrix_index)
  1127           dest_submatrix.row_offset + dest_submatrix.num_rows ==
  1128           dest_matrix_orig_num_rows);
  1133                             int32 *src_submatrix_index) {
  1142   int32 new_dest_num_rows = dest_submatrix.row_offset + src_matrix.
num_rows;
  1148   if (new_dest_num_rows > dest_matrix.num_rows) {
  1149     dest_matrix.num_rows = new_dest_num_rows;
  1152         SubMatrixInfo(dest_submatrix.matrix_index, 0, new_dest_num_rows,
  1153                       0, dest_matrix.num_cols));
  1160   dest_submatrix.num_rows = src_matrix.
num_rows;
  1175   std::vector<NnetComputation::Command>::iterator
  1178   bool changed = 
false;
  1179   for (; command_iter != command_end; ++command_iter) {
  1182         command.
alpha == 1.0) {
  1183       int32 dest_submatrix_index = command.
arg1,
  1184           src_submatrix_index = command.
arg2;
  1185       if (
CanBeExtended(dest_submatrix_index, src_submatrix_index)) {
  1198   std::vector<NnetComputation::Command>::iterator
  1201   std::vector<int32> whole_submatrices;
  1203   for (; command_iter != command_end; ++command_iter) {
  1209           new_s = whole_submatrices[m];
  1214         command.
arg1 = new_s;
  1220           new_s = whole_submatrices[m];
  1237         command.
arg1 = new_s;
  1249   for (
int32 m = 1; m < num_matrices; m++) {
  1253         old_num_rows = debug_info.
cindexes.size();
  1254     if (new_num_rows != old_num_rows) {
  1255       debug_info.
cindexes.resize(new_num_rows);
  1256       int32 num_extra_rows = new_num_rows - old_num_rows;
  1259       for (
int32 r = old_num_rows; r < new_num_rows; r++) {
  1288   void ConsolidateUpdateForComponent(
  1290       const std::vector<int32> &backprop_commands);
  1296   void AddCommandsToComputation();
  1313   int32 ConsolidateSubmatrices(
  1314       const std::vector<int32> &commands,
  1315       const std::vector<int32> &submatrices);
  1324   void AppendDebugInfoForSubmatrix(
  1325       int32 submatrix_index,
  1348     int32 submatrix_index,
  1355   int32 matrix_index = submatrix_info.matrix_index;
  1356   KALDI_ASSERT(matrix_index > 0 && static_cast<size_t>(matrix_index) <
  1360   debug_info->
is_deriv = src_info.is_deriv;
  1363   int32 row_begin = submatrix_info.row_offset,
  1364       row_end = row_begin + submatrix_info.num_rows;
  1366                              src_info.cindexes.begin() + row_begin,
  1367                              src_info.cindexes.begin() + row_end);
  1372     const std::vector<int32> &commands,
  1373     const std::vector<int32> &submatrices) {
  1374   int32 num_submatrices = submatrices.size();
  1375   KALDI_ASSERT(num_submatrices > 1 && commands.size() == submatrices.size());
  1376   int32 first_submatrix = submatrices[0];
  1381   for (
int32 i = 0; 
i < num_submatrices; 
i++) {
  1382     int32 submatrix = submatrices[
i];
  1386       AppendDebugInfoForSubmatrix(submatrix, &debug_info);
  1399   extra_commands_[0].push_back(
  1401   extra_commands_[0].push_back(
  1404   final_deallocate_commands_.push_back(
  1406   int32 new_matrix_index =
  1411   int32 row_offset = 0;
  1412   for (
int32 i = 0; 
i < num_submatrices; 
i++) {
  1413     int32 submatrix_index = submatrices[
i];
  1418                                                      row_offset, this_num_rows,
  1427     extra_commands_[commands[
i]].push_back(c);
  1428     row_offset += this_num_rows;
  1431   return new_whole_submatrix;
  1437       new_num_commands = old_num_commands +
  1438       static_cast<int32>(final_commands_.size() +
  1439                          final_deallocate_commands_.size());
  1440   for (
size_t i = 0; 
i < extra_commands_.size(); 
i++)
  1441     new_num_commands += static_cast<int32>(extra_commands_[
i].size());
  1442   std::vector<NnetComputation::Command> new_commands;
  1443   new_commands.reserve(new_num_commands);
  1444   for (
int32 c = 0; c < old_num_commands; c++) {
  1445     new_commands.insert(new_commands.end(),
  1446                         extra_commands_[c].begin(), extra_commands_[c].end());
  1449   new_commands.insert(new_commands.end(),
  1450                       final_commands_.begin(), final_commands_.end());
  1451   new_commands.insert(new_commands.end(),
  1452                       final_deallocate_commands_.begin(),
  1453                       final_deallocate_commands_.end());
  1461     int32 component_index,
  1462     const std::vector<int32> &backprop_commands) {
  1463   const Component *component = nnet_.GetComponent(component_index);
  1464   int32 num_backprop_commands = backprop_commands.size();
  1469   std::vector<int32>  input_submatrices(num_backprop_commands),
  1470       output_submatrices(num_backprop_commands),
  1471       output_deriv_submatrices(num_backprop_commands);
  1473   for (
int32 i = 0; 
i < num_backprop_commands; 
i++) {
  1474     int32 command_index = backprop_commands[
i];
  1481         output_submatrix = command.
arg4,
  1482         output_deriv_submatrix = command.
arg5;
  1484                  (output_submatrix != 0) == need_output);
  1485     input_submatrices[
i] = input_submatrix;
  1486     output_submatrices[
i] = output_submatrix;
  1487     output_deriv_submatrices[
i] = output_deriv_submatrix;
  1491   int32 input_submatrix = (need_input ?
  1492                            ConsolidateSubmatrices(backprop_commands,
  1493                                                   input_submatrices) : 0),
  1494       output_submatrix = (need_output ?
  1495                          ConsolidateSubmatrices(backprop_commands,
  1496                                                 output_submatrices) : 0),
  1497       output_deriv_submatrix = ConsolidateSubmatrices(backprop_commands,
  1498                                                       output_deriv_submatrices);
  1499   int32 precomputed_indexes_index = 0,  
  1500       input_deriv_submatrix = 0,  
  1503                              input_submatrix, output_submatrix,
  1504                              output_deriv_submatrix, input_deriv_submatrix,
  1506   final_commands_.push_back(c);
  1513     extra_commands_(computation->commands.size()) { }
  1521   std::vector<std::vector<int32> > backprop_commands(num_components);
  1522   for (
int32 command_index = 0;
  1523        command_index < num_commands; command_index++) {
  1532         backprop_commands[component_index].push_back(command_index);
  1535   bool consolidated = 
false;
  1536   for (
int32 component = 0; component < num_components; component++) {
  1537     if (backprop_commands[component].size() > 1) {
  1539                                     backprop_commands[component]);
  1540       consolidated = 
true;
  1565                                            int32 new_submatrix,
  1567                                            int32 *right_prune)
 const {
  1568   KALDI_ASSERT(initial_submatrix > 0 && new_submatrix > 0);
  1573   *left_prune = new_info.row_offset - initial_info.
row_offset;
  1574   if (right_prune != NULL) {
  1575     *right_prune = initial_info.
num_rows - new_info.num_rows - *left_prune;
  1582     int32 row_index)
 const {
  1583   KALDI_ASSERT(submatrix > 0 && submatrix < computation_->submatrices.size());
  1587                row_index < computation_->submatrices[submatrix].num_rows);
  1588   int32 matrix_index = info.matrix_index;
  1596   int32 t = debug_info.
cindexes[row_index + info.row_offset].second.t;
  1597   return (t >= min_deriv_time_ && t <= max_deriv_time_);
  1606   switch (command_type) {
  1618       if (submatrix_map_[command->
arg4] == 0)
  1634           output_submatrix = command->
arg4,
  1635           output_deriv_submatrix = command->
arg5,
  1636           input_deriv_submatrix = command->
arg6;
  1637       int32 mapped_input_submatrix = submatrix_map_[input_submatrix],
  1638            mapped_output_submatrix =  submatrix_map_[output_submatrix],
  1639      mapped_output_deriv_submatrix = submatrix_map_[output_deriv_submatrix],
  1640       mapped_input_deriv_submatrix = submatrix_map_[input_deriv_submatrix];
  1642       if (mapped_output_deriv_submatrix == 0) {
  1645                      mapped_input_submatrix == 0 &&
  1646                      mapped_output_submatrix == 0);
  1649         if (command->
arg7 > 0)
  1650           memos_to_delete_.insert(command->
arg7);
  1651       } 
else if (mapped_output_deriv_submatrix !=
  1652                  output_deriv_submatrix &&
  1657         command->
arg3 = mapped_input_submatrix;
  1658         command->
arg4 = mapped_output_submatrix;
  1659         command->
arg5 = mapped_output_deriv_submatrix;
  1660         command->
arg6 = mapped_input_deriv_submatrix;
  1665       MapSimpleMatrixCommand(command);
  1668       MapIndexesCommand(command);
  1672       MapIndexesMultiCommand(command);
  1675       MapAddRowRangesCommand(command);
  1682       KALDI_ERR << 
"Un-handled command type.";
  1688       submatrix2 = c->
arg2;
  1689   int32 submatrix1_mapped = submatrix_map_if_deriv_[submatrix1],
  1690       submatrix2_mapped = submatrix_map_if_deriv_[submatrix2];
  1691   if (submatrix1_mapped == submatrix1 &&
  1692       submatrix2_mapped == submatrix2) {
  1696   if (submatrix1_mapped == 0 || submatrix2_mapped == 0) {
  1703       left_prune1, left_prune2, right_prune1, right_prune2;
  1704   GetPruneValues(submatrix1, submatrix1_mapped, &left_prune1, &right_prune1);
  1705   GetPruneValues(submatrix2, submatrix2_mapped, &left_prune2, &right_prune2);
  1706   if (left_prune1 == left_prune2 && right_prune1 == right_prune2) {
  1709     c->
arg1 = submatrix1_mapped;
  1710     c->
arg2 = submatrix2_mapped;
  1714     int32 left_prune = std::max(left_prune1, left_prune2),
  1715         right_prune = std::max(right_prune1, right_prune2);
  1716     if (left_prune + right_prune >= orig_num_rows) {
  1721       int32 num_rows = orig_num_rows - left_prune - right_prune;
  1725                                            left_prune, num_rows, 0, -1);
  1727                                            left_prune, num_rows, 0, -1);
  1737       input_submatrix = c->
arg2;
  1738   int32 input_submatrix_mapped = submatrix_map_if_deriv_[input_submatrix],
  1739       output_submatrix_mapped = submatrix_map_if_deriv_[output_submatrix];
  1743   if (input_submatrix_mapped == 0 ||
  1744       output_submatrix_mapped == 0) {
  1759   int32 left_prune_input, left_prune_output;
  1760   GetPruneValues(input_submatrix, input_submatrix_mapped,
  1761                  &left_prune_input, NULL);
  1762   GetPruneValues(output_submatrix, output_submatrix_mapped,
  1763                  &left_prune_output, NULL);
  1764   int32 new_num_input_rows =
  1766       new_num_output_rows =
  1768   std::vector<int32> new_indexes(new_num_output_rows);
  1769   bool must_keep_command = 
false;
  1770   for (
int32 i = 0; 
i < new_num_output_rows; 
i++) {
  1773     int32 orig_index = old_indexes[
i + left_prune_output];
  1774     if (orig_index == -1 ||
  1775         !RowIsKept(input_submatrix, orig_index) ||
  1776         !RowIsKept(output_submatrix_mapped, 
i)) {
  1777       new_indexes[
i] = -1;
  1779       int32 mapped_index = orig_index - left_prune_input;
  1782       KALDI_ASSERT(mapped_index >= 0 && mapped_index < new_num_input_rows);
  1783       new_indexes[
i] = mapped_index;
  1784       must_keep_command = 
true;
  1787   if (!must_keep_command) {
  1793   c->
arg1 = output_submatrix_mapped;
  1794   c->
arg2 = input_submatrix_mapped;
  1795   c->
arg3 = new_indexes_index;
  1800       indexes_multi_arg = c->
arg2;
  1801   int32 dest_submatrix_mapped = submatrix_map_if_deriv_[dest_submatrix];
  1802   if (dest_submatrix_mapped == 0) {
  1808   GetPruneValues(dest_submatrix, dest_submatrix_mapped, &left_prune, NULL);
  1810   const std::vector<std::pair<int32, int32> > &old_indexes_multi(
  1812   std::vector<std::pair<int32, int32> > new_indexes_multi(new_num_rows);
  1813   bool must_keep_command = 
false;
  1814   for (
int32 i = 0; 
i < new_num_rows; 
i++) {
  1815     std::pair<int32,int32> &this_pair = new_indexes_multi[
i];
  1816     this_pair = old_indexes_multi[
i + left_prune];
  1819     int32 this_submatrix = this_pair.first,
  1820         this_row = this_pair.second;
  1821     if (this_submatrix == -1)  
  1823     if (!RowIsKept(this_submatrix, this_row) ||
  1824         !RowIsKept(dest_submatrix_mapped, 
i)) {
  1825       this_pair.first = -1;
  1826       this_pair.second = -1;
  1829     int32 this_submatrix_mapped = submatrix_map_if_deriv_[this_submatrix];
  1836     int32 this_left_prune, this_num_rows =
  1838     GetPruneValues(this_submatrix, this_submatrix_mapped,
  1839                    &this_left_prune, NULL);
  1840     int32 this_row_mapped = this_row - this_left_prune;
  1843     KALDI_ASSERT(this_row_mapped >= 0 && this_row_mapped < this_num_rows);
  1844     this_pair.first = this_submatrix_mapped;
  1845     this_pair.second = this_row_mapped;
  1846     must_keep_command = 
true;
  1848   if (!must_keep_command) {
  1852   if (dest_submatrix_mapped == dest_submatrix &&
  1853       new_indexes_multi == old_indexes_multi)  
  1855   c->
arg1 = dest_submatrix_mapped;
  1863       src_submatrix = c->
arg2,
  1864       indexes_ranges_index = c->
arg3;
  1865   int32 dest_submatrix_mapped = submatrix_map_if_deriv_[dest_submatrix],
  1866       src_submatrix_mapped = submatrix_map_if_deriv_[src_submatrix];
  1867   if (dest_submatrix_mapped == dest_submatrix &&
  1868       src_submatrix_mapped == src_submatrix)
  1870   if (dest_submatrix_mapped == 0 || src_submatrix_mapped == 0) {
  1876       src_left_prune, dest_left_prune;
  1877   GetPruneValues(dest_submatrix, dest_submatrix_mapped,
  1878                  &dest_left_prune, NULL);
  1879   GetPruneValues(src_submatrix, src_submatrix_mapped,
  1880                  &src_left_prune, NULL);
  1881   const std::vector<std::pair<int32,int32> > &old_indexes_ranges(
  1883   std::vector<std::pair<int32,int32> > new_indexes_ranges(dest_num_rows);
  1885   bool must_keep_command = 
false;
  1886   for (
int32 i = 0; 
i < dest_num_rows; 
i++) {
  1887     std::pair<int32, int32> &this_pair = new_indexes_ranges[
i];
  1888     this_pair = old_indexes_ranges[
i + dest_left_prune];
  1890     int32 start = this_pair.first, end = this_pair.second;
  1891     if (!RowIsKept(dest_submatrix_mapped, 
i)) {
  1894     } 
else if (start >= 0) {
  1900       while (start < end && !RowIsKept(src_submatrix, start))
  1902       while (end > start && !RowIsKept(src_submatrix, end - 1))
  1908         start -= src_left_prune;
  1909         end -= src_left_prune;
  1910         must_keep_command = 
true;
  1913         KALDI_ASSERT(start >= 0 && end <= src_num_rows && start < end);
  1916     this_pair.first = start;
  1917     this_pair.second = end;
  1919   if (must_keep_command) {
  1920     c->
arg1 = dest_submatrix_mapped;
  1921     c->
arg2 = src_submatrix_mapped;
  1931                                              int32 min_deriv_time,
  1932                                              int32 max_deriv_time,
  1935     min_deriv_time_(min_deriv_time),
  1936     max_deriv_time_(max_deriv_time),
  1959       num_memos_removed = 0;
  1960   for (
size_t command_index = 0; command_index < num_commands;
  1966       num_memos_removed++;
  1975                "Limiting derivative times requires debug info.");
  1981   for (
int32 matrix_index = 1; matrix_index < num_matrices; matrix_index++) {
  1985     const std::vector<Cindex> &cindexes = debug_info.
cindexes;
  1987     KALDI_ASSERT(num_rows == static_cast<int32>(cindexes.size()));
  1988     int32 first_row_within_range = num_rows,
  1989         last_row_within_range = -1;
  1990     for (
int32 i = 0; 
i < num_rows; 
i++) {
  1991       int32 t = cindexes[
i].second.t;
  1992       if (t >= min_deriv_time && t <= max_deriv_time) {
  1993         if (
i < first_row_within_range) first_row_within_range = 
i;
  1994         if (
i > last_row_within_range) last_row_within_range = 
i;
  1997     if (last_row_within_range == -1) {
  2000     } 
else if (last_row_within_range == num_rows - 1 &&
  2001                first_row_within_range == 0) {
  2007       prune_info.
row_begin = first_row_within_range;
  2008       prune_info.
row_end = last_row_within_range + 1;
  2020   for (
int32 s = 1; s < num_submatrices; s++) {
  2024         num_rows = submatrix_info.
num_rows;
  2035           pruned_row_end = std::min(matrix_prune_info.
row_end,
  2036                                     row_offset + num_rows);
  2037       if (pruned_row_end <= pruned_row_begin) {
  2043         int32 row_offset_within_submatrix =
  2044             pruned_row_begin - row_offset,
  2045             new_num_rows = pruned_row_end - pruned_row_begin;
  2048                                        new_num_rows, 0, -1);
  2058   std::vector<NnetComputation::Command>::iterator
  2061   for (; iter != end; ++iter)
  2078   std::vector<int32> whole_variables, mapped_variables;
  2083   KALDI_ASSERT(whole_variables.size() > mapped_variables.size());
  2084   std::vector<int32> excluded_variables(whole_variables.size() -
  2085                                         mapped_variables.size());
  2086   std::vector<int32>::iterator end_iter =
  2087       std::set_difference(whole_variables.begin(), whole_variables.end(),
  2088                           mapped_variables.begin(), mapped_variables.end(),
  2089                           excluded_variables.begin());
  2094   for (std::vector<int32>::iterator iter = excluded_variables.begin();
  2095        iter != end_iter; ++iter) {
  2096     int32 variable_index = *iter;
  2097     const std::vector<Access> &variable_accesses =
  2099     std::vector<Access>::const_iterator viter = variable_accesses.begin(),
  2100         vend = variable_accesses.end();
  2101     for (; viter != vend; ++viter) {
  2104       int32 command_index = viter->command_index;
  2108         KALDI_VLOG(3) << 
"Cannot prune matrix " << m;
  2120   for (
int32 s = 1; s < num_submatrices; s++) {
  2123     if (will_limit[m]) {
  2128                    matrix_num_rows < computation_->matrices[m].num_rows);
  2131       if (new_row_begin >= 0 &&
  2132           submat_info.
num_rows + new_row_begin <= matrix_num_rows) {
  2146           submat_info.
num_rows = matrix_num_rows;
  2160   for (
int32 m = 1; m < num_matrices; m++) {
  2161     if (will_limit[m]) {
  2167         std::vector<Cindex> &cindexes = debug_info.
cindexes;
  2169         cindexes.erase(cindexes.begin() + prune_info.
row_end, cindexes.end());
  2170         cindexes.erase(cindexes.begin(),
  2171                        cindexes.begin() + prune_info.
row_begin);
  2184   std::vector<bool> will_limit(num_matrices, 
false);
  2185   bool will_limit_at_least_one = 
false;
  2186   for (
int32 m = 1; m < num_matrices; m++) {
  2205         will_limit[m] = 
true;
  2206         will_limit_at_least_one = 
true;
  2210   if (will_limit_at_least_one)
  2216                           int32 min_deriv_time,
  2217                           int32 max_deriv_time,
  2253                                         int32 *first_nonnegative_pos,
  2254                                         int32 *first_nonnegative_value,
  2255                                         int32 *num_nonnegative_indexes) {
  2257   const int32 *indexes_ptr = &(indexes[0]);
  2258   size_t pos = 0, size = indexes.size();
  2261   for (; pos < size; ++pos)
  2262     if (indexes_ptr[pos] >= 0)
  2266   *first_nonnegative_pos = 
static_cast<int32>(pos);
  2267   int32 n = indexes_ptr[pos];
  2268   *first_nonnegative_value = 
n;
  2271   for (; pos < size; ++pos,++
n)
  2272     if (indexes_ptr[pos] != n)
  2275   *num_nonnegative_indexes = n - *first_nonnegative_value;
  2279   for (; pos < size; ++pos)
  2280     if (indexes_ptr[pos] >= 0)
  2291       num_indexes = computation->
indexes.size();
  2292   for (
int32 command_index = 0; command_index < num_commands;
  2297     int32 first_nonnegative_pos,
  2298         first_nonnegative_value,
  2299         num_nonnegative_indexes;
  2304         const std::vector<int32> &indexes = computation->
indexes[indexes_index];
  2306                                         &first_nonnegative_pos,
  2307                                         &first_nonnegative_value,
  2308                                         &num_nonnegative_indexes)) {
  2311                                              num_nonnegative_indexes,
  2314                                              num_nonnegative_indexes,
  2340                                                int32 *num_leading_negatives,
  2341                                                int32 *num_trailing_negatives) {
  2343   const int32 *begin = &(vec[0]), *ptr = begin, *end = ptr + vec.size();
  2344   while (ptr != end && *ptr < 0)
  2348   KALDI_ASSERT(ptr != end && 
"Vector consists entirely of -1's.");
  2349   *num_leading_negatives = ptr - begin;
  2350   const int32 *ptr2 = end - 1;
  2357   *num_trailing_negatives = end - 1 - ptr2;
  2365                             int32 command_index) {
  2368   const std::vector<int32> &indexes = computation->
indexes[c.
arg3];
  2369   int32 num_leading_negatives, num_trailing_negatives;
  2371                                     &num_leading_negatives,
  2372                                     &num_trailing_negatives);
  2373   if (num_leading_negatives == 0 && num_trailing_negatives == 0)
  2376   int32 new_num_rows = 
static_cast<int32>(indexes.size()) -
  2377       num_leading_negatives - num_trailing_negatives;
  2379   std::vector<int32> new_indexes(indexes.begin() + num_leading_negatives,
  2380                                  indexes.begin() + num_leading_negatives +
  2383   computation->
indexes.push_back(std::vector<int32>());
  2384   computation->
indexes.back().swap(new_indexes);
  2386                                      num_leading_negatives, new_num_rows,
  2407     const std::vector<std::pair<int32, int32> > &vec,
  2408     int32 *num_leading_negatives,
  2409     int32 *num_trailing_negatives) {
  2411   const std::pair<int32, int32> *begin = &(vec[0]), *ptr = begin,
  2412       *end = ptr + vec.size();
  2413   while (ptr != end && ptr->first < 0)
  2417   KALDI_ASSERT(ptr != end && 
"Vector consists entirely of -1's.");
  2418   *num_leading_negatives = ptr - begin;
  2419   const std::pair<int32, int32> *ptr2 = end - 1;
  2423   while (ptr2->first < 0)
  2426   *num_trailing_negatives = end - 1 - ptr2;
  2435                            int32 command_index) {
  2438   const std::vector<std::pair<int32, int32> > &indexes_multi =
  2440   int32 num_leading_negatives, num_trailing_negatives;
  2442                                     &num_leading_negatives,
  2443                                     &num_trailing_negatives);
  2444   if (num_leading_negatives == 0 && num_trailing_negatives == 0)
  2447   int32 new_num_rows = 
static_cast<int32>(indexes_multi.size()) -
  2448       num_leading_negatives - num_trailing_negatives;
  2450   std::vector<std::pair<int32, int32> > new_indexes_multi(
  2451       indexes_multi.begin() + num_leading_negatives,
  2452       indexes_multi.begin() + num_leading_negatives + new_num_rows);
  2454   computation->
indexes_multi.push_back(std::vector<std::pair<int32, int32> >());
  2457                                      num_leading_negatives, new_num_rows,
  2478     const std::vector<std::pair<int32, int32> > &vec,
  2479     int32 *num_leading_identicals,
  2480     int32 *num_trailing_identicals) {
  2482   const std::pair<int32, int32> *begin = &(vec[0]), *ptr = begin,
  2483       *end = ptr + vec.size();
  2484   while (ptr != end && ptr->first == ptr->second)
  2489   KALDI_ASSERT(ptr != end && 
"Vector consists entirely of -1's.");
  2490   *num_leading_identicals = ptr - begin;
  2491   const std::pair<int32, int32> *ptr2 = end - 1;
  2495   while (ptr2->first == ptr2->second)
  2498   *num_trailing_identicals = end - 1 - ptr2;
  2508                             int32 command_index) {
  2511   const std::vector<std::pair<int32, int32> > &indexes_ranges =
  2513   int32 num_leading_identicals, num_trailing_identicals;
  2515                                     &num_leading_identicals,
  2516                                     &num_trailing_identicals);
  2517   if (num_leading_identicals == 0 && num_trailing_identicals == 0)
  2520   int32 new_num_rows = 
static_cast<int32>(indexes_ranges.size()) -
  2521       num_leading_identicals - num_trailing_identicals;
  2523   std::vector<std::pair<int32, int32> > new_indexes_ranges(
  2524       indexes_ranges.begin() + num_leading_identicals,
  2525       indexes_ranges.begin() + num_leading_identicals + new_num_rows);
  2527   computation->
indexes_ranges.push_back(std::vector<std::pair<int32, int32> >());
  2530                                      num_leading_identicals, new_num_rows,
  2540   for (
int32 command_index = 0; command_index < num_commands;
  2585     return SplitIndexes() && SplitCommands();
  2594   bool SplitIndexes();
  2598   bool SplitCommands();
  2605   bool SplitCommand(
int32 command_index);
  2668   bool GetSplitInfo(std::vector<std::pair<int32, int32> >::const_iterator begin,
  2669                     std::vector<std::pair<int32, int32> >::const_iterator end,
  2687     std::vector<std::pair<int32, int32> >::const_iterator begin,
  2688     std::vector<std::pair<int32, int32> >::const_iterator end,
  2693   const int32 max_size_ratio = 2;
  2695   int32 size = end - begin;
  2697   int32 first = begin->first;
  2702   int32 initial_second_value = begin->second,
  2703       min_second_value = initial_second_value,
  2704       max_second_value = initial_second_value;
  2706   bool is_consecutive = 
true;
  2708     int32 second = begin[
i].second;
  2709     if (begin[
i].first != first || second < 0) 
return false;
  2711     if (second != initial_second_value + 
i)
  2712       is_consecutive = 
false;
  2713     if (second < min_second_value) min_second_value = second;
  2714     if (second > max_second_value) max_second_value = second;
  2720   if (is_consecutive) {
  2733   split_info_.resize(num_indexes_multi);
  2734   for (
int32 i = 0; 
i < num_indexes_multi; 
i++) {
  2735     const std::vector<std::pair<int32,int32> > &multi_index =
  2739     int32 num_pairs = multi_index.size();
  2744     int32 split_point = -1, initial_first = multi_index[0].first;
  2745     for (
int32 j = 1; 
j < num_pairs; 
j++) {
  2746       if (multi_index[
j].first != initial_first) {
  2751     if (split_point == -1) {
  2752       split_info.
splits.resize(1);
  2753       split_info.
splits[0].offset = 0;
  2754       if (!GetSplitInfo(multi_index.begin(), multi_index.end(),
  2755                         &(split_info.
splits[0]))) {
  2756         split_info.
splits.clear();
  2761       split_info.
splits.resize(2);
  2762       split_info.
splits[0].offset = 0;
  2763       split_info.
splits[1].offset = split_point;
  2765       std::vector<std::pair<int32,int32> >::const_iterator mid_iter =
  2766           multi_index.begin() + split_point;
  2767       if (!GetSplitInfo(multi_index.begin(), mid_iter,
  2768                         &(split_info.
splits[0])) ||
  2769           !GetSplitInfo(mid_iter, multi_index.end(),
  2770                         &(split_info.
splits[1]))) {
  2771         split_info.
splits.clear();
  2785   switch (command_type) {
  2788     default: 
return false;
  2790   int32 indexes_multi_index = command.
arg2;
  2792                static_cast<int32>(split_info_.size()));
  2794   if (split_info.
splits.empty())
  2800   std::vector<NnetComputation::Command> split_commands(
  2801       split_info.
splits.size());
  2802   for (
size_t i = 0; 
i < split_info.
splits.size(); 
i++) {
  2814       switch (command_type) {
  2836       switch (command_type) {
  2867           KALDI_ERR << 
"Code error: un-handled case.";
  2871   command = split_commands[0];
  2873   for (
size_t i = 1; 
i < split_commands.size(); 
i++) {
  2874     new_commands_.resize(new_commands_.size() + 1);
  2877     new_commands_.back().first = c + 1;
  2878     new_commands_.back().second = split_commands[
i];
  2886   for (
int32 c = 0; c < num_commands; c++)
  2887     if (SplitCommand(c))
  2889   if (!new_commands_.empty())
  2896   return splitter.
Split();
  2945   int32 size = indexes.size();
  2947   int32 N = indexes[size-1].n + 1,
  2953   Index index(indexes[0]);
  2954   if (index.
n != 0 || size % N != 0) {
  2963   if (indexes[1] == index) {
  2965   } 
else if (indexes[size / N] == index) {
  2966     n_stride = size / N;
  2971     for (stride = 2; stride < size / N; stride++) {
  2972       if (size % stride == 0 && indexes[stride] == index) {
  2977     if (n_stride == -1) {
  2987   int32 block_size = n_stride * N;
  2989   std::vector<int32> indexes_to_check;
  2991     indexes_to_check.resize(size);
  2993       indexes_to_check[
i] = 
i;
  2995     int32 num_to_check = std::min<int32>(5, size);
  2996     indexes_to_check.resize(num_to_check);
  2997     for (
int32 j = 0; 
j < num_to_check; 
j++)
  2998       indexes_to_check[
j] = 
RandInt(0, size - 1);
  3001   for (std::vector<int32>::iterator iter = indexes_to_check.begin();
  3002        iter != indexes_to_check.end(); ++iter) {
  3004     Index index = indexes[
i];
  3008       if (i + n_stride >= size || indexes[i + n_stride] != index)
  3012       if (i / block_size != (i + n_stride * (N-1)) / block_size) {
  3022       if (i - n_stride < 0 || indexes[i - n_stride] != index)
  3035   int32 size = cindexes.size();
  3037   int32 N = cindexes[size-1].second.n + 1,
  3041   Cindex cindex(cindexes[0]);
  3042   if (cindex.second.n != 0 || size % N != 0)
  3044   cindex.second.n = 1;
  3045   if (cindexes[1] == cindex) {
  3047   } 
else if (cindexes[size / N] == cindex) {
  3048     n_stride = size / N;
  3051     for (stride = 2; stride < size / N; stride++) {
  3052       if (size % stride == 0 && cindexes[stride] == cindex) {
  3057     if (stride == size / N)
  3060   int32 block_size = n_stride * N;
  3061   std::vector<int32> indexes_to_check;
  3063     indexes_to_check.resize(size);
  3065       indexes_to_check[
i] = 
i;
  3067     int32 num_to_check = std::min<int32>(5, size);
  3068     indexes_to_check.resize(num_to_check);
  3069     for (
int32 j = 0; 
j < num_to_check; 
j++)
  3070       indexes_to_check[
j] = 
RandInt(0, size - 1);
  3073   for (std::vector<int32>::iterator iter = indexes_to_check.begin();
  3074        iter != indexes_to_check.end(); ++iter) {
  3077     int32 n = cindex.second.n;
  3079       cindex.second.n = n + 1;
  3080       if (i + n_stride >= size || cindexes[i + n_stride] != cindex)
  3084       if (i / block_size != (i + n_stride * (N-1)) / block_size)
  3087       cindex.second.n = n - 1;
  3088       if (i - n_stride < 0 || cindexes[i - n_stride] != cindex)
  3107                               const std::vector<Index> &indexes_in,
  3108                               std::vector<Index> *indexes_out) {
  3109   int32 size_in = indexes_in.size();
  3110   KALDI_ASSERT(size_in > 0 && indexes_in[size_in - 1].
n == old_N - 1);
  3111   int32 block_size_in = n_stride * old_N,
  3112       block_size_out = n_stride * new_N;
  3114   indexes_out->resize((size_in / old_N) * new_N);
  3115   for (
int32 i_in = 0; i_in < size_in; i_in++) {
  3116     if (indexes_in[i_in].
n != 0)
  3118     Index index(indexes_in[i_in]);
  3119     int32 block_index = i_in / block_size_in,
  3120         offset_within_block = i_in % block_size_in;
  3123     int32 i_out = block_index * block_size_out +
  3124         offset_within_block;
  3125     for (
int32 n = 0; 
n < new_N; 
n++, i_out += n_stride) {
  3127       (*indexes_out)[i_out] = index;
  3143                       bool need_debug_info,
  3146       nnet_(nnet), misc_info_(misc_info),
  3148       need_debug_info_(need_debug_info),
  3149       num_n_values_(num_n_values),
  3150       expanded_computation_(expanded_computation) {
  3161   void InitStrideInfo();
  3166   void ComputeMatrixInfo();
  3170   void ComputeDebugInfo();
  3175   void ComputeSubmatrixInfo();
  3203   void ComputePrecomputedIndexes();
  3209   void ComputeCommands();
  3217   void EnsureDebugInfoExists(
int32 submatrix_index);
  3237   bool GetNewSubmatLocationInfo(
int32 submat_index,
  3238                                 int32 old_row_index,
  3239                                 int32 *new_row_index,
  3240                                 int32 *n_stride) 
const;
  3258   int32 GetNewMatrixLocationInfo(
int32 old_matrix_index,
  3259                                  int32 old_row_index) 
const;
  3265   void ExpandIndexes(
const std::vector<Index> &indexes,
  3266                      std::vector<Index> *indexes_expanded) 
const;
  3312   c_out->
arg3 = expanded_computation_->indexes.size();
  3314   expanded_computation_->indexes.push_back(std::vector<int32>());
  3315   std::vector<int32> &new_indexes = expanded_computation_->indexes.back();
  3318   int32 old_size = old_indexes.size(),
  3319       num_n_values = num_n_values_,
  3320       new_s1_size = expanded_computation_->submatrices[s1].num_rows,
  3321       new_s2_size = expanded_computation_->submatrices[s2].num_rows;
  3325   new_indexes.resize(new_s1_size, -1);
  3334   for (
int32 i1 = 0; i1 < old_size; i1++) {
  3335     int32 new_i1_n0, n_stride1;
  3336     if (GetNewSubmatLocationInfo(s1, i1, &new_i1_n0, &n_stride1)) {
  3339       int32 i2 = old_indexes[i1];  
  3340       int32 new_i2_n0, n_stride2;
  3345         bool ans = GetNewSubmatLocationInfo(s2, i2, &new_i2_n0, &n_stride2);
  3350         int32 new_i1 = new_i1_n0, new_i2 = new_i2_n0;
  3351         for (
int32 n = 0; 
n < num_n_values;
  3352              ++
n, new_i1 += n_stride1, new_i2 += n_stride2) {
  3353           KALDI_ASSERT(new_i1 < new_s1_size && new_i2 < new_s2_size);
  3354           new_indexes[new_i1] = new_i2;
  3370       num_rows_new = expanded_computation_->submatrices[s1].num_rows;
  3373   int32 num_n_values = num_n_values_;
  3376   c_out->
arg2 = expanded_computation_->indexes_multi.size();
  3377   expanded_computation_->indexes_multi.push_back(
  3378       std::vector<std::pair<int32, int32> >());
  3379   std::vector<std::pair<int32, int32> > &new_indexes_multi =
  3380       expanded_computation_->indexes_multi.back();
  3381   const std::vector<std::pair<int32, int32> > &old_indexes_multi =
  3388   KALDI_ASSERT(static_cast<int32>(old_indexes_multi.size()) == num_rows_old);
  3391   new_indexes_multi.resize(num_rows_new,
  3392                            std::pair<int32,int32>(-1, -1));
  3394   for (
int32 i1 = 0; i1 < num_rows_old; i1++) {
  3395     int32 new_i1_n0, n_stride1;
  3396     if (GetNewSubmatLocationInfo(s1, i1, &new_i1_n0, &n_stride1)) {
  3399       int32 s2 = old_indexes_multi[i1].first,
  3400           i2 = old_indexes_multi[i1].second;
  3401       int32 new_i2_n0, n_stride2;
  3407         bool ans = GetNewSubmatLocationInfo(s2, i2, &new_i2_n0, &n_stride2);
  3412         int32 new_i1 = new_i1_n0, new_i2 = new_i2_n0;
  3414         for (
int32 n = 0; 
n < num_n_values;
  3415              n++, new_i1 += n_stride1, new_i2 += n_stride2) {
  3416           new_indexes_multi[new_i1].first = s2;
  3417           new_indexes_multi[new_i1].second = new_i2;
  3435       num_rows_new = expanded_computation_->submatrices[s1].num_rows;
  3438   int32 num_n_values = num_n_values_;
  3441   c_out->
arg3 = expanded_computation_->indexes_ranges.size();
  3442   expanded_computation_->indexes_ranges.push_back(
  3443       std::vector<std::pair<int32, int32> >());
  3444   std::vector<std::pair<int32, int32> > &new_indexes_ranges =
  3445       expanded_computation_->indexes_ranges.back();
  3446   const std::vector<std::pair<int32, int32> > &old_indexes_ranges =
  3454   KALDI_ASSERT(static_cast<int32>(old_indexes_ranges.size()) == num_rows_old);
  3456   new_indexes_ranges.resize(num_rows_new,
  3457                            std::pair<int32,int32>(-1, -1));
  3459   for (
int32 i1 = 0; i1 < num_rows_old; i1++) {
  3460     int32 new_i1_n0, n_stride1;
  3461     if (GetNewSubmatLocationInfo(s1, i1, &new_i1_n0, &n_stride1)) {
  3464       int32 i2_begin = old_indexes_ranges[i1].first,
  3465           i2_end = old_indexes_ranges[i1].second;
  3466       if (i2_end == i2_begin)
  3471       int32 i2_last = i2_end - 1;
  3472       int32 new_i2_n0_begin, new_i2_n0_last,
  3476       bool ans1 = GetNewSubmatLocationInfo(s2, i2_begin, &new_i2_n0_begin,
  3478           ans2 = GetNewSubmatLocationInfo(s2, i2_last, &new_i2_n0_last,
  3480       KALDI_ASSERT(ans1 && ans2 && new_i2_n0_last >= new_i2_n0_begin &&
  3481                    new_i2_n0_begin >= 0 && n_stride1 > 0 && n_stride2 > 0);
  3486       int32 new_i1 = new_i1_n0,
  3487           new_i2_begin = new_i2_n0_begin,
  3488           new_i2_end = new_i2_n0_last + 1;
  3489       for (
int32 n = 0; 
n < num_n_values;
  3490            n++, new_i1 += n_stride1, new_i2_begin += n_stride2,
  3491                new_i2_end += n_stride2) {
  3492         new_indexes_ranges[new_i1].first = new_i2_begin;
  3493         new_indexes_ranges[new_i1].second = new_i2_end;
  3503   expanded_computation_->commands.resize(num_commands);
  3504   for (
int32 command_index = 0; command_index < num_commands;
  3508         expanded_computation_->commands[command_index];
  3525         ExpandRowsCommand(c, &c_out);
  3529         ExpandRowsMultiCommand(c, &c_out);
  3532         ExpandRowRangesCommand(c, &c_out);
  3551   n_stride_.resize(num_matrices);
  3557   for (
int32 m = 1; m < num_matrices; m++) {
  3561     bool full_check = 
true;  
  3563     if (n_stride == 0) {
  3564       KALDI_ERR << 
"Problem encountered in 'shortcut' compilation: the computation "  3565                 << 
"does not have the expected structure.  Try compiling with "  3566                 << 
"--use-shortcut=false.";
  3568     n_stride_[m] = n_stride;
  3575   ComputeMatrixInfo();
  3576   if (need_debug_info_)
  3579     expanded_computation_->matrix_debug_info.clear();
  3580   ComputeSubmatrixInfo();
  3581   ComputePrecomputedIndexes();
  3584   expanded_computation_->need_model_derivative =
  3590   expanded_computation_->matrices.resize(num_matrices);
  3593   int32 old_num_n_values = 2,
  3594       new_num_n_values = num_n_values_;
  3595   for (
int32 m = 1; m < num_matrices; m++) {
  3597     expanded_computation_->matrices[m].num_rows =
  3606   expanded_computation_->matrix_debug_info.resize(num_matrices);
  3608   expanded_computation_->matrix_debug_info[0] =
  3610   int32 num_n_values = num_n_values_;
  3611   for (
int32 m = 1; m < num_matrices; m++) {
  3615         expanded_computation_->matrix_debug_info[m];
  3618         num_rows_out = expanded_computation_->matrices[m].num_rows;
  3620     info_out.
cindexes.resize(num_rows_out);
  3623     for (
int32 r = 0; r < num_rows_in; r++) {
  3624       if (info_in.
cindexes[r].second.n == 0) {
  3625         int32 new_r = GetNewMatrixLocationInfo(m, r),
  3626             n_stride = n_stride_[m];
  3627         for (
int32 n = 0; 
n < num_n_values; 
n++) {
  3628           int32 r_out = new_r + 
n * n_stride;
  3629           cindexes_out[r_out] = cindexes_in[r];
  3630           cindexes_out[r_out].second.n = 
n;
  3639   expanded_computation_->submatrices.resize(num_submatrices);
  3642   for (
int32 s = 1; s < num_submatrices; s++) {
  3650         last_row_in = first_row_in + info_in.
num_rows - 1;
  3651     if (!(debug_info_in.
cindexes[first_row_in].second.n == 0 &&
  3652           debug_info_in.
cindexes[last_row_in].second.n == 1)) {
  3653       std::ostringstream computation_ss;
  3654       std::vector<std::string> submat_strings;
  3657       KALDI_ERR << 
"Submatrix s" << s << 
" = " << submat_strings[s]
  3658                 << 
" has strange dimensions.  Computation is: "  3659                 << computation_ss.str();
  3662     int32 first_row_out = GetNewMatrixLocationInfo(m, first_row_in),
  3663         last_row_out = GetNewMatrixLocationInfo(m, last_row_in),
  3664         new_num_rows = (last_row_out + 1 - first_row_out);
  3667         expanded_computation_->submatrices[s];
  3686   std::vector<bool> need_backprop(num_precomputed_indexes, 
false);
  3688   std::vector<int32> component_index(num_precomputed_indexes, -1);
  3690   for (
int32 command_index = 0; command_index < num_commands; command_index++) {
  3700       need_backprop[c.
arg2] = 
true;
  3705        p < expanded_computation_->component_precomputed_indexes.size();
  3707     delete expanded_computation_->component_precomputed_indexes[p].data;
  3708   expanded_computation_->component_precomputed_indexes.clear();
  3709   expanded_computation_->component_precomputed_indexes.resize(
  3710       num_precomputed_indexes);
  3712   for (
int32 p = 1; p < num_precomputed_indexes; ++p) {
  3716         expanded_computation_->component_precomputed_indexes[p];
  3719                  "Input/output indexes not present in precomputed info of "  3720                  "computation to be expanded.");
  3726     std::vector<Index> input_indexes, output_indexes;
  3733                                      output_indexes, need_backprop[p]);
  3738     new_info.
data = expanded_precomputed_indexes;
  3745     int32 *new_row_index, 
int32 *n_stride)
 const {
  3748    new_row_offset = expanded_computation_->submatrices[submat_index].row_offset;
  3752   if (debug_info_in.
cindexes[old_row_index + old_row_offset].second.n != 0)
  3754   *new_row_index = (GetNewMatrixLocationInfo(matrix_index,
  3755                                              old_row_index + old_row_offset) -
  3757   *n_stride = n_stride_[matrix_index];
  3762     int32 matrix_index, 
int32 old_row_index)
 const {
  3764   int32 n_stride = n_stride_[matrix_index],
  3765       old_num_n_values = 2, new_num_n_values = num_n_values_,
  3766       old_block_size = old_num_n_values * n_stride,
  3767       new_block_size = new_num_n_values * n_stride,
  3768       block_index = old_row_index / old_block_size,
  3769       offset_within_block = old_row_index % old_block_size;
  3777   int32 old_n_value = offset_within_block / n_stride,
  3778       index_within_subblock = offset_within_block % n_stride;
  3779   const std::vector<Cindex> &cindexes =
  3781   KALDI_ASSERT(old_n_value == cindexes[old_row_index].second.n &&
  3782                (old_n_value == 0 || old_n_value == 1));
  3787   int32 new_n_value = (old_n_value == 0 ? 0 : new_num_n_values - 1);
  3789   return block_index * new_block_size + index_within_subblock +
  3790       new_n_value * n_stride;
  3795     const std::vector<Index> &indexes,
  3796     std::vector<Index> *indexes_expanded)
 const {
  3797   bool full_check = 
false;
  3801                     indexes, indexes_expanded);
  3807                        bool need_debug_info,
  3811                                need_debug_info, num_n_values,
  3812                                expanded_computation);
  3824                                           int32 *num_n_values_out) {
  3827   const std::vector<Index> &indexes = io_spec.
indexes;
  3828   KALDI_ASSERT(!indexes.empty() && 
"Empty Indexes in computation request");
  3830   bool full_check = 
true;  
  3832   int32 num_n_values = indexes.back().n + 1;
  3833   if (num_n_values <= 2) {
  3839   *num_n_values_out = num_n_values;
  3847                     indexes, &(mini_io_spec->
indexes));
  3854                            int32 *num_n_values) {
  3855   size_t num_inputs = request.
inputs.size(),
  3856       num_outputs = request.
outputs.size();
  3857   mini_request->
inputs.resize(num_inputs);
  3858   mini_request->
outputs.resize(num_outputs);
  3864   for (
size_t i = 0; 
i < num_inputs; 
i++) {
  3865     int32 this_num_n_values = 0;
  3868                                        &this_num_n_values))
  3871       *num_n_values = this_num_n_values;
  3873       if (this_num_n_values != *num_n_values)
  3877   for (
size_t i = 0; 
i < num_outputs; 
i++) {
  3878     int32 this_num_n_values = 0;
  3881                                        &this_num_n_values))
  3883     if (this_num_n_values != *num_n_values)
  3918                                 std::vector<std::pair<int32, int32> > *matrix_to_pair);
  3925   static inline int32 NormalizeCindexes(std::vector<Cindex> *cindexes);
  3931   static void GetPairToMatrixMap(
  3932       std::vector<std::pair<int32, int32> > &matrix_to_pair,
  3941   static void ConvertListsToPairLists(
  3942       const std::vector<std::vector<int32> > &active_matrices,
  3943       const std::vector<std::pair<int32, int32> > &matrix_to_pair,
  3944       std::vector<std::vector<std::pair<int32, int32> > > *active_pairs);
  3955   static bool ListsAreEqualExceptForPossibleShift(
  3956       const std::vector<std::pair<int32, int32> > &a,
  3957       const std::vector<std::pair<int32, int32> > &b,
  3977   static bool FindFirstRepeat(
  3978       const std::vector<std::vector<std::pair<int32, int32> > > &active_pairs,
  3979       int32 time_shift_per_segment,
  3989   static void GetIdentifiedMatrices(
  3990       const std::vector<std::pair<int32, int32> > &pair_list1,
  3991       const std::vector<std::pair<int32, int32> > &pair_list2,
  3993       std::vector<int32> *matrix_list1,
  3994       std::vector<int32> *matrix_list2);
  4002   static void CheckIdentifiedMatrices(
  4004       const std::vector<int32> &list1,
  4005       const std::vector<int32> &list2,
  4006       int32 time_difference);
  4014   static void FormInfiniteLoop(
int32 command1, 
int32 command2,
  4025   static void AddMatrixSwapCommands(
  4026       const std::vector<int32> &matrices1,
  4027       const std::vector<int32> &matrices2,
  4038   static void GetMatrixSwapOrder(
  4039       const std::vector<int32> &matrices1,
  4040       const std::vector<int32> &matrices2,
  4041       std::vector<std::pair<int32, int32> > *swaps);
  4057                                  const std::vector<int32> &splice_point_commands,
  4058                                  std::vector<std::vector<int32> > *active_matrices);
  4072   std::vector<int32> segment_ends;
  4077   int32 second_segment_begin = segment_ends[0],
  4078       third_segment_begin = segment_ends[1],
  4079       fourth_segment_begin = segment_ends[2];
  4080   int32 first_output_command_seg2 = -1,
  4081       first_output_command_seg3 = -1;
  4082   for (
int32 c = second_segment_begin; c < third_segment_begin; c++)
  4084         first_output_command_seg2 < 0)
  4085       first_output_command_seg2 = c;
  4086   for (
int32 c = third_segment_begin; c < fourth_segment_begin; c++)
  4088         first_output_command_seg3 < 0)
  4089       first_output_command_seg3 = c;
  4090   if (first_output_command_seg2 < 0 ||
  4091       first_output_command_seg3 < 0)
  4092     KALDI_ERR << 
"Could not locate output commands for segments 2 and 3.";
  4094       &command2 = computation.
commands[first_output_command_seg2],
  4095       &command3 = computation.
commands[first_output_command_seg3];
  4096   int32 seg2_node = command2.
arg2, seg3_node = command3.arg2;
  4099       seg3_submatrix = command3.arg1;
  4103       seg3_matrix = computation.
submatrices[seg3_submatrix].matrix_index;
  4105                computation.
matrices[seg3_matrix].num_rows);
  4110   int32 t_offset = debug_info3.cindexes[0].second.t -
  4111       debug_info2.cindexes[0].second.t;
  4112   int32 num_rows = debug_info2.cindexes.size();
  4113   for (
int32 r = 0; r < num_rows; r++) {
  4115                  debug_info2.cindexes[r].second.t + t_offset);
  4122     std::vector<Cindex> *cindexes) {
  4123   std::vector<Cindex>::iterator iter = cindexes->begin(),
  4124       end = cindexes->end();
  4126   for (; iter != end; iter++) {
  4127     if (iter->second.t != 
kNoTime) {
  4128       ans = iter->second.t;
  4134     KALDI_ERR << 
"All t values are kNoTime in matrix.";
  4136   iter = cindexes->begin();
  4137   for (; iter != end; iter++)
  4138     if (iter->second.t != 
kNoTime)
  4139       iter->second.t -= ans;
  4146     std::vector<std::pair<int32, int32> > *matrix_to_pair) {
  4147   typedef unordered_map<std::vector<Cindex>, 
int32,
  4149   int32 cur_vector_id = 1;
  4154   int32 num_matrices = computation.
matrices.size();
  4155   matrix_to_pair->resize(num_matrices);
  4157   for (int32 m = 1; m < num_matrices; m++) {
  4160     int32 t_offset = NormalizeCindexes(&cindexes);
  4161     MapType::const_iterator iter = cindex_map.find(cindexes);
  4163     if (iter != cindex_map.end()) {
  4164       vector_id = iter->second;
  4166       vector_id = cur_vector_id++;
  4167       cindex_map[cindexes] = vector_id;
  4170     int32 unique_id = 2 * vector_id + (is_deriv ? 1 : 0);
  4171     (*matrix_to_pair)[m].first = unique_id;
  4172     (*matrix_to_pair)[m].second = t_offset;
  4178       std::vector<std::pair<int32, int32> > &matrix_to_pair,
  4180   int32 num_matrices = matrix_to_pair.size();
  4182   pair_to_matrix->clear();
  4183   for (
int32 m = 1; m < num_matrices; m++)
  4184     (*pair_to_matrix)[matrix_to_pair[m]] = m;
  4190       const std::vector<std::vector<int32> > &active_matrices,
  4191       const std::vector<std::pair<int32, int32> > &matrix_to_pair,
  4192       std::vector<std::vector<std::pair<int32, int32> > > *active_pairs) {
  4193   active_pairs->clear();
  4194   active_pairs->resize(active_matrices.size());
  4195   int32 num_matrices = matrix_to_pair.size();
  4196   for (
size_t seg = 0; seg < active_matrices.size(); seg++) {
  4197     const std::vector<int32> &this_active_matrix_list = active_matrices[seg];
  4198     std::vector<std::pair<int32, int32> > &this_active_pair_list =
  4199         (*active_pairs)[seg];
  4200     this_active_pair_list.resize(this_active_matrix_list.size());
  4201     std::vector<int32>::const_iterator iter = this_active_matrix_list.begin(),
  4202         end = this_active_matrix_list.end();
  4203     std::vector<std::pair<int32, int32> >::iterator
  4204         out_iter = this_active_pair_list.begin();
  4205     for (; iter != end; ++iter, ++out_iter) {
  4207       *out_iter = matrix_to_pair[*iter];
  4214     const std::vector<std::pair<int32, int32> > &a,
  4215     const std::vector<std::pair<int32, int32> > &b,
  4217   size_t size = a.size();
  4218   if (b.size() != size)
  4220   for (
size_t i = 0; 
i < size; 
i++) {
  4221     const std::pair<int32, int32> &p1 = a[
i],
  4223     if (p1.first != p2.first)
  4225     if (p2.second != p1.second + shift && p2.second != p1.second)
  4233     const std::vector<std::vector<std::pair<int32, int32> > > &active_pairs,
  4234     int32 time_shift_per_segment,
  4236   int32 num_segments = active_pairs.size();
  4243   for (
int32 s = 0; s < num_segments; s++) {
  4244     for (
int32 t = s + 1; t < num_segments; t++) {
  4245       if (ListsAreEqualExceptForPossibleShift(active_pairs[s],
  4247                                               (t - s) * time_shift_per_segment)) {
  4259     const std::vector<std::pair<int32, int32> > &pair_list1,
  4260     const std::vector<std::pair<int32, int32> > &pair_list2,
  4262     std::vector<int32> *matrix_list1,
  4263     std::vector<int32> *matrix_list2) {
  4264   size_t size = pair_list1.size();
  4266   matrix_list1->clear();
  4267   matrix_list2->clear();
  4268   matrix_list1->reserve(size);
  4269   matrix_list2->reserve(size);
  4270   std::vector<std::pair<int32, int32> >::const_iterator
  4271       iter1 = pair_list1.begin(), end1 = pair_list1.end(),
  4272       iter2 = pair_list2.begin();
  4273   for (; iter1 != end1; ++iter1, ++iter2) {
  4274     if (iter1->second == iter2->second)
  4278     unordered_map<std::pair<int32, int32>, 
int32,
  4280         map_iter1 = pair_to_matrix.find(*iter1),
  4281         map_iter2 = pair_to_matrix.find(*iter2);
  4282     if (map_iter1 == pair_to_matrix.end() ||
  4283         map_iter2 == pair_to_matrix.end())
  4284       KALDI_ERR << 
"Could not find pair in map (code error)";
  4285     matrix_list1->push_back(map_iter1->second);
  4286     matrix_list2->push_back(map_iter2->second);
  4296     const std::vector<int32> &splice_point_commands,
  4297     std::vector<std::vector<int32> > *active_matrices) {
  4299   int32 num_splice_points = splice_point_commands.size();
  4300   active_matrices->clear();
  4301   active_matrices->resize(num_splice_points);
  4309   std::vector<int32> whole_submatrices;
  4311   for (
int32 m = 1; m < num_matrices; m++) {
  4314     int32 s = whole_submatrices[m],  
  4318     for (
int32 i = 0; 
i < num_splice_points; 
i++) {
  4319       int32 splice_point = splice_point_commands[
i];
  4320       if (first_access < splice_point && last_access > splice_point) {
  4324         (*active_matrices)[
i].push_back(m);
  4333     const std::vector<int32> &list1,
  4334     const std::vector<int32> &list2,
  4335     int32 time_difference) {
  4339   for (
size_t i = 0; 
i < list1.size(); 
i++) {
  4340     int32 m1 = list1[
i], m2 = list2[
i];
  4342         &matrix_info1 = computation.
matrices[m1],
  4343         &matrix_info2 = computation.
matrices[m2];
  4345                  matrix_info1.
num_cols == matrix_info2.num_cols &&
  4346                  matrix_info1.
stride_type == matrix_info2.stride_type);
  4352     std::vector<Cindex>::const_iterator iter1 = debug_info1.
cindexes.begin(),
  4354         iter2 = debug_info2.cindexes.begin();
  4355     for (; iter1 != end1; iter1++,iter2++) {
  4357                    iter2->second.n == iter1->second.n &&
  4359                     iter2->second.t == iter1->second.t + time_difference) &&
  4360                    iter2->second.x == iter1->second.x);
  4368     const std::vector<int32> &matrices1,
  4369     const std::vector<int32> &matrices2,
  4370     std::vector<std::pair<int32, int32> > *swaps) {
  4373   int32 num_matrices = matrices1.size();
  4374   std::vector<bool> processed(num_matrices, 
false);
  4375   std::vector<int32> queue;
  4378   int32 num_loops = 0;
  4379   for (; 
static_cast<int32>(swaps->size()) < num_matrices; num_loops++) {
  4380     for (
int32 i = 0; 
i < num_matrices; 
i++) {
  4383       int32 m1 = matrices1[
i], m2 = matrices2[
i];
  4384       std::vector<int32>::const_iterator iter =
  4385           std::lower_bound(matrices2.begin(), matrices2.end(), m1);
  4386       if (iter == matrices2.end() || *iter != m1) {
  4389         swaps->push_back(std::pair<int32,int32>(m1, m2));
  4390         processed[
i] = 
true;
  4392         int32 m1_pos_in_matrices2 = iter - matrices2.begin();
  4393         if (processed[m1_pos_in_matrices2]) {
  4397           swaps->push_back(std::pair<int32,int32>(m1, m2));
  4398           processed[
i] = 
true;
  4420     const std::vector<int32> &matrices1,
  4421     const std::vector<int32> &matrices2,
  4423   std::vector<std::pair<int32, int32> > swaps;
  4428   GetMatrixSwapOrder(matrices1, matrices2, &swaps);
  4437   std::vector<int32> whole_submatrices;
  4439   size_t num_matrices = whole_submatrices.size();
  4441   for (
size_t i = 0; 
i < swaps.size(); 
i++) {
  4442     int32 m1 = swaps[
i].first, m2 = swaps[
i].second;
  4444                  static_cast<size_t>(m2) < num_matrices);
  4445     int32 s1 = whole_submatrices[m1], s2 = whole_submatrices[m2];
  4449   computation->
commands.push_back(goto_label_command);
  4457                command2 + 1 && command1 < command2);
  4462   computation->
commands.resize(command2 + 1);
  4464   computation->
commands[command2].arg1 = command1;
  4476                "You must request matrix debug info when compiling "  4477                "looped computations.");
  4486   std::vector<int32> splice_points;
  4492   std::vector<std::vector<int32> > active_matrices;
  4494   FindActiveMatrices(*
computation_, analyzer_, splice_points,
  4501   std::vector<std::pair<int32, int32> > matrix_to_pair;
  4506   GetPairToMatrixMap(matrix_to_pair, &pair_to_matrix);
  4509   std::vector<std::vector<std::pair<int32, int32> > > pair_lists;
  4510   ConvertListsToPairLists(active_matrices, matrix_to_pair,
  4516   if (!FindFirstRepeat(pair_lists,
  4517                        time_shift_per_segment,
  4519     KALDI_VLOG(2) << 
"Could not find repeats of variables.";
  4523   std::vector<int32> seg1_matrices, seg2_matrices;
  4524   GetIdentifiedMatrices(pair_lists[seg1], pair_lists[seg2],
  4526                         &seg1_matrices, &seg2_matrices);
  4528   int32 time_difference = time_shift_per_segment * (seg2 - seg1);
  4529   CheckIdentifiedMatrices(*
computation_, seg1_matrices, seg2_matrices,
  4532   FormInfiniteLoop(splice_points[seg1], splice_points[seg2], 
computation_);
  4534   AddMatrixSwapCommands(seg1_matrices, seg2_matrices, 
computation_);
  4554   if (num_commands == 0)
  4556   for (
int32 c = num_commands - 1; c >= 0; c--) {
  4559       if (static_cast<size_t>(dest_command) <  computation->
commands.size() &&
  4562       for (
int32 d = 0; 
d + 1 < num_commands; 
d++) {
  4588   for (
size_t i = 0; 
i < accesses.
accesses.size(); 
i++) {
  4591         computation.
commands[command_index];
  4618   for (
size_t i = 0; 
i < accesses.
accesses.size(); 
i++) {
  4634   bool operator () (
const std::pair<int32, NnetComputation::Command> &p1,
  4635                     const std::pair<int32, NnetComputation::Command> &p2)
 const {
  4636     return p1.first < p2.first;
  4641     std::vector<std::pair<int32, NnetComputation::Command> > *new_commands,
  4643   int32 num_new_commands = new_commands->size(),
  4644       num_old_commands = computation->
commands.size();
  4645   if (num_new_commands == 0)
  4651   std::stable_sort(new_commands->begin(), new_commands->end(),
  4652                    comparison_operator);
  4655     for (
int32 i = 0; 
i + 1 < num_new_commands; 
i++) {
  4656       KALDI_ASSERT((*new_commands)[
i].first <= (*new_commands)[
i+1].first &&
  4657                    (*new_commands)[
i].first >= 0 &&
  4658                    (*new_commands)[
i+1].first <= num_old_commands);
  4661   std::vector<NnetComputation::Command> merged_commands;
  4662   merged_commands.reserve(num_old_commands + num_new_commands);
  4664   std::vector<std::pair<int32, NnetComputation::Command> >::const_iterator
  4665       new_commands_iter = new_commands->begin(),
  4666       new_commands_end = new_commands->end();
  4668   for (
int32 old_command_index = 0; old_command_index <= num_old_commands;
  4669        old_command_index++) {
  4670     while (new_commands_iter != new_commands_end &&
  4671            new_commands_iter->first <= old_command_index) {
  4672       merged_commands.push_back(new_commands_iter->second);
  4673       ++new_commands_iter;
  4675     if (old_command_index < num_old_commands)
  4676       merged_commands.push_back(computation->
commands[old_command_index]);
  4678   KALDI_ASSERT(merged_commands.size() == num_old_commands +
  4681   computation->
commands.swap(merged_commands);
  4704                              int32 memory_compression_level,
  4705                              int32 middle_command,
  4707       nnet_(nnet), memory_compression_level_(memory_compression_level),
  4708       middle_command_(middle_command), 
computation_(computation) { }
  4715   void ProcessMatrix(
int32 m);
  4719   void ModifyComputation();
  4750                        int32 backward_command_index,
  4753         m(m), compression_command_index(forward_command_index),
  4754         uncompression_command_index(backward_command_index),
  4755         compression_type(compression_type), range(range),
  4756         truncate(truncate) { }
  4772   std::vector<int32> whole_submatrices;
  4778   std::vector<std::pair<int32, NnetComputation::Command> >
  4780   pairs_to_insert.reserve(compress_info_.size() * 2);
  4781   for (
size_t i = 0; 
i < compress_info_.size(); 
i++) {
  4783     int32 s = whole_submatrices[info.
m];
  4787     std::pair<int32, NnetComputation::Command> p1(
  4792     pairs_to_insert.push_back(p1);
  4793     std::pair<int32, NnetComputation::Command> p2(
  4796     pairs_to_insert.push_back(p2);
  4807   for (
int32 m = 1; m < num_matrices; m++)
  4809   if (!compress_info_.empty())
  4810     ModifyComputation();
  4814   if (analyzer_.matrix_accesses[m].is_output) {
  4820   const std::vector<Access> &accesses = analyzer_.matrix_accesses[m].accesses;
  4825   std::vector<Access>::const_iterator iter = std::lower_bound(accesses.begin(),
  4832   if (iter == accesses.end()) {
  4836   if (iter == accesses.begin()) {
  4844   const Access &backward_access = iter[0],
  4845       &forward_access = iter[-1];
  4846   KALDI_ASSERT(forward_access.command_index < middle_command_ &&
  4853   bool backward_access_is_last_access = (accesses.end() == iter + 1);
  4856       forward_command_index = forward_access.command_index;
  4860   if (memory_compression_level_ >= 1 &&
  4861       backward_access_is_last_access &&
  4864     int32 component_index = backward_command.
arg1;
  4868     if (component->
Type() == 
"RectifiedLinearComponent") {
  4869       compress_info_.push_back(
  4871                              backward_command_index,
  4884   if (memory_compression_level_ >= 2) {
  4885     compress_info_.push_back(
  4887                            backward_command_index,
  4900                                int32 memory_compression_level,
  4902   if (memory_compression_level == 0 || computation->
commands.empty())
  4912   int32 middle_command = -1;
  4913   for (
size_t i = 0; 
i < computation->
commands.size(); 
i++) {
  4915       if (middle_command < 0) {
  4916         middle_command = 
static_cast<int32>(
i);
  4918         KALDI_WARN << 
"Found more than one command of type kNoOperationMarker "  4919             "in non-looped computation.";
  4926   if (middle_command == -1) {
  4929   if (memory_compression_level >= 1) {
  4930     int64 bytes_used_initial, bytes_used_final;
  4936                                    middle_command, computation);
  4941       if (bytes_used_final != bytes_used_initial) {
  4942         KALDI_VLOG(2) << 
"Memory compression reduced  memory use from "  4943                       << bytes_used_initial << 
" to "  4944                       << bytes_used_final << 
" bytes.";
  4953   std::lock_guard<std::mutex> lock(mutex_);
  4955   CacheType::iterator iter = computation_cache_.find(&in_request);
  4956   if (iter == computation_cache_.end()) {
  4959     std::shared_ptr<const NnetComputation> ans = iter->second.first;
  4962     access_queue_.splice(access_queue_.end(), access_queue_,
  4963                          iter->second.second);
  4970     cache_capacity_(cache_capacity) {
  4978   std::lock_guard<std::mutex> lock(
mutex_);
  4981     const CacheType::iterator iter =
  4997   std::shared_ptr<const NnetComputation> computation(computation_in);
  5002       std::make_pair(request, std::make_pair(computation, ait)));
  5021   int32 computation_cache_size;
  5022   ExpectToken(is, binary, 
"<ComputationCacheSize>");
  5028   for (
size_t c = 0; c < computation_cache_size; c++) {
  5030     request.
Read(is, binary);
  5032     computation->
Read(is, binary);
  5033     Insert(request, computation);
  5043   for (; iter != end; ++iter) {
  5052   WriteToken(os, binary, 
"<ComputationCacheSize>");
  5054   WriteToken(os, binary, 
"<ComputationCache>");
  5057     iter->first->Write(os, binary);
  5058     iter->second.first->Write(os, binary);
  5068   for (; iter != end; ++iter)
 
bool MatrixIsUnused(const Analyzer &analyzer, const NnetComputation &computation, int32 m)
This function returns true if matrix 1 <= m < computation->matrices.size() is unused, defined as: it is not an input or an output, and is not accessed other than via commands of type kAllocMatrix, kDeallocMatrix, and kSetConst. 
 
CommandType
CommandType is an enum that describes the category of the command used in the NnetComputation. 
 
static void CreateRenumbering(int32 old_num_elements, const std::vector< int32 > &to_remove, std::vector< int32 > *renumbering)
creates a renumbering that removes the elements in "to_remove", e.g. 
 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
 
std::vector< MatrixCompressInfo > compress_info_
 
std::vector< bool > matrix_is_used_
 
int32 FirstNontrivialAccess(int32 s) const
Returns the first command (read or write) that accesses any part of 's' except for zeroing it (i...
 
void IdentifyMatrixArgsInComputation(NnetComputation *computation, std::vector< int32 *> *matrix_args)
 
static void ConvertListsToPairLists(const std::vector< std::vector< int32 > > &active_matrices, const std::vector< std::pair< int32, int32 > > &matrix_to_pair, std::vector< std::vector< std::pair< int32, int32 > > > *active_pairs)
 
bool store_component_stats
you should set need_component_stats to true if you need the average-activation and average-derivative...
 
void GetCommandsOfType(const NnetComputation &computation, CommandType t, std::vector< int32 > *command_indexes)
This utility function works out from a computation, the command-indexes of the commands of the given ...
 
std::vector< MatrixDebugInfo > matrix_debug_info
 
bool SplitRowOps(NnetComputation *computation)
This function detects cases where commands of type kAddRowsMulti, kAddToRowsMulti, kCopyRowsMulti, kCopyToRowsMulti use indexes that correspond to at most two submatrices, in two distinct ranges without gaps filled by -1's, and could be converted to at most two commands of type kMatrixAdd, kMatrixCopy, kAddRows or kCopyRows. 
 
std::vector< NnetComputation::Command > final_commands_
 
void OptimizeLoopedComputation(const Nnet &nnet, NnetComputation *computation)
This function tries to optimize computation 'computation' for an 'looped' computation. 
 
std::vector< bool > submatrix_is_kept_
 
ComputationLoopedOptimizer(const Nnet &nnet, NnetComputation *computation)
 
std::vector< int32 > second_value_offsets
 
std::vector< Index > input_indexes
 
std::vector< int32 > orig_num_rows_
 
void RenumberSubmatrices()
 
std::vector< std::vector< NnetComputation::Command > > extra_commands_
 
bool need_model_derivative
if need_model_derivative is true, then we'll be doing either model training or model-derivative compu...
 
void ExpandRowsMultiCommand(const NnetComputation::Command &c_in, NnetComputation::Command *c_out)
 
std::vector< bool > variable_dirty_
 
void IdentifySubmatrixArgs(NnetComputation::Command *c, std::vector< int32 *> *submatrix_args)
This function outputs to "submatrix_args" the addresses of a subset of arguments arg1 through arg6 in...
 
void Extend(int32 *dest_submatrix_index, int32 *src_submatrix_index)
 
MiscComputationInfo misc_info
misc_info is for extensibility to things that don't easily fit into the framework. 
 
static void ConvertNumNValues(int32 n_stride, int32 old_N, int32 new_N, const std::vector< Index > &indexes_in, std::vector< Index > *indexes_out)
 
Abstract base-class for neural-net components. 
 
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
 
void RenumberComputation(NnetComputation *computation)
This function detects submatrices and matrices that are never used (e.g. 
 
void ComputeSubmatrixInfo()
 
void InsertCommands(std::vector< std::pair< int32, NnetComputation::Command > > *new_commands, NnetComputation *computation)
Inserts commands into the computation at the requested places. 
 
static void AddMatrixSwapCommands(const std::vector< int32 > &matrices1, const std::vector< int32 > &matrices2, NnetComputation *computation)
 
int32 GetVerboseLevel()
Get verbosity level, usually set via command line '–verbose=' switch. 
 
bool is_output
true if this matrix is an output of the computation (i.e. 
 
std::vector< int32 > submatrix_map_if_deriv_
 
NnetComputation * computation_
 
void ComputeSubmatrixMaps()
 
bool SplitCommand(int32 command_index)
 
const MiscComputationInfo & misc_info_
 
std::vector< int32 > whole_submatrices_
 
void ComputeSubmatrixIsUsed()
 
void IdentifySubmatrixArgsInComputation(NnetComputation *computation, std::vector< int32 *> *submatrix_args)
This function outputs to "submatrix_args" the addresses of integers in 'computation' that correspond ...
 
int32 NewMatrix(int32 num_rows, int32 num_cols, MatrixStrideType stride_type)
Convenience function used when adding new matrices. 
 
void ExpandRowRangesCommand(const NnetComputation::Command &c_in, NnetComputation::Command *c_out)
 
void Print(std::ostream &os, const Nnet &nnet) const
 
static int32 FindNStride(const std::vector< Index > &indexes, bool full_check)
 
MatrixCompressInfo(int32 m, int32 forward_command_index, int32 backward_command_index, CuCompressedMatrixType compression_type, BaseFloat range, bool truncate)
 
static bool IsNoop(const NnetComputation::Command &command)
 
static void GetIdentifiedMatrices(const std::vector< std::pair< int32, int32 > > &pair_list1, const std::vector< std::pair< int32, int32 > > &pair_list2, const unordered_map< std::pair< int32, int32 >, int32, PairHasher< int32 > > &pair_to_matrix, std::vector< int32 > *matrix_list1, std::vector< int32 > *matrix_list2)
 
void RenumberIndexesRanges()
 
This class is responsible for consolidating the model-update part of backprop commands, for components in (e.g.) recurrent networks that need to have many separate backprop commands, into more efficient single commands operating on consolidated data in larger matrices. 
 
bool RequestIsDecomposable(const ComputationRequest &request, ComputationRequest *mini_request, int32 *num_n_values)
This function, used in 'shortcut' compilation where we first compile a smaller computation with the s...
 
const NnetComputation & computation_
 
std::vector< std::pair< int32, NnetComputation::Command > > new_commands_
 
ComputationRenumberer(NnetComputation *computation)
 
void swap(basic_filebuf< CharT, Traits > &x, basic_filebuf< CharT, Traits > &y)
 
std::vector< IoSpecification > inputs
 
std::vector< MatrixInfo > matrices
 
DerivativeTimeLimiter(const Nnet &nnet, int32 min_deriv_time, int32 max_deriv_time, NnetComputation *computation)
 
virtual ComponentPrecomputedIndexes * PrecomputeIndexes(const MiscComputationInfo &misc_info, const std::vector< Index > &input_indexes, const std::vector< Index > &output_indexes, bool need_backprop) const
This function must return NULL for simple Components. 
 
void NnetComputation(const Nnet &nnet, const CuMatrixBase< BaseFloat > &input, bool pad_input, CuMatrixBase< BaseFloat > *output)
Does the basic neural net computation, on a sequence of data (e.g. 
 
static void GetPairToMatrixMap(std::vector< std::pair< int32, int32 > > &matrix_to_pair, unordered_map< std::pair< int32, int32 >, int32, PairHasher< int32 > > *pair_to_matrix)
 
NnetComputation * computation_
 
void SortAndUniq(std::vector< T > *vec)
Sorts and uniq's (removes duplicates) from a vector. 
 
void ExtendMatrices(NnetComputation *computation)
This is not really an optimization in itself but it can make things easier for class VariableMergingO...
 
void MapAddRowRangesCommand(NnetComputation::Command *c)
 
std::vector< Command > commands
 
std::shared_ptr< const NnetComputation > Find(const ComputationRequest &request)
 
MatrixStrideType stride_type
 
void LimitMatrices(const std::vector< bool > &will_limit)
 
void LimitDerivativeTimes(const Nnet &nnet, int32 min_deriv_time, int32 max_deriv_time, NnetComputation *computation)
 
void Write(std::ostream &os, bool binary) const
 
std::vector< bool > is_input_or_output_
 
bool CanLimitMatrix(const Analyzer &analyzer, int32 matrix_index) const
 
struct Index is intended to represent the various indexes by which we number the rows of the matrices...
 
RowOpsSplitter(NnetComputation *computation)
 
int32 GetNewMatrixLocationInfo(int32 old_matrix_index, int32 old_row_index) const
This function is used in mapping row-indexes into matrices, from the old to the new computation...
 
void OptimizeMemoryCompression(const Nnet &nnet, int32 memory_compression_level, NnetComputation *computation)
Performs optimization to reduce memory usage where possible, making use of the kCompressMatrix and kD...
 
void ExpandIndexes(const std::vector< Index > &indexes, std::vector< Index > *indexes_expanded) const
 
std::vector< Access > accesses
Records the indexes of commands that access the matrix, and the type (read, read/write, write). 
 
int32 compression_command_index
 
std::shared_ptr< const NnetComputation > Insert(const ComputationRequest &request, const NnetComputation *computation)
 
ComponentPrecomputedIndexes * data
 
void AddCommandsToComputation()
This function, called at the end of ConsolidateModelUpdate(), takes the commands that we have put in ...
 
void GetWholeSubmatrices(std::vector< int32 > *whole_submatrices) const
 
std::pair< int32, Index > Cindex
 
void RemoveIndexesMultiDuplicates()
 
ComputationExpander(const Nnet &nnet, const MiscComputationInfo &misc_info, const NnetComputation &computation, bool need_debug_info, int32 num_n_values, NnetComputation *expanded_computation)
 
int32 NewSubMatrix(int32 base_submatrix, int32 row_offset, int32 num_rows, int32 col_offset, int32 num_cols)
Convenience function used when adding new sub-matrices. 
 
void DoMerge(int32 command_index, int32 s_to_keep, int32 m_to_discard)
 
std::vector< bool > submatrix_is_used_
 
ModelUpdateConsolidator(const Nnet &nnet, NnetComputation *computation)
 
static void CheckIdentifiedMatrices(const NnetComputation &computation, const std::vector< int32 > &list1, const std::vector< int32 > &list2, int32 time_difference)
 
std::vector< std::pair< int32, int32 > > matrix_to_pair_
 
ComputationCache(int32 cache_capacity)
 
static bool ListsAreEqualExceptForPossibleShift(const std::vector< std::pair< int32, int32 > > &a, const std::vector< std::pair< int32, int32 > > &b, int32 shift)
 
int64 GetMaxMemoryUse(const NnetComputation &computation)
 
bool need_model_derivative
 
void Read(std::istream &istream, bool binary)
 
void Init(const Nnet &nnet, const NnetComputation &computation)
 
NnetComputation * expanded_computation_
 
static void ExpectToken(const std::string &token, const std::string &what_we_are_parsing, const std::string **next_token)
 
void Read(std::istream &is, bool binary)
 
int32 NumVariables() const
 
std::vector< std::vector< std::pair< int32, int32 > > > indexes_multi
 
static void FindActiveMatrices(const NnetComputation &computation, const Analyzer &analyzer, const std::vector< int32 > &splice_point_commands, std::vector< std::vector< int32 > > *active_matrices)
Given a list of command indexes ('splice_point_commands') which are expected to be command indexes of...
 
bool CanBeExtended(int32 dest_submatrix_index, int32 src_submatrix_index)
 
virtual int32 Properties() const =0
Return bitmask of the component's properties. 
 
This class is used in the function OptimizeMemoryCompression(), once we determine that there is some ...
 
static bool SnipRangesRowOp(NnetComputation *computation, int32 command_index)
 
std::vector< Cindex > cindexes
 
std::vector< MatrixPruneInfo > matrix_prune_info_
 
static NnetComputation::SubMatrixInfo GetSubMatrixOfSubMatrix(const NnetComputation &computation, int32 submat_a, int32 submat_b)
This static function returns a SubMatrixInfo corresponding to replacing the matrix-index in a's "matr...
 
const NnetOptimizeOptions & config_
 
std::vector< std::vector< int32 > > matrix_to_submatrix_
 
int32 FirstNontrivialMatrixAccess(int32 m) const
Returns the first command that is not a zeroing command (kSetConst with alpha=0.0), that accesses any part of 'm' [note: allocation and deallocation do not count a matrix accesses. 
 
void IdentifyIndexesMultiArgs(std::vector< NnetComputation::Command > *commands, std::vector< int32 *> *indexes_multi_args)
Identifies in the vector of commands, arguments that correspond to indexes into the computation's ind...
 
static void FindNumLeadingAndTrailingNegatives(const std::vector< int32 > &vec, int32 *num_leading_negatives, int32 *num_trailing_negatives)
 
void MapIndexesMultiCommand(NnetComputation::Command *c)
 
std::vector< SubMatrixInfo > submatrices
 
std::unordered_set< int32 > memos_to_delete_
 
void ConsolidateUpdateForComponent(int32 component, const std::vector< int32 > &backprop_commands)
This function, called from ConsolidateModelUpdate, is passed a list of commands that are all backprop...
 
size_t operator()(const NnetComputation::SubMatrixInfo &submat) const noexcept
 
void Check(const Nnet &nnet) const
 
CuCompressedMatrixType compression_type
 
std::vector< int32 > n_stride_
 
MemoryCompressionOptimizer(const Nnet &nnet, int32 memory_compression_level, int32 middle_command, NnetComputation *computation)
 
NnetComputation::SubMatrixInfo SubMatrixInfo
 
NnetComputation * computation_
 
static void CreateMatrixPairs(const NnetComputation &computation, std::vector< std::pair< int32, int32 > > *matrix_to_pair)
 
void ProcessMatrix(int32 m)
 
bool ReplaceRowWithMatrixOps(NnetComputation *computation)
This function detects cases where commands of type kCopyRows, kAddRows or kAddToRows can be converted...
 
void Read(std::istream &istream, bool binary)
 
int32 ConsolidateSubmatrices(const std::vector< int32 > &commands, const std::vector< int32 > &submatrices)
You call this function when you want to consolidate the values of a list of submatrices taken just pr...
 
NnetComputation * computation_
 
int32 memory_compression_level_
 
static bool FindFirstRepeat(const std::vector< std::vector< std::pair< int32, int32 > > > &active_pairs, int32 time_shift_per_segment, int32 *seg1, int32 *seg2)
 
std::vector< std::vector< Access > > variable_accesses
 
int32 deallocate_command
Index of the command that deallocates the matrix (which will be of type kDeallocMatrix or kSwapMatrix...
 
int32 LastWriteAccess(int32 s) const
Returns the last command-index that accesses any part of submatrix 's' as a write operation...
 
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters. 
 
int32 DataInvalidatedCommand(int32 c, int32 s) const
Returns (the first command-index after 'c' that any part of submatrix 's' is written to); or if there...
 
void ComputeMatrixIsUsed()
 
std::vector< int32 > old_to_new_matrix_
 
std::vector< Index > output_indexes
 
void FixGotoLabel(NnetComputation *computation)
This function ensures that the arg1 of a final command of type kGotoLabel is the same as the command ...
 
int32 LastAccess(int32 s) const
Returns the last non-deallocation command that accesses any part of submatrix 's'; if there is no suc...
 
bool GetSplitInfo(std::vector< std::pair< int32, int32 > >::const_iterator begin, std::vector< std::pair< int32, int32 > >::const_iterator end, SingleSplitInfo *info)
 
void Optimize(const NnetOptimizeOptions &config, const Nnet &nnet, int32 max_output_time_in_request, NnetComputation *computation)
This is the top-level function for optimizing a computation. 
 
Component * GetComponent(int32 c)
Return component indexed c. Not a copy; not owned by caller. 
 
void GetPruneValues(int32 initial_submatrix, int32 new_submatrix, int32 *left_prune, int32 *right_prune) const
 
std::vector< int32 > splice_point_commands_
 
std::vector< PrecomputedIndexesInfo > component_precomputed_indexes
 
NnetComputation * computation_
 
int32 uncompression_command_index
 
void ComputeMatrixPruneInfo()
 
virtual std::string Type() const =0
Returns a string such as "SigmoidComponent", describing the type of the object. 
 
void MapIndexesCommand(NnetComputation::Command *c)
 
void ComputeMatrixToSubmatrix(const NnetComputation &computation, std::vector< std::vector< int32 > > *mat_to_submat)
This function computes a vector 'mat_to_submat', indexed by matrix index, such that (*mat_to_submat)[...
 
void ExpandComputation(const Nnet &nnet, const MiscComputationInfo &misc_info, const NnetComputation &computation, bool need_debug_info, int32 num_n_values, NnetComputation *expanded_computation)
This function is used in 'shortcut' compilation to expand a computation that has been compiled for ex...
 
static int32 NormalizeCindexes(std::vector< Cindex > *cindexes)
 
int32 num_submatrices_new_
 
std::vector< SingleSplitInfo > splits
 
void ConsolidateModelUpdate(const Nnet &nnet, NnetComputation *computation)
This optimization consolidates the model-update part of backprop commands, for components in (e...
 
std::vector< Index > indexes
 
void ModifyCommand(NnetComputation::Command *command)
 
CacheType computation_cache_
 
std::vector< MatrixAccesses > matrix_accesses
 
int32 NumComponents() const
 
void MarkAsDirty(int32 s)
Marks the variables underlying submatrix 's' as dirty. 
 
static void FormInfiniteLoop(int32 command1, int32 command2, NnetComputation *computation)
 
VariableMergingOptimizer(const NnetOptimizeOptions &config, const Nnet &nnet, NnetComputation *computation)
 
#define KALDI_ASSERT(cond)
 
std::vector< IoSpecification > outputs
 
void RemoveNoOps(NnetComputation *computation)
Removes commands of type kNoOperation in the computation. 
 
void RemoveUnusedIndexesMulti()
 
NnetComputation * computation_
 
void AppendVariablesForSubmatrix(int32 submatrix_index, std::vector< int32 > *variable_indexes) const
 
void ConsolidateModelUpdate()
 
NnetComputation * computation_
 
static bool IoSpecificationIsDecomposable(const IoSpecification &io_spec, IoSpecification *mini_io_spec, int32 *num_n_values_out)
 
void IdentifyIndexesRangesArgs(std::vector< NnetComputation::Command > *commands, std::vector< int32 *> *indexes_ranges_args)
Identifies in the vector of commands, arguments that correspond to indexes into the computation's 'in...
 
int32 allocate_command
Index of the command that allocates the matrix (which will be of type kAllocMatrix or kSwapMatrix)...
 
void RemoveCommandsForUnusedMatrix(const Analyzer &analyzer, int32 m, NnetComputation *computation)
This function removes from 'computation' the commands accessing matrix 'm', which is assumed to be un...
 
bool is_input
true if this matrix is an input to the computation (i.e. 
 
static void FindNumLeadingAndTrailingIdenticals(const std::vector< std::pair< int32, int32 > > &vec, int32 *num_leading_identicals, int32 *num_trailing_identicals)
 
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
 
void AppendDebugInfoForSubmatrix(int32 submatrix_index, NnetComputation::MatrixDebugInfo *debug_info) const
This function, called from ConsolidateSubmatrices, will update 'debug_info' by appending the correspo...
 
void MapSimpleMatrixCommand(NnetComputation::Command *c)
 
std::vector< int32 > submatrix_map_
 
void ComputePrecomputedIndexes()
 
std::vector< std::vector< int32 > > indexes
 
ComputationVariables variables
 
bool already_called_merge_variables_
 
bool SnipRowOps(NnetComputation *computation)
This function detects cases where commands of type kCopyRows, kAddRows, kAddRowsMulti, kAddToRowsMulti, kCopyRowsMulti, kCopyToRowsMulti or kAddRowRanges use indexes that start or end with -1's or equivalents, and replace them with similar commands that act on a sub-matrix of the matrices they are currently acting on. 
 
static bool IndexesHaveSpecialStructure(const std::vector< int32 > &indexes, int32 *first_nonnegative_pos, int32 *first_nonnegative_value, int32 *num_nonnegative_indexes)
 
void ExpandRowsCommand(const NnetComputation::Command &c_in, NnetComputation::Command *c_out)
 
MatrixExtender(NnetComputation *computation)
 
static bool SnipSingleRowOp(NnetComputation *computation, int32 command_index)
 
bool IsSortedAndUniq(const std::vector< T > &vec)
Returns true if the vector is sorted and contains each element only once. 
 
bool GetNewSubmatLocationInfo(int32 submat_index, int32 old_row_index, int32 *new_row_index, int32 *n_stride) const
 
NnetComputation::MatrixInfo MatrixInfo
 
bool RowIsKept(int32 submatrix, int32 row_index) const
 
void IdentifyIndexesArgs(std::vector< NnetComputation::Command > *commands, std::vector< int32 *> *indexes_args)
Identifies in the vector of commands, arguments that correspond to indexes into the computation's 'in...
 
static bool SnipMultiRowOp(NnetComputation *computation, int32 command_index)
 
std::vector< NnetComputation::Command > final_deallocate_commands_
 
bool IsWholeMatrix(int32 submatrix_index) const
 
This struct exists to set up various pieces of analysis; it helps avoid the repetition of code where ...
 
std::pair< bool, bool > MayBeMerged(int32 command, int32 s1, int32 s2) const
This function returns a pair of bools saying whether we can do a (left and/or right) merge respective...
 
static void GetMatrixSwapOrder(const std::vector< int32 > &matrices1, const std::vector< int32 > &matrices2, std::vector< std::pair< int32, int32 > > *swaps)
 
static int32 FindTimeShift(const NnetComputation &computation)
 
std::vector< int32 > old_to_new_submatrix_
 
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)
 
NnetComputation * computation_
 
A hashing function-object for pairs of ints. 
 
void GetSubmatrixStrings(const Nnet &nnet, std::vector< std::string > *submat_strings) const
 
BaseFloat min_proportion_
 
std::vector< MultiIndexSplitInfo > split_info_
 
This class performs various kinds of specific analysis on top of what class Analyzer gives you immedi...
 
std::vector< std::vector< std::pair< int32, int32 > > > indexes_ranges