29 std::vector<int32*> *submatrix_args) {
30 submatrix_args->clear();
35 submatrix_args->push_back(&c->
arg1);
38 submatrix_args->push_back(&c->
arg1);
39 submatrix_args->push_back(&c->
arg2);
42 submatrix_args->push_back(&c->
arg3);
43 submatrix_args->push_back(&c->
arg4);
47 submatrix_args->push_back(&c->
arg3);
48 submatrix_args->push_back(&c->
arg4);
49 submatrix_args->push_back(&c->
arg5);
50 submatrix_args->push_back(&c->
arg6);
57 submatrix_args->push_back(&c->
arg1);
58 submatrix_args->push_back(&c->
arg2);
64 submatrix_args->push_back(&c->
arg1);
67 submatrix_args->push_back(&c->
arg1);
81 std::vector<int32*> *submatrix_args) {
82 submatrix_args->clear();
83 std::vector<NnetComputation::Command>::iterator iter = commands->begin(),
84 end = commands->end();
85 std::vector<int32*> this_submatrix_args;
86 for (; iter != end; ++iter) {
88 submatrix_args->insert(submatrix_args->end(),
89 this_submatrix_args.begin(),
90 this_submatrix_args.end());
97 std::vector<int32*> *matrix_args) {
99 matrix_args->reserve(computation->
submatrices.size());
100 for (
int32 s = 1; s < num_submatrices; s++)
101 matrix_args->push_back(&(computation->
submatrices[s].matrix_index));
106 std::vector<int32*> *indexes_multi_args) {
107 indexes_multi_args->clear();
108 std::vector<NnetComputation::Command>::iterator iter = commands->begin(),
109 end = commands->end();
110 for (; iter != end; ++iter) {
116 indexes_multi_args->push_back(&(command.
arg2));
122 std::vector<int32*> *indexes_ranges_args) {
123 indexes_ranges_args->clear();
124 std::vector<NnetComputation::Command>::iterator iter = commands->begin(),
125 end = commands->end();
126 for (; iter != end; ++iter) {
129 indexes_ranges_args->push_back(&(command.
arg3));
134 std::vector<int32*> *indexes_args) {
135 indexes_args->clear();
136 std::vector<NnetComputation::Command>::iterator iter = commands->begin(),
137 end = commands->end();
138 for (; iter != end; ++iter) {
142 indexes_args->push_back(&(command.
arg3));
191 return submat.matrix_index +
192 19553 * submat.row_offset +
193 29297 * submat.num_rows +
194 42209 * submat.col_offset +
195 56527 * submat.num_cols;
209 const std::vector<T> *ptr2)
const {
210 size_t size1 = ptr1->size(), size2 = ptr2->size();
211 if (size1 < size2)
return true;
212 else if (size1 > size2)
return false;
213 else return (*ptr1 < *ptr2);
222 const std::vector<int32> &to_remove,
223 std::vector<int32> *renumbering);
230 std::vector<int32> *renumbering);
257 const std::vector<bool> &used,
258 std::vector<int32> *renumbering) {
259 renumbering->clear();
260 renumbering->reserve(used.size());
261 std::vector<bool>::const_iterator iter = used.begin(), end = used.end();
263 for (; iter != end; ++iter) {
264 if (*iter) renumbering->push_back(cur_index++);
265 else renumbering->push_back(-1);
272 int32 old_num_elements,
273 const std::vector<int32> &to_remove,
274 std::vector<int32> *renumbering) {
276 renumbering->clear();
277 renumbering->resize(old_num_elements, 0);
278 int32 num_remove = to_remove.size();
279 for (
int32 r = 0; r < num_remove; r++) {
280 int32 this_remove = to_remove[r];
283 KALDI_ASSERT(this_remove > 0 && this_remove < old_num_elements);
284 (*renumbering)[this_remove] = -1;
286 int32 cur_number = 0;
287 for (
int32 i = 0;
i < old_num_elements;
i++) {
288 if ((*renumbering)[
i] != -1)
289 (*renumbering)[
i] = cur_number++;
292 static_cast<int32>(to_remove.size()));
300 std::vector<std::pair<int32, int32> > memo_to_commands;
301 std::vector<int32> memo_indexes_used;
302 std::pair<int32, int32> blank(-1, -1);
304 for (
int32 c = 0; c < num_commands; c++) {
308 if (memo_index > 0) {
309 if (memo_to_commands.size() <=
static_cast<size_t>(memo_index))
310 memo_to_commands.resize(memo_index + 1, blank);
312 memo_to_commands[memo_index].first = c;
313 memo_indexes_used.push_back(memo_index);
317 if (memo_index > 0) {
318 if (memo_to_commands.size() <=
static_cast<size_t>(memo_index))
319 memo_to_commands.resize(memo_index + 1, blank);
321 memo_to_commands[memo_index].second == -1);
322 memo_to_commands[memo_index].second = c;
326 int32 new_memo_index = 1;
327 for (std::vector<int32>::iterator iter = memo_indexes_used.begin();
328 iter != memo_indexes_used.end(); ++iter) {
329 int32 memo_index = *iter;
330 int32 propagate_command = memo_to_commands[memo_index].first,
331 backprop_command = memo_to_commands[memo_index].second;
333 "Propagate generates memo but backprop doesn't use it.");
341 std::vector<int32*> *submatrix_args) {
344 size_t extra_size = 0;
347 submatrix_args->reserve(submatrix_args->size() + extra_size);
350 std::vector<std::pair<int32, int32> > &indexes_multi =
352 std::vector<std::pair<int32, int32> >::iterator
353 iter = indexes_multi.begin(), end = indexes_multi.end();
354 for (; iter != end; ++iter)
355 if (iter->first != -1)
356 submatrix_args->push_back(&(iter->first));
368 std::vector<int32*> submatrix_args;
370 std::vector<int32*>::iterator iter = submatrix_args.begin(),
371 end = submatrix_args.end();
372 int32 cur_submatrix_index = -1;
375 for (; iter != end; ++iter) {
376 int32 submatrix_index = **iter;
377 if (submatrix_index > 0 && submatrix_index != cur_submatrix_index) {
378 cur_submatrix_index = submatrix_index;
392 for (
int32 s = 1; s < num_submatrices; s++) {
406 int32 cur_index = 1, num_submatrices_orig =
413 for (int32 s = 1; s < num_submatrices_orig; s++) {
415 const NnetComputation::SubMatrixInfo &info =
417 if (submat_map.count(info) > 0) {
429 std::vector<int32*> submatrix_args;
431 std::vector<int32*>::iterator iter = submatrix_args.begin(),
432 end = submatrix_args.end();
433 for (; iter != end; ++iter) {
440 **iter = new_submatrix_index;
443 std::vector<NnetComputation::SubMatrixInfo> new_submatrices;
445 new_submatrices.reserve(num_submatrices_old);
446 for (
int32 s = 0; s < num_submatrices_old; s++)
455 std::vector<int32*> matrix_args;
457 for (
int32 s = 1; s < num_submatrices; s++) {
464 *matrix_index = new_matrix_index;
467 std::vector<NnetComputation::MatrixInfo> new_matrices;
469 new_matrices.reserve(num_matrices_old);
470 for (
int32 m = 0; m < num_matrices_old; m++)
475 std::vector<NnetComputation::MatrixDebugInfo> new_debug_info;
477 KALDI_ASSERT(debug_info_size == 0 || debug_info_size == num_matrices_old);
478 new_debug_info.reserve(debug_info_size);
479 for (
int32 m = 0; m < debug_info_size; m++) {
504 if (num_indexes_multi == 0)
506 std::vector<bool> indexes_multi_used(num_indexes_multi,
false);
507 std::vector<int32*> indexes_multi_args;
509 std::vector<int32*>::iterator iter = indexes_multi_args.begin(),
510 end = indexes_multi_args.end();
511 for (; iter != end; ++iter) {
512 int32 indexes_multi_index = **iter;
514 indexes_multi_index < num_indexes_multi);
515 indexes_multi_used[indexes_multi_index] = 1;
519 std::vector<int32> old_to_new(num_indexes_multi, -1);
522 if (new_num_indexes_multi == num_indexes_multi)
524 std::vector<std::vector<std::pair<int32, int32> > >
525 new_indexes_multi(new_num_indexes_multi);
526 for (
int32 i = 0;
i < num_indexes_multi;
i++) {
527 if (old_to_new[
i] != -1)
532 for (iter = indexes_multi_args.begin(); iter != end; ++iter)
533 **iter = old_to_new[**iter];
541 if (old_indexes_multi_size == 0)
547 std::vector<int32> indexes_multi_old_to_new(old_indexes_multi_size);
548 typedef std::vector<std::pair<int32,int32> > PairVectorType;
549 typedef std::map<
const PairVectorType*,
int32,
551 MapType indexes_multi_map;
553 std::pair<MapType::iterator, bool> p =
554 indexes_multi_map.insert(std::pair<const PairVectorType*, int32>(
557 indexes_multi_old_to_new[
i] = cur_index++;
559 int32 index_from_map = p.first->second;
560 indexes_multi_old_to_new[
i] = index_from_map;
563 if (cur_index == old_indexes_multi_size)
565 std::vector<PairVectorType> new_indexes_multi(cur_index);
566 for (int32
i = 0;
i < old_indexes_multi_size;
i++) {
567 int32 new_index = indexes_multi_old_to_new[
i];
572 std::vector<int32*> indexes_multi_args;
574 std::vector<int32*>::const_iterator iter = indexes_multi_args.begin(),
575 end = indexes_multi_args.end();
576 for (; iter != end; ++iter)
577 **iter = indexes_multi_old_to_new[**iter];
583 if (old_num_indexes == 0)
585 std::vector<int32*> indexes_args;
588 std::vector<bool> indexes_seen(old_num_indexes,
false);
589 std::vector<int32*>::const_iterator iter = indexes_args.begin(),
590 end = indexes_args.end();
591 for (; iter != end; ++iter)
592 indexes_seen[**iter] =
true;
594 std::vector<int32> old_to_new_index(old_num_indexes);
595 typedef std::map<const std::vector<int32>*,
int32,
600 for (int32
i = 0;
i < old_num_indexes;
i++) {
601 if (!indexes_seen[
i]) {
602 old_to_new_index[
i] = -1;
604 std::pair<MapType::iterator, bool> p =
605 indexes_map.insert(std::pair<
const std::vector<int32>*, int32>(
608 old_to_new_index[
i] = cur_index++;
610 int32 index_from_map = p.first->second;
611 old_to_new_index[
i] = index_from_map;
615 if (cur_index == old_num_indexes)
617 std::vector<std::vector<int32> > new_indexes(cur_index);
618 for (int32
i = 0;
i < old_num_indexes;
i++) {
619 int32 new_index = old_to_new_index[
i];
626 for (iter = indexes_args.begin(); iter != end; ++iter) {
627 int32 old_index = **iter;
628 KALDI_ASSERT(old_index >= 0 && old_index < old_num_indexes);
629 int32 new_index = old_to_new_index[old_index];
637 if (old_num_indexes_ranges == 0)
639 std::vector<int32*> indexes_ranges_args;
642 std::vector<bool> is_seen(old_num_indexes_ranges,
false);
643 std::vector<int32*>::const_iterator iter = indexes_ranges_args.begin(),
644 end = indexes_ranges_args.end();
645 for (; iter != end; ++iter)
646 is_seen[**iter] =
true;
648 std::vector<int32> old_to_new_index(old_num_indexes_ranges);
649 typedef std::map<const std::vector<std::pair<int32, int32> >*,
int32,
653 for (int32
i = 0;
i < old_num_indexes_ranges;
i++) {
655 old_to_new_index[
i] = -1;
657 std::pair<MapType::iterator, bool> p =
659 std::pair<
const std::vector<std::pair<int32, int32> >*, int32>(
662 old_to_new_index[
i] = cur_index++;
664 int32 index_from_map = p.first->second;
665 old_to_new_index[
i] = index_from_map;
669 if (cur_index == old_num_indexes_ranges)
671 std::vector<std::vector<std::pair<int32, int32> > > new_indexes_ranges(
673 for (int32
i = 0;
i < old_num_indexes_ranges;
i++) {
674 int32 new_index = old_to_new_index[
i];
681 for (iter = indexes_ranges_args.begin(); iter != end; ++iter) {
682 int32 old_index = **iter;
683 KALDI_ASSERT(old_index >= 0 && old_index < old_num_indexes_ranges);
684 int32 new_index = old_to_new_index[old_index];
705 std::remove_if(computation->
commands.begin(),
715 config_(config), nnet_(nnet),
717 already_called_merge_variables_(false) {
730 for (
int32 command_index = 0; command_index < num_commands;
735 int32 s1 = -1, s2 = -1;
763 if (s1 > 0 && s2 > 0) {
764 std::pair<bool,bool> p =
MayBeMerged(command_index, s1, s2);
766 DoMerge(command_index, s1, s2);
768 }
else if (p.second) {
769 DoMerge(command_index, s2, s1);
796 computation.
matrices[a.matrix_index];
808 std::vector<int32> variable_indexes;
810 std::vector<int32>::const_iterator iter = variable_indexes.begin(),
811 end = variable_indexes.end();
812 for (; iter != end; ++iter) {
821 int32 s_to_discard) {
829 KALDI_ASSERT(m_to_keep != m_to_discard && m_to_keep > 0 && m_to_discard > 0);
833 std::vector<int32>::const_iterator iter =
836 for (; iter != end; ++iter) {
837 int32 submatrix_index = *iter;
848 const std::vector<MatrixAccesses> &matrix_accesses =
875 int32 dealloc_keep = matrix_accesses[m_to_keep].deallocate_command,
876 dealloc_discard = matrix_accesses[m_to_discard].deallocate_command;
877 if (dealloc_discard != -1) {
900 int32 alloc_keep = matrix_accesses[m_to_keep].allocate_command,
901 alloc_discard = matrix_accesses[m_to_discard].allocate_command;
910 int32 matrix_whose_zeroing_to_discard;
911 if (discard_alloc_command.command_type ==
kAcceptInput) {
913 matrix_whose_zeroing_to_discard = m_to_keep;
916 matrix_whose_zeroing_to_discard = m_to_discard;
920 int32 zeroing_command_to_discard =
921 matrix_accesses[matrix_whose_zeroing_to_discard].accesses[0].command_index;
925 zeroing_command.
alpha == 0.0) {
947 KALDI_ASSERT(s1 > 0 && s2 > 0 && static_cast<size_t>(command_index) <
950 return std::pair<bool,bool>(
false,
false);
954 if (m1 == m2)
return std::pair<bool,bool>(
false,
false);
955 std::vector<int32> variable_indexes;
958 std::vector<int32>::iterator iter = variable_indexes.begin(),
959 end = variable_indexes.end();
961 for (; iter != end; ++iter)
963 return std::pair<bool,bool>(
false,
false);
966 &m2_access = matrix_accesses[m2];
968 if ((m1_access.
is_input && m2_access.is_input) ||
969 (m1_access.
is_output && m2_access.is_output))
970 return std::pair<bool,bool>(
false,
false);
973 m2_access.is_input || m2_access.is_output) &&
976 return std::pair<bool,bool>(
false,
false);
992 return std::pair<bool,bool>(
false,
false);
1002 return std::pair<bool,bool>(left, right);
1007 return std::pair<bool,bool>(left, right);
1011 return std::pair<bool,bool>(
false,
false);
1032 bool CanBeExtended(
int32 dest_submatrix_index,
1033 int32 src_submatrix_index);
1038 void Extend(
int32 *dest_submatrix_index,
int32 *src_submatrix_index);
1042 void FixComputation();
1046 void FixDebugInfo();
1068 min_proportion_(0.8),
1075 for (
int32 m = 1; m < num_matrices; m++)
1080 std::vector<NnetComputation::Command>::iterator
1083 for (; command_iter != command_end; ++command_iter) {
1100 int32 src_submatrix_index) {
1104 if (src_submatrix.
matrix_index == dest_submatrix.matrix_index)
1127 dest_submatrix.row_offset + dest_submatrix.num_rows ==
1128 dest_matrix_orig_num_rows);
1133 int32 *src_submatrix_index) {
1142 int32 new_dest_num_rows = dest_submatrix.row_offset + src_matrix.
num_rows;
1148 if (new_dest_num_rows > dest_matrix.num_rows) {
1149 dest_matrix.num_rows = new_dest_num_rows;
1152 SubMatrixInfo(dest_submatrix.matrix_index, 0, new_dest_num_rows,
1153 0, dest_matrix.num_cols));
1160 dest_submatrix.num_rows = src_matrix.
num_rows;
1175 std::vector<NnetComputation::Command>::iterator
1178 bool changed =
false;
1179 for (; command_iter != command_end; ++command_iter) {
1182 command.
alpha == 1.0) {
1183 int32 dest_submatrix_index = command.
arg1,
1184 src_submatrix_index = command.
arg2;
1185 if (
CanBeExtended(dest_submatrix_index, src_submatrix_index)) {
1198 std::vector<NnetComputation::Command>::iterator
1201 std::vector<int32> whole_submatrices;
1203 for (; command_iter != command_end; ++command_iter) {
1209 new_s = whole_submatrices[m];
1214 command.
arg1 = new_s;
1220 new_s = whole_submatrices[m];
1237 command.
arg1 = new_s;
1249 for (
int32 m = 1; m < num_matrices; m++) {
1253 old_num_rows = debug_info.
cindexes.size();
1254 if (new_num_rows != old_num_rows) {
1255 debug_info.
cindexes.resize(new_num_rows);
1256 int32 num_extra_rows = new_num_rows - old_num_rows;
1259 for (
int32 r = old_num_rows; r < new_num_rows; r++) {
1288 void ConsolidateUpdateForComponent(
1290 const std::vector<int32> &backprop_commands);
1296 void AddCommandsToComputation();
1313 int32 ConsolidateSubmatrices(
1314 const std::vector<int32> &commands,
1315 const std::vector<int32> &submatrices);
1324 void AppendDebugInfoForSubmatrix(
1325 int32 submatrix_index,
1348 int32 submatrix_index,
1355 int32 matrix_index = submatrix_info.matrix_index;
1356 KALDI_ASSERT(matrix_index > 0 && static_cast<size_t>(matrix_index) <
1360 debug_info->
is_deriv = src_info.is_deriv;
1363 int32 row_begin = submatrix_info.row_offset,
1364 row_end = row_begin + submatrix_info.num_rows;
1366 src_info.cindexes.begin() + row_begin,
1367 src_info.cindexes.begin() + row_end);
1372 const std::vector<int32> &commands,
1373 const std::vector<int32> &submatrices) {
1374 int32 num_submatrices = submatrices.size();
1375 KALDI_ASSERT(num_submatrices > 1 && commands.size() == submatrices.size());
1376 int32 first_submatrix = submatrices[0];
1381 for (
int32 i = 0;
i < num_submatrices;
i++) {
1382 int32 submatrix = submatrices[
i];
1386 AppendDebugInfoForSubmatrix(submatrix, &debug_info);
1399 extra_commands_[0].push_back(
1401 extra_commands_[0].push_back(
1404 final_deallocate_commands_.push_back(
1406 int32 new_matrix_index =
1411 int32 row_offset = 0;
1412 for (
int32 i = 0;
i < num_submatrices;
i++) {
1413 int32 submatrix_index = submatrices[
i];
1418 row_offset, this_num_rows,
1427 extra_commands_[commands[
i]].push_back(c);
1428 row_offset += this_num_rows;
1431 return new_whole_submatrix;
1437 new_num_commands = old_num_commands +
1438 static_cast<int32>(final_commands_.size() +
1439 final_deallocate_commands_.size());
1440 for (
size_t i = 0;
i < extra_commands_.size();
i++)
1441 new_num_commands += static_cast<int32>(extra_commands_[
i].size());
1442 std::vector<NnetComputation::Command> new_commands;
1443 new_commands.reserve(new_num_commands);
1444 for (
int32 c = 0; c < old_num_commands; c++) {
1445 new_commands.insert(new_commands.end(),
1446 extra_commands_[c].begin(), extra_commands_[c].end());
1449 new_commands.insert(new_commands.end(),
1450 final_commands_.begin(), final_commands_.end());
1451 new_commands.insert(new_commands.end(),
1452 final_deallocate_commands_.begin(),
1453 final_deallocate_commands_.end());
1461 int32 component_index,
1462 const std::vector<int32> &backprop_commands) {
1463 const Component *component = nnet_.GetComponent(component_index);
1464 int32 num_backprop_commands = backprop_commands.size();
1469 std::vector<int32> input_submatrices(num_backprop_commands),
1470 output_submatrices(num_backprop_commands),
1471 output_deriv_submatrices(num_backprop_commands);
1473 for (
int32 i = 0;
i < num_backprop_commands;
i++) {
1474 int32 command_index = backprop_commands[
i];
1481 output_submatrix = command.
arg4,
1482 output_deriv_submatrix = command.
arg5;
1484 (output_submatrix != 0) == need_output);
1485 input_submatrices[
i] = input_submatrix;
1486 output_submatrices[
i] = output_submatrix;
1487 output_deriv_submatrices[
i] = output_deriv_submatrix;
1491 int32 input_submatrix = (need_input ?
1492 ConsolidateSubmatrices(backprop_commands,
1493 input_submatrices) : 0),
1494 output_submatrix = (need_output ?
1495 ConsolidateSubmatrices(backprop_commands,
1496 output_submatrices) : 0),
1497 output_deriv_submatrix = ConsolidateSubmatrices(backprop_commands,
1498 output_deriv_submatrices);
1499 int32 precomputed_indexes_index = 0,
1500 input_deriv_submatrix = 0,
1503 input_submatrix, output_submatrix,
1504 output_deriv_submatrix, input_deriv_submatrix,
1506 final_commands_.push_back(c);
1513 extra_commands_(computation->commands.size()) { }
1521 std::vector<std::vector<int32> > backprop_commands(num_components);
1522 for (
int32 command_index = 0;
1523 command_index < num_commands; command_index++) {
1532 backprop_commands[component_index].push_back(command_index);
1535 bool consolidated =
false;
1536 for (
int32 component = 0; component < num_components; component++) {
1537 if (backprop_commands[component].size() > 1) {
1539 backprop_commands[component]);
1540 consolidated =
true;
1565 int32 new_submatrix,
1567 int32 *right_prune)
const {
1568 KALDI_ASSERT(initial_submatrix > 0 && new_submatrix > 0);
1573 *left_prune = new_info.row_offset - initial_info.
row_offset;
1574 if (right_prune != NULL) {
1575 *right_prune = initial_info.
num_rows - new_info.num_rows - *left_prune;
1582 int32 row_index)
const {
1583 KALDI_ASSERT(submatrix > 0 && submatrix < computation_->submatrices.size());
1587 row_index < computation_->submatrices[submatrix].num_rows);
1588 int32 matrix_index = info.matrix_index;
1596 int32 t = debug_info.
cindexes[row_index + info.row_offset].second.t;
1597 return (t >= min_deriv_time_ && t <= max_deriv_time_);
1606 switch (command_type) {
1618 if (submatrix_map_[command->
arg4] == 0)
1634 output_submatrix = command->
arg4,
1635 output_deriv_submatrix = command->
arg5,
1636 input_deriv_submatrix = command->
arg6;
1637 int32 mapped_input_submatrix = submatrix_map_[input_submatrix],
1638 mapped_output_submatrix = submatrix_map_[output_submatrix],
1639 mapped_output_deriv_submatrix = submatrix_map_[output_deriv_submatrix],
1640 mapped_input_deriv_submatrix = submatrix_map_[input_deriv_submatrix];
1642 if (mapped_output_deriv_submatrix == 0) {
1645 mapped_input_submatrix == 0 &&
1646 mapped_output_submatrix == 0);
1649 if (command->
arg7 > 0)
1650 memos_to_delete_.insert(command->
arg7);
1651 }
else if (mapped_output_deriv_submatrix !=
1652 output_deriv_submatrix &&
1657 command->
arg3 = mapped_input_submatrix;
1658 command->
arg4 = mapped_output_submatrix;
1659 command->
arg5 = mapped_output_deriv_submatrix;
1660 command->
arg6 = mapped_input_deriv_submatrix;
1665 MapSimpleMatrixCommand(command);
1668 MapIndexesCommand(command);
1672 MapIndexesMultiCommand(command);
1675 MapAddRowRangesCommand(command);
1682 KALDI_ERR <<
"Un-handled command type.";
1688 submatrix2 = c->
arg2;
1689 int32 submatrix1_mapped = submatrix_map_if_deriv_[submatrix1],
1690 submatrix2_mapped = submatrix_map_if_deriv_[submatrix2];
1691 if (submatrix1_mapped == submatrix1 &&
1692 submatrix2_mapped == submatrix2) {
1696 if (submatrix1_mapped == 0 || submatrix2_mapped == 0) {
1703 left_prune1, left_prune2, right_prune1, right_prune2;
1704 GetPruneValues(submatrix1, submatrix1_mapped, &left_prune1, &right_prune1);
1705 GetPruneValues(submatrix2, submatrix2_mapped, &left_prune2, &right_prune2);
1706 if (left_prune1 == left_prune2 && right_prune1 == right_prune2) {
1709 c->
arg1 = submatrix1_mapped;
1710 c->
arg2 = submatrix2_mapped;
1714 int32 left_prune = std::max(left_prune1, left_prune2),
1715 right_prune = std::max(right_prune1, right_prune2);
1716 if (left_prune + right_prune >= orig_num_rows) {
1721 int32 num_rows = orig_num_rows - left_prune - right_prune;
1725 left_prune, num_rows, 0, -1);
1727 left_prune, num_rows, 0, -1);
1737 input_submatrix = c->
arg2;
1738 int32 input_submatrix_mapped = submatrix_map_if_deriv_[input_submatrix],
1739 output_submatrix_mapped = submatrix_map_if_deriv_[output_submatrix];
1743 if (input_submatrix_mapped == 0 ||
1744 output_submatrix_mapped == 0) {
1759 int32 left_prune_input, left_prune_output;
1760 GetPruneValues(input_submatrix, input_submatrix_mapped,
1761 &left_prune_input, NULL);
1762 GetPruneValues(output_submatrix, output_submatrix_mapped,
1763 &left_prune_output, NULL);
1764 int32 new_num_input_rows =
1766 new_num_output_rows =
1768 std::vector<int32> new_indexes(new_num_output_rows);
1769 bool must_keep_command =
false;
1770 for (
int32 i = 0;
i < new_num_output_rows;
i++) {
1773 int32 orig_index = old_indexes[
i + left_prune_output];
1774 if (orig_index == -1 ||
1775 !RowIsKept(input_submatrix, orig_index) ||
1776 !RowIsKept(output_submatrix_mapped,
i)) {
1777 new_indexes[
i] = -1;
1779 int32 mapped_index = orig_index - left_prune_input;
1782 KALDI_ASSERT(mapped_index >= 0 && mapped_index < new_num_input_rows);
1783 new_indexes[
i] = mapped_index;
1784 must_keep_command =
true;
1787 if (!must_keep_command) {
1793 c->
arg1 = output_submatrix_mapped;
1794 c->
arg2 = input_submatrix_mapped;
1795 c->
arg3 = new_indexes_index;
1800 indexes_multi_arg = c->
arg2;
1801 int32 dest_submatrix_mapped = submatrix_map_if_deriv_[dest_submatrix];
1802 if (dest_submatrix_mapped == 0) {
1808 GetPruneValues(dest_submatrix, dest_submatrix_mapped, &left_prune, NULL);
1810 const std::vector<std::pair<int32, int32> > &old_indexes_multi(
1812 std::vector<std::pair<int32, int32> > new_indexes_multi(new_num_rows);
1813 bool must_keep_command =
false;
1814 for (
int32 i = 0;
i < new_num_rows;
i++) {
1815 std::pair<int32,int32> &this_pair = new_indexes_multi[
i];
1816 this_pair = old_indexes_multi[
i + left_prune];
1819 int32 this_submatrix = this_pair.first,
1820 this_row = this_pair.second;
1821 if (this_submatrix == -1)
1823 if (!RowIsKept(this_submatrix, this_row) ||
1824 !RowIsKept(dest_submatrix_mapped,
i)) {
1825 this_pair.first = -1;
1826 this_pair.second = -1;
1829 int32 this_submatrix_mapped = submatrix_map_if_deriv_[this_submatrix];
1836 int32 this_left_prune, this_num_rows =
1838 GetPruneValues(this_submatrix, this_submatrix_mapped,
1839 &this_left_prune, NULL);
1840 int32 this_row_mapped = this_row - this_left_prune;
1843 KALDI_ASSERT(this_row_mapped >= 0 && this_row_mapped < this_num_rows);
1844 this_pair.first = this_submatrix_mapped;
1845 this_pair.second = this_row_mapped;
1846 must_keep_command =
true;
1848 if (!must_keep_command) {
1852 if (dest_submatrix_mapped == dest_submatrix &&
1853 new_indexes_multi == old_indexes_multi)
1855 c->
arg1 = dest_submatrix_mapped;
1863 src_submatrix = c->
arg2,
1864 indexes_ranges_index = c->
arg3;
1865 int32 dest_submatrix_mapped = submatrix_map_if_deriv_[dest_submatrix],
1866 src_submatrix_mapped = submatrix_map_if_deriv_[src_submatrix];
1867 if (dest_submatrix_mapped == dest_submatrix &&
1868 src_submatrix_mapped == src_submatrix)
1870 if (dest_submatrix_mapped == 0 || src_submatrix_mapped == 0) {
1876 src_left_prune, dest_left_prune;
1877 GetPruneValues(dest_submatrix, dest_submatrix_mapped,
1878 &dest_left_prune, NULL);
1879 GetPruneValues(src_submatrix, src_submatrix_mapped,
1880 &src_left_prune, NULL);
1881 const std::vector<std::pair<int32,int32> > &old_indexes_ranges(
1883 std::vector<std::pair<int32,int32> > new_indexes_ranges(dest_num_rows);
1885 bool must_keep_command =
false;
1886 for (
int32 i = 0;
i < dest_num_rows;
i++) {
1887 std::pair<int32, int32> &this_pair = new_indexes_ranges[
i];
1888 this_pair = old_indexes_ranges[
i + dest_left_prune];
1890 int32 start = this_pair.first, end = this_pair.second;
1891 if (!RowIsKept(dest_submatrix_mapped,
i)) {
1894 }
else if (start >= 0) {
1900 while (start < end && !RowIsKept(src_submatrix, start))
1902 while (end > start && !RowIsKept(src_submatrix, end - 1))
1908 start -= src_left_prune;
1909 end -= src_left_prune;
1910 must_keep_command =
true;
1913 KALDI_ASSERT(start >= 0 && end <= src_num_rows && start < end);
1916 this_pair.first = start;
1917 this_pair.second = end;
1919 if (must_keep_command) {
1920 c->
arg1 = dest_submatrix_mapped;
1921 c->
arg2 = src_submatrix_mapped;
1931 int32 min_deriv_time,
1932 int32 max_deriv_time,
1935 min_deriv_time_(min_deriv_time),
1936 max_deriv_time_(max_deriv_time),
1959 num_memos_removed = 0;
1960 for (
size_t command_index = 0; command_index < num_commands;
1966 num_memos_removed++;
1975 "Limiting derivative times requires debug info.");
1981 for (
int32 matrix_index = 1; matrix_index < num_matrices; matrix_index++) {
1985 const std::vector<Cindex> &cindexes = debug_info.
cindexes;
1987 KALDI_ASSERT(num_rows == static_cast<int32>(cindexes.size()));
1988 int32 first_row_within_range = num_rows,
1989 last_row_within_range = -1;
1990 for (
int32 i = 0;
i < num_rows;
i++) {
1991 int32 t = cindexes[
i].second.t;
1992 if (t >= min_deriv_time && t <= max_deriv_time) {
1993 if (
i < first_row_within_range) first_row_within_range =
i;
1994 if (
i > last_row_within_range) last_row_within_range =
i;
1997 if (last_row_within_range == -1) {
2000 }
else if (last_row_within_range == num_rows - 1 &&
2001 first_row_within_range == 0) {
2007 prune_info.
row_begin = first_row_within_range;
2008 prune_info.
row_end = last_row_within_range + 1;
2020 for (
int32 s = 1; s < num_submatrices; s++) {
2024 num_rows = submatrix_info.
num_rows;
2035 pruned_row_end = std::min(matrix_prune_info.
row_end,
2036 row_offset + num_rows);
2037 if (pruned_row_end <= pruned_row_begin) {
2043 int32 row_offset_within_submatrix =
2044 pruned_row_begin - row_offset,
2045 new_num_rows = pruned_row_end - pruned_row_begin;
2048 new_num_rows, 0, -1);
2058 std::vector<NnetComputation::Command>::iterator
2061 for (; iter != end; ++iter)
2078 std::vector<int32> whole_variables, mapped_variables;
2083 KALDI_ASSERT(whole_variables.size() > mapped_variables.size());
2084 std::vector<int32> excluded_variables(whole_variables.size() -
2085 mapped_variables.size());
2086 std::vector<int32>::iterator end_iter =
2087 std::set_difference(whole_variables.begin(), whole_variables.end(),
2088 mapped_variables.begin(), mapped_variables.end(),
2089 excluded_variables.begin());
2094 for (std::vector<int32>::iterator iter = excluded_variables.begin();
2095 iter != end_iter; ++iter) {
2096 int32 variable_index = *iter;
2097 const std::vector<Access> &variable_accesses =
2099 std::vector<Access>::const_iterator viter = variable_accesses.begin(),
2100 vend = variable_accesses.end();
2101 for (; viter != vend; ++viter) {
2104 int32 command_index = viter->command_index;
2108 KALDI_VLOG(3) <<
"Cannot prune matrix " << m;
2120 for (
int32 s = 1; s < num_submatrices; s++) {
2123 if (will_limit[m]) {
2128 matrix_num_rows < computation_->matrices[m].num_rows);
2131 if (new_row_begin >= 0 &&
2132 submat_info.
num_rows + new_row_begin <= matrix_num_rows) {
2146 submat_info.
num_rows = matrix_num_rows;
2160 for (
int32 m = 1; m < num_matrices; m++) {
2161 if (will_limit[m]) {
2167 std::vector<Cindex> &cindexes = debug_info.
cindexes;
2169 cindexes.erase(cindexes.begin() + prune_info.
row_end, cindexes.end());
2170 cindexes.erase(cindexes.begin(),
2171 cindexes.begin() + prune_info.
row_begin);
2184 std::vector<bool> will_limit(num_matrices,
false);
2185 bool will_limit_at_least_one =
false;
2186 for (
int32 m = 1; m < num_matrices; m++) {
2205 will_limit[m] =
true;
2206 will_limit_at_least_one =
true;
2210 if (will_limit_at_least_one)
2216 int32 min_deriv_time,
2217 int32 max_deriv_time,
2253 int32 *first_nonnegative_pos,
2254 int32 *first_nonnegative_value,
2255 int32 *num_nonnegative_indexes) {
2257 const int32 *indexes_ptr = &(indexes[0]);
2258 size_t pos = 0, size = indexes.size();
2261 for (; pos < size; ++pos)
2262 if (indexes_ptr[pos] >= 0)
2266 *first_nonnegative_pos =
static_cast<int32>(pos);
2267 int32 n = indexes_ptr[pos];
2268 *first_nonnegative_value =
n;
2271 for (; pos < size; ++pos,++
n)
2272 if (indexes_ptr[pos] != n)
2275 *num_nonnegative_indexes = n - *first_nonnegative_value;
2279 for (; pos < size; ++pos)
2280 if (indexes_ptr[pos] >= 0)
2291 num_indexes = computation->
indexes.size();
2292 for (
int32 command_index = 0; command_index < num_commands;
2297 int32 first_nonnegative_pos,
2298 first_nonnegative_value,
2299 num_nonnegative_indexes;
2304 const std::vector<int32> &indexes = computation->
indexes[indexes_index];
2306 &first_nonnegative_pos,
2307 &first_nonnegative_value,
2308 &num_nonnegative_indexes)) {
2311 num_nonnegative_indexes,
2314 num_nonnegative_indexes,
2340 int32 *num_leading_negatives,
2341 int32 *num_trailing_negatives) {
2343 const int32 *begin = &(vec[0]), *ptr = begin, *end = ptr + vec.size();
2344 while (ptr != end && *ptr < 0)
2348 KALDI_ASSERT(ptr != end &&
"Vector consists entirely of -1's.");
2349 *num_leading_negatives = ptr - begin;
2350 const int32 *ptr2 = end - 1;
2357 *num_trailing_negatives = end - 1 - ptr2;
2365 int32 command_index) {
2368 const std::vector<int32> &indexes = computation->
indexes[c.
arg3];
2369 int32 num_leading_negatives, num_trailing_negatives;
2371 &num_leading_negatives,
2372 &num_trailing_negatives);
2373 if (num_leading_negatives == 0 && num_trailing_negatives == 0)
2376 int32 new_num_rows =
static_cast<int32>(indexes.size()) -
2377 num_leading_negatives - num_trailing_negatives;
2379 std::vector<int32> new_indexes(indexes.begin() + num_leading_negatives,
2380 indexes.begin() + num_leading_negatives +
2383 computation->
indexes.push_back(std::vector<int32>());
2384 computation->
indexes.back().swap(new_indexes);
2386 num_leading_negatives, new_num_rows,
2407 const std::vector<std::pair<int32, int32> > &vec,
2408 int32 *num_leading_negatives,
2409 int32 *num_trailing_negatives) {
2411 const std::pair<int32, int32> *begin = &(vec[0]), *ptr = begin,
2412 *end = ptr + vec.size();
2413 while (ptr != end && ptr->first < 0)
2417 KALDI_ASSERT(ptr != end &&
"Vector consists entirely of -1's.");
2418 *num_leading_negatives = ptr - begin;
2419 const std::pair<int32, int32> *ptr2 = end - 1;
2423 while (ptr2->first < 0)
2426 *num_trailing_negatives = end - 1 - ptr2;
2435 int32 command_index) {
2438 const std::vector<std::pair<int32, int32> > &indexes_multi =
2440 int32 num_leading_negatives, num_trailing_negatives;
2442 &num_leading_negatives,
2443 &num_trailing_negatives);
2444 if (num_leading_negatives == 0 && num_trailing_negatives == 0)
2447 int32 new_num_rows =
static_cast<int32>(indexes_multi.size()) -
2448 num_leading_negatives - num_trailing_negatives;
2450 std::vector<std::pair<int32, int32> > new_indexes_multi(
2451 indexes_multi.begin() + num_leading_negatives,
2452 indexes_multi.begin() + num_leading_negatives + new_num_rows);
2454 computation->
indexes_multi.push_back(std::vector<std::pair<int32, int32> >());
2457 num_leading_negatives, new_num_rows,
2478 const std::vector<std::pair<int32, int32> > &vec,
2479 int32 *num_leading_identicals,
2480 int32 *num_trailing_identicals) {
2482 const std::pair<int32, int32> *begin = &(vec[0]), *ptr = begin,
2483 *end = ptr + vec.size();
2484 while (ptr != end && ptr->first == ptr->second)
2489 KALDI_ASSERT(ptr != end &&
"Vector consists entirely of -1's.");
2490 *num_leading_identicals = ptr - begin;
2491 const std::pair<int32, int32> *ptr2 = end - 1;
2495 while (ptr2->first == ptr2->second)
2498 *num_trailing_identicals = end - 1 - ptr2;
2508 int32 command_index) {
2511 const std::vector<std::pair<int32, int32> > &indexes_ranges =
2513 int32 num_leading_identicals, num_trailing_identicals;
2515 &num_leading_identicals,
2516 &num_trailing_identicals);
2517 if (num_leading_identicals == 0 && num_trailing_identicals == 0)
2520 int32 new_num_rows =
static_cast<int32>(indexes_ranges.size()) -
2521 num_leading_identicals - num_trailing_identicals;
2523 std::vector<std::pair<int32, int32> > new_indexes_ranges(
2524 indexes_ranges.begin() + num_leading_identicals,
2525 indexes_ranges.begin() + num_leading_identicals + new_num_rows);
2527 computation->
indexes_ranges.push_back(std::vector<std::pair<int32, int32> >());
2530 num_leading_identicals, new_num_rows,
2540 for (
int32 command_index = 0; command_index < num_commands;
2585 return SplitIndexes() && SplitCommands();
2594 bool SplitIndexes();
2598 bool SplitCommands();
2605 bool SplitCommand(
int32 command_index);
2668 bool GetSplitInfo(std::vector<std::pair<int32, int32> >::const_iterator begin,
2669 std::vector<std::pair<int32, int32> >::const_iterator end,
2687 std::vector<std::pair<int32, int32> >::const_iterator begin,
2688 std::vector<std::pair<int32, int32> >::const_iterator end,
2693 const int32 max_size_ratio = 2;
2695 int32 size = end - begin;
2697 int32 first = begin->first;
2702 int32 initial_second_value = begin->second,
2703 min_second_value = initial_second_value,
2704 max_second_value = initial_second_value;
2706 bool is_consecutive =
true;
2708 int32 second = begin[
i].second;
2709 if (begin[
i].first != first || second < 0)
return false;
2711 if (second != initial_second_value +
i)
2712 is_consecutive =
false;
2713 if (second < min_second_value) min_second_value = second;
2714 if (second > max_second_value) max_second_value = second;
2720 if (is_consecutive) {
2733 split_info_.resize(num_indexes_multi);
2734 for (
int32 i = 0;
i < num_indexes_multi;
i++) {
2735 const std::vector<std::pair<int32,int32> > &multi_index =
2739 int32 num_pairs = multi_index.size();
2744 int32 split_point = -1, initial_first = multi_index[0].first;
2745 for (
int32 j = 1;
j < num_pairs;
j++) {
2746 if (multi_index[
j].first != initial_first) {
2751 if (split_point == -1) {
2752 split_info.
splits.resize(1);
2753 split_info.
splits[0].offset = 0;
2754 if (!GetSplitInfo(multi_index.begin(), multi_index.end(),
2755 &(split_info.
splits[0]))) {
2756 split_info.
splits.clear();
2761 split_info.
splits.resize(2);
2762 split_info.
splits[0].offset = 0;
2763 split_info.
splits[1].offset = split_point;
2765 std::vector<std::pair<int32,int32> >::const_iterator mid_iter =
2766 multi_index.begin() + split_point;
2767 if (!GetSplitInfo(multi_index.begin(), mid_iter,
2768 &(split_info.
splits[0])) ||
2769 !GetSplitInfo(mid_iter, multi_index.end(),
2770 &(split_info.
splits[1]))) {
2771 split_info.
splits.clear();
2785 switch (command_type) {
2788 default:
return false;
2790 int32 indexes_multi_index = command.
arg2;
2792 static_cast<int32>(split_info_.size()));
2794 if (split_info.
splits.empty())
2800 std::vector<NnetComputation::Command> split_commands(
2801 split_info.
splits.size());
2802 for (
size_t i = 0;
i < split_info.
splits.size();
i++) {
2814 switch (command_type) {
2836 switch (command_type) {
2867 KALDI_ERR <<
"Code error: un-handled case.";
2871 command = split_commands[0];
2873 for (
size_t i = 1;
i < split_commands.size();
i++) {
2874 new_commands_.resize(new_commands_.size() + 1);
2877 new_commands_.back().first = c + 1;
2878 new_commands_.back().second = split_commands[
i];
2886 for (
int32 c = 0; c < num_commands; c++)
2887 if (SplitCommand(c))
2889 if (!new_commands_.empty())
2896 return splitter.
Split();
2945 int32 size = indexes.size();
2947 int32 N = indexes[size-1].n + 1,
2953 Index index(indexes[0]);
2954 if (index.
n != 0 || size % N != 0) {
2963 if (indexes[1] == index) {
2965 }
else if (indexes[size / N] == index) {
2966 n_stride = size / N;
2971 for (stride = 2; stride < size / N; stride++) {
2972 if (size % stride == 0 && indexes[stride] == index) {
2977 if (n_stride == -1) {
2987 int32 block_size = n_stride * N;
2989 std::vector<int32> indexes_to_check;
2991 indexes_to_check.resize(size);
2993 indexes_to_check[
i] =
i;
2995 int32 num_to_check = std::min<int32>(5, size);
2996 indexes_to_check.resize(num_to_check);
2997 for (
int32 j = 0;
j < num_to_check;
j++)
2998 indexes_to_check[
j] =
RandInt(0, size - 1);
3001 for (std::vector<int32>::iterator iter = indexes_to_check.begin();
3002 iter != indexes_to_check.end(); ++iter) {
3004 Index index = indexes[
i];
3008 if (i + n_stride >= size || indexes[i + n_stride] != index)
3012 if (i / block_size != (i + n_stride * (N-1)) / block_size) {
3022 if (i - n_stride < 0 || indexes[i - n_stride] != index)
3035 int32 size = cindexes.size();
3037 int32 N = cindexes[size-1].second.n + 1,
3041 Cindex cindex(cindexes[0]);
3042 if (cindex.second.n != 0 || size % N != 0)
3044 cindex.second.n = 1;
3045 if (cindexes[1] == cindex) {
3047 }
else if (cindexes[size / N] == cindex) {
3048 n_stride = size / N;
3051 for (stride = 2; stride < size / N; stride++) {
3052 if (size % stride == 0 && cindexes[stride] == cindex) {
3057 if (stride == size / N)
3060 int32 block_size = n_stride * N;
3061 std::vector<int32> indexes_to_check;
3063 indexes_to_check.resize(size);
3065 indexes_to_check[
i] =
i;
3067 int32 num_to_check = std::min<int32>(5, size);
3068 indexes_to_check.resize(num_to_check);
3069 for (
int32 j = 0;
j < num_to_check;
j++)
3070 indexes_to_check[
j] =
RandInt(0, size - 1);
3073 for (std::vector<int32>::iterator iter = indexes_to_check.begin();
3074 iter != indexes_to_check.end(); ++iter) {
3077 int32 n = cindex.second.n;
3079 cindex.second.n = n + 1;
3080 if (i + n_stride >= size || cindexes[i + n_stride] != cindex)
3084 if (i / block_size != (i + n_stride * (N-1)) / block_size)
3087 cindex.second.n = n - 1;
3088 if (i - n_stride < 0 || cindexes[i - n_stride] != cindex)
3107 const std::vector<Index> &indexes_in,
3108 std::vector<Index> *indexes_out) {
3109 int32 size_in = indexes_in.size();
3110 KALDI_ASSERT(size_in > 0 && indexes_in[size_in - 1].
n == old_N - 1);
3111 int32 block_size_in = n_stride * old_N,
3112 block_size_out = n_stride * new_N;
3114 indexes_out->resize((size_in / old_N) * new_N);
3115 for (
int32 i_in = 0; i_in < size_in; i_in++) {
3116 if (indexes_in[i_in].
n != 0)
3118 Index index(indexes_in[i_in]);
3119 int32 block_index = i_in / block_size_in,
3120 offset_within_block = i_in % block_size_in;
3123 int32 i_out = block_index * block_size_out +
3124 offset_within_block;
3125 for (
int32 n = 0;
n < new_N;
n++, i_out += n_stride) {
3127 (*indexes_out)[i_out] = index;
3143 bool need_debug_info,
3146 nnet_(nnet), misc_info_(misc_info),
3148 need_debug_info_(need_debug_info),
3149 num_n_values_(num_n_values),
3150 expanded_computation_(expanded_computation) {
3161 void InitStrideInfo();
3166 void ComputeMatrixInfo();
3170 void ComputeDebugInfo();
3175 void ComputeSubmatrixInfo();
3203 void ComputePrecomputedIndexes();
3209 void ComputeCommands();
3217 void EnsureDebugInfoExists(
int32 submatrix_index);
3237 bool GetNewSubmatLocationInfo(
int32 submat_index,
3238 int32 old_row_index,
3239 int32 *new_row_index,
3240 int32 *n_stride)
const;
3258 int32 GetNewMatrixLocationInfo(
int32 old_matrix_index,
3259 int32 old_row_index)
const;
3265 void ExpandIndexes(
const std::vector<Index> &indexes,
3266 std::vector<Index> *indexes_expanded)
const;
3312 c_out->
arg3 = expanded_computation_->indexes.size();
3314 expanded_computation_->indexes.push_back(std::vector<int32>());
3315 std::vector<int32> &new_indexes = expanded_computation_->indexes.back();
3318 int32 old_size = old_indexes.size(),
3319 num_n_values = num_n_values_,
3320 new_s1_size = expanded_computation_->submatrices[s1].num_rows,
3321 new_s2_size = expanded_computation_->submatrices[s2].num_rows;
3325 new_indexes.resize(new_s1_size, -1);
3334 for (
int32 i1 = 0; i1 < old_size; i1++) {
3335 int32 new_i1_n0, n_stride1;
3336 if (GetNewSubmatLocationInfo(s1, i1, &new_i1_n0, &n_stride1)) {
3339 int32 i2 = old_indexes[i1];
3340 int32 new_i2_n0, n_stride2;
3345 bool ans = GetNewSubmatLocationInfo(s2, i2, &new_i2_n0, &n_stride2);
3350 int32 new_i1 = new_i1_n0, new_i2 = new_i2_n0;
3351 for (
int32 n = 0;
n < num_n_values;
3352 ++
n, new_i1 += n_stride1, new_i2 += n_stride2) {
3353 KALDI_ASSERT(new_i1 < new_s1_size && new_i2 < new_s2_size);
3354 new_indexes[new_i1] = new_i2;
3370 num_rows_new = expanded_computation_->submatrices[s1].num_rows;
3373 int32 num_n_values = num_n_values_;
3376 c_out->
arg2 = expanded_computation_->indexes_multi.size();
3377 expanded_computation_->indexes_multi.push_back(
3378 std::vector<std::pair<int32, int32> >());
3379 std::vector<std::pair<int32, int32> > &new_indexes_multi =
3380 expanded_computation_->indexes_multi.back();
3381 const std::vector<std::pair<int32, int32> > &old_indexes_multi =
3388 KALDI_ASSERT(static_cast<int32>(old_indexes_multi.size()) == num_rows_old);
3391 new_indexes_multi.resize(num_rows_new,
3392 std::pair<int32,int32>(-1, -1));
3394 for (
int32 i1 = 0; i1 < num_rows_old; i1++) {
3395 int32 new_i1_n0, n_stride1;
3396 if (GetNewSubmatLocationInfo(s1, i1, &new_i1_n0, &n_stride1)) {
3399 int32 s2 = old_indexes_multi[i1].first,
3400 i2 = old_indexes_multi[i1].second;
3401 int32 new_i2_n0, n_stride2;
3407 bool ans = GetNewSubmatLocationInfo(s2, i2, &new_i2_n0, &n_stride2);
3412 int32 new_i1 = new_i1_n0, new_i2 = new_i2_n0;
3414 for (
int32 n = 0;
n < num_n_values;
3415 n++, new_i1 += n_stride1, new_i2 += n_stride2) {
3416 new_indexes_multi[new_i1].first = s2;
3417 new_indexes_multi[new_i1].second = new_i2;
3435 num_rows_new = expanded_computation_->submatrices[s1].num_rows;
3438 int32 num_n_values = num_n_values_;
3441 c_out->
arg3 = expanded_computation_->indexes_ranges.size();
3442 expanded_computation_->indexes_ranges.push_back(
3443 std::vector<std::pair<int32, int32> >());
3444 std::vector<std::pair<int32, int32> > &new_indexes_ranges =
3445 expanded_computation_->indexes_ranges.back();
3446 const std::vector<std::pair<int32, int32> > &old_indexes_ranges =
3454 KALDI_ASSERT(static_cast<int32>(old_indexes_ranges.size()) == num_rows_old);
3456 new_indexes_ranges.resize(num_rows_new,
3457 std::pair<int32,int32>(-1, -1));
3459 for (
int32 i1 = 0; i1 < num_rows_old; i1++) {
3460 int32 new_i1_n0, n_stride1;
3461 if (GetNewSubmatLocationInfo(s1, i1, &new_i1_n0, &n_stride1)) {
3464 int32 i2_begin = old_indexes_ranges[i1].first,
3465 i2_end = old_indexes_ranges[i1].second;
3466 if (i2_end == i2_begin)
3471 int32 i2_last = i2_end - 1;
3472 int32 new_i2_n0_begin, new_i2_n0_last,
3476 bool ans1 = GetNewSubmatLocationInfo(s2, i2_begin, &new_i2_n0_begin,
3478 ans2 = GetNewSubmatLocationInfo(s2, i2_last, &new_i2_n0_last,
3480 KALDI_ASSERT(ans1 && ans2 && new_i2_n0_last >= new_i2_n0_begin &&
3481 new_i2_n0_begin >= 0 && n_stride1 > 0 && n_stride2 > 0);
3486 int32 new_i1 = new_i1_n0,
3487 new_i2_begin = new_i2_n0_begin,
3488 new_i2_end = new_i2_n0_last + 1;
3489 for (
int32 n = 0;
n < num_n_values;
3490 n++, new_i1 += n_stride1, new_i2_begin += n_stride2,
3491 new_i2_end += n_stride2) {
3492 new_indexes_ranges[new_i1].first = new_i2_begin;
3493 new_indexes_ranges[new_i1].second = new_i2_end;
3503 expanded_computation_->commands.resize(num_commands);
3504 for (
int32 command_index = 0; command_index < num_commands;
3508 expanded_computation_->commands[command_index];
3525 ExpandRowsCommand(c, &c_out);
3529 ExpandRowsMultiCommand(c, &c_out);
3532 ExpandRowRangesCommand(c, &c_out);
3551 n_stride_.resize(num_matrices);
3557 for (
int32 m = 1; m < num_matrices; m++) {
3561 bool full_check =
true;
3563 if (n_stride == 0) {
3564 KALDI_ERR <<
"Problem encountered in 'shortcut' compilation: the computation " 3565 <<
"does not have the expected structure. Try compiling with " 3566 <<
"--use-shortcut=false.";
3568 n_stride_[m] = n_stride;
3575 ComputeMatrixInfo();
3576 if (need_debug_info_)
3579 expanded_computation_->matrix_debug_info.clear();
3580 ComputeSubmatrixInfo();
3581 ComputePrecomputedIndexes();
3584 expanded_computation_->need_model_derivative =
3590 expanded_computation_->matrices.resize(num_matrices);
3593 int32 old_num_n_values = 2,
3594 new_num_n_values = num_n_values_;
3595 for (
int32 m = 1; m < num_matrices; m++) {
3597 expanded_computation_->matrices[m].num_rows =
3606 expanded_computation_->matrix_debug_info.resize(num_matrices);
3608 expanded_computation_->matrix_debug_info[0] =
3610 int32 num_n_values = num_n_values_;
3611 for (
int32 m = 1; m < num_matrices; m++) {
3615 expanded_computation_->matrix_debug_info[m];
3618 num_rows_out = expanded_computation_->matrices[m].num_rows;
3620 info_out.
cindexes.resize(num_rows_out);
3623 for (
int32 r = 0; r < num_rows_in; r++) {
3624 if (info_in.
cindexes[r].second.n == 0) {
3625 int32 new_r = GetNewMatrixLocationInfo(m, r),
3626 n_stride = n_stride_[m];
3627 for (
int32 n = 0;
n < num_n_values;
n++) {
3628 int32 r_out = new_r +
n * n_stride;
3629 cindexes_out[r_out] = cindexes_in[r];
3630 cindexes_out[r_out].second.n =
n;
3639 expanded_computation_->submatrices.resize(num_submatrices);
3642 for (
int32 s = 1; s < num_submatrices; s++) {
3650 last_row_in = first_row_in + info_in.
num_rows - 1;
3651 if (!(debug_info_in.
cindexes[first_row_in].second.n == 0 &&
3652 debug_info_in.
cindexes[last_row_in].second.n == 1)) {
3653 std::ostringstream computation_ss;
3654 std::vector<std::string> submat_strings;
3657 KALDI_ERR <<
"Submatrix s" << s <<
" = " << submat_strings[s]
3658 <<
" has strange dimensions. Computation is: " 3659 << computation_ss.str();
3662 int32 first_row_out = GetNewMatrixLocationInfo(m, first_row_in),
3663 last_row_out = GetNewMatrixLocationInfo(m, last_row_in),
3664 new_num_rows = (last_row_out + 1 - first_row_out);
3667 expanded_computation_->submatrices[s];
3686 std::vector<bool> need_backprop(num_precomputed_indexes,
false);
3688 std::vector<int32> component_index(num_precomputed_indexes, -1);
3690 for (
int32 command_index = 0; command_index < num_commands; command_index++) {
3700 need_backprop[c.
arg2] =
true;
3705 p < expanded_computation_->component_precomputed_indexes.size();
3707 delete expanded_computation_->component_precomputed_indexes[p].data;
3708 expanded_computation_->component_precomputed_indexes.clear();
3709 expanded_computation_->component_precomputed_indexes.resize(
3710 num_precomputed_indexes);
3712 for (
int32 p = 1; p < num_precomputed_indexes; ++p) {
3716 expanded_computation_->component_precomputed_indexes[p];
3719 "Input/output indexes not present in precomputed info of " 3720 "computation to be expanded.");
3726 std::vector<Index> input_indexes, output_indexes;
3733 output_indexes, need_backprop[p]);
3738 new_info.
data = expanded_precomputed_indexes;
3745 int32 *new_row_index,
int32 *n_stride)
const {
3748 new_row_offset = expanded_computation_->submatrices[submat_index].row_offset;
3752 if (debug_info_in.
cindexes[old_row_index + old_row_offset].second.n != 0)
3754 *new_row_index = (GetNewMatrixLocationInfo(matrix_index,
3755 old_row_index + old_row_offset) -
3757 *n_stride = n_stride_[matrix_index];
3762 int32 matrix_index,
int32 old_row_index)
const {
3764 int32 n_stride = n_stride_[matrix_index],
3765 old_num_n_values = 2, new_num_n_values = num_n_values_,
3766 old_block_size = old_num_n_values * n_stride,
3767 new_block_size = new_num_n_values * n_stride,
3768 block_index = old_row_index / old_block_size,
3769 offset_within_block = old_row_index % old_block_size;
3777 int32 old_n_value = offset_within_block / n_stride,
3778 index_within_subblock = offset_within_block % n_stride;
3779 const std::vector<Cindex> &cindexes =
3781 KALDI_ASSERT(old_n_value == cindexes[old_row_index].second.n &&
3782 (old_n_value == 0 || old_n_value == 1));
3787 int32 new_n_value = (old_n_value == 0 ? 0 : new_num_n_values - 1);
3789 return block_index * new_block_size + index_within_subblock +
3790 new_n_value * n_stride;
3795 const std::vector<Index> &indexes,
3796 std::vector<Index> *indexes_expanded)
const {
3797 bool full_check =
false;
3801 indexes, indexes_expanded);
3807 bool need_debug_info,
3811 need_debug_info, num_n_values,
3812 expanded_computation);
3824 int32 *num_n_values_out) {
3827 const std::vector<Index> &indexes = io_spec.
indexes;
3828 KALDI_ASSERT(!indexes.empty() &&
"Empty Indexes in computation request");
3830 bool full_check =
true;
3832 int32 num_n_values = indexes.back().n + 1;
3833 if (num_n_values <= 2) {
3839 *num_n_values_out = num_n_values;
3847 indexes, &(mini_io_spec->
indexes));
3854 int32 *num_n_values) {
3855 size_t num_inputs = request.
inputs.size(),
3856 num_outputs = request.
outputs.size();
3857 mini_request->
inputs.resize(num_inputs);
3858 mini_request->
outputs.resize(num_outputs);
3864 for (
size_t i = 0;
i < num_inputs;
i++) {
3865 int32 this_num_n_values = 0;
3868 &this_num_n_values))
3871 *num_n_values = this_num_n_values;
3873 if (this_num_n_values != *num_n_values)
3877 for (
size_t i = 0;
i < num_outputs;
i++) {
3878 int32 this_num_n_values = 0;
3881 &this_num_n_values))
3883 if (this_num_n_values != *num_n_values)
3918 std::vector<std::pair<int32, int32> > *matrix_to_pair);
3925 static inline int32 NormalizeCindexes(std::vector<Cindex> *cindexes);
3931 static void GetPairToMatrixMap(
3932 std::vector<std::pair<int32, int32> > &matrix_to_pair,
3941 static void ConvertListsToPairLists(
3942 const std::vector<std::vector<int32> > &active_matrices,
3943 const std::vector<std::pair<int32, int32> > &matrix_to_pair,
3944 std::vector<std::vector<std::pair<int32, int32> > > *active_pairs);
3955 static bool ListsAreEqualExceptForPossibleShift(
3956 const std::vector<std::pair<int32, int32> > &a,
3957 const std::vector<std::pair<int32, int32> > &b,
3977 static bool FindFirstRepeat(
3978 const std::vector<std::vector<std::pair<int32, int32> > > &active_pairs,
3979 int32 time_shift_per_segment,
3989 static void GetIdentifiedMatrices(
3990 const std::vector<std::pair<int32, int32> > &pair_list1,
3991 const std::vector<std::pair<int32, int32> > &pair_list2,
3993 std::vector<int32> *matrix_list1,
3994 std::vector<int32> *matrix_list2);
4002 static void CheckIdentifiedMatrices(
4004 const std::vector<int32> &list1,
4005 const std::vector<int32> &list2,
4006 int32 time_difference);
4014 static void FormInfiniteLoop(
int32 command1,
int32 command2,
4025 static void AddMatrixSwapCommands(
4026 const std::vector<int32> &matrices1,
4027 const std::vector<int32> &matrices2,
4038 static void GetMatrixSwapOrder(
4039 const std::vector<int32> &matrices1,
4040 const std::vector<int32> &matrices2,
4041 std::vector<std::pair<int32, int32> > *swaps);
4057 const std::vector<int32> &splice_point_commands,
4058 std::vector<std::vector<int32> > *active_matrices);
4072 std::vector<int32> segment_ends;
4077 int32 second_segment_begin = segment_ends[0],
4078 third_segment_begin = segment_ends[1],
4079 fourth_segment_begin = segment_ends[2];
4080 int32 first_output_command_seg2 = -1,
4081 first_output_command_seg3 = -1;
4082 for (
int32 c = second_segment_begin; c < third_segment_begin; c++)
4084 first_output_command_seg2 < 0)
4085 first_output_command_seg2 = c;
4086 for (
int32 c = third_segment_begin; c < fourth_segment_begin; c++)
4088 first_output_command_seg3 < 0)
4089 first_output_command_seg3 = c;
4090 if (first_output_command_seg2 < 0 ||
4091 first_output_command_seg3 < 0)
4092 KALDI_ERR <<
"Could not locate output commands for segments 2 and 3.";
4094 &command2 = computation.
commands[first_output_command_seg2],
4095 &command3 = computation.
commands[first_output_command_seg3];
4096 int32 seg2_node = command2.
arg2, seg3_node = command3.arg2;
4099 seg3_submatrix = command3.arg1;
4103 seg3_matrix = computation.
submatrices[seg3_submatrix].matrix_index;
4105 computation.
matrices[seg3_matrix].num_rows);
4110 int32 t_offset = debug_info3.cindexes[0].second.t -
4111 debug_info2.cindexes[0].second.t;
4112 int32 num_rows = debug_info2.cindexes.size();
4113 for (
int32 r = 0; r < num_rows; r++) {
4115 debug_info2.cindexes[r].second.t + t_offset);
4122 std::vector<Cindex> *cindexes) {
4123 std::vector<Cindex>::iterator iter = cindexes->begin(),
4124 end = cindexes->end();
4126 for (; iter != end; iter++) {
4127 if (iter->second.t !=
kNoTime) {
4128 ans = iter->second.t;
4134 KALDI_ERR <<
"All t values are kNoTime in matrix.";
4136 iter = cindexes->begin();
4137 for (; iter != end; iter++)
4138 if (iter->second.t !=
kNoTime)
4139 iter->second.t -= ans;
4146 std::vector<std::pair<int32, int32> > *matrix_to_pair) {
4147 typedef unordered_map<std::vector<Cindex>,
int32,
4149 int32 cur_vector_id = 1;
4154 int32 num_matrices = computation.
matrices.size();
4155 matrix_to_pair->resize(num_matrices);
4157 for (int32 m = 1; m < num_matrices; m++) {
4160 int32 t_offset = NormalizeCindexes(&cindexes);
4161 MapType::const_iterator iter = cindex_map.find(cindexes);
4163 if (iter != cindex_map.end()) {
4164 vector_id = iter->second;
4166 vector_id = cur_vector_id++;
4167 cindex_map[cindexes] = vector_id;
4170 int32 unique_id = 2 * vector_id + (is_deriv ? 1 : 0);
4171 (*matrix_to_pair)[m].first = unique_id;
4172 (*matrix_to_pair)[m].second = t_offset;
4178 std::vector<std::pair<int32, int32> > &matrix_to_pair,
4180 int32 num_matrices = matrix_to_pair.size();
4182 pair_to_matrix->clear();
4183 for (
int32 m = 1; m < num_matrices; m++)
4184 (*pair_to_matrix)[matrix_to_pair[m]] = m;
4190 const std::vector<std::vector<int32> > &active_matrices,
4191 const std::vector<std::pair<int32, int32> > &matrix_to_pair,
4192 std::vector<std::vector<std::pair<int32, int32> > > *active_pairs) {
4193 active_pairs->clear();
4194 active_pairs->resize(active_matrices.size());
4195 int32 num_matrices = matrix_to_pair.size();
4196 for (
size_t seg = 0; seg < active_matrices.size(); seg++) {
4197 const std::vector<int32> &this_active_matrix_list = active_matrices[seg];
4198 std::vector<std::pair<int32, int32> > &this_active_pair_list =
4199 (*active_pairs)[seg];
4200 this_active_pair_list.resize(this_active_matrix_list.size());
4201 std::vector<int32>::const_iterator iter = this_active_matrix_list.begin(),
4202 end = this_active_matrix_list.end();
4203 std::vector<std::pair<int32, int32> >::iterator
4204 out_iter = this_active_pair_list.begin();
4205 for (; iter != end; ++iter, ++out_iter) {
4207 *out_iter = matrix_to_pair[*iter];
4214 const std::vector<std::pair<int32, int32> > &a,
4215 const std::vector<std::pair<int32, int32> > &b,
4217 size_t size = a.size();
4218 if (b.size() != size)
4220 for (
size_t i = 0;
i < size;
i++) {
4221 const std::pair<int32, int32> &p1 = a[
i],
4223 if (p1.first != p2.first)
4225 if (p2.second != p1.second + shift && p2.second != p1.second)
4233 const std::vector<std::vector<std::pair<int32, int32> > > &active_pairs,
4234 int32 time_shift_per_segment,
4236 int32 num_segments = active_pairs.size();
4243 for (
int32 s = 0; s < num_segments; s++) {
4244 for (
int32 t = s + 1; t < num_segments; t++) {
4245 if (ListsAreEqualExceptForPossibleShift(active_pairs[s],
4247 (t - s) * time_shift_per_segment)) {
4259 const std::vector<std::pair<int32, int32> > &pair_list1,
4260 const std::vector<std::pair<int32, int32> > &pair_list2,
4262 std::vector<int32> *matrix_list1,
4263 std::vector<int32> *matrix_list2) {
4264 size_t size = pair_list1.size();
4266 matrix_list1->clear();
4267 matrix_list2->clear();
4268 matrix_list1->reserve(size);
4269 matrix_list2->reserve(size);
4270 std::vector<std::pair<int32, int32> >::const_iterator
4271 iter1 = pair_list1.begin(), end1 = pair_list1.end(),
4272 iter2 = pair_list2.begin();
4273 for (; iter1 != end1; ++iter1, ++iter2) {
4274 if (iter1->second == iter2->second)
4278 unordered_map<std::pair<int32, int32>,
int32,
4280 map_iter1 = pair_to_matrix.find(*iter1),
4281 map_iter2 = pair_to_matrix.find(*iter2);
4282 if (map_iter1 == pair_to_matrix.end() ||
4283 map_iter2 == pair_to_matrix.end())
4284 KALDI_ERR <<
"Could not find pair in map (code error)";
4285 matrix_list1->push_back(map_iter1->second);
4286 matrix_list2->push_back(map_iter2->second);
4296 const std::vector<int32> &splice_point_commands,
4297 std::vector<std::vector<int32> > *active_matrices) {
4299 int32 num_splice_points = splice_point_commands.size();
4300 active_matrices->clear();
4301 active_matrices->resize(num_splice_points);
4309 std::vector<int32> whole_submatrices;
4311 for (
int32 m = 1; m < num_matrices; m++) {
4314 int32 s = whole_submatrices[m],
4318 for (
int32 i = 0;
i < num_splice_points;
i++) {
4319 int32 splice_point = splice_point_commands[
i];
4320 if (first_access < splice_point && last_access > splice_point) {
4324 (*active_matrices)[
i].push_back(m);
4333 const std::vector<int32> &list1,
4334 const std::vector<int32> &list2,
4335 int32 time_difference) {
4339 for (
size_t i = 0;
i < list1.size();
i++) {
4340 int32 m1 = list1[
i], m2 = list2[
i];
4342 &matrix_info1 = computation.
matrices[m1],
4343 &matrix_info2 = computation.
matrices[m2];
4345 matrix_info1.
num_cols == matrix_info2.num_cols &&
4346 matrix_info1.
stride_type == matrix_info2.stride_type);
4352 std::vector<Cindex>::const_iterator iter1 = debug_info1.
cindexes.begin(),
4354 iter2 = debug_info2.cindexes.begin();
4355 for (; iter1 != end1; iter1++,iter2++) {
4357 iter2->second.n == iter1->second.n &&
4359 iter2->second.t == iter1->second.t + time_difference) &&
4360 iter2->second.x == iter1->second.x);
4368 const std::vector<int32> &matrices1,
4369 const std::vector<int32> &matrices2,
4370 std::vector<std::pair<int32, int32> > *swaps) {
4373 int32 num_matrices = matrices1.size();
4374 std::vector<bool> processed(num_matrices,
false);
4375 std::vector<int32> queue;
4378 int32 num_loops = 0;
4379 for (;
static_cast<int32>(swaps->size()) < num_matrices; num_loops++) {
4380 for (
int32 i = 0;
i < num_matrices;
i++) {
4383 int32 m1 = matrices1[
i], m2 = matrices2[
i];
4384 std::vector<int32>::const_iterator iter =
4385 std::lower_bound(matrices2.begin(), matrices2.end(), m1);
4386 if (iter == matrices2.end() || *iter != m1) {
4389 swaps->push_back(std::pair<int32,int32>(m1, m2));
4390 processed[
i] =
true;
4392 int32 m1_pos_in_matrices2 = iter - matrices2.begin();
4393 if (processed[m1_pos_in_matrices2]) {
4397 swaps->push_back(std::pair<int32,int32>(m1, m2));
4398 processed[
i] =
true;
4420 const std::vector<int32> &matrices1,
4421 const std::vector<int32> &matrices2,
4423 std::vector<std::pair<int32, int32> > swaps;
4428 GetMatrixSwapOrder(matrices1, matrices2, &swaps);
4437 std::vector<int32> whole_submatrices;
4439 size_t num_matrices = whole_submatrices.size();
4441 for (
size_t i = 0;
i < swaps.size();
i++) {
4442 int32 m1 = swaps[
i].first, m2 = swaps[
i].second;
4444 static_cast<size_t>(m2) < num_matrices);
4445 int32 s1 = whole_submatrices[m1], s2 = whole_submatrices[m2];
4449 computation->
commands.push_back(goto_label_command);
4457 command2 + 1 && command1 < command2);
4462 computation->
commands.resize(command2 + 1);
4464 computation->
commands[command2].arg1 = command1;
4476 "You must request matrix debug info when compiling " 4477 "looped computations.");
4486 std::vector<int32> splice_points;
4492 std::vector<std::vector<int32> > active_matrices;
4494 FindActiveMatrices(*
computation_, analyzer_, splice_points,
4501 std::vector<std::pair<int32, int32> > matrix_to_pair;
4506 GetPairToMatrixMap(matrix_to_pair, &pair_to_matrix);
4509 std::vector<std::vector<std::pair<int32, int32> > > pair_lists;
4510 ConvertListsToPairLists(active_matrices, matrix_to_pair,
4516 if (!FindFirstRepeat(pair_lists,
4517 time_shift_per_segment,
4519 KALDI_VLOG(2) <<
"Could not find repeats of variables.";
4523 std::vector<int32> seg1_matrices, seg2_matrices;
4524 GetIdentifiedMatrices(pair_lists[seg1], pair_lists[seg2],
4526 &seg1_matrices, &seg2_matrices);
4528 int32 time_difference = time_shift_per_segment * (seg2 - seg1);
4529 CheckIdentifiedMatrices(*
computation_, seg1_matrices, seg2_matrices,
4532 FormInfiniteLoop(splice_points[seg1], splice_points[seg2],
computation_);
4534 AddMatrixSwapCommands(seg1_matrices, seg2_matrices,
computation_);
4554 if (num_commands == 0)
4556 for (
int32 c = num_commands - 1; c >= 0; c--) {
4559 if (static_cast<size_t>(dest_command) < computation->
commands.size() &&
4562 for (
int32 d = 0;
d + 1 < num_commands;
d++) {
4588 for (
size_t i = 0;
i < accesses.
accesses.size();
i++) {
4591 computation.
commands[command_index];
4618 for (
size_t i = 0;
i < accesses.
accesses.size();
i++) {
4634 bool operator () (
const std::pair<int32, NnetComputation::Command> &p1,
4635 const std::pair<int32, NnetComputation::Command> &p2)
const {
4636 return p1.first < p2.first;
4641 std::vector<std::pair<int32, NnetComputation::Command> > *new_commands,
4643 int32 num_new_commands = new_commands->size(),
4644 num_old_commands = computation->
commands.size();
4645 if (num_new_commands == 0)
4651 std::stable_sort(new_commands->begin(), new_commands->end(),
4652 comparison_operator);
4655 for (
int32 i = 0;
i + 1 < num_new_commands;
i++) {
4656 KALDI_ASSERT((*new_commands)[
i].first <= (*new_commands)[
i+1].first &&
4657 (*new_commands)[
i].first >= 0 &&
4658 (*new_commands)[
i+1].first <= num_old_commands);
4661 std::vector<NnetComputation::Command> merged_commands;
4662 merged_commands.reserve(num_old_commands + num_new_commands);
4664 std::vector<std::pair<int32, NnetComputation::Command> >::const_iterator
4665 new_commands_iter = new_commands->begin(),
4666 new_commands_end = new_commands->end();
4668 for (
int32 old_command_index = 0; old_command_index <= num_old_commands;
4669 old_command_index++) {
4670 while (new_commands_iter != new_commands_end &&
4671 new_commands_iter->first <= old_command_index) {
4672 merged_commands.push_back(new_commands_iter->second);
4673 ++new_commands_iter;
4675 if (old_command_index < num_old_commands)
4676 merged_commands.push_back(computation->
commands[old_command_index]);
4678 KALDI_ASSERT(merged_commands.size() == num_old_commands +
4681 computation->
commands.swap(merged_commands);
4704 int32 memory_compression_level,
4705 int32 middle_command,
4707 nnet_(nnet), memory_compression_level_(memory_compression_level),
4708 middle_command_(middle_command),
computation_(computation) { }
4715 void ProcessMatrix(
int32 m);
4719 void ModifyComputation();
4750 int32 backward_command_index,
4753 m(m), compression_command_index(forward_command_index),
4754 uncompression_command_index(backward_command_index),
4755 compression_type(compression_type), range(range),
4756 truncate(truncate) { }
4772 std::vector<int32> whole_submatrices;
4778 std::vector<std::pair<int32, NnetComputation::Command> >
4780 pairs_to_insert.reserve(compress_info_.size() * 2);
4781 for (
size_t i = 0;
i < compress_info_.size();
i++) {
4783 int32 s = whole_submatrices[info.
m];
4787 std::pair<int32, NnetComputation::Command> p1(
4792 pairs_to_insert.push_back(p1);
4793 std::pair<int32, NnetComputation::Command> p2(
4796 pairs_to_insert.push_back(p2);
4807 for (
int32 m = 1; m < num_matrices; m++)
4809 if (!compress_info_.empty())
4810 ModifyComputation();
4814 if (analyzer_.matrix_accesses[m].is_output) {
4820 const std::vector<Access> &accesses = analyzer_.matrix_accesses[m].accesses;
4825 std::vector<Access>::const_iterator iter = std::lower_bound(accesses.begin(),
4832 if (iter == accesses.end()) {
4836 if (iter == accesses.begin()) {
4844 const Access &backward_access = iter[0],
4845 &forward_access = iter[-1];
4846 KALDI_ASSERT(forward_access.command_index < middle_command_ &&
4853 bool backward_access_is_last_access = (accesses.end() == iter + 1);
4856 forward_command_index = forward_access.command_index;
4860 if (memory_compression_level_ >= 1 &&
4861 backward_access_is_last_access &&
4864 int32 component_index = backward_command.
arg1;
4868 if (component->
Type() ==
"RectifiedLinearComponent") {
4869 compress_info_.push_back(
4871 backward_command_index,
4884 if (memory_compression_level_ >= 2) {
4885 compress_info_.push_back(
4887 backward_command_index,
4900 int32 memory_compression_level,
4902 if (memory_compression_level == 0 || computation->
commands.empty())
4912 int32 middle_command = -1;
4913 for (
size_t i = 0;
i < computation->
commands.size();
i++) {
4915 if (middle_command < 0) {
4916 middle_command =
static_cast<int32>(
i);
4918 KALDI_WARN <<
"Found more than one command of type kNoOperationMarker " 4919 "in non-looped computation.";
4926 if (middle_command == -1) {
4929 if (memory_compression_level >= 1) {
4930 int64 bytes_used_initial, bytes_used_final;
4936 middle_command, computation);
4941 if (bytes_used_final != bytes_used_initial) {
4942 KALDI_VLOG(2) <<
"Memory compression reduced memory use from " 4943 << bytes_used_initial <<
" to " 4944 << bytes_used_final <<
" bytes.";
4953 std::lock_guard<std::mutex> lock(mutex_);
4955 CacheType::iterator iter = computation_cache_.find(&in_request);
4956 if (iter == computation_cache_.end()) {
4959 std::shared_ptr<const NnetComputation> ans = iter->second.first;
4962 access_queue_.splice(access_queue_.end(), access_queue_,
4963 iter->second.second);
4970 cache_capacity_(cache_capacity) {
4978 std::lock_guard<std::mutex> lock(
mutex_);
4981 const CacheType::iterator iter =
4997 std::shared_ptr<const NnetComputation> computation(computation_in);
5002 std::make_pair(request, std::make_pair(computation, ait)));
5021 int32 computation_cache_size;
5022 ExpectToken(is, binary,
"<ComputationCacheSize>");
5028 for (
size_t c = 0; c < computation_cache_size; c++) {
5030 request.
Read(is, binary);
5032 computation->
Read(is, binary);
5033 Insert(request, computation);
5043 for (; iter != end; ++iter) {
5052 WriteToken(os, binary,
"<ComputationCacheSize>");
5054 WriteToken(os, binary,
"<ComputationCache>");
5057 iter->first->Write(os, binary);
5058 iter->second.first->Write(os, binary);
5068 for (; iter != end; ++iter)
bool MatrixIsUnused(const Analyzer &analyzer, const NnetComputation &computation, int32 m)
This function returns true if matrix 1 <= m < computation->matrices.size() is unused, defined as: it is not an input or an output, and is not accessed other than via commands of type kAllocMatrix, kDeallocMatrix, and kSetConst.
CommandType
CommandType is an enum that describes the category of the command used in the NnetComputation.
static void CreateRenumbering(int32 old_num_elements, const std::vector< int32 > &to_remove, std::vector< int32 > *renumbering)
creates a renumbering that removes the elements in "to_remove", e.g.
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
std::vector< MatrixCompressInfo > compress_info_
std::vector< bool > matrix_is_used_
int32 FirstNontrivialAccess(int32 s) const
Returns the first command (read or write) that accesses any part of 's' except for zeroing it (i...
void IdentifyMatrixArgsInComputation(NnetComputation *computation, std::vector< int32 *> *matrix_args)
static void ConvertListsToPairLists(const std::vector< std::vector< int32 > > &active_matrices, const std::vector< std::pair< int32, int32 > > &matrix_to_pair, std::vector< std::vector< std::pair< int32, int32 > > > *active_pairs)
bool store_component_stats
you should set need_component_stats to true if you need the average-activation and average-derivative...
void GetCommandsOfType(const NnetComputation &computation, CommandType t, std::vector< int32 > *command_indexes)
This utility function works out from a computation, the command-indexes of the commands of the given ...
std::vector< MatrixDebugInfo > matrix_debug_info
bool SplitRowOps(NnetComputation *computation)
This function detects cases where commands of type kAddRowsMulti, kAddToRowsMulti, kCopyRowsMulti, kCopyToRowsMulti use indexes that correspond to at most two submatrices, in two distinct ranges without gaps filled by -1's, and could be converted to at most two commands of type kMatrixAdd, kMatrixCopy, kAddRows or kCopyRows.
std::vector< NnetComputation::Command > final_commands_
void OptimizeLoopedComputation(const Nnet &nnet, NnetComputation *computation)
This function tries to optimize computation 'computation' for an 'looped' computation.
std::vector< bool > submatrix_is_kept_
ComputationLoopedOptimizer(const Nnet &nnet, NnetComputation *computation)
std::vector< int32 > second_value_offsets
std::vector< Index > input_indexes
std::vector< int32 > orig_num_rows_
void RenumberSubmatrices()
std::vector< std::vector< NnetComputation::Command > > extra_commands_
bool need_model_derivative
if need_model_derivative is true, then we'll be doing either model training or model-derivative compu...
void ExpandRowsMultiCommand(const NnetComputation::Command &c_in, NnetComputation::Command *c_out)
std::vector< bool > variable_dirty_
void IdentifySubmatrixArgs(NnetComputation::Command *c, std::vector< int32 *> *submatrix_args)
This function outputs to "submatrix_args" the addresses of a subset of arguments arg1 through arg6 in...
void Extend(int32 *dest_submatrix_index, int32 *src_submatrix_index)
MiscComputationInfo misc_info
misc_info is for extensibility to things that don't easily fit into the framework.
static void ConvertNumNValues(int32 n_stride, int32 old_N, int32 new_N, const std::vector< Index > &indexes_in, std::vector< Index > *indexes_out)
Abstract base-class for neural-net components.
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
void RenumberComputation(NnetComputation *computation)
This function detects submatrices and matrices that are never used (e.g.
void ComputeSubmatrixInfo()
void InsertCommands(std::vector< std::pair< int32, NnetComputation::Command > > *new_commands, NnetComputation *computation)
Inserts commands into the computation at the requested places.
static void AddMatrixSwapCommands(const std::vector< int32 > &matrices1, const std::vector< int32 > &matrices2, NnetComputation *computation)
int32 GetVerboseLevel()
Get verbosity level, usually set via command line '–verbose=' switch.
bool is_output
true if this matrix is an output of the computation (i.e.
std::vector< int32 > submatrix_map_if_deriv_
NnetComputation * computation_
void ComputeSubmatrixMaps()
bool SplitCommand(int32 command_index)
const MiscComputationInfo & misc_info_
std::vector< int32 > whole_submatrices_
void ComputeSubmatrixIsUsed()
void IdentifySubmatrixArgsInComputation(NnetComputation *computation, std::vector< int32 *> *submatrix_args)
This function outputs to "submatrix_args" the addresses of integers in 'computation' that correspond ...
int32 NewMatrix(int32 num_rows, int32 num_cols, MatrixStrideType stride_type)
Convenience function used when adding new matrices.
void ExpandRowRangesCommand(const NnetComputation::Command &c_in, NnetComputation::Command *c_out)
void Print(std::ostream &os, const Nnet &nnet) const
static int32 FindNStride(const std::vector< Index > &indexes, bool full_check)
MatrixCompressInfo(int32 m, int32 forward_command_index, int32 backward_command_index, CuCompressedMatrixType compression_type, BaseFloat range, bool truncate)
static bool IsNoop(const NnetComputation::Command &command)
static void GetIdentifiedMatrices(const std::vector< std::pair< int32, int32 > > &pair_list1, const std::vector< std::pair< int32, int32 > > &pair_list2, const unordered_map< std::pair< int32, int32 >, int32, PairHasher< int32 > > &pair_to_matrix, std::vector< int32 > *matrix_list1, std::vector< int32 > *matrix_list2)
void RenumberIndexesRanges()
This class is responsible for consolidating the model-update part of backprop commands, for components in (e.g.) recurrent networks that need to have many separate backprop commands, into more efficient single commands operating on consolidated data in larger matrices.
bool RequestIsDecomposable(const ComputationRequest &request, ComputationRequest *mini_request, int32 *num_n_values)
This function, used in 'shortcut' compilation where we first compile a smaller computation with the s...
const NnetComputation & computation_
std::vector< std::pair< int32, NnetComputation::Command > > new_commands_
ComputationRenumberer(NnetComputation *computation)
void swap(basic_filebuf< CharT, Traits > &x, basic_filebuf< CharT, Traits > &y)
std::vector< IoSpecification > inputs
std::vector< MatrixInfo > matrices
DerivativeTimeLimiter(const Nnet &nnet, int32 min_deriv_time, int32 max_deriv_time, NnetComputation *computation)
virtual ComponentPrecomputedIndexes * PrecomputeIndexes(const MiscComputationInfo &misc_info, const std::vector< Index > &input_indexes, const std::vector< Index > &output_indexes, bool need_backprop) const
This function must return NULL for simple Components.
void NnetComputation(const Nnet &nnet, const CuMatrixBase< BaseFloat > &input, bool pad_input, CuMatrixBase< BaseFloat > *output)
Does the basic neural net computation, on a sequence of data (e.g.
static void GetPairToMatrixMap(std::vector< std::pair< int32, int32 > > &matrix_to_pair, unordered_map< std::pair< int32, int32 >, int32, PairHasher< int32 > > *pair_to_matrix)
NnetComputation * computation_
void SortAndUniq(std::vector< T > *vec)
Sorts and uniq's (removes duplicates) from a vector.
void ExtendMatrices(NnetComputation *computation)
This is not really an optimization in itself but it can make things easier for class VariableMergingO...
void MapAddRowRangesCommand(NnetComputation::Command *c)
std::vector< Command > commands
std::shared_ptr< const NnetComputation > Find(const ComputationRequest &request)
MatrixStrideType stride_type
void LimitMatrices(const std::vector< bool > &will_limit)
void LimitDerivativeTimes(const Nnet &nnet, int32 min_deriv_time, int32 max_deriv_time, NnetComputation *computation)
void Write(std::ostream &os, bool binary) const
std::vector< bool > is_input_or_output_
bool CanLimitMatrix(const Analyzer &analyzer, int32 matrix_index) const
struct Index is intended to represent the various indexes by which we number the rows of the matrices...
RowOpsSplitter(NnetComputation *computation)
int32 GetNewMatrixLocationInfo(int32 old_matrix_index, int32 old_row_index) const
This function is used in mapping row-indexes into matrices, from the old to the new computation...
void OptimizeMemoryCompression(const Nnet &nnet, int32 memory_compression_level, NnetComputation *computation)
Performs optimization to reduce memory usage where possible, making use of the kCompressMatrix and kD...
void ExpandIndexes(const std::vector< Index > &indexes, std::vector< Index > *indexes_expanded) const
std::vector< Access > accesses
Records the indexes of commands that access the matrix, and the type (read, read/write, write).
int32 compression_command_index
std::shared_ptr< const NnetComputation > Insert(const ComputationRequest &request, const NnetComputation *computation)
ComponentPrecomputedIndexes * data
void AddCommandsToComputation()
This function, called at the end of ConsolidateModelUpdate(), takes the commands that we have put in ...
void GetWholeSubmatrices(std::vector< int32 > *whole_submatrices) const
std::pair< int32, Index > Cindex
void RemoveIndexesMultiDuplicates()
ComputationExpander(const Nnet &nnet, const MiscComputationInfo &misc_info, const NnetComputation &computation, bool need_debug_info, int32 num_n_values, NnetComputation *expanded_computation)
int32 NewSubMatrix(int32 base_submatrix, int32 row_offset, int32 num_rows, int32 col_offset, int32 num_cols)
Convenience function used when adding new sub-matrices.
void DoMerge(int32 command_index, int32 s_to_keep, int32 m_to_discard)
std::vector< bool > submatrix_is_used_
ModelUpdateConsolidator(const Nnet &nnet, NnetComputation *computation)
static void CheckIdentifiedMatrices(const NnetComputation &computation, const std::vector< int32 > &list1, const std::vector< int32 > &list2, int32 time_difference)
std::vector< std::pair< int32, int32 > > matrix_to_pair_
ComputationCache(int32 cache_capacity)
static bool ListsAreEqualExceptForPossibleShift(const std::vector< std::pair< int32, int32 > > &a, const std::vector< std::pair< int32, int32 > > &b, int32 shift)
int64 GetMaxMemoryUse(const NnetComputation &computation)
bool need_model_derivative
void Read(std::istream &istream, bool binary)
void Init(const Nnet &nnet, const NnetComputation &computation)
NnetComputation * expanded_computation_
static void ExpectToken(const std::string &token, const std::string &what_we_are_parsing, const std::string **next_token)
void Read(std::istream &is, bool binary)
int32 NumVariables() const
std::vector< std::vector< std::pair< int32, int32 > > > indexes_multi
static void FindActiveMatrices(const NnetComputation &computation, const Analyzer &analyzer, const std::vector< int32 > &splice_point_commands, std::vector< std::vector< int32 > > *active_matrices)
Given a list of command indexes ('splice_point_commands') which are expected to be command indexes of...
bool CanBeExtended(int32 dest_submatrix_index, int32 src_submatrix_index)
virtual int32 Properties() const =0
Return bitmask of the component's properties.
This class is used in the function OptimizeMemoryCompression(), once we determine that there is some ...
static bool SnipRangesRowOp(NnetComputation *computation, int32 command_index)
std::vector< Cindex > cindexes
std::vector< MatrixPruneInfo > matrix_prune_info_
static NnetComputation::SubMatrixInfo GetSubMatrixOfSubMatrix(const NnetComputation &computation, int32 submat_a, int32 submat_b)
This static function returns a SubMatrixInfo corresponding to replacing the matrix-index in a's "matr...
const NnetOptimizeOptions & config_
std::vector< std::vector< int32 > > matrix_to_submatrix_
int32 FirstNontrivialMatrixAccess(int32 m) const
Returns the first command that is not a zeroing command (kSetConst with alpha=0.0), that accesses any part of 'm' [note: allocation and deallocation do not count a matrix accesses.
void IdentifyIndexesMultiArgs(std::vector< NnetComputation::Command > *commands, std::vector< int32 *> *indexes_multi_args)
Identifies in the vector of commands, arguments that correspond to indexes into the computation's ind...
static void FindNumLeadingAndTrailingNegatives(const std::vector< int32 > &vec, int32 *num_leading_negatives, int32 *num_trailing_negatives)
void MapIndexesMultiCommand(NnetComputation::Command *c)
std::vector< SubMatrixInfo > submatrices
std::unordered_set< int32 > memos_to_delete_
void ConsolidateUpdateForComponent(int32 component, const std::vector< int32 > &backprop_commands)
This function, called from ConsolidateModelUpdate, is passed a list of commands that are all backprop...
size_t operator()(const NnetComputation::SubMatrixInfo &submat) const noexcept
void Check(const Nnet &nnet) const
CuCompressedMatrixType compression_type
std::vector< int32 > n_stride_
MemoryCompressionOptimizer(const Nnet &nnet, int32 memory_compression_level, int32 middle_command, NnetComputation *computation)
NnetComputation::SubMatrixInfo SubMatrixInfo
NnetComputation * computation_
static void CreateMatrixPairs(const NnetComputation &computation, std::vector< std::pair< int32, int32 > > *matrix_to_pair)
void ProcessMatrix(int32 m)
bool ReplaceRowWithMatrixOps(NnetComputation *computation)
This function detects cases where commands of type kCopyRows, kAddRows or kAddToRows can be converted...
void Read(std::istream &istream, bool binary)
int32 ConsolidateSubmatrices(const std::vector< int32 > &commands, const std::vector< int32 > &submatrices)
You call this function when you want to consolidate the values of a list of submatrices taken just pr...
NnetComputation * computation_
int32 memory_compression_level_
static bool FindFirstRepeat(const std::vector< std::vector< std::pair< int32, int32 > > > &active_pairs, int32 time_shift_per_segment, int32 *seg1, int32 *seg2)
std::vector< std::vector< Access > > variable_accesses
int32 deallocate_command
Index of the command that deallocates the matrix (which will be of type kDeallocMatrix or kSwapMatrix...
int32 LastWriteAccess(int32 s) const
Returns the last command-index that accesses any part of submatrix 's' as a write operation...
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
int32 DataInvalidatedCommand(int32 c, int32 s) const
Returns (the first command-index after 'c' that any part of submatrix 's' is written to); or if there...
void ComputeMatrixIsUsed()
std::vector< int32 > old_to_new_matrix_
std::vector< Index > output_indexes
void FixGotoLabel(NnetComputation *computation)
This function ensures that the arg1 of a final command of type kGotoLabel is the same as the command ...
int32 LastAccess(int32 s) const
Returns the last non-deallocation command that accesses any part of submatrix 's'; if there is no suc...
bool GetSplitInfo(std::vector< std::pair< int32, int32 > >::const_iterator begin, std::vector< std::pair< int32, int32 > >::const_iterator end, SingleSplitInfo *info)
void Optimize(const NnetOptimizeOptions &config, const Nnet &nnet, int32 max_output_time_in_request, NnetComputation *computation)
This is the top-level function for optimizing a computation.
Component * GetComponent(int32 c)
Return component indexed c. Not a copy; not owned by caller.
void GetPruneValues(int32 initial_submatrix, int32 new_submatrix, int32 *left_prune, int32 *right_prune) const
std::vector< int32 > splice_point_commands_
std::vector< PrecomputedIndexesInfo > component_precomputed_indexes
NnetComputation * computation_
int32 uncompression_command_index
void ComputeMatrixPruneInfo()
virtual std::string Type() const =0
Returns a string such as "SigmoidComponent", describing the type of the object.
void MapIndexesCommand(NnetComputation::Command *c)
void ComputeMatrixToSubmatrix(const NnetComputation &computation, std::vector< std::vector< int32 > > *mat_to_submat)
This function computes a vector 'mat_to_submat', indexed by matrix index, such that (*mat_to_submat)[...
void ExpandComputation(const Nnet &nnet, const MiscComputationInfo &misc_info, const NnetComputation &computation, bool need_debug_info, int32 num_n_values, NnetComputation *expanded_computation)
This function is used in 'shortcut' compilation to expand a computation that has been compiled for ex...
static int32 NormalizeCindexes(std::vector< Cindex > *cindexes)
int32 num_submatrices_new_
std::vector< SingleSplitInfo > splits
void ConsolidateModelUpdate(const Nnet &nnet, NnetComputation *computation)
This optimization consolidates the model-update part of backprop commands, for components in (e...
std::vector< Index > indexes
void ModifyCommand(NnetComputation::Command *command)
CacheType computation_cache_
std::vector< MatrixAccesses > matrix_accesses
int32 NumComponents() const
void MarkAsDirty(int32 s)
Marks the variables underlying submatrix 's' as dirty.
static void FormInfiniteLoop(int32 command1, int32 command2, NnetComputation *computation)
VariableMergingOptimizer(const NnetOptimizeOptions &config, const Nnet &nnet, NnetComputation *computation)
#define KALDI_ASSERT(cond)
std::vector< IoSpecification > outputs
void RemoveNoOps(NnetComputation *computation)
Removes commands of type kNoOperation in the computation.
void RemoveUnusedIndexesMulti()
NnetComputation * computation_
void AppendVariablesForSubmatrix(int32 submatrix_index, std::vector< int32 > *variable_indexes) const
void ConsolidateModelUpdate()
NnetComputation * computation_
static bool IoSpecificationIsDecomposable(const IoSpecification &io_spec, IoSpecification *mini_io_spec, int32 *num_n_values_out)
void IdentifyIndexesRangesArgs(std::vector< NnetComputation::Command > *commands, std::vector< int32 *> *indexes_ranges_args)
Identifies in the vector of commands, arguments that correspond to indexes into the computation's 'in...
int32 allocate_command
Index of the command that allocates the matrix (which will be of type kAllocMatrix or kSwapMatrix)...
void RemoveCommandsForUnusedMatrix(const Analyzer &analyzer, int32 m, NnetComputation *computation)
This function removes from 'computation' the commands accessing matrix 'm', which is assumed to be un...
bool is_input
true if this matrix is an input to the computation (i.e.
static void FindNumLeadingAndTrailingIdenticals(const std::vector< std::pair< int32, int32 > > &vec, int32 *num_leading_identicals, int32 *num_trailing_identicals)
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
void AppendDebugInfoForSubmatrix(int32 submatrix_index, NnetComputation::MatrixDebugInfo *debug_info) const
This function, called from ConsolidateSubmatrices, will update 'debug_info' by appending the correspo...
void MapSimpleMatrixCommand(NnetComputation::Command *c)
std::vector< int32 > submatrix_map_
void ComputePrecomputedIndexes()
std::vector< std::vector< int32 > > indexes
ComputationVariables variables
bool already_called_merge_variables_
bool SnipRowOps(NnetComputation *computation)
This function detects cases where commands of type kCopyRows, kAddRows, kAddRowsMulti, kAddToRowsMulti, kCopyRowsMulti, kCopyToRowsMulti or kAddRowRanges use indexes that start or end with -1's or equivalents, and replace them with similar commands that act on a sub-matrix of the matrices they are currently acting on.
static bool IndexesHaveSpecialStructure(const std::vector< int32 > &indexes, int32 *first_nonnegative_pos, int32 *first_nonnegative_value, int32 *num_nonnegative_indexes)
void ExpandRowsCommand(const NnetComputation::Command &c_in, NnetComputation::Command *c_out)
MatrixExtender(NnetComputation *computation)
static bool SnipSingleRowOp(NnetComputation *computation, int32 command_index)
bool IsSortedAndUniq(const std::vector< T > &vec)
Returns true if the vector is sorted and contains each element only once.
bool GetNewSubmatLocationInfo(int32 submat_index, int32 old_row_index, int32 *new_row_index, int32 *n_stride) const
NnetComputation::MatrixInfo MatrixInfo
bool RowIsKept(int32 submatrix, int32 row_index) const
void IdentifyIndexesArgs(std::vector< NnetComputation::Command > *commands, std::vector< int32 *> *indexes_args)
Identifies in the vector of commands, arguments that correspond to indexes into the computation's 'in...
static bool SnipMultiRowOp(NnetComputation *computation, int32 command_index)
std::vector< NnetComputation::Command > final_deallocate_commands_
bool IsWholeMatrix(int32 submatrix_index) const
This struct exists to set up various pieces of analysis; it helps avoid the repetition of code where ...
std::pair< bool, bool > MayBeMerged(int32 command, int32 s1, int32 s2) const
This function returns a pair of bools saying whether we can do a (left and/or right) merge respective...
static void GetMatrixSwapOrder(const std::vector< int32 > &matrices1, const std::vector< int32 > &matrices2, std::vector< std::pair< int32, int32 > > *swaps)
static int32 FindTimeShift(const NnetComputation &computation)
std::vector< int32 > old_to_new_submatrix_
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)
NnetComputation * computation_
A hashing function-object for pairs of ints.
void GetSubmatrixStrings(const Nnet &nnet, std::vector< std::string > *submat_strings) const
BaseFloat min_proportion_
std::vector< MultiIndexSplitInfo > split_info_
This class performs various kinds of specific analysis on top of what class Analyzer gives you immedi...
std::vector< std::vector< std::pair< int32, int32 > > > indexes_ranges