nnet-compile.cc
Go to the documentation of this file.
1 // nnet3/nnet-compile.cc
2 
3 // Copyright 2015 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include <iterator>
21 #include <sstream>
22 #include "nnet3/nnet-compile.h"
24 #include "nnet3/nnet-optimize.h" // just for ConsolidateIoOperations().
25 
26 namespace kaldi {
27 namespace nnet3 {
28 
30  const ComputationRequest &request,
31  const Nnet &nnet): nnet_(nnet) {
32  requests_.push_back(&request);
33 }
34 
36  const std::vector<const ComputationRequest*> &requests,
37  const Nnet &nnet): requests_(requests), nnet_(nnet) {
38  KALDI_ASSERT(requests_.size() >= 1);
39  // We are currently not supporting getting model derivatives for multi-segment
40  // (online) computations.
41  if (requests_.size() != 1) {
42  for (size_t i = 0; i < requests_.size(); i++) {
43  KALDI_ASSERT(!requests_[i]->need_model_derivative);
44  KALDI_ASSERT(requests_[i]->store_component_stats ==
45  requests_[0]->store_component_stats);
46  }
47  }
48 }
49 
51  NnetComputation *computation) {
52  computation->Clear();
54  // note: there are only >1 segments in a 'looped' computation.
55  for (size_t segment = 0; segment < requests_.size(); segment++) {
56  builder.Compute(*(requests_[segment]));
57  if (!builder.AllOutputsAreComputable()) {
58  builder.ExplainWhyAllOutputsNotComputable(); // prints logging info
59  KALDI_ERR << "Not all outputs were computable, cannot create computation.";
60  }
61  builder.Prune();
62  }
63  // see function declaration's comment for more on the meaning of "phases" (a
64  // phase will later be decomposed into one or more steps). for each segment
65  // s, phases_per_segment[s] is a list of phases; each phase is a list of
66  // cindex_ids.
67  std::vector<std::vector<std::vector<int32> > > phases_per_segment;
68  ComputeComputationPhases(nnet_, graph_, &phases_per_segment);
69  std::vector<std::vector<int32> > steps;
70  steps.reserve(1000);
71 
72  // maps each step to the segment in which it appears. in the normal case
73  // (non-looped computation), a vector of all zeros.
74  std::vector<int32> step_to_segment;
75 
76 
77  {
78  // note: this class will output to 'steps' and to 'cindex_id_to_location_'.
79  // it may incidentally change 'graph_' by adding a few cindexes.
80  ComputationStepsComputer steps_computer(nnet_, &graph_, &steps,
82 
83  for (size_t segment = 0; segment < requests_.size(); segment++) {
84  steps_computer.ComputeForSegment(*(requests_[segment]),
85  phases_per_segment[segment]);
86  while (step_to_segment.size() < steps.size())
87  step_to_segment.push_back(segment);
88 
89  // save memory, by deleting the phases we just consumed. the
90  // following two lines just exist to save memory.
91  std::vector<std::vector<int32> > temp;
92  phases_per_segment[segment].swap(temp);
93  }
94  steps_computer.Check();
95  }
96  std::vector<bool> deriv_needed;
97  ComputeDerivNeeded(steps, step_to_segment, &deriv_needed);
98  CreateStepInfo(deriv_needed, step_to_segment, &steps, computation);
99  AddCommands(deriv_needed, step_to_segment, computation);
100  // the following command reorders commands so kAcceptInput and kProvideOutput
101  // appear in the desired places.
102  ConsolidateIoOperations(nnet_, computation);
103  if (opts.output_debug_info)
104  OutputDebugInfo(computation);
105 }
106 
107 void Compiler::AddCommands(const std::vector<bool> &deriv_needed,
108  const std::vector<int32> &step_to_segment,
109  NnetComputation *computation) {
110  computation->need_model_derivative = requests_[0]->need_model_derivative;
111  int32 arbitrary_factor = 8;
112  computation->commands.reserve(computation->matrices.size()
113  * arbitrary_factor);
114 
115  std::vector<int32> whole_submatrices;
116  computation->GetWholeSubmatrices(&whole_submatrices);
117  AllocateMatrices(whole_submatrices, computation);
118  SetUpPrecomputedIndexes(step_to_segment, computation);
119  int32 num_steps = steps_.size();
120  for (int32 step = 0; step < num_steps; step++) {
121  CompileForward(step, computation);
122  if (step + 1 < static_cast<int32>(step_to_segment.size()) &&
123  step_to_segment[step + 1] != step_to_segment[step]) {
124  // insert a marker that separates segments of the computation.
125  computation->commands.push_back(
127  }
128  }
129 
130  // mark the end of the forward phase.
131  computation->commands.push_back(
133 
134  for (int32 step = num_steps - 1; step >= 0; step--)
135  if (deriv_needed[step])
136  CompileBackward(step, computation);
137 
138  DeallocateMatrices(whole_submatrices, step_to_segment, computation);
139 }
140 
141 
143  const std::vector<int32> &this_step,
144  int32 step_index,
145  unordered_set<int32> *dep_steps) {
146  dep_steps->clear();
147  if (this_step.empty())
148  return;
149  // steps always have a single node index, we can pick the first.
150  int32 node_index = graph_.cindexes[this_step[0]].first;
151  if (nnet_.IsComponentNode(node_index)) {
152  // there is only one step that a component step depends on, and it's the
153  // immediately preceding step (the component-input step).
154  KALDI_ASSERT(step_index > 0);
155  dep_steps->insert(step_index - 1);
156  return;
157  }
158  std::vector<int32>::const_iterator step_iter = this_step.begin(),
159  step_end = this_step.end();
160  int32 prev_input_step = -1; // this is an optimization for speed.
161  for (; step_iter != step_end; ++step_iter) {
162  int32 cindex_id = *step_iter;
163  const std::vector<int32> &dep = graph_.dependencies[cindex_id];
164  std::vector<int32>::const_iterator iter = dep.begin(), end = dep.end();
165  for (; iter != end; ++iter) {
166  int32 dep_cindex_id = *iter,
167  input_step = cindex_id_to_location_[dep_cindex_id].first;
168  if (input_step != prev_input_step) { // optimization.
169  prev_input_step = input_step;
170  dep_steps->insert(input_step);
171  }
172  }
173  }
174 }
175 
177  const std::vector<std::vector<int32> > &steps,
178  const std::vector<int32> &step_to_segment,
179  std::vector<bool> *deriv_needed) {
180  KALDI_ASSERT(steps.size() == step_to_segment.size() &&
181  step_to_segment[0] == 0 &&
182  step_to_segment.back() + 1 == requests_.size());
183  deriv_needed->clear();
184  int32 num_steps = steps.size();
185  deriv_needed->resize(num_steps, false);
186 
187  for (int32 step = 0; step < num_steps; step++) {
188  const std::vector<int32> &this_step = steps[step];
189  if (this_step.empty()) // empty steps are theoretically possible, e.g.
190  continue; // if a non-simple Component requires no input.
191  int32 cindex_id = this_step[0];
192  int32 node_index = graph_.cindexes[cindex_id].first;
193  bool is_input = graph_.is_input[cindex_id];
194 
195  std::string node_name = nnet_.GetNodeNames()[node_index];
196  unordered_set<int32> input_steps;
197  ComputeStepDependencies(this_step, step, &input_steps);
198 
199  unordered_set<int32>::iterator iter = input_steps.begin(),
200  end = input_steps.end();
201  // if some step that we depend on needs a derivative, we need the derivative.
202  for (; iter != end; ++iter) {
203  int32 dep_step = *iter;
204  KALDI_ASSERT(dep_step < step);
205  if ((*deriv_needed)[dep_step])
206  (*deriv_needed)[step] = true;
207  }
208  // if this step is an input and the user requested the derivative w.r.t. that
209  // input, we need the derivative.
210  const ComputationRequest &request = *(requests_[step_to_segment[step]]);
211 
212  if (is_input) {
213  int32 input_index = request.IndexForInput(node_name);
214  KALDI_ASSERT(input_index != -1);
215  if (request.inputs[input_index].has_deriv)
216  (*deriv_needed)[step] = true;
217  }
218  // if this step is an output and the user is providing the derivative w.r.t. that
219  // output, we need a place to store the derivative, so we set (*deriv_needed) to
220  // true.
221  if (nnet_.IsOutputNode(node_index)) {
222  int32 output_index = request.IndexForOutput(node_name);
223  KALDI_ASSERT(output_index != -1);
224  if (request.outputs[output_index].has_deriv)
225  (*deriv_needed)[step] = true;
226  }
227 
228  // If this is an updatable Component node with a nonzero learning rate and
229  // the user requested model derivatives (e.g. during training), we need this
230  // step's derivative.
231  if (nnet_.IsComponentNode(node_index) && request.need_model_derivative) {
232  const NetworkNode &node = nnet_.GetNode(node_index);
233  const Component *c = nnet_.GetComponent(node.u.component_index);
234  if (c->Properties() & kUpdatableComponent) {
235  const UpdatableComponent *u = dynamic_cast<const UpdatableComponent*>(c);
236  KALDI_ASSERT(u != NULL);
237  if (u->LearningRate() != 0)
238  (*deriv_needed)[step] = true;
239  }
240  }
241  }
242  if (GetVerboseLevel() >= 5) {
243  std::ostringstream os;
244  os << "deriv_needed = ";
245  for (int32 i = 0; i < deriv_needed->size(); i++)
246  os << ((*deriv_needed)[i] ? "t" : "f");
247  os << "\n";
248  KALDI_VLOG(5) << os.str();
249  }
250 }
251 
253  int32 component_node_index;
254  bool is_input;
255  if (nnet_.IsComponentInputNode(node_index)) {
256  // this node is for the input to a component.
257  component_node_index = node_index + 1;
258  is_input = true;
259  } else if (nnet_.IsComponentNode(node_index)) {
260  component_node_index = node_index;
261  is_input = false;
262  } else {
263  return kDefaultStride;
264  }
265  const NetworkNode &node = nnet_.GetNode(component_node_index);
266  const Component *c = nnet_.GetComponent(node.u.component_index);
267  if (is_input) {
268  return (c->Properties() & kInputContiguous) ?
270  } else {
271  return (c->Properties() & kOutputContiguous) ?
273  }
274 }
275 
276 
277 // Note: "by_step" is an input but is passed as a pointer because this
278 // function destroys it.
280  const std::vector<bool> &deriv_needed,
281  const std::vector<int32> &step_to_segment,
282  std::vector<std::vector<int32> > *by_step,
283  NnetComputation *computation) {
284  KALDI_ASSERT(!by_step->empty());
285  int32 num_steps = by_step->size();
286  steps_.resize(num_steps);
287  for (int32 step = 0; step < num_steps; step++) {
288  StepInfo &this_info = steps_[step];
289  this_info.output_cindex_ids.swap((*by_step)[step]);
290  this_info.segment = step_to_segment[step];
291  int32 num_ids = this_info.output_cindex_ids.size();
292  this_info.output_indexes.resize(num_ids);
293  for (int32 row_index = 0; row_index < num_ids; row_index++)
294  this_info.output_indexes[row_index] =
295  graph_.cindexes[this_info.output_cindex_ids[row_index]].second;
296  if (num_ids > 0) {
297  // node id's of all Cindexes are the same, so just use first one.
298  this_info.node_index =
299  graph_.cindexes[this_info.output_cindex_ids.front()].first;
300  } else {
301  // it's possible to have an empty step if it's the component-input step of
302  // a GeneralComponent that does not always have dependencies, such as the
303  // ConstantFunctionComponent. This is just a kind of placeholder; it will
304  // generate no commands. The next command works because the next
305  // step will be the propagate for that Component, whose node-index is one
306  // more than the component-input node.
307  KALDI_ASSERT((step+1) < by_step->size() && !(*by_step)[step+1].empty());
308  this_info.node_index =
309  graph_.cindexes[(*by_step)[step+1][0]].first - 1;
310  KALDI_ASSERT(this_info.node_index >= 0);
311  continue; // we don't need to do anything else for this step.
312  }
313  const NetworkNode &node = nnet_.GetNode(this_info.node_index);
314  int32 num_rows = num_ids, num_cols = node.Dim(nnet_);
315 
316  if (node.node_type != kDimRange) {
317  MatrixStrideType stride_type = GetStrideType(this_info.node_index);
318  this_info.value = computation->NewMatrix(num_rows, num_cols,
319  stride_type);
320  if (deriv_needed[step])
321  this_info.deriv = computation->NewMatrix(num_rows, num_cols,
322  stride_type);
323  } else {
324  // kDimRange. Will just be a sub-matrix of a Component or Input node.
325  std::vector<int32>::const_iterator
326  iter = this_info.output_cindex_ids.begin(),
327  end = this_info.output_cindex_ids.end();
328  int32 source_cindex_id = -1;
329  for (; iter != end; ++iter) {
330  int32 cindex_id = *iter;
331  if (!graph_.dependencies[cindex_id].empty()) {
332  KALDI_ASSERT(graph_.dependencies[cindex_id].size() == 1);
333  source_cindex_id = graph_.dependencies[cindex_id][0];
334  break;
335  }
336  }
337  KALDI_ASSERT(source_cindex_id >= 0);
338  int32 input_step = cindex_id_to_location_[source_cindex_id].first;
339  KALDI_ASSERT(this_info.output_cindex_ids.size() ==
340  steps_[input_step].output_cindex_ids.size());
341  KALDI_ASSERT(input_step >= 0 && input_step < step);
343  steps_[input_step].output_indexes);
344  this_info.value = computation->NewSubMatrix(steps_[input_step].value,
345  0, -1,
346  node.dim_offset, node.dim);
347  if (deriv_needed[step])
348  this_info.deriv = computation->NewSubMatrix(steps_[input_step].deriv,
349  0, -1,
350  node.dim_offset, node.dim);
351  }
352  if (node.node_type == kDescriptor) {
353  // we have a couple of things to do: set up input_locations_list which
354  // says where we copy the data from, and also set up value_parts and
355  // possibly deriv_parts.
356  const Descriptor &desc = node.descriptor;
357  int32 num_parts = desc.NumParts();
358  KALDI_ASSERT(num_parts > 0);
359  // set up input_locations_list.
360  this_info.input_locations_list.resize(num_parts);
361  for (int32 part = 0; part < num_parts; part++)
362  ComputeInputLocationsList(step, part,
363  &(this_info.input_locations_list[part]));
364  // set up value_parts and deriv_parts.
365  if (num_parts == 1) {
366  this_info.value_parts.push_back(this_info.value);
367  if (deriv_needed[step])
368  this_info.deriv_parts.push_back(this_info.deriv);
369  } else { // num_parts > 1.
370  int32 cur_dim_offset = 0;
371  // Have multiple parts, so need to set up sub-matrices.
372  this_info.value_parts.resize(num_parts);
373  if (deriv_needed[step])
374  this_info.deriv_parts.resize(num_parts);
375  for (int32 p = 0; p < num_parts; p++) {
376  const SumDescriptor &this_part = desc.Part(p);
377  int32 this_dim = this_part.Dim(nnet_);
378  this_info.value_parts[p] =
379  computation->NewSubMatrix(this_info.value,
380  0, -1,
381  cur_dim_offset, this_dim);
382  if (deriv_needed[step])
383  this_info.deriv_parts[p] =
384  computation->NewSubMatrix(this_info.deriv,
385  0, -1,
386  cur_dim_offset, this_dim);
387  cur_dim_offset += this_dim;
388  }
389  KALDI_ASSERT(cur_dim_offset == desc.Dim(nnet_));
390  }
391  }
392  KALDI_ASSERT(static_cast<int32>(this_info.output_cindex_ids.size()) ==
393  computation->submatrices[this_info.value].num_rows);
394  }
395 }
396 
397 bool Compiler::IsInputStep(int32 step) const {
398  KALDI_ASSERT(step >= 0);
399  if (step >= steps_.size())
400  return false;
401  const StepInfo &step_info = steps_[step];
402  const NetworkNode &node = nnet_.GetNode(step_info.node_index);
403  return (node.node_type == kInput);
404 }
405 
407  NnetComputation *computation) const {
408  KALDI_ASSERT(step < static_cast<int32>(steps_.size()));
409  const StepInfo &step_info = steps_[step];
410  const NetworkNode &node = nnet_.GetNode(step_info.node_index);
411  switch (node.node_type) {
412  case kInput: // Note: input nodes appear before other node types.
413  AddForwardStepInput(step, computation);
414  if (!IsInputStep(step + 1)) // Make sure forward computation is nonempty.
415  computation->commands.push_back(
417  break;
418  case kDimRange: break; // Nothing to do.
419  case kComponent:
420  AddForwardStepComponent(step, computation);
421  break;
422  case kDescriptor:
423  CompileForwardDescriptor(step, computation);
424  break;
425  default:
426  KALDI_ERR << "Invalid node type";
427  }
428 
429 }
430 
431 
433  int32 step, NnetComputation *computation) const {
434  int32 num_parts = steps_[step].value_parts.size();
435  for (int32 part = 0; part < num_parts; part++)
436  CompileForwardSumDescriptor(step, part, computation);
437  const StepInfo &step_info = steps_[step];
438  if (nnet_.IsOutputNode(step_info.node_index)) {
439  // If the node is an output then we need to add commands to provide the
440  // output to the user, and possibly to get derivatives w.r.t. the output
441  // from the user.
442  int32 node_index = step_info.node_index,
443  submatrix_index = step_info.value;
444  KALDI_ASSERT(computation->IsWholeMatrix(submatrix_index));
445  NnetComputation::Command c(kProvideOutput, submatrix_index, node_index);
446  computation->commands.push_back(c);
447  }
448 }
449 
450 
451 // The output vector "locations" is indexed first by output row-index i
452 // (i.e. the index of output_indexes or output_cindex_ids), and then is a list
453 // of input locations for that row-index, sorted in the natural order of
454 // Cindexes (but not necessarily unique). The semantics is that the i'th row of
455 // the output becomes a sum over the rows in the i'th list (or zero if that list
456 // is empty). These locations will be pairs [step-index, row-index].
458  int32 step, int32 part_index,
459  std::vector<std::vector<std::pair<int32, int32> > > *submat_locations_list)
460  const {
461 
462  KALDI_ASSERT(static_cast<size_t>(step) < steps_.size());
463  const StepInfo &step_info = steps_[step];
464  const std::vector<Index> &output_indexes = step_info.output_indexes;
465  const NetworkNode &node = nnet_.GetNode(step_info.node_index);
466  const SumDescriptor &descriptor = node.descriptor.Part(part_index);
467  int32 num_indexes = output_indexes.size();
468  submat_locations_list->clear();
469  submat_locations_list->resize(num_indexes);
470 
471  for (int32 i = 0; i < num_indexes; i++) {
472  const Index &index = output_indexes[i];
473  std::vector<std::pair<int32, int32> > &this_locations_list =
474  (*submat_locations_list)[i];
475  if (index.t != kNoTime) {
476  // a real Index, not a 'blank' one
477  // ('blank' indexes are inserted by some non-simple Components to
478  // satisfy internal constraints.
479  std::vector<int32> input_cindex_ids;
480  std::vector<Cindex> input_cindexes;
481  CindexSet cindex_set(graph_);
482  bool ans = descriptor.IsComputable(index, cindex_set, &input_cindexes);
483  // earlier compilation stages should have checked that it is computable,
484  // and the graph should still contain required inputs.
485  KALDI_ASSERT(ans);
486  std::sort(input_cindexes.begin(), input_cindexes.end());
487  int32 size = input_cindexes.size();
488  input_cindex_ids.resize(size);
489  for (int32 j = 0; j < size; j++) {
490  int32 c = graph_.GetCindexId(input_cindexes[j]);
491  KALDI_ASSERT(c != -1);
492  input_cindex_ids[j] = c;
493  }
494  this_locations_list.resize(size);
495  for (int32 j = 0; j < size; j++)
496  this_locations_list[j] = cindex_id_to_location_[input_cindex_ids[j]];
497  } else {
498  this_locations_list.clear();
499  }
500  }
501 }
502 
504 const std::vector<std::vector<std::pair<int32, int32> > > &input_locations_list,
505  std::vector<std::vector<std::pair<int32, int32> > >*submat_locations_list)
506 const {
507  submat_locations_list->clear();
508  submat_locations_list->resize(input_locations_list.size());
509  int32 size = submat_locations_list->size();
510  for (int32 i = 0; i < size; i++) {
511  const std::vector<std::pair<int32, int32> > &this_list =
512  input_locations_list[i];
513  std::vector<std::pair<int32, int32> > &this_submat_list =
514  (*submat_locations_list)[i];
515  this_submat_list.resize(this_list.size());
516  std::vector<std::pair<int32, int32> >::const_iterator
517  input_iter = this_list.begin(), input_end = this_list.end();
518  std::vector<std::pair<int32, int32> >::iterator iter =
519  this_submat_list.begin();
520  for (; input_iter != input_end; ++input_iter, ++iter) {
521  int32 step = input_iter->first,
522  value_submat_index = steps_[step].value,
523  row = input_iter->second;
524  iter->first = value_submat_index;
525  iter->second = row;
526  }
527  }
528 }
529 
530 
532  const std::vector<std::vector<std::pair<int32, int32> > > &input_locations_list,
533  std::vector<std::vector<std::pair<int32, int32> > > *submat_locations_list)
534  const {
535  submat_locations_list->clear();
536  submat_locations_list->resize(input_locations_list.size());
537  int32 size = submat_locations_list->size();
538  for (int32 i = 0; i < size; i++) {
539  const std::vector<std::pair<int32, int32> > &this_list = input_locations_list[i];
540  std::vector<std::pair<int32, int32> > &this_submat_list = (*submat_locations_list)[i];
541  this_submat_list.reserve(this_list.size());
542  std::vector<std::pair<int32, int32> >::const_iterator
543  input_iter = this_list.begin(), input_end = this_list.end();
544  for (; input_iter != input_end; ++input_iter) {
545  int32 step = input_iter->first,
546  deriv_submat_index = steps_[step].deriv,
547  row = input_iter->second;
548  if (deriv_submat_index > 0)
549  this_submat_list.push_back(std::pair<int32,int32>(deriv_submat_index,
550  row));
551  }
552  }
553 }
554 
555 
556 
558  const SumDescriptor &descriptor,
559  const std::vector<std::vector<std::pair<int32,int32> > > &input_locations_list,
560  std::vector<std::pair<BaseFloat,
561  std::vector<std::vector<std::pair<int32,int32> > > > >
562  *split_locations_lists) const {
563  split_locations_lists->clear();
564  // alpha_to_nodes maps from the scale alpha to the list of nodes which are
565  // given that scale.
566  std::map<BaseFloat, std::vector<int32> > alpha_to_nodes;
567  { // This block compute `alpha_to_nodes`.
568  std::vector<int32> nodes;
569  descriptor.GetNodeDependencies(&nodes);
570  SortAndUniq(&nodes);
571  // Now `nodes` is a list of the graph node indexes that are referred to
572  // in the descriptor. E.g. if the Descriptor represents
573  // 'Sum(tdnn1, Offset(tdnn2, -2))' then `nodes` would contain the
574  // integer node indexes for graph-nodes 'tdnn1' and 'tdnn2'.
575  for (size_t i = 0; i < nodes.size(); i++) {
576  int32 node = nodes[i];
577  BaseFloat alpha = descriptor.GetScaleForNode(node);
578  KALDI_ASSERT(alpha - alpha == 0.0); // check it's not infinity.
579  alpha_to_nodes[alpha].push_back(node);
580  }
581  }
582 
583  if (alpha_to_nodes.size() == 1) {
584  // If all the alpha values are the same we treat it as a special case
585  // for efficiency, to avoid a redundant copy of the contents of
586  // 'input_locations_list'.
587  return alpha_to_nodes.begin()->first;
588  }
589 
590  // `steps_used` will be a list of all step indexes that appear as `.first`
591  // elements in `input_locations_list`.
592  unordered_set<int32> steps_used;
593  { // This block computes `steps_used`.
594  int32 cur_step = -1000;
595  std::vector<std::vector<std::pair<int32,int32> > >::const_iterator
596  iter = input_locations_list.begin(),
597  end = input_locations_list.end();
598  for (; iter != end; ++iter) {
599  std::vector<std::pair<int32,int32> >::const_iterator
600  pair_iter = iter->begin(),
601  pair_end = iter->end();
602  for (; pair_iter != pair_end; ++pair_iter) {
603  if (pair_iter->first != cur_step) {
604  cur_step = pair_iter->first;
605  steps_used.insert(cur_step);
606  }
607  }
608  }
609  }
610 
611  // `node_to_steps` will be a map from graph node index to the list of steps
612  // which are present in `steps_used` and which are associated with that graph
613  // node.
614  std::map<int32, std::vector<int32> > node_to_steps;
615  { // This block computes `node_to_steps`.
616  unordered_set<int32>::const_iterator
617  step_iter = steps_used.begin(), step_end = steps_used.end();
618  for (; step_iter != step_end; ++step_iter) {
619  int32 step_index = *step_iter;
620  KALDI_ASSERT(static_cast<size_t>(step_index) < steps_.size());
621  int32 node_index = steps_[step_index].node_index;
622  node_to_steps[node_index].push_back(step_index);
623  }
624  }
625 
626  int32 num_rows = input_locations_list.size();
627  split_locations_lists->resize(alpha_to_nodes.size());
628  // `step_to_index` will map from the step-index to the index into
629  // `split_locations_lists`; each index is associated with a different value of
630  // the scale `alpha`.
631  std::vector<int32> step_to_locations_index(steps_.size(), -1);
632  { // This block computes `step_to_index` and also sets the `alpha` values
633  // which are present as (*split_locations_lists)[*].first.
634  std::map<BaseFloat, std::vector<int32> >::const_iterator
635  iter = alpha_to_nodes.begin(), end = alpha_to_nodes.end();
636  int32 split_locations_index = 0;
637  for (; iter != end; ++iter, ++split_locations_index) {
638  BaseFloat alpha = iter->first;
639  const std::vector<int32> &nodes = iter->second;
640  (*split_locations_lists)[split_locations_index].first = alpha;
641  (*split_locations_lists)[split_locations_index].second.resize(num_rows);
642  for (size_t i = 0; i < nodes.size(); i++) {
643  int32 node_index = nodes[i];
644  KALDI_ASSERT(node_to_steps.count(node_index) != 0);
645  const std::vector<int32> &steps = node_to_steps[node_index];
646  for (size_t j = 0; j < steps.size(); j++) {
647  int32 step_index = steps[j];
648  KALDI_ASSERT(step_index >= 0 &&
649  step_to_locations_index[step_index] == -1);
650  step_to_locations_index[step_index] = split_locations_index;
651  }
652  }
653  }
654  }
655 
656  { // This block populates 'split_locations_lists[*].second' with the
657  // split-by-alpha version of 'input_locations_list'
658  for (int32 r = 0; r < num_rows; r++) {
659  const std::vector<std::pair<int32,int32> > &this_list =
660  input_locations_list[r];
661  std::vector<std::pair<int32,int32> >::const_iterator
662  pair_iter = this_list.begin(),
663  pair_end = this_list.end();
664  for (; pair_iter != pair_end; ++pair_iter) {
665  int32 step = pair_iter->first,
666  split_locations_index = step_to_locations_index[step];
667  (*split_locations_lists)[split_locations_index].second[r].push_back(
668  *pair_iter);
669  }
670  }
671  }
672  return std::numeric_limits<BaseFloat>::infinity();
673 }
674 
675 
677  int32 step, int32 part_index, NnetComputation *computation) const {
678  const StepInfo &step_info = steps_[step];
679  int32 value_submatrix_index = step_info.value_parts[part_index];
680  const SumDescriptor &descriptor =
681  nnet_.GetNode(step_info.node_index).descriptor.Part(part_index);
682 
683  BaseFloat offset_term = descriptor.GetScaleForNode(-1);
684  if (offset_term != 0.0) {
685  computation->commands.push_back(
686  NnetComputation::Command(offset_term, kSetConst,
687  value_submatrix_index));
688  // if offset_term == 0.0 there's no need to do this, because
689  // we zeroed the matrix when we allocated it; search in this
690  // file for kSetConst to see the code. If we are redundantly
691  // setting the value, this will later be optimized out (in the
692  // common cases).
693  }
694 
695 
696  // `input_locations_list` is a vector indexed by row-index, with each element
697  // being a list of pairs (step, row_index) representing terms in a weighted
698  // sum.
699  const std::vector<std::vector<std::pair<int32,int32> > >
700  &input_locations_list = step_info.input_locations_list[part_index];
701 
702  // `split_locations_lists` is a vector of pairs `(alpha, locations_list)`
703  // where alpha is the scale in which these items appear in the
704  // summation and `locations_list` is the same format as `input_locations_list`
705  std::vector<std::pair<BaseFloat,
706  std::vector<std::vector<std::pair<int32,int32> > > > > split_locations_lists;
707  BaseFloat shared_alpha = SplitByScale(descriptor, input_locations_list,
708  &split_locations_lists);
709  if (shared_alpha - shared_alpha == 0.0) {
710  // If the returned value 'shared_alpha' is finite, this indicates that there was no
711  // need to split up 'input_locations_list' because all the alpha values
712  // (scales) were the same. We treat this case specially for efficiency
713  // reasons; this branch will be the most common branch.
714  std::vector<std::vector<std::pair<int32, int32> > > submat_locations_list;
715  ComputeValueSubmatLocationsList(input_locations_list,
716  &submat_locations_list);
718  value_submatrix_index,
719  shared_alpha,
720  submat_locations_list,
721  computation);
722  } else {
723  for (size_t i = 0; i < split_locations_lists.size(); i++) {
724  BaseFloat this_alpha = split_locations_lists[i].first;
725  KALDI_ASSERT(this_alpha - this_alpha == 0.0);
726  std::vector<std::vector<std::pair<int32, int32> > > submat_locations_list;
727  ComputeValueSubmatLocationsList(split_locations_lists[i].second,
728  &submat_locations_list);
730  value_submatrix_index,
731  this_alpha,
732  submat_locations_list,
733  computation);
734  }
735  }
736 }
737 
739  int32 value_submatrix_index,
740  int32 input_submatrix_index,
741  BaseFloat alpha,
742  const std::vector<int32> &indexes,
743  NnetComputation *computation) const {
744 
745  int32 input_num_rows =
746  computation->submatrices[input_submatrix_index].num_rows,
747  num_rows = indexes.size();
748  if (input_num_rows == num_rows) {
749  int32 i;
750  for (i = 0; i < num_rows; i++)
751  if (indexes[i] != i)
752  break;
753  if (i == num_rows) { // Simplest case: just matrix addition.
754  computation->commands.push_back(
756  value_submatrix_index,
757  input_submatrix_index));
758 
759  return;
760  }
761  }
762  // if we got to here, it's not just a case of matrix-copy or matrix-add,
763  // but it's still from a single source matrix.
764  int32 indexes_index = computation->indexes.size();
765  computation->indexes.push_back(indexes);
766  computation->commands.push_back(
767  NnetComputation::Command(alpha, kAddRows, value_submatrix_index,
768  input_submatrix_index, indexes_index));
769  return;
770 }
771 
773  int32 value_submatrix_index,
774  BaseFloat alpha,
775  const std::vector<std::pair<int32, int32> > &submat_locations,
776  NnetComputation *computation) const {
777 
778  int32 input_submatrix_index = -1;
779  std::vector<int32> indexes;
780  if (ConvertToIndexes(submat_locations, &input_submatrix_index, &indexes)) {
781  CompileForwardFromIndexes(value_submatrix_index,
782  input_submatrix_index,
783  alpha,
784  indexes,
785  computation);
786  return;
787  } else {
788  // There are multiple source matrices.
789  int32 indexes_multi_index = computation->indexes_multi.size();
790  computation->indexes_multi.push_back(submat_locations);
791  computation->commands.push_back(
793  value_submatrix_index,
794  indexes_multi_index));
795  return;
796  }
797 }
798 
800  int32 value_submatrix_index,
801  BaseFloat alpha,
802  const std::vector<std::vector<std::pair<int32, int32> > > &submat_lists,
803  NnetComputation *computation) const {
804  std::vector<std::vector<std::pair<int32, int32> > > split_lists;
805  SplitLocations(submat_lists, &split_lists);
806  int32 size = split_lists.size();
807  // note: `size` may be empty in unusual cases so don't assert that it's
808  // nonzero.
809  for (int32 i = 0; i < size; i++)
811  value_submatrix_index,
812  alpha,
813  split_lists[i],
814  computation);
815 }
816 
817 
819  int32 deriv_submatrix_index,
820  BaseFloat alpha,
821  const std::vector<std::vector<std::pair<int32, int32> > > &submat_lists,
822  NnetComputation *computation) const {
823  std::vector<std::vector<std::pair<int32, int32> > > split_lists;
824  SplitLocationsBackward(submat_lists, &split_lists);
825  int32 size = split_lists.size(); // size may be zero e.g. for unused outputs.
826  for (int32 i = 0; i < size; i++)
828  deriv_submatrix_index,
829  alpha,
830  split_lists[i],
831  computation);
832 }
833 
834 
836  int32 step, int32 part_index, NnetComputation *computation) const {
837  const StepInfo &step_info = steps_[step];
838  int32 deriv_submatrix_index = step_info.deriv_parts[part_index];
839  KALDI_ASSERT(deriv_submatrix_index > 0); // or should not have called this.
840  const SumDescriptor &descriptor =
841  nnet_.GetNode(step_info.node_index).descriptor.Part(part_index);
842  // Note: `offset_term` appeared in the forward computation here but does not
843  // come into the backward computation.
844 
845  // `input_locations_list` is a vector indexed by row-index, with each element
846  // being a list of pairs (step, row_index) representing terms in a weighted
847  // sum.
848  const std::vector<std::vector<std::pair<int32,int32> > >
849  &input_locations_list = step_info.input_locations_list[part_index];
850 
851  // `split_locations_lists` is a vector of pairs `(alpha, locations_list)`
852  // where alpha is the scale in which these items appear in the
853  // summation and `locations_list` is the same format as `input_locations_list`
854  std::vector<std::pair<BaseFloat,
855  std::vector<std::vector<std::pair<int32,int32> > > > > split_locations_lists;
856  BaseFloat shared_alpha = SplitByScale(descriptor, input_locations_list,
857  &split_locations_lists);
858  if (shared_alpha - shared_alpha == 0.0) {
859  // If the returned value 'shared_alpha' is finite, this indicates that there
860  // was no need to split up 'input_locations_list' because all the alpha
861  // values (scales) were the same. We treat this case specially for
862  // efficiency reasons; this branch will be the most common branch.
863  std::vector<std::vector<std::pair<int32, int32> > > submat_locations_list;
864  ComputeDerivSubmatLocationsList(input_locations_list,
865  &submat_locations_list);
866  CompileBackwardFromSubmatLocationsList(deriv_submatrix_index,
867  shared_alpha,
868  submat_locations_list,
869  computation);
870  } else {
871  for (size_t i = 0; i < split_locations_lists.size(); i++) {
872  BaseFloat this_alpha = split_locations_lists[i].first;
873  KALDI_ASSERT(this_alpha - this_alpha == 0.0);
874  std::vector<std::vector<std::pair<int32, int32> > > submat_locations_list;
875  ComputeDerivSubmatLocationsList(split_locations_lists[i].second,
876  &submat_locations_list);
877  CompileBackwardFromSubmatLocationsList(deriv_submatrix_index,
878  this_alpha,
879  submat_locations_list,
880  computation);
881  }
882  }
883 }
884 
885 
886 
888  int32 deriv_submatrix_index,
889  BaseFloat alpha,
890  const std::vector<std::pair<int32, int32> > &submat_locations,
891  NnetComputation *computation) const {
892  // This function creates a command to handle an individual piece of the
893  // Descriptor, for backprop. Note: because the backprop case is a little
894  // trickier to implement efficiently on the GPU, there may be cases
895  // which we will refuse to implement backprop for if we get here.
896 
897 
898 
899  int32 first_value;
900  std::vector<int32> second_values;
901  if (ConvertToIndexes(submat_locations, &first_value,
902  &second_values)) {
903  int32 input_deriv_submatrix_index = first_value;
904  CompileBackwardFromIndexes(deriv_submatrix_index,
905  input_deriv_submatrix_index,
906  alpha,
907  second_values,
908  computation);
909  return;
910  } else {
911  // There are multiple source matrices.
912  std::vector<std::pair<int32, int32> > submat_locations_sorted;
913  std::sort(submat_locations_sorted.begin(), submat_locations_sorted.end());
914  if (IsSortedAndUniq(submat_locations_sorted)) {
915  // There are no repeats in any of the submat locations. This means that
916  // we can just use kAddToRowsMulti (i.e. AddToRows with pointer
917  // destination). If there were repeats, the CUDA kernel would require
918  // special synchronization so we don't allow it.
919  int32 indexes_multi_index = computation->indexes_multi.size();
920  computation->indexes_multi.push_back(submat_locations);
921  computation->commands.push_back(
924  deriv_submatrix_index,
925  indexes_multi_index));
926  return;
927  }
928  // If you reach this point, there is a case that wasn't handled. Our
929  // intended strategy to handle it, if it's ever needed, is to create a
930  // temporary matrix consisting of all the unique submat_locations in the
931  // input. We would first recurse to CompileBackwardFromIndexes, and
932  // let it write to this temporary matrix; and then do the kAddToRowsMulti
933  // command as above to go from the temporary matrix to the multiple
934  // matrices.
935  KALDI_ERR << "This case not handled.";
936  }
937 }
938 
940  int32 deriv_submatrix_index,
941  int32 input_deriv_submatrix_index,
942  BaseFloat alpha,
943  const std::vector<int32> &indexes,
944  NnetComputation *computation) const {
945 
946  int32 num_rows = computation->submatrices[deriv_submatrix_index].num_rows,
947  input_num_rows =
948  computation->submatrices[input_deriv_submatrix_index].num_rows;
949  KALDI_ASSERT(indexes.size() == num_rows);
950  if (input_num_rows == num_rows) {
951  int32 i;
952  for (i = 0; i < num_rows; i++)
953  if (indexes[i] != i)
954  break;
955  if (i == num_rows) { // Simplest case: just matrix addition.
956  computation->commands.push_back(
958  kMatrixAdd,
959  input_deriv_submatrix_index,
960  deriv_submatrix_index));
961 
962  return;
963  }
964  }
965  if (input_num_rows >= num_rows) {
966  // If there are no repeated elements in the "indexes" array, we can reverse
967  // the mapping and make it an operation of type kAddRows. TODO: change this
968  // to use kAddToRows, kCopyToRows, when implemented (will be more
969  // efficient).
970  std::vector<int32> reverse_indexes(input_num_rows, -1);
971  int32 i;
972  for (i = 0; i < num_rows; i++) {
973  int32 index_i = indexes[i];
974  KALDI_ASSERT(index_i >= -1 && index_i < input_num_rows);
975  if (index_i >= 0) {
976  if (reverse_indexes[index_i] == -1)
977  reverse_indexes[index_i] = i;
978  else
979  break;
980  } // note: there may be -1's in 'indexes', meaning just use zero.
981  }
982  if (i == num_rows) {
983  // There were no repeated elements, and this strategy will work.
984  int32 indexes_index = computation->indexes.size();
985  computation->indexes.push_back(reverse_indexes);
986  computation->commands.push_back(
988  kAddRows,
989  input_deriv_submatrix_index,
990  deriv_submatrix_index,
991  indexes_index));
992  return;
993  }
994  }
995  std::vector<std::pair<int32, int32> > ranges;
996  if (HasContiguousProperty(indexes, &ranges)) {
997  // the operation can be set up as AddRowRanges.
998  if (static_cast<int32>(ranges.size()) != input_num_rows) {
999  KALDI_ASSERT(static_cast<int32>(ranges.size()) < input_num_rows);
1000  // extend with (-1, -1) pairs.
1001  ranges.resize(input_num_rows, std::pair<int32,int32>(-1, -1));
1002  }
1003  int32 indexes_ranges_index = computation->indexes_ranges.size();
1004  computation->indexes_ranges.push_back(ranges);
1005  computation->commands.push_back(
1007  kAddRowRanges,
1008  input_deriv_submatrix_index,
1009  deriv_submatrix_index,
1010  indexes_ranges_index));
1011  // TODO: if any of these ranges are quite long (summing over many rows), the
1012  // resulting code could be inefficient because the AddRowRanges kernels
1013  // takes time linear in the length of the range. Using a temporary matrix
1014  // with an intermediate size would make this more efficient in that case, so
1015  // the one command would be two commands (plus commands to set up and
1016  // destroy the temporary matrix).
1017  return;
1018  }
1019 
1020  // If you ever reach here, it means someone has used a type of network that we
1021  // don't yet support in the backprop. Basically this case can be handled by
1022  // creating a temporary matrix to reorder the matrix at deriv_submatrix_index,
1023  // (using CopyRows), and doing AddRowRanges from that.
1024  // It wouldn't be too much work.
1025  KALDI_ERR << "This case not implemented yet.";
1026 }
1027 
1028 
1030  int32 step, NnetComputation *computation) {
1031  StepInfo &step_info = steps_[step];
1032  if (nnet_.IsOutputNode(step_info.node_index) &&
1033  step_info.deriv > 0) {
1034  int32 deriv_submatrix_index = step_info.deriv;
1035  KALDI_ASSERT(computation->IsWholeMatrix(deriv_submatrix_index));
1036  NnetComputation::Command c(kAcceptInput, deriv_submatrix_index,
1037  step_info.node_index);
1038  computation->commands.push_back(c);
1039  }
1040 
1041  // the top-level descriptor has a bunch of parts that we concatenate features
1042  // over.
1043  int32 num_parts = step_info.value_parts.size();
1044  for (int32 part = 0; part < num_parts; part++)
1045  CompileBackwardSumDescriptor(step, part,
1046  computation);
1047 }
1048 
1049 
1051  NnetComputation *computation) {
1052  KALDI_ASSERT(step < static_cast<int32>(steps_.size()));
1053  const StepInfo &step_info = steps_[step];
1054  int32 node_index = step_info.node_index;
1055  const NetworkNode &node = nnet_.GetNode(node_index);
1056 
1057  switch (node.node_type) {
1058  case kInput:
1059  AddBackwardStepInput(step, computation);
1060  if (!IsInputStep(step + 1)) // Make sure backward computation is nonempty.
1061  computation->commands.push_back(
1063  break;
1064  case kDimRange:
1065  break; // Nothing to do.
1066  case kComponent:
1067  AddBackwardStepComponent(step, computation);
1068  break;
1069  case kDescriptor:
1070  CompileBackwardDescriptor(step, computation);
1071  break;
1072  default:
1073  KALDI_ERR << "Invalid node type";
1074  }
1075 }
1076 
1077 // This just adds a command of type kAcceptInput that directs the computer to
1078 // expect input from the user. Because inputs are always listed first in
1079 // 'steps', these will precede the actual commands.
1081  NnetComputation *computation) const {
1082  KALDI_ASSERT(static_cast<size_t>(step) < steps_.size());
1083  const StepInfo &step_info = steps_[step];
1084  int32 node_index = step_info.node_index,
1085  submatrix_index = step_info.value;
1086  KALDI_ASSERT(computation->IsWholeMatrix(submatrix_index));
1087 
1088  const NetworkNode &node = nnet_.GetNode(node_index);
1089  // actually currently the node type would always be kInput.
1090  KALDI_ASSERT(node.node_type == kInput || node.node_type == kComponent);
1091 
1092  NnetComputation::Command c(kAcceptInput, submatrix_index, node_index);
1093  computation->commands.push_back(c);
1094 }
1095 
1096 
1098  NnetComputation *computation) const {
1099  KALDI_ASSERT(static_cast<size_t>(step) < steps_.size());
1100  const StepInfo &step_info = steps_[step];
1101  int32 input_step = step - 1;
1102  const StepInfo &input_step_info = steps_[input_step];
1103  int32 node_index = step_info.node_index;
1104  const NetworkNode &node = nnet_.GetNode(node_index);
1106  int32 component_index = node.u.component_index;
1107  const Component *component = nnet_.GetComponent(component_index);
1108 
1109  // note RE memo_index: we'll renumber them in optimization to get rid of gaps.
1110  // The use of 'step' as the memo index is OK because step > 0 if we're doing
1111  // forward propagation, there must be preceding steps for inputs or for
1112  // component-input nodes).
1113  int32 properties = component->Properties(),
1114  input_submatrix_index = input_step_info.value,
1115  output_submatrix_index = step_info.value,
1116  memo_index = (step_info.deriv > 0 && (properties & kUsesMemo) ? step : 0),
1117  store_stats = (requests_[0]->store_component_stats &&
1118  (properties & kStoresStats) ? 1 : 0);
1119 
1121  component_index,
1122  step_info.precomputed_indexes_index,
1123  input_submatrix_index,
1124  output_submatrix_index,
1125  memo_index,
1126  store_stats);
1127  computation->commands.push_back(c);
1128 }
1129 
1130 
1132  NnetComputation *computation) const {
1133  KALDI_ASSERT(static_cast<size_t>(step) < steps_.size());
1134  const StepInfo &step_info = steps_[step];
1135  int32 node_index = step_info.node_index,
1136  deriv_submatrix_index = step_info.deriv;
1137  if (deriv_submatrix_index == 0)
1138  return; // Nothing to do.
1139  KALDI_ASSERT(computation->IsWholeMatrix(deriv_submatrix_index));
1140  const NetworkNode &node = nnet_.GetNode(node_index);
1141  // actually, currently the node type would always be kInput.
1142  KALDI_ASSERT(node.node_type == kInput || node.node_type == kComponent);
1143 
1144  NnetComputation::Command c(kProvideOutput, deriv_submatrix_index, node_index);
1145  computation->commands.push_back(c);
1146 }
1147 
1148 
1150  NnetComputation *computation) const {
1151  KALDI_ASSERT(static_cast<size_t>(step) < steps_.size());
1152  const StepInfo &step_info = steps_[step];
1153  int32 input_step = step - 1;
1154  const StepInfo &input_step_info = steps_[input_step];
1155  int32 node_index = step_info.node_index;
1156  const NetworkNode &node = nnet_.GetNode(node_index);
1158  int32 component_index = node.u.component_index;
1159  const Component *component = nnet_.GetComponent(component_index);
1160  int32 properties = component->Properties();
1161 
1162  int32 input_submatrix_index = input_step_info.value,
1163  output_submatrix_index = step_info.value,
1164  input_deriv_submatrix_index = input_step_info.deriv,
1165  output_deriv_submatrix_index = step_info.deriv,
1166  memo_index = (properties & kUsesMemo ? step : 0);
1167  KALDI_ASSERT(output_deriv_submatrix_index > 0 &&
1168  (input_deriv_submatrix_index > 0 ||
1169  properties & kUpdatableComponent));
1170 
1171  if (! (properties & kBackpropNeedsInput))
1172  input_submatrix_index = 0;
1173  if (! (properties & kBackpropNeedsOutput))
1174  output_submatrix_index = 0;
1175 
1177  component_index,
1178  step_info.precomputed_indexes_index,
1179  input_submatrix_index,
1180  output_submatrix_index,
1181  output_deriv_submatrix_index,
1182  input_deriv_submatrix_index,
1183  memo_index);
1184  computation->commands.push_back(c);
1185 }
1186 
1187 
1188 
1189 void Compiler::AllocateMatrices(const std::vector<int32> &whole_submatrices,
1190  NnetComputation *computation) const {
1191  KALDI_ASSERT(computation->commands.empty());
1192  // Work out which matrices are inputs to the computation (or output-derivs,
1193  // which are also supplied as inputs to the computation); we won't be setting
1194  // these up.
1195  unordered_set<int32> input_and_oderiv_matrices;
1196  int32 num_steps = steps_.size();
1197  for (int32 step = 0; step < num_steps; step++) {
1198  const StepInfo &this_info = steps_[step];
1199  if (this_info.output_cindex_ids.empty())
1200  continue;
1201  int32 first_cindex_id = this_info.output_cindex_ids.front(),
1202  node_index = this_info.node_index;
1203  bool is_input = graph_.is_input[first_cindex_id],
1204  is_output = nnet_.IsOutputNode(node_index);
1205  if (is_input) {
1206  int32 value_submatrix_index = this_info.value,
1207  value_matrix_index =
1208  computation->submatrices[value_submatrix_index].matrix_index;
1209  input_and_oderiv_matrices.insert(value_matrix_index);
1210  }
1211  if (is_output && this_info.deriv != 0) {
1212  int32 deriv_submatrix_index = this_info.deriv,
1213  deriv_matrix_index =
1214  computation->submatrices[deriv_submatrix_index].matrix_index;
1215  input_and_oderiv_matrices.insert(deriv_matrix_index);
1216  }
1217  }
1218 
1219  int32 num_matrices = computation->matrices.size();
1220  for (int32 m = 1; m < num_matrices; m++) {
1221  // We don't set up the matrices that are inputs to the computation;
1222  // this happens when the user provides the input.
1223  if (input_and_oderiv_matrices.count(m) == 0) {
1224  // get a submatrix index that refers to the entire matrix.
1225  int32 submatrix_index = whole_submatrices[m];
1226 
1227  computation->commands.push_back(
1228  NnetComputation::Command(kAllocMatrix, submatrix_index));
1229  // Later in the optimization phase, it turns out that zeroing is not
1230  // necessary for some matrices, we'll remove the redundant kSetConst
1231  // commands.
1232  computation->commands.push_back(
1233  NnetComputation::Command(0.0, kSetConst, submatrix_index));
1234  }
1235  }
1236 }
1237 
1238 
1240  const std::vector<int32> &step_to_segment,
1241  NnetComputation *computation) {
1242  int32 num_steps = steps_.size();
1243  KALDI_ASSERT(computation->component_precomputed_indexes.empty());
1244  // the zeroth commponent is special, contains a NULL pointer.
1245  computation->component_precomputed_indexes.resize(1);
1246  for (int32 step = 0; step < num_steps; step++) {
1247  StepInfo &step_info = steps_[step];
1248  int32 node_index = step_info.node_index;
1249  const NetworkNode &node = nnet_.GetNode(node_index);
1250  // There is only something to do for nodes of type Component.
1251  if (node.node_type != kComponent)
1252  continue;
1253  const StepInfo &input_step_info = steps_[step - 1];
1254  int32 component_index = node.u.component_index;
1255  int32 input_node_index = input_step_info.node_index;
1256  KALDI_ASSERT(input_node_index == node_index - 1);
1257  const std::vector<Index> &input_indexes = input_step_info.output_indexes;
1258  const std::vector<Index> &output_indexes = step_info.output_indexes;
1259 
1260  const Component *component = nnet_.GetComponent(component_index);
1261 
1262  const ComputationRequest &request = *(requests_[step_to_segment[step]]);
1263  bool need_derivs = request.NeedDerivatives();
1264  ComponentPrecomputedIndexes *precomputed_indexes =
1265  component->PrecomputeIndexes(request.misc_info,
1266  input_indexes, output_indexes,
1267  need_derivs);
1268  if (precomputed_indexes == NULL) {
1269  // e.g. simple Components, and some other Components, will return NULL for
1270  // precomputed_indexes.
1271  step_info.precomputed_indexes_index = 0;
1272  } else {
1273  step_info.precomputed_indexes_index =
1274  computation->component_precomputed_indexes.size();
1275 
1277  info.data = precomputed_indexes;
1278 
1279  if (!input_indexes.empty() && input_indexes.back().n == 1 &&
1280  !output_indexes.empty() && output_indexes.back().n == 1) {
1281  // If these conditions are true, it's *possible* that we are doing
1282  // 'shortcut' compilation. So just in case that's what's going on, we
1283  // store 'input_indexes' and 'output_indexes, which are needed by
1284  // the ExpandComputation() function that is used in that process.
1285  info.input_indexes = input_indexes;
1286  info.output_indexes = output_indexes;
1287  }
1288  computation->component_precomputed_indexes.push_back(info);
1289  }
1290  }
1291 }
1292 
1293 void Compiler::DeallocateMatrices(const std::vector<int32> &whole_submatrices,
1294  const std::vector<int32> &step_to_segment,
1295  NnetComputation *computation) {
1296  // This adds the commands to destroy all the matrices- but not the
1297  // ones that might be needed as outputs of the computation. The ones that
1298  // are spared from destruction are those corresponding to outputs of the
1299  // computation, and those corresponding to input derivatives that were
1300  // requested by the user.
1301  int32 num_matrices = computation->matrices.size();
1302  std::vector<bool> will_destroy(num_matrices, true);
1303 
1304  int32 num_steps = steps_.size();
1305  for (int32 step = 0; step < num_steps; step++) {
1306  const StepInfo &step_info = steps_[step];
1307  const ComputationRequest &request = *(requests_[step_to_segment[step]]);
1308  if (nnet_.IsOutputNode(step_info.node_index)) {
1309  // steps corresponding to output nodes need to have their "value" kept.
1310  int32 value_matrix_index =
1311  computation->submatrices[step_info.value].matrix_index;
1312  will_destroy[value_matrix_index] = false;
1313  } else if (nnet_.IsInputNode(step_info.node_index)) {
1314  // steps corresponding to input nodes need to have their "deriv" kept, but
1315  // only if the corresponding input derivative was requested. (we don't
1316  // need to worry about whether outputs were requested, because if they
1317  // were not requested we would not be computing them in the first place).
1318  std::string input_name = nnet_.GetNodeNames()[step_info.node_index];
1319  int32 i = 0, num_inputs = request.inputs.size();
1320  bool has_deriv = false;
1321  for (; i < num_inputs; i++) {
1322  if (input_name == request.inputs[i].name) {
1323  has_deriv = request.inputs[i].has_deriv;
1324  break;
1325  }
1326  }
1327  KALDI_ASSERT(i != num_inputs); // assert we found an input-request with
1328  // this name
1329  if (has_deriv) {
1330  int32 deriv_matrix_index =
1331  computation->submatrices[step_info.deriv].matrix_index;
1332  will_destroy[deriv_matrix_index] = false;
1333  }
1334  }
1335  }
1336  // note: matrix-index 0 is the empty matrix.
1337  for (int32 m = 1; m < num_matrices; m++) {
1338  if (will_destroy[m]) {
1339  int32 submatrix_index = whole_submatrices[m];
1340  computation->commands.push_back(
1341  NnetComputation::Command(kDeallocMatrix, submatrix_index));
1342  }
1343  }
1344 }
1345 
1346 void Compiler::OutputDebugInfo(NnetComputation *computation) const {
1347  int32 num_matrices = computation->matrices.size(),
1348  num_steps = steps_.size();
1349  computation->matrix_debug_info.resize(num_matrices);
1350  for (int32 step = 0; step < num_steps; step++) {
1351  const StepInfo &step_info = steps_[step];
1352  if (step_info.value == 0)
1353  continue; // e.g. input step for ConstantComponent.
1354  if (!computation->IsWholeMatrix(step_info.value))
1355  continue;
1356  int32 value_matrix = computation->submatrices[step_info.value].matrix_index;
1357  int32 deriv_matrix = 0;
1358  if (step_info.deriv != 0 && computation->IsWholeMatrix(step_info.deriv))
1359  deriv_matrix = computation->submatrices[step_info.deriv].matrix_index;
1360 
1361  NnetComputation::MatrixDebugInfo &debug_info =
1362  computation->matrix_debug_info[value_matrix];
1363  debug_info.is_deriv = false;
1364  if (!debug_info.cindexes.empty()) {
1365  // This can happen if we created an alias for a node using a
1366  // dim-range-node that covers all the dimensions (would satisfy
1367  // IsWholeMatrix() above while not being a unique matrix). We sometimes
1368  // do that to work around compiler constraints when creating expressions
1369  // that have the same quantity with more than one scaling value within the
1370  // same expression (like for computing deltas).
1371  continue;
1372  }
1373  AppendCindexes(step_info.node_index, step_info.output_indexes,
1374  &debug_info.cindexes);
1375  if (deriv_matrix != 0) {
1376  NnetComputation::MatrixDebugInfo &deriv_debug_info =
1377  computation->matrix_debug_info[deriv_matrix];
1378  deriv_debug_info.is_deriv = true;
1379  deriv_debug_info.cindexes = debug_info.cindexes;
1380  }
1381  }
1382 }
1383 
1384 void AppendCindexes(int32 node, const std::vector<Index> &indexes,
1385  std::vector<Cindex> *out) {
1386  size_t indexes_size = indexes.size();
1387  if (indexes_size > out->size())
1388  out->reserve(out->size() + indexes_size);
1389  for (size_t i = 0; i < indexes_size; i++)
1390  out->push_back(Cindex(node, indexes[i]));
1391 }
1392 
1393 
1394 } // namespace nnet3
1395 } // namespace kaldi
BaseFloat SplitByScale(const SumDescriptor &descriptor, const std::vector< std::vector< std::pair< int32, int32 > > > &input_locations_list, std::vector< std::pair< BaseFloat, std::vector< std::vector< std::pair< int32, int32 > > > > > *split_locations_lists) const
This function helps to handle scalar factors in Descriptors (expressions like `Scale(-1.0, <descriptor)`).
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
std::vector< Index > output_indexes
Definition: nnet-compile.h:88
virtual void GetNodeDependencies(std::vector< int32 > *node_indexes) const =0
This function appends to "node_indexes" a list (not necessarily sorted or unique) of all the node ind...
std::vector< MatrixDebugInfo > matrix_debug_info
void AddBackwardStepComponent(int32 step, NnetComputation *computation) const
void CompileForwardFromSubmatLocationsList(int32 value_submatrix_index, BaseFloat alpha, const std::vector< std::vector< std::pair< int32, int32 > > > &submat_locations, NnetComputation *computation) const
Adds to &#39;computation&#39; commands for part of the forward computation corresponding to a Descriptor...
virtual BaseFloat GetScaleForNode(int32 node_index) const =0
This function returns the scale on the node-index &#39;node_index&#39; when it appears in expressions inside ...
void CompileBackward(int32 step, NnetComputation *computation)
bool need_model_derivative
if need_model_derivative is true, then we&#39;ll be doing either model training or model-derivative compu...
void AllocateMatrices(const std::vector< int32 > &whole_submatrices, NnetComputation *computation) const
void ConsolidateIoOperations(const Nnet &nnet, NnetComputation *computation)
This optimization puts the input operations (kAcceptInput) and output operations (kProvideOutput) at ...
bool HasContiguousProperty(const std::vector< int32 > &indexes, std::vector< std::pair< int32, int32 > > *reverse_indexes)
This function returns true if for each integer i != -1, all the indexes j at which indexes[j] == i ar...
MiscComputationInfo misc_info
misc_info is for extensibility to things that don&#39;t easily fit into the framework.
Abstract base-class for neural-net components.
int32 GetVerboseLevel()
Get verbosity level, usually set via command line &#39;–verbose=&#39; switch.
Definition: kaldi-error.h:60
bool ConvertToIndexes(const std::vector< std::pair< int32, int32 > > &location_vector, int32 *first_value, std::vector< int32 > *second_values)
If it is the case for some i >= 0 that all the .first elements of "location_vector" are either i or -...
int32 NewMatrix(int32 num_rows, int32 num_cols, MatrixStrideType stride_type)
Convenience function used when adding new matrices.
std::vector< std::vector< std::vector< std::pair< int32, int32 > > > > input_locations_list
Definition: nnet-compile.h:108
bool NeedDerivatives() const
returns true if any of inputs[*].has_deriv is true, or need_model_derivative is true.
void CompileBackwardFromSubmatLocationsList(int32 deriv_submatrix_index, BaseFloat alpha, const std::vector< std::vector< std::pair< int32, int32 > > > &submat_locations, NnetComputation *computation) const
bool IsInputNode(int32 node) const
Returns true if this is an output node, meaning that it is of type kInput.
Definition: nnet-nnet.cc:120
kaldi::int32 int32
std::vector< IoSpecification > inputs
std::vector< MatrixInfo > matrices
virtual ComponentPrecomputedIndexes * PrecomputeIndexes(const MiscComputationInfo &misc_info, const std::vector< Index > &input_indexes, const std::vector< Index > &output_indexes, bool need_backprop) const
This function must return NULL for simple Components.
bool IsComponentNode(int32 node) const
Returns true if this is a component node, meaning that it is of type kComponent.
Definition: nnet-nnet.cc:132
void SortAndUniq(std::vector< T > *vec)
Sorts and uniq&#39;s (removes duplicates) from a vector.
Definition: stl-utils.h:39
void AddForwardStepInput(int32 step, NnetComputation *computation) const
std::vector< Command > commands
void CreateStepInfo(const std::vector< bool > &deriv_needed, const std::vector< int32 > &step_to_segment, std::vector< std::vector< int32 > > *by_step, NnetComputation *computation)
struct Index is intended to represent the various indexes by which we number the rows of the matrices...
Definition: nnet-common.h:44
void CompileForwardFromSubmatLocations(int32 value_submatrix_index, BaseFloat alpha, const std::vector< std::pair< int32, int32 > > &submat_locations, NnetComputation *computation) const
Adds to &#39;computation&#39; commands for part of the forward computation corresponding to a Descriptor...
void GetWholeSubmatrices(std::vector< int32 > *whole_submatrices) const
std::pair< int32, Index > Cindex
Definition: nnet-common.h:115
void SplitLocations(const std::vector< std::vector< std::pair< int32, int32 > > > &submat_lists, std::vector< std::vector< std::pair< int32, int32 > > > *split_lists)
The input to this function is a vector (indexed by matrix-row-index) of lists of pairs (submat_index...
ComputationGraph graph_
Definition: nnet-compile.h:64
void ComputeDerivNeeded(const std::vector< std::vector< int32 > > &steps, const std::vector< int32 > &step_to_segment, std::vector< bool > *deriv_needed)
int32 NewSubMatrix(int32 base_submatrix, int32 row_offset, int32 num_rows, int32 col_offset, int32 num_cols)
Convenience function used when adding new sub-matrices.
This is an abstract base-class.
std::vector< Cindex > cindexes
The mapping of cindex_id to Cindex.
const NetworkNode & GetNode(int32 node) const
returns const reference to a particular numbered network node.
Definition: nnet-nnet.h:146
float BaseFloat
Definition: kaldi-types.h:29
void CompileBackwardSumDescriptor(int32 step, int32 part_index, NnetComputation *computation) const
std::vector< std::vector< int32 > > dependencies
dependencies[cindex_id] gives you the list of other cindex_ids that this particular cindex_id directl...
int32 GetCindexId(const Cindex &cindex, bool is_input, bool *is_new)
Maps a Cindex to an integer cindex_id.
bool IsOutputNode(int32 node) const
Returns true if this is an output node, meaning that it is of type kDescriptor and is not directly fo...
Definition: nnet-nnet.cc:112
void Check() const
This is only to be called after you have called ComputeForSegment for all the segments.
int32 Dim(const Nnet &nnet) const
Definition: nnet-nnet.cc:33
std::vector< std::vector< std::pair< int32, int32 > > > indexes_multi
void CompileForwardDescriptor(int32 step, NnetComputation *computation) const
void ComputeComputationPhases(const Nnet &nnet, const ComputationGraph &graph, std::vector< std::vector< std::vector< int32 > > > *phases_per_segment)
This function divides a computation into &#39;phases&#39;, where a &#39;phase&#39; is a collection of cindexes which ...
virtual int32 Properties() const =0
Return bitmask of the component&#39;s properties.
void ComputeInputLocationsList(int32 step, int32 part_index, std::vector< std::vector< std::pair< int32, int32 > > > *input_locations) const
std::vector< bool > is_input
For each Cindex this tells us whether it was provided as an input to the network. ...
std::vector< SubMatrixInfo > submatrices
const SumDescriptor & Part(int32 n) const
returns the n&#39;th part.
MatrixStrideType
Definition: matrix-common.h:44
void CompileBackwardFromSubmatLocations(int32 deriv_submatrix_index, BaseFloat alpha, const std::vector< std::pair< int32, int32 > > &submat_locations, NnetComputation *computation) const
Compiler(const ComputationRequest &request, const Nnet &nnet)
Definition: nnet-compile.cc:29
void ComputeValueSubmatLocationsList(const std::vector< std::vector< std::pair< int32, int32 > > > &input_locations_list, std::vector< std::vector< std::pair< int32, int32 > > > *submat_locations_list) const
void ComputeDerivSubmatLocationsList(const std::vector< std::vector< std::pair< int32, int32 > > > &input_locations_list, std::vector< std::vector< std::pair< int32, int32 > > > *submat_locations_list) const
std::vector< int32 > value_parts
Definition: nnet-compile.h:97
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_PARANOID_ASSERT(cond)
Definition: kaldi-error.h:206
void Compute(const ComputationRequest &request)
void AppendCindexes(int32 node, const std::vector< Index > &indexes, std::vector< Cindex > *out)
Appends to &#39;out&#39; the pairs (node, indexes[0]), (node, indexes[1]), ...
std::vector< int32 > output_cindex_ids
Definition: nnet-compile.h:89
void CompileForward(int32 step, NnetComputation *computation) const
std::vector< int32 > deriv_parts
Definition: nnet-compile.h:100
int32 IndexForInput(const std::string &node_name) const
Returns the index into "inputs" corresponding to the node with name "node_name", or -1 if there is no...
NetworkNode is used to represent, three types of thing: either an input of the network (which pretty ...
Definition: nnet-nnet.h:81
void CompileBackwardDescriptor(int32 step, NnetComputation *computation)
Component * GetComponent(int32 c)
Return component indexed c. Not a copy; not owned by caller.
Definition: nnet-nnet.cc:150
Class UpdatableComponent is a Component which has trainable parameters; it extends the interface of C...
void SetUpPrecomputedIndexes(const std::vector< int32 > &step_to_segment, NnetComputation *computation)
virtual bool IsComputable(const Index &ind, const CindexSet &cindex_set, std::vector< Cindex > *used_inputs) const =0
This function exists to enable us to manage optional dependencies, i.e.
std::vector< PrecomputedIndexesInfo > component_precomputed_indexes
std::vector< StepInfo > steps_
Definition: nnet-compile.h:153
void ComputeForSegment(const ComputationRequest &request, const std::vector< std::vector< int32 > > &phases)
You call this once for each segment, in order (note: for normal, non-online computations, there is only one segment).
void SplitLocationsBackward(const std::vector< std::vector< std::pair< int32, int32 > > > &submat_lists, std::vector< std::vector< std::pair< int32, int32 > > > *split_lists)
This function has the same interface as SplitLocations(); however, it ensures certain additional prop...
MatrixStrideType GetStrideType(int32 node_index) const
void CompileForwardSumDescriptor(int32 step, int32 part_index, NnetComputation *computation) const
std::vector< const ComputationRequest * > requests_
Definition: nnet-compile.h:62
void CreateComputation(const CompilerOptions &opts, NnetComputation *computation)
Definition: nnet-compile.cc:50
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
std::vector< IoSpecification > outputs
This class arranges the cindex_ids of the computation into a sequence of lists called "steps"...
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
void DeallocateMatrices(const std::vector< int32 > &whole_submatrices, const std::vector< int32 > &step_to_segment, NnetComputation *computation)
int32 IndexForOutput(const std::string &node_name) const
Returns the index into "inputs" corresponding to the node with name "node_name", or -1 if there is no...
void OutputDebugInfo(NnetComputation *computation) const
std::vector< std::vector< int32 > > indexes
union kaldi::nnet3::NetworkNode::@15 u
int32 NumParts() const
Returns the number of parts that are concatenated over.
std::vector< std::pair< int32, int32 > > cindex_id_to_location_
This maps each cindex_id to its location.
Definition: nnet-compile.h:161
void AddBackwardStepInput(int32 step, NnetComputation *computation) const
void ComputeStepDependencies(const std::vector< int32 > &this_step, int32 step_index, unordered_set< int32 > *dep_steps)
bool IsSortedAndUniq(const std::vector< T > &vec)
Returns true if the vector is sorted and contains each element only once.
Definition: stl-utils.h:63
bool IsInputStep(int32 step) const
BaseFloat LearningRate() const
Gets the learning rate to be used in gradient descent.
void CompileForwardFromIndexes(int32 value_submatrix_index, int32 input_submatrix_index, BaseFloat alpha, const std::vector< int32 > &indexes, NnetComputation *computation) const
Adds to `computation` a command that adds to the submatrix in `value_submatrix_index` a quantity cons...
void CompileBackwardFromIndexes(int32 deriv_submatrix_index, int32 input_deriv_submatrix_index, BaseFloat alpha, const std::vector< int32 > &indexes, NnetComputation *computation) const
virtual int32 Dim(const Nnet &nnet) const =0
bool IsWholeMatrix(int32 submatrix_index) const
void AddCommands(const std::vector< bool > &deriv_needed, const std::vector< int32 > &step_to_segment, NnetComputation *computation)
const std::vector< std::string > & GetNodeNames() const
returns vector of node names (needed by some parsing code, for instance).
Definition: nnet-nnet.cc:63
int32 Dim(const Nnet &nnet) const
const int kNoTime
Definition: nnet-common.cc:573
An abstract representation of a set of Cindexes.
void AddForwardStepComponent(int32 step, NnetComputation *computation) const
bool IsComponentInputNode(int32 node) const
Returns true if this is component-input node, i.e.
Definition: nnet-nnet.cc:172
std::vector< std::vector< std::pair< int32, int32 > > > indexes_ranges