doc/nnet-batch-compute_8h_source.html

 // nnet3/nnet-batch-compute.h

 // Copyright 2012-2018  Johns Hopkins University (author: Daniel Povey)
 //                2018       Hang Lyu

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.

 #ifndef KALDI_NNET3_NNET_BATCH_COMPUTE_H_
 #define KALDI_NNET3_NNET_BATCH_COMPUTE_H_

 #include <vector>
 #include <string>
 #include <list>
 #include <utility>
 #include <condition_variable>
 #include "base/kaldi-common.h"
 #include "gmm/am-diag-gmm.h"
 #include "hmm/transition-model.h"
 #include "itf/decodable-itf.h"
 #include "nnet3/nnet-optimize.h"
 #include "nnet3/nnet-compute.h"
 #include "nnet3/am-nnet-simple.h"
 #include "nnet3/nnet-am-decodable-simple.h"
 #include "decoder/lattice-faster-decoder.h"
 #include "util/stl-utils.h"


 namespace kaldi {
 namespace nnet3 {


 struct NnetInferenceTask {
   // The copy constructor is required to exist because of std::vector's resize()
   // function, but in practice should never be used.
   NnetInferenceTask(const NnetInferenceTask &other) {
     KALDI_ERR << "NnetInferenceTask was not designed to be copied.";
   }
   NnetInferenceTask() { }


   // The input frames, which are treated as being numbered t=0, t=1, etc.  (If
   // the lowest t value was originally nonzero in the 'natural' numbering, this
   // just means we conceptually shift the 't' values; the only real constraint
   // is that the 't' values are contiguous.
   CuMatrix<BaseFloat> input;

   // The index of the first output frame (in the shifted numbering where the
   // first output frame is numbered zero.  This will typically be less than one,
   // because most network topologies require left context.  If this was an
   // 'interior' chunk of a recurrent topology like LSTMs, first_input_t may be
   // substantially less than zero, due to 'extra_left_context'.
   int32 first_input_t;

   // The stride of output 't' values: e.g., will be 1 for normal-frame-rate
   // models, and 3 for low-frame-rate models such as chain models.
   int32 output_t_stride;

   // The number of output 't' values (they will start from zero and be separated
   // by output_t_stride).  This will be the num-rows of 'output'.
   int32 num_output_frames;

   // 'num_initial_unused_output_frames', which will normally be zero, is the
   // number of rows of the output matrix ('output' or 'output_cpu') which won't
   // actually be needed by the user, usually because they overlap with a
   // previous chunk.  This can happen because the number of outputs isn't a
   // multiple of the number of chunks.
   int32 num_initial_unused_output_frames;

   // 0 < num_used_output_frames <= num_output_frames - num_initial_unused_output_frames
   // is the number of output frames which are actually going to be used by the
   // user.  (Due to edge effects, not all are necessarily used).
   int32 num_used_output_frames;

   // first_used_output_frame_index is provided for the convenience of the user
   // so that they can know how this chunk relates to the utterance which it is
   // a part of.
   // It represents an output frame index in the original utterance-- after
   // subsampling; so not a 't' value but a 't' value divided by
   // frame-subsampling-factor.  Specifically, it tells you the row index in the
   // full utterance's output which corresponds to the first 'used' frame index
   // at the output of this chunk, specifically: the row numbered
   // 'num_initial_unused_output_frames' in the 'output' or 'output_cpu' data
   // member.
   int32 first_used_output_frame_index;

   // True if this chunk is an 'edge' (the beginning or end of an utterance) AND
   // is structurally different somehow from non-edge chunk, e.g. requires less
   // context.  This is present only so that NnetBatchComputer will know the
   // appropriate minibatch size to use.
   bool is_edge;

   // True if this task represents an irregular-sized chunk.  These can happen
   // only for utterances that are shorter than the requested minibatch size, and
   // it should be quite rare.  We use a minibatch size of 1 in this case.
   bool is_irregular;

   // The i-vector for this chunk, if this network accepts i-vector inputs.
   CuVector<BaseFloat> ivector;

   // A priority (higher is more urgent); may be either sign.  May be updated
   // after this object is provided to class NnetBatchComputer.
   double priority;

   // This semaphore will be incremented by class NnetBatchComputer when this
   // chunk is done.  After this semaphore is incremented, class
   // NnetBatchComputer will no longer hold any pointers to this class.
   Semaphore semaphore;

   // Will be set to true by the caller if they want the output of the neural net
   // to be copied to CPU (to 'output').  If false, the output will stay on
   // the GPU (if used)- in cu_output.
   bool output_to_cpu;

   // The neural net output, of dimension num_output_frames by the output-dim of
   // the neural net, will be written to 'output_cpu' if 'output_to_cpu' is true.
   // This is expected to be empty when this task is provided to class
   // NnetBatchComputer, and will be nonempty (if output_to_cpu == true) when the
   // task is completed and the semaphore is signaled.
   Matrix<BaseFloat> output_cpu;

   // The output goes here instead of 'output_to_cpu' is false.
   CuMatrix<BaseFloat> output;
 };


 struct NnetBatchComputerOptions: public NnetSimpleComputationOptions {
   int32 minibatch_size;
   int32 edge_minibatch_size;
   bool ensure_exact_final_context;
   BaseFloat partial_minibatch_factor;

   NnetBatchComputerOptions(): minibatch_size(128),
                               edge_minibatch_size(32),
                               ensure_exact_final_context(false),
                               partial_minibatch_factor(0.5) {
   }

   void Register(OptionsItf *po) {
     NnetSimpleComputationOptions::Register(po);
     po->Register("minibatch-size", &minibatch_size, "Number of chunks per "
                  "minibatch (see also edge-minibatch-size)");
     po->Register("edge-minibatch-size", &edge_minibatch_size, "Number of "
                  "chunks per minibatch: this applies to chunks at the "
                  "beginnings and ends of utterances, in cases (such as "
                  "recurrent models) when the computation would be different "
                  "from the usual one.");
     po->Register("ensure-exact-final-context", &ensure_exact_final_context,
                  "If true, for utterances shorter than --frames-per-chunk, "
                  "use exact-length, special computations.  If false, "
                  "pad with repeats of the last frame.  Would only affect "
                  "the output for backwards-recurrent models, but would "
                  "negatively impact speed in all cases.");
     po->Register("partial-minibatch-factor", &partial_minibatch_factor,
                  "Factor that controls how small partial minibatches will be "
                  "they become necessary.  We will potentially do the computation "
                  "for sizes: int(partial_minibatch_factor^n * minibatch_size "
                  ", for n = 0, 1, 2....  Set it to 0.0 if you want to use "
                  "only the specified minibatch sizes.");
   }
 };


 void MergeTaskOutput(
     const std::vector<NnetInferenceTask> &tasks,
     Matrix<BaseFloat> *output);
 void MergeTaskOutput(
     const std::vector<NnetInferenceTask> &tasks,
     CuMatrix<BaseFloat> *output);

 class NnetBatchComputer {
  public:
   NnetBatchComputer(const NnetBatchComputerOptions &opts,
                     const Nnet &nnet,
                     const VectorBase<BaseFloat> &priors);


   void AcceptTask(NnetInferenceTask *task,
                   int32 max_minibatches_full = -1);

   int32 NumFullPendingMinibatches() const { return num_full_minibatches_; }


   bool Compute(bool allow_partial_minibatch);


   void SplitUtteranceIntoTasks(
       bool output_to_cpu,
       const Matrix<BaseFloat> &input,
       const Vector<BaseFloat> *ivector,
       const Matrix<BaseFloat> *online_ivectors,
       int32 online_ivector_period,
       std::vector<NnetInferenceTask> *tasks);
   void SplitUtteranceIntoTasks(
       bool output_to_cpu,
       const CuMatrix<BaseFloat> &input,
       const CuVector<BaseFloat> *ivector,
       const CuMatrix<BaseFloat> *online_ivectors,
       int32 online_ivector_period,
       std::vector<NnetInferenceTask> *tasks);

   const NnetBatchComputerOptions &GetOptions() { return opts_; }

   ~NnetBatchComputer();

  private:
   KALDI_DISALLOW_COPY_AND_ASSIGN(NnetBatchComputer);

   // Information about a specific minibatch size for a group of tasks sharing a
   // specific structure (in terms of left and right context, etc.)
   struct MinibatchSizeInfo {
     // the computation for this minibatch size.
     std::shared_ptr<const NnetComputation> computation;
     int32 num_done;  // The number of minibatches computed: for diagnostics.
     int64 tot_num_tasks;  // The total number of tasks in those minibatches,
     // also for diagnostics... can be used to compute
     // how 'full', on average, these minibatches were.
     double seconds_taken;  // The total time elapsed in computation for this
                           // minibatch type.
     MinibatchSizeInfo(): computation(NULL), num_done(0),
                          tot_num_tasks(0), seconds_taken(0.0) { }
   };


   // A computation group is a group of tasks that have the same structure
   // (number of input and output frames, left and right context).
   struct ComputationGroupInfo {
     // The tasks to be completed.  This array is added-to by AcceptTask(),
     // and removed-from by GetHighestPriorityComputation(), which is called
     // from Compute().
     std::vector<NnetInferenceTask*> tasks;

     // Map from minibatch-size to information specific to this minibatch-size,
     // including the NnetComputation.  This is set up by
     // GetHighestPriorityComputation(), which is called from Compute().
     std::map<int32, MinibatchSizeInfo> minibatch_info;
   };

   // This struct allows us to arrange the tasks into groups that can be
   // computed in the same minibatch.
   struct ComputationGroupKey {
     ComputationGroupKey(const NnetInferenceTask &task):
         num_input_frames(task.input.NumRows()),
         first_input_t(task.first_input_t),
         num_output_frames(task.num_output_frames) {}

     bool operator == (const ComputationGroupKey &other) const {
       return num_input_frames == other.num_input_frames &&
           first_input_t == other.first_input_t &&
           num_output_frames == other.num_output_frames;
     }
     int32 num_input_frames;
     int32 first_input_t;
     int32 num_output_frames;
   };

   struct ComputationGroupKeyHasher {
     int32 operator () (const ComputationGroupKey &key) const {
       return key.num_input_frames + 18043 * key.first_input_t +
           6413 * key.num_output_frames;
     }
   };


   typedef unordered_map<ComputationGroupKey, ComputationGroupInfo,
                         ComputationGroupKeyHasher> MapType;

   // Gets the priority for a group, higher means higher priority.  (A group is a
   // list of tasks that may be computed in the same minibatch).  What this
   // function does is a kind of heuristic.
   // If allow_partial_minibatch == false, it will set the priority for
   // any minibatches that are not full to negative infinity.
   inline double GetPriority(bool allow_partial_minibatch,
                             const ComputationGroupInfo &info) const;

   // Returns the minibatch size for this group of tasks, i.e. the size of a full
   // minibatch for this type of task, which is what we'd ideally like to
   // compute.  Note: the is_edge and is_irregular options should be the same
   // for for all tasks in the group.
   //   - If 'tasks' is empty or info.is_edge and info.is_irregular are both,
   //     false, then return opts_.minibatch_size
   //   - If 'tasks' is nonempty and tasks[0].is_irregular is true, then
   //     returns 1.
   //   - If 'tasks' is nonempty and tasks[0].is_irregular is false and
   //     tasks[0].is_edge is true, then returns opts_.edge_minibatch_size.
   inline int32 GetMinibatchSize(const ComputationGroupInfo &info) const;


   // This function compiles, and returns, a computation for tasks of
   // the structure present in info.tasks[0], and the specified minibatch
   // size.
   std::shared_ptr<const NnetComputation> GetComputation(
       const ComputationGroupInfo &info,
       int32 minibatch_size);


   // Returns the actual minibatch size we'll use for this computation.  In most
   // cases it will be opts_.minibatch_size (or opts_.edge_minibatch_size if
   // appropriate; but if the number of available tasks is much less than the
   // appropriate minibatch size, it may be less.  The minibatch size may be
   // greater than info.tasks.size(); in that case, the remaining 'n' values in
   // the minibatch are not used.  (It may also be less than info.tasks.size(),
   // in which case we only do some of them).
   int32 GetActualMinibatchSize(const ComputationGroupInfo &info) const;


   // This function gets the highest-priority 'num_tasks' tasks from 'info',
   // removes them from the array info->tasks, and puts them into the array
   // 'tasks' (which is assumed to be initially empty).
   // This function also updates the num_full_minibatches_ variable if
   // necessary, and takes care of notifying any related condition variables.
   void GetHighestPriorityTasks(
       int32 num_tasks,
       ComputationGroupInfo *info,
       std::vector<NnetInferenceTask*> *tasks);

   MinibatchSizeInfo *GetHighestPriorityComputation(
       bool allow_partial_minibatch,
       int32 *minibatch_size,
       std::vector<NnetInferenceTask*> *tasks);

   void FormatInputs(int32 minibatch_size,
                     const std::vector<NnetInferenceTask*> &tasks,
                     CuMatrix<BaseFloat> *input,
                     CuMatrix<BaseFloat> *ivector);


   // Copies 'output', piece by piece, to the 'output_cpu' or 'output'
   // members of 'tasks', depending on their 'output_to_cpu' value.
   void FormatOutputs(const CuMatrix<BaseFloat> &output,
                      const std::vector<NnetInferenceTask*> &tasks);


   // Changes opts_.frames_per_chunk to be a multiple of
   // opts_.frame_subsampling_factor, if needed.
   void CheckAndFixConfigs();

   // this function creates and returns the computation request which is to be
   // compiled.
   static void GetComputationRequest(const NnetInferenceTask &task,
                                     int32 minibatch_size,
                                     ComputationRequest *request);

   // Prints some logging information about what we computed, with breakdown by
   // minibatch type.
   void PrintMinibatchStats();

   NnetBatchComputerOptions opts_;
   const Nnet &nnet_;
   CachingOptimizingCompiler compiler_;
   CuVector<BaseFloat> log_priors_;

   // Mutex that guards this object.  It is only held for fairly quick operations
   // (not while the actual computation is being done).
   std::mutex mutex_;

   // tasks_ contains all the queued tasks.
   // Each key contains a vector of NnetInferenceTask* pointers, of the same
   // structure (i.e., IsCompatible() returns true).
   MapType tasks_;

   // num_full_minibatches_ is a function of the data in tasks_ (and the
   // minibatch sizes, specified in opts_.  It is the number of full minibatches
   // of tasks that are pending, meaning: for each group of tasks, the number of
   // pending tasks divided by the minibatch-size for that group in integer
   // arithmetic.  This is kept updated for thread synchronization reasons, because
   // it is the shared variable
   int32 num_full_minibatches_;

   // a map from 'n' to a condition variable corresponding to the condition:
   // num_full_minibatches_ <= n.  Any time the number of full minibatches drops
   // below n, the corresponding condition variable is notified (if it exists).
   std::unordered_map<int32, std::condition_variable*> no_more_than_n_minibatches_full_;

   // some static information about the neural net, computed at the start.
   int32 nnet_left_context_;
   int32 nnet_right_context_;
   int32 input_dim_;
   int32 ivector_dim_;
   int32 output_dim_;
 };


 class NnetBatchInference {
  public:

   NnetBatchInference(
       const NnetBatchComputerOptions &opts,
       const Nnet &nnet,
       const VectorBase<BaseFloat> &priors);

   void AcceptInput(const std::string &utterance_id,
                    const Matrix<BaseFloat> &input,
                    const Vector<BaseFloat> *ivector,
                    const Matrix<BaseFloat> *online_ivectors,
                    int32 online_ivector_period);

   void Finished();

   bool GetOutput(std::string *utterance_id,
                  Matrix<BaseFloat> *output);

   ~NnetBatchInference();
  private:
   KALDI_DISALLOW_COPY_AND_ASSIGN(NnetBatchInference);

   // This is the computation thread, which is run in the background.  It will
   // exit once the user calls Finished() and all computation is completed.
   void Compute();
   // static wrapper for Compute().
   static void ComputeFunc(NnetBatchInference *object) { object->Compute(); }


   // This object implements the internals of what this class does.  It is
   // accessed both by the main thread (from where AcceptInput(), Finished() and
   // GetOutput() are called), and from the background thread in which Compute()
   // is called.
   NnetBatchComputer computer_;

   // This is set to true when the user calls Finished(); the computation thread
   // sees it and knows to flush
   bool is_finished_;

   // This semaphore is signaled by the main thread (the thread in which
   // AcceptInput() is called) every time a new utterance is added, and waited on
   // in the background thread in which Compute() is called.
   Semaphore tasks_ready_semaphore_;

   struct UtteranceInfo {
     std::string utterance_id;
     // The tasks into which we split this utterance.
     std::vector<NnetInferenceTask> tasks;
     // 'num_tasks_finished' is the number of tasks which are known to be
     // finished, meaning we successfully waited for those tasks' 'semaphore'
     // member.  When this reaches tasks.size(), we are ready to consolidate
     // the output into a single matrix and return it to the user.
     size_t num_tasks_finished;
   };

   // This list is only accessed directly by the main thread, by AcceptInput()
   // and GetOutput().  It is a list of utterances, with more recently added ones
   // at the back.  When utterances are given to the user by GetOutput(),
   std::list<UtteranceInfo*> utts_;

   int32 utterance_counter_;  // counter that increases on every utterance.

   // The thread running the Compute() process.
   std::thread compute_thread_;
 };


 class NnetBatchDecoder {
  public:
   NnetBatchDecoder(const fst::Fst<fst::StdArc> &fst,
                    const LatticeFasterDecoderConfig &decoder_config,
                    const TransitionModel &trans_model,
                    const fst::SymbolTable *word_syms,
                    bool allow_partial,
                    int32 num_threads,
                    NnetBatchComputer *computer);

   void AcceptInput(const std::string &utterance_id,
                    const Matrix<BaseFloat> &input,
                    const Vector<BaseFloat> *ivector,
                    const Matrix<BaseFloat> *online_ivectors,
                    int32 online_ivector_period);

   /*
     The user should call this function each time there was a problem with an utterance
     prior to being able to call AcceptInput()-- e.g. missing i-vectors.  This will
     update the num-failed-utterances stats which are stored in this class.
    */
   void UtteranceFailed();

   /*
      The user should call this when all input has been provided, e.g.
      when AcceptInput will not be called any more.  It will block until
      all threads have terminated; after that, you can call GetOutput()
      until it returns false, which will guarantee that nothing remains
      to compute.
      It returns the number of utterances that have been successfully decoded.
    */
   int32 Finished();

   bool GetOutput(std::string *utterance_id,
                  CompactLattice *clat,
                  std::string *sentence);

   // This version of GetOutput is for where config.determinize_lattice == false
   // (w.r.t. the config provided to the constructor).  It is the same as the
   // other version except it outputs to a normal Lattice, not a CompactLattice.
   bool GetOutput(std::string *utterance_id,
                  Lattice *lat,
                  std::string *sentence);

   ~NnetBatchDecoder();

  private:
   KALDI_DISALLOW_COPY_AND_ASSIGN(NnetBatchDecoder);

   struct UtteranceInput {
     std::string utterance_id;
     const Matrix<BaseFloat> *input;
     const Vector<BaseFloat> *ivector;
     const Matrix<BaseFloat> *online_ivectors;
     int32 online_ivector_period;
   };

   // This object is created when a thread finished an utterance.  For utterances
   // where decoding failed somehow, the relevant lattice (compact_lat, if
   // opts_.determinize == true, or lat otherwise) will be empty (have no
   // states).
   struct UtteranceOutput {
     std::string utterance_id;
     bool finished;
     CompactLattice compact_lat;
     Lattice lat;
     std::string sentence;  // 'sentence' is only nonempty if a non-NULL symbol
                            // table was provided to the constructor of class
                            // NnetBatchDecoder; it's the sentence as a string (a
                            // sequence of words separated by space).  It's used
                            // for printing the sentence to stderr, which we do
                            // in the main thread to keep the order consistent.
   };

   // This is the decoding thread, several copies of which are run in the
   // background.  It will exit once the user calls Finished() and all
   // computation is completed.
   void Decode();
   // static wrapper for Compute().
   static void DecodeFunc(NnetBatchDecoder *object) { object->Decode(); }

   // This is the computation thread; it handles the neural net inference.
   void Compute();
   // static wrapper for Compute().
   static void ComputeFunc(NnetBatchDecoder *object) { object->Compute(); }


   // Sets the priorities of the tasks in a newly provided utterance.
   void SetPriorities(std::vector<NnetInferenceTask> *tasks);

   // In the single-thread case, this sets priority_offset_ to 'priority'.
   // In the multi-threaded case it causes priority_offset_ to approach
   // 'priority' at a rate that depends on the nunber of threads.
   void UpdatePriorityOffset(double priority);

   // This function does the determinization (if needed) and finds the best path through
   // the lattice to update the stats.  It is expected that when it is called, 'output' must
   // have its 'lat' member set up.
   void ProcessOutputUtterance(UtteranceOutput *output);

   const fst::Fst<fst::StdArc> &fst_;
   const LatticeFasterDecoderConfig &decoder_opts_;
   const TransitionModel &trans_model_;
   const fst::SymbolTable *word_syms_;  // May be NULL.  Owned here.
   bool allow_partial_;
   NnetBatchComputer *computer_;
   std::vector<std::thread*> decode_threads_;
   std::thread compute_thread_;  // Thread that calls computer_->Compute().


   // 'input_utterance', together with utterance_ready_semaphore_ and
   // utterance_consumed_semaphore_, use used to 'hand off' information about a
   // newly provided utterance from AcceptInput() to a decoder thread that is
   // ready to process a new utterance.
   UtteranceInput input_utterance_;
   Semaphore input_ready_semaphore_;  // Is signaled by the main thread when
                                      // AcceptInput() is called and a new
                                      // utterance is being provided (or when the
                                      // input is finished), and waited on in
                                      // decoder thread.
   Semaphore input_consumed_semaphore_;  // Is signaled in decoder thread when it
                                         // has finished consuming the input, so
                                         // the main thread can know when it
                                         // should continue (to avoid letting
                                         // 'input' go out of scope while it's
                                         // still needed).

   Semaphore tasks_ready_semaphore_; // Is signaled when new tasks are added to
                                     // the computer_ object (or when we're finished).

   bool is_finished_;  // True if the input is finished.  If this is true, a
                       // signal to input_ready_semaphore_ indicates to the
                       // decoder thread that it should terminate.

   bool tasks_finished_;  // True if we know that no more tasks will be given
                          // to the computer_ object.


   // pending_utts_ is a list of utterances that have been provided via
   // AcceptInput(), but their decoding has not yet finished.  AcceptInput() will
   // push_back to it, and GetOutput() will pop_front().  When a decoding thread
   // has finished an utterance it will set its 'finished' member to true.  There
   // is no need to synchronize or use mutexes here.
   std::list<UtteranceOutput*> pending_utts_;

   // priority_offset_ is something used in determining the priorities of nnet
   // computation tasks.  It starts off at zero and becomes more negative with
   // time, with the aim being that the priority of the first task (i.e. the
   // leftmost chunk) of a new utterance should be at about the same priority as
   // whatever chunks we are just now getting around to decoding.
   double priority_offset_;

   // Some statistics accumulated by this class, for logging and timing purposes.
   double tot_like_;  // Total likelihood (of best path) over all lattices that
                      // we output.
   int64 frame_count_;  // Frame count over all latices that we output.
   int32 num_success_;  // Number of successfully decoded files.
   int32 num_fail_;  // Number of files where decoding failed.
   int32 num_partial_;  // Number of files that were successfully decoded but
                        // reached no final-state (can only be nonzero if
                        // allow_partial_ is true).
   std::mutex stats_mutex_;  // Mutex that guards the statistics from tot_like_
                             // through num_partial_.
   Timer timer_;  // Timer used to print real-time info.
 };


 }  // namespace nnet3
 }  // namespace kaldi

 #endif  // KALDI_NNET3_NNET_BATCH_COMPUTE_H_
am-diag-gmm.h

kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::nnet3::NnetBatchDecoder
Decoder object that uses multiple CPU threads for the graph search, plus a GPU for the neural net inf...
Definition: nnet-batch-compute.h:613

nnet-compute.h

kaldi::nnet3::NnetInferenceTask::num_output_frames
int32 num_output_frames
Definition: nnet-batch-compute.h:78

stl-utils.h

kaldi::nnet3::NnetBatchComputer::ComputationGroupKeyHasher
Definition: nnet-batch-compute.h:335

kaldi::nnet3::NnetBatchComputer::num_full_minibatches_
int32 num_full_minibatches_
Definition: nnet-batch-compute.h:480

kaldi::CuVector
Definition: matrix-common.h:74

kaldi::nnet3::NnetBatchInference::ComputeFunc
static void ComputeFunc(NnetBatchInference *object)
Definition: nnet-batch-compute.h:565

am-nnet-simple.h

kaldi::nnet3::NnetBatchComputer::MinibatchSizeInfo
Definition: nnet-batch-compute.h:289

kaldi::nnet3::NnetBatchComputer::mutex_
std::mutex mutex_
Definition: nnet-batch-compute.h:467

kaldi::nnet3::NnetInferenceTask::output
CuMatrix< BaseFloat > output
Definition: nnet-batch-compute.h:140

kaldi::nnet3::NnetBatchComputer::MinibatchSizeInfo::MinibatchSizeInfo
MinibatchSizeInfo()
Definition: nnet-batch-compute.h:298

kaldi::nnet3::NnetBatchDecoder::UtteranceOutput
Definition: nnet-batch-compute.h:737

kaldi::nnet3::NnetBatchDecoder::frame_count_
int64 frame_count_
Definition: nnet-batch-compute.h:831

lattice-faster-decoder.h

kaldi::nnet3::NnetBatchComputer::output_dim_
int32 output_dim_
Definition: nnet-batch-compute.h:492

kaldi::nnet3::NnetBatchInference::is_finished_
bool is_finished_
Definition: nnet-batch-compute.h:576

kaldi::nnet3::NnetBatchDecoder::num_success_
int32 num_success_
Definition: nnet-batch-compute.h:832

fst
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
Definition: graph.dox:21

kaldi::nnet3::NnetBatchDecoder::UtteranceOutput::sentence
std::string sentence
Definition: nnet-batch-compute.h:742

kaldi::nnet3::NnetBatchDecoder::UtteranceOutput::compact_lat
CompactLattice compact_lat
Definition: nnet-batch-compute.h:740

kaldi::nnet3::NnetBatchInference::UtteranceInfo::num_tasks_finished
size_t num_tasks_finished
Definition: nnet-batch-compute.h:591

kaldi::nnet3::NnetInferenceTask::priority
double priority
Definition: nnet-batch-compute.h:120

kaldi::GetOutput
void GetOutput(OnlineFeatureInterface *a, Matrix< BaseFloat > *output)
Definition: online-feature-test.cc:29

kaldi::nnet3::CachingOptimizingCompiler
This class enables you to do the compilation and optimization in one call, and also ensures that if t...
Definition: nnet-optimize.h:219

kaldi::nnet3::NnetBatchDecoder::tot_like_
double tot_like_
Definition: nnet-batch-compute.h:829

kaldi::nnet3::NnetInferenceTask::output_t_stride
int32 output_t_stride
Definition: nnet-batch-compute.h:74

kaldi::nnet3::NnetInferenceTask::num_used_output_frames
int32 num_used_output_frames
Definition: nnet-batch-compute.h:90

kaldi::nnet3::NnetBatchComputer::nnet_left_context_
int32 nnet_left_context_
Definition: nnet-batch-compute.h:488

kaldi::nnet3::NnetBatchComputer::ComputationGroupInfo::tasks
std::vector< NnetInferenceTask * > tasks
Definition: nnet-batch-compute.h:309

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

kaldi::nnet3::NnetBatchInference::UtteranceInfo::tasks
std::vector< NnetInferenceTask > tasks
Definition: nnet-batch-compute.h:586

kaldi::nnet3::NnetBatchComputer::nnet_right_context_
int32 nnet_right_context_
Definition: nnet-batch-compute.h:489

kaldi::Matrix< BaseFloat >

kaldi::CuMatrix
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71

kaldi::nnet3::NnetBatchComputerOptions::ensure_exact_final_context
bool ensure_exact_final_context
Definition: nnet-batch-compute.h:147

kaldi::nnet3::NnetBatchDecoder::num_partial_
int32 num_partial_
Definition: nnet-batch-compute.h:834

KALDI_DISALLOW_COPY_AND_ASSIGN
#define KALDI_DISALLOW_COPY_AND_ASSIGN(type)
Definition: kaldi-utils.h:121

kaldi::nnet3::NnetBatchComputer::MapType
unordered_map< ComputationGroupKey, ComputationGroupInfo, ComputationGroupKeyHasher > MapType
Definition: nnet-batch-compute.h:344

kaldi::nnet3::NnetBatchDecoder::ComputeFunc
static void ComputeFunc(NnetBatchDecoder *object)
Definition: nnet-batch-compute.h:760

kaldi::nnet3::NnetBatchDecoder::priority_offset_
double priority_offset_
Definition: nnet-batch-compute.h:826

kaldi::nnet3::NnetBatchDecoder::word_syms_
const fst::SymbolTable * word_syms_
Definition: nnet-batch-compute.h:779

kaldi::LatticeFasterDecoderConfig
Definition: lattice-faster-decoder.h:38

kaldi::nnet3::NnetBatchDecoder::tasks_ready_semaphore_
Semaphore tasks_ready_semaphore_
Definition: nnet-batch-compute.h:803

kaldi::nnet3::NnetBatchDecoder::fst_
const fst::Fst< fst::StdArc > & fst_
Definition: nnet-batch-compute.h:776

kaldi::nnet3::NnetBatchComputer::log_priors_
CuVector< BaseFloat > log_priors_
Definition: nnet-batch-compute.h:463

kaldi::nnet3::NnetBatchComputer::ComputationGroupKey::first_input_t
int32 first_input_t
Definition: nnet-batch-compute.h:331

kaldi::OptionsItf::Register
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0

kaldi::TransitionModel
Definition: transition-model.h:123

kaldi::nnet3::NnetInferenceTask::ivector
CuVector< BaseFloat > ivector
Definition: nnet-batch-compute.h:116

kaldi::nnet3::NnetBatchComputer::MinibatchSizeInfo::tot_num_tasks
int64 tot_num_tasks
Definition: nnet-batch-compute.h:293

kaldi::nnet3::NnetBatchComputer::ComputationGroupInfo::minibatch_info
std::map< int32, MinibatchSizeInfo > minibatch_info
Definition: nnet-batch-compute.h:314

kaldi::nnet3::NnetBatchDecoder::tasks_finished_
bool tasks_finished_
Definition: nnet-batch-compute.h:810

nnet-optimize.h

kaldi::nnet3::NnetBatchComputer::ComputationGroupKey::ComputationGroupKey
ComputationGroupKey(const NnetInferenceTask &task)
Definition: nnet-batch-compute.h:320

kaldi::nnet3::NnetInferenceTask::semaphore
Semaphore semaphore
Definition: nnet-batch-compute.h:125

kaldi::nnet3::NnetBatchInference
This class implements a simplified interface to class NnetBatchComputer, which is suitable for progra...
Definition: nnet-batch-compute.h:502

kaldi::nnet3::NnetBatchComputer::ComputationGroupKey::num_input_frames
int32 num_input_frames
Definition: nnet-batch-compute.h:330

kaldi::nnet3::NnetInferenceTask::first_input_t
int32 first_input_t
Definition: nnet-batch-compute.h:70

kaldi::nnet3::ComputationRequest
Definition: nnet-computation.h:114

kaldi::nnet3::NnetBatchComputer::ivector_dim_
int32 ivector_dim_
Definition: nnet-batch-compute.h:491

kaldi::nnet3::NnetInferenceTask
class NnetInferenceTask represents a chunk of an utterance that is requested to be computed...
Definition: nnet-batch-compute.h:50

kaldi::nnet3::NnetBatchComputer::MinibatchSizeInfo::num_done
int32 num_done
Definition: nnet-batch-compute.h:292

kaldi::nnet3::NnetBatchComputer::MinibatchSizeInfo::computation
std::shared_ptr< const NnetComputation > computation
Definition: nnet-batch-compute.h:291

kaldi::nnet3::NnetBatchDecoder::compute_thread_
std::thread compute_thread_
Definition: nnet-batch-compute.h:783

float

kaldi::nnet3::NnetBatchDecoder::allow_partial_
bool allow_partial_
Definition: nnet-batch-compute.h:780

kaldi::nnet3::NnetBatchDecoder::input_consumed_semaphore_
Semaphore input_consumed_semaphore_
Definition: nnet-batch-compute.h:796

kaldi::nnet3::NnetInferenceTask::input
CuMatrix< BaseFloat > input
Definition: nnet-batch-compute.h:63

transition-model.h

kaldi::nnet3::NnetBatchInference::tasks_ready_semaphore_
Semaphore tasks_ready_semaphore_
Definition: nnet-batch-compute.h:581

kaldi::nnet3::NnetSimpleComputationOptions::Register
void Register(OptionsItf *opts)
Definition: nnet-am-decodable-simple.h:68

kaldi::nnet3::NnetInferenceTask::output_to_cpu
bool output_to_cpu
Definition: nnet-batch-compute.h:130

kaldi::nnet3::Nnet
Definition: nnet-nnet.h:115

kaldi::nnet3::NnetBatchDecoder::input_utterance_
UtteranceInput input_utterance_
Definition: nnet-batch-compute.h:790

kaldi::nnet3::NnetBatchDecoder::DecodeFunc
static void DecodeFunc(NnetBatchDecoder *object)
Definition: nnet-batch-compute.h:755

kaldi::nnet3::NnetBatchInference::computer_
NnetBatchComputer computer_
Definition: nnet-batch-compute.h:572

kaldi::Lattice
fst::VectorFst< LatticeArc > Lattice
Definition: kaldi-lattice.h:44

kaldi::nnet3::NnetInferenceTask::first_used_output_frame_index
int32 first_used_output_frame_index
Definition: nnet-batch-compute.h:102

KALDI_ERR
#define KALDI_ERR
Definition: kaldi-error.h:147

kaldi::nnet3::NnetBatchDecoder::trans_model_
const TransitionModel & trans_model_
Definition: nnet-batch-compute.h:778

kaldi::nnet3::NnetInferenceTask::is_edge
bool is_edge
Definition: nnet-batch-compute.h:108

kaldi::nnet3::NnetBatchComputer::MinibatchSizeInfo::seconds_taken
double seconds_taken
Definition: nnet-batch-compute.h:296

kaldi::nnet3::NnetBatchInference::utterance_counter_
int32 utterance_counter_
Definition: nnet-batch-compute.h:599

kaldi::nnet3::NnetBatchDecoder::stats_mutex_
std::mutex stats_mutex_
Definition: nnet-batch-compute.h:837

kaldi::nnet3::NnetBatchComputer::ComputationGroupKey::num_output_frames
int32 num_output_frames
Definition: nnet-batch-compute.h:332

decodable-itf.h

kaldi::OptionsItf
Definition: options-itf.h:26

kaldi::nnet3::NnetBatchComputer::NumFullPendingMinibatches
int32 NumFullPendingMinibatches() const
Returns the number of full minibatches waiting to be computed.
Definition: nnet-batch-compute.h:233

kaldi::nnet3::NnetInferenceTask::output_cpu
Matrix< BaseFloat > output_cpu
Definition: nnet-batch-compute.h:137

kaldi::nnet3::NnetBatchDecoder::is_finished_
bool is_finished_
Definition: nnet-batch-compute.h:806

kaldi::nnet3::NnetBatchDecoder::input_ready_semaphore_
Semaphore input_ready_semaphore_
Definition: nnet-batch-compute.h:791

kaldi::nnet3::NnetBatchComputer::tasks_
MapType tasks_
Definition: nnet-batch-compute.h:472

kaldi::nnet3::NnetBatchComputerOptions::Register
void Register(OptionsItf *po)
Definition: nnet-batch-compute.h:156

kaldi::nnet3::NnetBatchComputer::ComputationGroupInfo
Definition: nnet-batch-compute.h:305

kaldi::Timer
Definition: timer.h:63

kaldi::nnet3::NnetBatchDecoder::num_fail_
int32 num_fail_
Definition: nnet-batch-compute.h:833

kaldi::nnet3::NnetBatchInference::UtteranceInfo
Definition: nnet-batch-compute.h:583

kaldi::nnet3::NnetInferenceTask::NnetInferenceTask
NnetInferenceTask()
Definition: nnet-batch-compute.h:56

kaldi::CompactLattice
fst::VectorFst< CompactLatticeArc > CompactLattice
Definition: kaldi-lattice.h:46

kaldi::nnet3::NnetBatchDecoder::UtteranceOutput::utterance_id
std::string utterance_id
Definition: nnet-batch-compute.h:738

kaldi::nnet3::NnetBatchComputerOptions
Definition: nnet-batch-compute.h:144

kaldi::nnet3::NnetInferenceTask::num_initial_unused_output_frames
int32 num_initial_unused_output_frames
Definition: nnet-batch-compute.h:85

kaldi::nnet3::NnetBatchComputer::nnet_
const Nnet & nnet_
Definition: nnet-batch-compute.h:461

kaldi::nnet3::NnetBatchComputerOptions::edge_minibatch_size
int32 edge_minibatch_size
Definition: nnet-batch-compute.h:146

kaldi::nnet3::NnetBatchDecoder::pending_utts_
std::list< UtteranceOutput * > pending_utts_
Definition: nnet-batch-compute.h:819

kaldi::nnet3::NnetInferenceTask::NnetInferenceTask
NnetInferenceTask(const NnetInferenceTask &other)
Definition: nnet-batch-compute.h:53

kaldi::Vector
A class representing a vector.
Definition: kaldi-vector.h:406

kaldi::nnet3::NnetBatchComputer::ComputationGroupKey
Definition: nnet-batch-compute.h:319

kaldi::nnet3::NnetBatchComputerOptions::minibatch_size
int32 minibatch_size
Definition: nnet-batch-compute.h:145

kaldi::nnet3::NnetBatchDecoder::UtteranceInput::online_ivector_period
int32 online_ivector_period
Definition: nnet-batch-compute.h:730

kaldi::nnet3::NnetBatchDecoder::UtteranceInput
Definition: nnet-batch-compute.h:725

kaldi::nnet3::NnetBatchComputer::compiler_
CachingOptimizingCompiler compiler_
Definition: nnet-batch-compute.h:462

kaldi::nnet3::NnetBatchComputerOptions::NnetBatchComputerOptions
NnetBatchComputerOptions()
Definition: nnet-batch-compute.h:150

kaldi::nnet3::NnetBatchInference::compute_thread_
std::thread compute_thread_
Definition: nnet-batch-compute.h:602

kaldi::nnet3::NnetBatchComputerOptions::partial_minibatch_factor
BaseFloat partial_minibatch_factor
Definition: nnet-batch-compute.h:148

fst::operator==
bool operator==(const LatticeWeightTpl< FloatType > &wa, const LatticeWeightTpl< FloatType > &wb)
Definition: lattice-weight.h:267

kaldi::nnet3::NnetBatchComputer::opts_
NnetBatchComputerOptions opts_
Definition: nnet-batch-compute.h:460

kaldi::nnet3::NnetBatchComputer::no_more_than_n_minibatches_full_
std::unordered_map< int32, std::condition_variable * > no_more_than_n_minibatches_full_
Definition: nnet-batch-compute.h:485

kaldi::nnet3::NnetBatchInference::UtteranceInfo::utterance_id
std::string utterance_id
Definition: nnet-batch-compute.h:584

kaldi::Semaphore
Definition: kaldi-semaphore.h:30

kaldi::nnet3::NnetBatchComputer::GetOptions
const NnetBatchComputerOptions & GetOptions()
Definition: nnet-batch-compute.h:280

kaldi::nnet3::NnetBatchComputer::input_dim_
int32 input_dim_
Definition: nnet-batch-compute.h:490

kaldi::nnet3::NnetBatchDecoder::computer_
NnetBatchComputer * computer_
Definition: nnet-batch-compute.h:781

kaldi::nnet3::NnetBatchInference::utts_
std::list< UtteranceInfo * > utts_
Definition: nnet-batch-compute.h:597

kaldi::nnet3::NnetBatchComputer
This class does neural net inference in a way that is optimized for GPU use: it combines chunks of mu...
Definition: nnet-batch-compute.h:207

kaldi::VectorBase
Provides a vector abstraction class.
Definition: kaldi-vector.h:41

kaldi::nnet3::NnetBatchDecoder::UtteranceInput::ivector
const Vector< BaseFloat > * ivector
Definition: nnet-batch-compute.h:728

kaldi::nnet3::NnetBatchDecoder::decoder_opts_
const LatticeFasterDecoderConfig & decoder_opts_
Definition: nnet-batch-compute.h:777

kaldi::nnet3::NnetInferenceTask::is_irregular
bool is_irregular
Definition: nnet-batch-compute.h:113

kaldi::nnet3::MergeTaskOutput
void MergeTaskOutput(const std::vector< NnetInferenceTask > &tasks, Matrix< BaseFloat > *output)
Merges together the &#39;output_cpu&#39; (if the &#39;output_to_cpu&#39; members are true) or the &#39;output&#39; members of...
Definition: nnet-batch-compute.cc:968

kaldi-common.h

kaldi::nnet3::NnetBatchDecoder::decode_threads_
std::vector< std::thread * > decode_threads_
Definition: nnet-batch-compute.h:782

kaldi::nnet3::NnetBatchDecoder::UtteranceOutput::finished
bool finished
Definition: nnet-batch-compute.h:739

nnet-am-decodable-simple.h

kaldi::nnet3::NnetBatchDecoder::UtteranceOutput::lat
Lattice lat
Definition: nnet-batch-compute.h:741

kaldi::nnet3::NnetSimpleComputationOptions
Definition: nnet-am-decodable-simple.h:43

kaldi::nnet3::NnetBatchDecoder::UtteranceInput::input
const Matrix< BaseFloat > * input
Definition: nnet-batch-compute.h:727

kaldi::nnet3::NnetBatchDecoder::timer_
Timer timer_
Definition: nnet-batch-compute.h:839

kaldi::nnet3::NnetBatchDecoder::UtteranceInput::utterance_id
std::string utterance_id
Definition: nnet-batch-compute.h:726

kaldi::nnet3::GetComputationRequest
void GetComputationRequest(const Nnet &nnet, const NnetExample &eg, bool need_model_derivative, bool store_component_stats, ComputationRequest *request)
This function takes a NnetExample (which should already have been frame-selected, if desired...
Definition: nnet-example-utils.cc:202

kaldi::nnet3::NnetBatchDecoder::UtteranceInput::online_ivectors
const Matrix< BaseFloat > * online_ivectors
Definition: nnet-batch-compute.h:729