21 #ifndef KALDI_NNET3_NNET_BATCH_COMPUTE_H_ 22 #define KALDI_NNET3_NNET_BATCH_COMPUTE_H_ 28 #include <condition_variable> 54 KALDI_ERR <<
"NnetInferenceTask was not designed to be copied.";
151 edge_minibatch_size(32),
152 ensure_exact_final_context(false),
153 partial_minibatch_factor(0.5) {
158 po->
Register(
"minibatch-size", &minibatch_size,
"Number of chunks per " 159 "minibatch (see also edge-minibatch-size)");
160 po->
Register(
"edge-minibatch-size", &edge_minibatch_size,
"Number of " 161 "chunks per minibatch: this applies to chunks at the " 162 "beginnings and ends of utterances, in cases (such as " 163 "recurrent models) when the computation would be different " 164 "from the usual one.");
165 po->
Register(
"ensure-exact-final-context", &ensure_exact_final_context,
166 "If true, for utterances shorter than --frames-per-chunk, " 167 "use exact-length, special computations. If false, " 168 "pad with repeats of the last frame. Would only affect " 169 "the output for backwards-recurrent models, but would " 170 "negatively impact speed in all cases.");
171 po->
Register(
"partial-minibatch-factor", &partial_minibatch_factor,
172 "Factor that controls how small partial minibatches will be " 173 "they become necessary. We will potentially do the computation " 174 "for sizes: int(partial_minibatch_factor^n * minibatch_size " 175 ", for n = 0, 1, 2.... Set it to 0.0 if you want to use " 176 "only the specified minibatch sizes.");
194 const std::vector<NnetInferenceTask> &tasks,
197 const std::vector<NnetInferenceTask> &tasks,
230 int32 max_minibatches_full = -1);
245 bool Compute(
bool allow_partial_minibatch);
265 void SplitUtteranceIntoTasks(
270 int32 online_ivector_period,
271 std::vector<NnetInferenceTask> *tasks);
272 void SplitUtteranceIntoTasks(
277 int32 online_ivector_period,
278 std::vector<NnetInferenceTask> *tasks);
299 tot_num_tasks(0), seconds_taken(0.0) { }
309 std::vector<NnetInferenceTask*>
tasks;
321 num_input_frames(task.input.NumRows()),
351 inline double GetPriority(
bool allow_partial_minibatch,
352 const ComputationGroupInfo &info)
const;
364 inline int32 GetMinibatchSize(
const ComputationGroupInfo &info)
const;
370 std::shared_ptr<const NnetComputation> GetComputation(
371 const ComputationGroupInfo &info,
372 int32 minibatch_size);
382 int32 GetActualMinibatchSize(
const ComputationGroupInfo &info)
const;
390 void GetHighestPriorityTasks(
392 ComputationGroupInfo *info,
393 std::vector<NnetInferenceTask*> *tasks);
418 bool allow_partial_minibatch,
419 int32 *minibatch_size,
420 std::vector<NnetInferenceTask*> *tasks);
434 void FormatInputs(
int32 minibatch_size,
435 const std::vector<NnetInferenceTask*> &tasks,
443 const std::vector<NnetInferenceTask*> &tasks);
448 void CheckAndFixConfigs();
453 int32 minibatch_size,
458 void PrintMinibatchStats();
527 void AcceptInput(
const std::string &utterance_id,
531 int32 online_ivector_period);
554 bool GetOutput(std::string *utterance_id,
586 std::vector<NnetInferenceTask>
tasks;
639 const fst::SymbolTable *word_syms,
662 void AcceptInput(
const std::string &utterance_id,
666 int32 online_ivector_period);
673 void UtteranceFailed();
709 bool GetOutput(std::string *utterance_id,
711 std::string *sentence);
716 bool GetOutput(std::string *utterance_id,
718 std::string *sentence);
764 void SetPriorities(std::vector<NnetInferenceTask> *tasks);
769 void UpdatePriorityOffset(
double priority);
776 const fst::Fst<fst::StdArc> &
fst_;
846 #endif // KALDI_NNET3_NNET_BATCH_COMPUTE_H_
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Decoder object that uses multiple CPU threads for the graph search, plus a GPU for the neural net inf...
int32 num_full_minibatches_
static void ComputeFunc(NnetBatchInference *object)
CuMatrix< BaseFloat > output
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
CompactLattice compact_lat
size_t num_tasks_finished
void GetOutput(OnlineFeatureInterface *a, Matrix< BaseFloat > *output)
This class enables you to do the compilation and optimization in one call, and also ensures that if t...
int32 num_used_output_frames
std::vector< NnetInferenceTask * > tasks
std::vector< NnetInferenceTask > tasks
int32 nnet_right_context_
This class represents a matrix that's stored on the GPU if we have one, and in memory if not...
bool ensure_exact_final_context
#define KALDI_DISALLOW_COPY_AND_ASSIGN(type)
unordered_map< ComputationGroupKey, ComputationGroupInfo, ComputationGroupKeyHasher > MapType
static void ComputeFunc(NnetBatchDecoder *object)
const fst::SymbolTable * word_syms_
Semaphore tasks_ready_semaphore_
const fst::Fst< fst::StdArc > & fst_
CuVector< BaseFloat > log_priors_
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
CuVector< BaseFloat > ivector
std::map< int32, MinibatchSizeInfo > minibatch_info
ComputationGroupKey(const NnetInferenceTask &task)
This class implements a simplified interface to class NnetBatchComputer, which is suitable for progra...
class NnetInferenceTask represents a chunk of an utterance that is requested to be computed...
std::shared_ptr< const NnetComputation > computation
std::thread compute_thread_
Semaphore input_consumed_semaphore_
CuMatrix< BaseFloat > input
Semaphore tasks_ready_semaphore_
void Register(OptionsItf *opts)
UtteranceInput input_utterance_
static void DecodeFunc(NnetBatchDecoder *object)
NnetBatchComputer computer_
fst::VectorFst< LatticeArc > Lattice
int32 first_used_output_frame_index
const TransitionModel & trans_model_
int32 NumFullPendingMinibatches() const
Returns the number of full minibatches waiting to be computed.
Matrix< BaseFloat > output_cpu
Semaphore input_ready_semaphore_
void Register(OptionsItf *po)
fst::VectorFst< CompactLatticeArc > CompactLattice
int32 num_initial_unused_output_frames
int32 edge_minibatch_size
std::list< UtteranceOutput * > pending_utts_
NnetInferenceTask(const NnetInferenceTask &other)
A class representing a vector.
CachingOptimizingCompiler compiler_
NnetBatchComputerOptions()
std::thread compute_thread_
BaseFloat partial_minibatch_factor
bool operator==(const LatticeWeightTpl< FloatType > &wa, const LatticeWeightTpl< FloatType > &wb)
NnetBatchComputerOptions opts_
std::unordered_map< int32, std::condition_variable * > no_more_than_n_minibatches_full_
const NnetBatchComputerOptions & GetOptions()
NnetBatchComputer * computer_
std::list< UtteranceInfo * > utts_
This class does neural net inference in a way that is optimized for GPU use: it combines chunks of mu...
Provides a vector abstraction class.
const LatticeFasterDecoderConfig & decoder_opts_
void MergeTaskOutput(const std::vector< NnetInferenceTask > &tasks, Matrix< BaseFloat > *output)
Merges together the 'output_cpu' (if the 'output_to_cpu' members are true) or the 'output' members of...
std::vector< std::thread * > decode_threads_
void GetComputationRequest(const Nnet &nnet, const NnetExample &eg, bool need_model_derivative, bool store_component_stats, ComputationRequest *request)
This function takes a NnetExample (which should already have been frame-selected, if desired...