36 const fst::SymbolTable *word_syms,
37 const std::string &utt,
50 decoder_(decoder), decodable_(decodable), trans_model_(&trans_model),
51 word_syms_(word_syms), utt_(utt), acoustic_scale_(acoustic_scale),
52 determinize_(determinize), allow_partial_(allow_partial),
53 alignments_writer_(alignments_writer),
54 words_writer_(words_writer),
55 compact_lattice_writer_(compact_lattice_writer),
56 lattice_writer_(lattice_writer),
57 like_sum_(like_sum), frame_sum_(frame_sum),
58 num_done_(num_done), num_err_(num_err),
59 num_partial_(num_partial),
60 computed_(false), success_(false), partial_(false),
61 clat_(NULL), lat_(NULL) { }
77 <<
" since no final-state reached\n";
81 <<
" since no final-state reached and " 82 <<
"--allow-partial=false.\n";
91 if (
lat_->NumStates() == 0)
92 KALDI_ERR <<
"Unexpected problem getting lattice for utterance " <<
utt_;
102 KALDI_WARN <<
"Determinization finished earlier than the beam for " 103 <<
"utterance " <<
utt_;
118 KALDI_ERR <<
"Destructor called without operator (), error in calling code.";
121 if (
num_err_ != NULL) (*num_err_)++;
131 fst::VectorFst<LatticeArc> decoded;
133 if (decoded.NumStates() == 0) {
135 KALDI_ERR <<
"Failed to get traceback for utterance " <<
utt_;
137 std::vector<int32> alignment;
138 std::vector<int32>
words;
140 num_frames = alignment.size();
146 std::cerr <<
utt_ <<
' ';
147 for (
size_t i = 0;
i < words.size();
i++) {
150 KALDI_ERR <<
"Word-id " << words[
i] <<
" not in symbol table.";
151 std::cerr << s <<
' ';
161 if (
clat_->NumStates() == 0) {
170 if (
lat_->NumStates() == 0) {
180 KALDI_LOG <<
"Log-like per frame for utterance " <<
utt_ <<
" is " 181 << (likelihood / num_frames) <<
" over " 182 << num_frames <<
" frames.";
198 template <
typename FST>
203 const fst::SymbolTable *word_syms,
205 double acoustic_scale,
213 using fst::VectorFst;
214 if (!decoder.
Decode(&decodable)) {
215 KALDI_WARN <<
"Failed to decode utterance with id " << utt;
220 KALDI_WARN <<
"Outputting partial output for utterance " << utt
221 <<
" since no final-state reached\n";
223 KALDI_WARN <<
"Not producing output for utterance " << utt
224 <<
" since no final-state reached and " 225 <<
"--allow-partial=false.\n";
232 if (clat.NumStates() == 0)
233 KALDI_ERR <<
"Unexpected problem getting lattice for utterance " << utt;
244 if (decoded.Start() == fst::kNoStateId)
246 KALDI_ERR <<
"Failed to get traceback for utterance " << utt;
248 std::vector<int32> alignment;
249 std::vector<int32>
words;
251 num_frames = alignment.size();
253 if (words_writer->
IsOpen())
254 words_writer->
Write(utt, words);
255 if (alignment_writer->
IsOpen())
256 alignment_writer->
Write(utt, alignment);
257 if (word_syms != NULL) {
258 std::cerr << utt <<
' ';
259 for (
size_t i = 0;
i < words.size();
i++) {
260 std::string s = word_syms->Find(words[
i]);
262 KALDI_ERR <<
"Word-id " << words[
i] <<
" not in symbol table.";
263 std::cerr << s <<
' ';
271 if (acoustic_scale != 0.0)
274 compact_lattice_writer->
Write(utt, clat);
275 KALDI_LOG <<
"Log-like per frame for utterance " << utt <<
" is " 276 << (likelihood / num_frames) <<
" over " 277 << num_frames <<
" frames.";
278 KALDI_VLOG(2) <<
"Cost for utterance " << utt <<
" is " 280 *like_ptr = likelihood;
286 template <
typename FST>
291 const fst::SymbolTable *word_syms,
293 double acoustic_scale,
301 using fst::VectorFst;
303 if (!decoder.
Decode(&decodable)) {
304 KALDI_WARN <<
"Failed to decode utterance with id " << utt;
309 KALDI_WARN <<
"Outputting partial output for utterance " << utt
310 <<
" since no final-state reached\n";
312 KALDI_WARN <<
"Not producing output for utterance " << utt
313 <<
" since no final-state reached and " 314 <<
"--allow-partial=false.\n";
323 VectorFst<LatticeArc> decoded;
326 KALDI_ERR <<
"Failed to get traceback for utterance " << utt;
328 std::vector<int32> alignment;
329 std::vector<int32>
words;
331 num_frames = alignment.size();
332 if (words_writer->
IsOpen())
333 words_writer->
Write(utt, words);
334 if (alignment_writer->
IsOpen())
335 alignment_writer->
Write(utt, alignment);
336 if (word_syms != NULL) {
337 std::cerr << utt <<
' ';
338 for (
size_t i = 0;
i < words.size();
i++) {
339 std::string s = word_syms->Find(words[
i]);
341 KALDI_ERR <<
"Word-id " << words[
i] <<
" not in symbol table.";
342 std::cerr << s <<
' ';
352 if (lat.NumStates() == 0)
353 KALDI_ERR <<
"Unexpected problem getting lattice for utterance " << utt;
363 KALDI_WARN <<
"Determinization finished earlier than the beam for " 364 <<
"utterance " << utt;
366 if (acoustic_scale != 0.0)
368 compact_lattice_writer->
Write(utt, clat);
371 if (acoustic_scale != 0.0)
373 lattice_writer->
Write(utt, lat);
375 KALDI_LOG <<
"Log-like per frame for utterance " << utt <<
" is " 376 << (likelihood / num_frames) <<
" over " 377 << num_frames <<
" frames.";
378 KALDI_VLOG(2) <<
"Cost for utterance " << utt <<
" is " 380 *like_ptr = likelihood;
389 const fst::SymbolTable *word_syms,
391 double acoustic_scale,
404 const fst::SymbolTable *word_syms,
406 double acoustic_scale,
420 const fst::SymbolTable *word_syms,
422 double acoustic_scale,
435 const fst::SymbolTable *word_syms,
437 double acoustic_scale,
452 const fst::SymbolTable *word_syms,
454 double acoustic_scale,
462 using fst::VectorFst;
464 if (!decoder.
Decode(&decodable)) {
465 KALDI_WARN <<
"Failed to decode utterance with id " << utt;
470 KALDI_WARN <<
"Outputting partial output for utterance " << utt
471 <<
" since no final-state reached\n";
473 KALDI_WARN <<
"Not producing output for utterance " << utt
474 <<
" since no final-state reached and " 475 <<
"--allow-partial=false.\n";
484 VectorFst<LatticeArc> decoded;
487 KALDI_ERR <<
"Failed to get traceback for utterance " << utt;
489 std::vector<int32> alignment;
490 std::vector<int32>
words;
492 num_frames = alignment.size();
493 if (words_writer->
IsOpen())
494 words_writer->
Write(utt, words);
495 if (alignment_writer->
IsOpen())
496 alignment_writer->
Write(utt, alignment);
497 if (word_syms != NULL) {
498 std::cerr << utt <<
' ';
499 for (
size_t i = 0;
i < words.size();
i++) {
500 std::string s = word_syms->Find(words[
i]);
502 KALDI_ERR <<
"Word-id " << words[
i] <<
" not in symbol table.";
503 std::cerr << s <<
' ';
513 KALDI_ERR <<
"Unexpected problem getting lattice for utterance " << utt;
523 KALDI_WARN <<
"Determinization finished earlier than the beam for " 524 <<
"utterance " << utt;
526 if (acoustic_scale != 0.0)
528 compact_lattice_writer->
Write(utt, clat);
531 if (acoustic_scale != 0.0)
533 lattice_writer->
Write(utt, lat);
535 KALDI_LOG <<
"Log-like per frame for utterance " << utt <<
" is " 536 << (likelihood / num_frames) <<
" over " 537 << num_frames <<
" frames.";
538 KALDI_VLOG(2) <<
"Cost for utterance " << utt <<
" is " 540 *like_ptr = likelihood;
547 fst::VectorFst<fst::StdArc> *
fst) {
552 StateId num_states = fst->NumStates();
553 if (num_states == 0) {
557 Weight zero = Weight::Zero();
559 fst::VectorFst<fst::StdArc> fst_rhs(*fst);
561 for (StateId state = 0; state < num_states; state++)
562 fst_rhs.SetFinal(state, zero);
563 StateId pre_initial = fst_rhs.AddState();
564 Arc to_initial(0, 0, Weight::One(), fst_rhs.Start());
565 fst_rhs.AddArc(pre_initial, to_initial);
566 fst_rhs.SetStart(pre_initial);
570 fst_rhs.SetFinal(pre_initial, Weight::One());
571 fst::VectorFst<fst::StdArc> fst_concat;
572 fst::Concat(fst, fst_rhs);
578 const std::string &utt,
581 fst::VectorFst<fst::StdArc> *
fst,
594 config.
beam <= 0.0) {
599 if (fst->Start() == fst::kNoStateId) {
600 KALDI_WARN <<
"Empty decoding graph for " << utt;
601 if (num_error != NULL) (*num_error)++;
612 decoder.
Decode(decodable);
617 if (num_retried != NULL) (*num_retried)++;
618 KALDI_WARN <<
"Retrying utterance " << utt <<
" with beam " 622 decoder.
Decode(decodable);
627 KALDI_WARN <<
"Did not successfully decode file " << utt <<
", len = " 629 if (num_error != NULL) (*num_error)++;
633 fst::VectorFst<LatticeArc> decoded;
635 if (decoded.NumStates() == 0) {
636 KALDI_WARN <<
"Error getting best path from decoder (likely a bug)";
637 if (num_error != NULL) (*num_error)++;
641 std::vector<int32> alignment;
642 std::vector<int32>
words;
648 if (num_done != NULL) (*num_done)++;
649 if (tot_like != NULL) (*tot_like) += like;
650 if (frame_count != NULL) (*frame_count) += decodable->
NumFramesReady();
652 if (alignment_writer != NULL && alignment_writer->
IsOpen())
653 alignment_writer->
Write(utt, alignment);
655 if (scores_writer != NULL && scores_writer->
IsOpen())
659 if (per_frame_acwt_writer != NULL && per_frame_acwt_writer->
IsOpen()) {
661 per_frame_loglikes.
Scale(-1 / acoustic_scale);
662 per_frame_acwt_writer->
Write(utt, per_frame_loglikes);
fst::StdArc::StateId StateId
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
bool DecodeUtteranceLatticeIncremental(LatticeIncrementalDecoderTpl< FST > &decoder, DecodableInterface &decodable, const TransitionModel &trans_model, const fst::SymbolTable *word_syms, std::string utt, double acoustic_scale, bool determinize, bool allow_partial, Int32VectorWriter *alignment_writer, Int32VectorWriter *words_writer, CompactLatticeWriter *compact_lattice_writer, LatticeWriter *lattice_writer, double *like_ptr)
TODO.
LatticeWriter * lattice_writer_
DecodeUtteranceLatticeFasterClass(LatticeFasterDecoder *decoder, DecodableInterface *decodable, const TransitionModel &trans_model, const fst::SymbolTable *word_syms, const std::string &utt, BaseFloat acoustic_scale, bool determinize, bool allow_partial, Int32VectorWriter *alignments_writer, Int32VectorWriter *words_writer, CompactLatticeWriter *compact_lattice_writer, LatticeWriter *lattice_writer, double *like_sum, int64 *frame_sum, int32 *num_done, int32 *num_err, int32 *num_partial)
virtual int32 NumFramesReady() const
The call NumFramesReady() will return the number of frames currently available for this decodable obj...
const LatticeFasterDecoderConfig & GetOptions() const
DecodableInterface provides a link between the (acoustic-modeling and feature-processing) code and th...
bool GetRawLattice(Lattice *ofst, bool use_final_probs=true) const
Outputs an FST corresponding to the raw, state-level tracebacks.
const TransitionModel * trans_model_
void SetOptions(const FasterDecoderOptions &config)
CompactLatticeWriter * compact_lattice_writer_
This is an extention to the "normal" lattice-generating decoder.
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
bool ReachedFinal() const
says whether a final-state was active on the last frame.
bool Decode(DecodableInterface *decodable)
void Decode(DecodableInterface *decodable)
Int32VectorWriter * alignments_writer_
A templated class for writing objects to an archive or script file; see The Table concept...
bool DecodeUtteranceLatticeFaster(LatticeFasterDecoderTpl< FST > &decoder, DecodableInterface &decodable, const TransitionModel &trans_model, const fst::SymbolTable *word_syms, std::string utt, double acoustic_scale, bool determinize, bool allow_partial, Int32VectorWriter *alignment_writer, Int32VectorWriter *words_writer, CompactLatticeWriter *compact_lattice_writer, LatticeWriter *lattice_writer, double *like_ptr)
This function DecodeUtteranceLatticeFaster is used in several decoders, and we have moved it here...
void GetPerFrameAcousticCosts(const Lattice &nbest, Vector< BaseFloat > *per_frame_loglikes)
This function extracts the per-frame log likelihoods from a linear lattice (which we refer to as an '...
const LatticeSimpleDecoderConfig & GetOptions() const
Int32VectorWriter * words_writer_
bool GetLinearSymbolSequence(const Fst< Arc > &fst, std::vector< I > *isymbols_out, std::vector< I > *osymbols_out, typename Arc::Weight *tot_weight_out)
GetLinearSymbolSequence gets the symbol sequence from a linear FST.
void Write(const std::string &key, const T &value) const
bool GetBestPath(fst::MutableFst< LatticeArc > *fst_out, bool use_final_probs=true)
GetBestPath gets the decoding traceback.
void CompactLatticeShortestPath(const CompactLattice &clat, CompactLattice *shortest_path)
A form of the shortest-path/best-path algorithm that's specially coded for CompactLattice.
~DecodeUtteranceLatticeFasterClass()
std::vector< std::vector< double > > AcousticLatticeScale(double acwt)
bool GetRawLattice(Lattice *lat, bool use_final_probs=true) const
void ScaleLattice(const std::vector< std::vector< ScaleFloat > > &scale, MutableFst< ArcTpl< Weight > > *fst)
Scales the pairs of weights in LatticeWeight or CompactLatticeWeight by viewing the pair (a...
void ConvertLattice(const ExpandedFst< ArcTpl< Weight > > &ifst, MutableFst< ArcTpl< CompactLatticeWeightTpl< Weight, Int > > > *ofst, bool invert)
Convert lattice from a normal FST to a CompactLattice FST.
BaseFloat acoustic_scale_
static const LatticeWeightTpl Zero()
fst::VectorFst< LatticeArc > Lattice
void Scale(Real alpha)
Multiplies all elements by this constant.
Simplest possible decoder, included largely for didactic purposes and as a means to debug more highly...
bool DecodeUtteranceLatticeSimple(LatticeSimpleDecoder &decoder, DecodableInterface &decodable, const TransitionModel &trans_model, const fst::SymbolTable *word_syms, std::string utt, double acoustic_scale, bool determinize, bool allow_partial, Int32VectorWriter *alignment_writer, Int32VectorWriter *words_writer, CompactLatticeWriter *compact_lattice_writer, LatticeWriter *lattice_writer, double *like_ptr)
bool GetBestPath(Lattice *ofst, bool use_final_probs=true) const
Outputs an FST corresponding to the single best path through the lattice.
fst::VectorFst< CompactLatticeArc > CompactLattice
fst::StdArc::Weight Weight
void ModifyGraphForCarefulAlignment(fst::VectorFst< fst::StdArc > *fst)
This function modifies the decoding graph for what we call "careful alignment".
fst::DeterminizeLatticePhonePrunedOptions det_opts
This is the "normal" lattice-generating decoder.
bool Decode(DecodableInterface *decodable)
Decodes until there are no more frames left in the "decodable" object.
const fst::SymbolTable * word_syms_
DecodableInterface * decodable_
A class representing a vector.
int32 NumFramesDecoded() const
Returns the number of frames decoded so far.
#define KALDI_ASSERT(cond)
bool Decode(DecodableInterface *decodable)
CAUTION: it's unlikely that you will ever want to call this function.
bool ReachedFinal() const
says whether a final-state was active on the last frame.
bool GetBestPath(Lattice *lat, bool use_final_probs=true) const
fst::DeterminizeLatticePhonePrunedOptions det_opts
const CompactLattice & GetLattice(int32 num_frames_to_include, bool use_final_probs=false)
This decoder has no GetBestPath() function.
void AlignUtteranceWrapper(const AlignConfig &config, const std::string &utt, BaseFloat acoustic_scale, fst::VectorFst< fst::StdArc > *fst, DecodableInterface *decodable, Int32VectorWriter *alignment_writer, BaseFloatWriter *scores_writer, int32 *num_done, int32 *num_error, int32 *num_retried, double *tot_like, int64 *frame_count, BaseFloatVectorWriter *per_frame_acwt_writer)
AlignUtteranceWapper is a wrapper for alignment code used in training, that is called from many diffe...
bool ReachedFinal() const
Returns true if a final state was active on the last frame.
bool DeterminizeLatticePhonePrunedWrapper(const kaldi::TransitionModel &trans_model, MutableFst< kaldi::LatticeArc > *ifst, double beam, MutableFst< kaldi::CompactLatticeArc > *ofst, DeterminizeLatticePhonePrunedOptions opts)
This function is a wrapper of DeterminizeLatticePhonePruned() that works for Lattice type FSTs...
LatticeFasterDecoder * decoder_
bool ReachedFinal() const
says whether a final-state was active on the last frame.