doc/online-gmm-decoding_8cc_source.html

 // online2/online-gmm-decoding.cc

 // Copyright    2013-2014  Johns Hopkins University (author: Daniel Povey)

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.

 #include "online2/online-gmm-decoding.h"
 #include "lat/lattice-functions.h"
 #include "lat/determinize-lattice-pruned.h"

 namespace kaldi {

 void OnlineGmmAdaptationState::Read(std::istream &in_stream, bool binary) {
   ExpectToken(in_stream, binary, "<ONLINEGMMADAPTATIONSTATE>");
   ExpectToken(in_stream, binary, "<TRANSFORM>");
   transform.Read(in_stream, binary);
   ExpectToken(in_stream, binary, "<CMVNSTATS>");
   cmvn_state.Read(in_stream, binary);
   ExpectToken(in_stream, binary, "<SPKSTATS>");
   spk_stats.Read(in_stream, binary, false);
   ExpectToken(in_stream, binary, "</ONLINEGMMADAPTATIONSTATE>");
 }

 void OnlineGmmAdaptationState::Write(std::ostream &out_stream, bool binary) const {
   WriteToken(out_stream, binary, "<ONLINEGMMADAPTATIONSTATE>");
   WriteToken(out_stream, binary, "<TRANSFORM>");
   transform.Write(out_stream, binary);
   WriteToken(out_stream, binary, "<CMVNSTATS>");
   cmvn_state.Write(out_stream, binary);
   WriteToken(out_stream, binary, "<SPKSTATS>");
   spk_stats.Write(out_stream, binary);
   WriteToken(out_stream, binary, "</ONLINEGMMADAPTATIONSTATE>");
 }

 SingleUtteranceGmmDecoder::SingleUtteranceGmmDecoder(
     const OnlineGmmDecodingConfig &config,
     const OnlineGmmDecodingModels &models,
     const OnlineFeaturePipeline &feature_prototype,
     const fst::Fst<fst::StdArc> &fst,
     const OnlineGmmAdaptationState &adaptation_state):
     config_(config), models_(models),
     feature_pipeline_(feature_prototype.New()),
     orig_adaptation_state_(adaptation_state),
     adaptation_state_(adaptation_state),
     decoder_(fst, config.faster_decoder_opts) {
   if (!SplitStringToIntegers(config_.silence_phones, ":", false,
                              &silence_phones_))
     KALDI_ERR << "Bad --silence-phones option '"
               << config_.silence_phones << "'";
   SortAndUniq(&silence_phones_);
   feature_pipeline_->SetTransform(adaptation_state_.transform);
   decoder_.InitDecoding();
 }

 // Advance the decoding as far as we can, and possibly estimate fMLLR.
 void SingleUtteranceGmmDecoder::AdvanceDecoding() {

   const AmDiagGmm &am_gmm = (HaveTransform() ? models_.GetModel() :
                              models_.GetOnlineAlignmentModel());

   // The decodable object is lightweight, we lose nothing
   // from constructing it each time we want to decode more of the
   // input.
   DecodableDiagGmmScaledOnline decodable(am_gmm,
                                          models_.GetTransitionModel(),
                                          config_.acoustic_scale,
                                          feature_pipeline_);

   int32 old_frames = decoder_.NumFramesDecoded();

   // This will decode as many frames as are currently available.
   decoder_.AdvanceDecoding(&decodable);


   {  // possibly estimate fMLLR.
     int32 new_frames = decoder_.NumFramesDecoded();
     BaseFloat frame_shift = feature_pipeline_->FrameShiftInSeconds();
     // if the original adaptation state (at utterance-start) had no transform,
     // then this means it's the first utt of the speaker... even if not, if we
     // don't have a transform it probably makes sense to treat it as the 1st utt
     // of the speaker, i.e. to do fMLLR adaptation sooner.
     bool is_first_utterance_of_speaker =
         (orig_adaptation_state_.transform.NumRows() == 0);
     bool end_of_utterance = false;
     if (config_.adaptation_policy_opts.DoAdapt(old_frames * frame_shift,
                                                new_frames * frame_shift,
                                                is_first_utterance_of_speaker))
       this->EstimateFmllr(end_of_utterance);
   }
 }

 void SingleUtteranceGmmDecoder::FinalizeDecoding() {
   decoder_.FinalizeDecoding();
 }

 // gets Gaussian posteriors for purposes of fMLLR estimation.
 // We exclude the silence phones from the Gaussian posteriors.
 bool SingleUtteranceGmmDecoder::GetGaussianPosteriors(bool end_of_utterance,
                                                       GaussPost *gpost) {
   // Gets the Gaussian-level posteriors for this utterance, using whatever
   // features and model we are currently decoding with.  We'll use these
   // to estimate basis-fMLLR with.
   if (decoder_.NumFramesDecoded() == 0) {
     KALDI_WARN << "You have decoded no data so cannot estimate fMLLR.";
     return false;
   }

   KALDI_ASSERT(config_.fmllr_lattice_beam > 0.0);

   // Note: we'll just use whatever acoustic scaling factor we were decoding
   // with.  This is in the lattice that we get from decoder_.GetRawLattice().
   Lattice raw_lat;
   decoder_.GetRawLatticePruned(&raw_lat, end_of_utterance,
                                config_.fmllr_lattice_beam);

   // At this point we could rescore the lattice if we wanted, and
   // this might improve the accuracy on long utterances that were
   // the first utterance of that speaker, if we had already
   // estimated the fMLLR by the time we reach this code (e.g. this
   // was the second call).  We don't do this right now.

   PruneLattice(config_.fmllr_lattice_beam, &raw_lat);

 #if 1 // Do determinization.
   Lattice det_lat; // lattice-determinized lattice-- represent this as Lattice
                    // not CompactLattice, as LatticeForwardBackward() does not
                    // accept CompactLattice.


   fst::Invert(&raw_lat); // want to determinize on words.
   fst::ILabelCompare<kaldi::LatticeArc> ilabel_comp;
   fst::ArcSort(&raw_lat, ilabel_comp); // improves efficiency of determinization

   fst::DeterminizeLatticePruned(raw_lat,
                                 double(config_.fmllr_lattice_beam),
                                 &det_lat);

   fst::Invert(&det_lat); // invert back.

   if (det_lat.NumStates() == 0) {
     // Do nothing if the lattice is empty.  This should not happen.
     KALDI_WARN << "Got empty lattice.  Not estimating fMLLR.";
     return false;
   }
 #else
   Lattice &det_lat = raw_lat; // Don't determinize.
 #endif
   TopSortLatticeIfNeeded(&det_lat);

   // Note: the acoustic scale we use here is whatever we decoded with.
   Posterior post;
   BaseFloat tot_fb_like = LatticeForwardBackward(det_lat, &post);

   KALDI_VLOG(3) << "Lattice forward-backward likelihood was "
                 << (tot_fb_like / post.size()) << " per frame over " << post.size()
                 << " frames.";

   ConstIntegerSet<int32> silence_set(silence_phones_);  // faster lookup
   const TransitionModel &trans_model = models_.GetTransitionModel();
   WeightSilencePost(trans_model, silence_set,
                     config_.silence_weight, &post);

   const AmDiagGmm &am_gmm = (HaveTransform() ? models_.GetModel() :
                              models_.GetOnlineAlignmentModel());


   Posterior pdf_post;
   ConvertPosteriorToPdfs(trans_model, post, &pdf_post);

   Vector<BaseFloat> feat(feature_pipeline_->Dim());

   double tot_like = 0.0, tot_weight = 0.0;
   gpost->resize(pdf_post.size());
   for (size_t i = 0; i < pdf_post.size(); i++) {
     feature_pipeline_->GetFrame(i, &feat);
     for (size_t j = 0; j < pdf_post[i].size(); j++) {
       int32 pdf_id = pdf_post[i][j].first;
       BaseFloat weight = pdf_post[i][j].second;
       const DiagGmm &gmm = am_gmm.GetPdf(pdf_id);
       Vector<BaseFloat> this_post_vec;
       BaseFloat like = gmm.ComponentPosteriors(feat, &this_post_vec);
       this_post_vec.Scale(weight);
       tot_like += like * weight;
       tot_weight += weight;
       (*gpost)[i].push_back(std::make_pair(pdf_id, this_post_vec));
     }
   }
   KALDI_VLOG(3) << "Average likelihood weighted by posterior was "
                 << (tot_like / tot_weight) << " over " << tot_weight
                 << " frames (after downweighting silence).";
   return true;
 }


 void SingleUtteranceGmmDecoder::EstimateFmllr(bool end_of_utterance) {
   if (decoder_.NumFramesDecoded() == 0) {
     KALDI_WARN << "You have decoded no data so cannot estimate fMLLR.";
   }

   if (GetVerboseLevel() >= 2) {
     Matrix<BaseFloat> feats;
     feature_pipeline_->GetAsMatrix(&feats);
     KALDI_VLOG(2) << "Features are " << feats;
   }


   GaussPost gpost;
   GetGaussianPosteriors(end_of_utterance, &gpost);

   FmllrDiagGmmAccs &spk_stats = adaptation_state_.spk_stats;

   if (spk_stats.beta_ !=
       orig_adaptation_state_.spk_stats.beta_) {
     // This could happen if the user called EstimateFmllr() twice on the
     // same utterance... we don't want to count any stats twice so we
     // have to reset the stats to what they were before this utterance
     // (possibly empty).
     spk_stats = orig_adaptation_state_.spk_stats;
   }

   int32 dim = feature_pipeline_->Dim();
   if (spk_stats.Dim() == 0)
     spk_stats.Init(dim);

   Matrix<BaseFloat> empty_transform;
   feature_pipeline_->SetTransform(empty_transform);
   Vector<BaseFloat> feat(dim);

   if (adaptation_state_.transform.NumRows() == 0) {
     // If this is the first time we're estimating fMLLR, freeze the CMVN to its
     // current value.  It doesn't matter too much what value this is, since we
     // have already computed the Gaussian-level alignments (it may have a small
     // effect if the basis is very small and doesn't include an offset as part
     // of the transform).
     feature_pipeline_->FreezeCmvn();
   }

   // GetModel() returns the model to be used for estimating
   // transforms.
   const AmDiagGmm &am_gmm = models_.GetModel();

   for (size_t i = 0; i < gpost.size(); i++) {
     feature_pipeline_->GetFrame(i, &feat);
     for (size_t j = 0; j < gpost[i].size(); j++) {
       int32 pdf_id = gpost[i][j].first; // caution: this gpost has pdf-id
                                         // instead of transition-id, which is
                                         // unusual.
       const Vector<BaseFloat> &posterior(gpost[i][j].second);
       spk_stats.AccumulateFromPosteriors(am_gmm.GetPdf(pdf_id),
                                          feat, posterior);
     }
   }

   const BasisFmllrEstimate &basis = models_.GetFmllrBasis();
   if (basis.Dim() == 0)
     KALDI_ERR << "In order to estimate fMLLR, you need to supply the "
               << "--fmllr-basis option.";
   Vector<BaseFloat> basis_coeffs;
   BaseFloat impr = basis.ComputeTransform(spk_stats,
                                           &adaptation_state_.transform,
                                           &basis_coeffs, config_.basis_opts);
   KALDI_VLOG(3) << "Objective function improvement from basis-fMLLR is "
                 << (impr / spk_stats.beta_) << " per frame, over "
                 << spk_stats.beta_ << " frames, #params estimated is "
                 << basis_coeffs.Dim();
   feature_pipeline_->SetTransform(adaptation_state_.transform);
 }


 bool SingleUtteranceGmmDecoder::HaveTransform() const {
   return (feature_pipeline_->HaveFmllrTransform());
 }

 void SingleUtteranceGmmDecoder::GetAdaptationState(
     OnlineGmmAdaptationState *adaptation_state) const {
   *adaptation_state = adaptation_state_;
   feature_pipeline_->GetCmvnState(&adaptation_state->cmvn_state);
 }

 bool SingleUtteranceGmmDecoder::RescoringIsNeeded() const {
   if (orig_adaptation_state_.transform.NumRows() !=
       adaptation_state_.transform.NumRows()) return true;  // fMLLR was estimated
   if (!orig_adaptation_state_.transform.ApproxEqual(
           adaptation_state_.transform)) return true;  // fMLLR was re-estimated
   if (adaptation_state_.transform.NumRows() != 0 &&
       &models_.GetModel() != &models_.GetFinalModel())
     return true; // we have an fMLLR transform, and a discriminatively estimated
                  // model which differs from the one used to estimate fMLLR.
   return false;
 }

 SingleUtteranceGmmDecoder::~SingleUtteranceGmmDecoder() {
   delete feature_pipeline_;
 }


 bool SingleUtteranceGmmDecoder::EndpointDetected(
     const OnlineEndpointConfig &config) {
   const TransitionModel &tmodel = models_.GetTransitionModel();
   return kaldi::EndpointDetected(config, tmodel,
                                  feature_pipeline_->FrameShiftInSeconds(),
                                  decoder_);
 }

 void SingleUtteranceGmmDecoder::GetLattice(bool rescore_if_needed,
                                            bool end_of_utterance,
                                            CompactLattice *clat) const {
   Lattice lat;
   double lat_beam = config_.faster_decoder_opts.lattice_beam;
   decoder_.GetRawLattice(&lat, end_of_utterance);
   if (rescore_if_needed && RescoringIsNeeded()) {
     DecodableDiagGmmScaledOnline decodable(models_.GetFinalModel(),
                                            models_.GetTransitionModel(),
                                            config_.acoustic_scale,
                                            feature_pipeline_);

     if (!kaldi::RescoreLattice(&decodable, &lat))
       KALDI_WARN << "Error rescoring lattice";
   }
   PruneLattice(lat_beam, &lat);

   DeterminizeLatticePhonePrunedWrapper(models_.GetTransitionModel(),
                                        &lat, lat_beam, clat,
                                        config_.faster_decoder_opts.det_opts);

 }

 void SingleUtteranceGmmDecoder::GetBestPath(bool end_of_utterance,
                                             Lattice *best_path) const {
   decoder_.GetBestPath(best_path, end_of_utterance);
 }

 OnlineGmmDecodingModels::OnlineGmmDecodingModels(
     const OnlineGmmDecodingConfig &config) {
   KALDI_ASSERT(!config.model_rxfilename.empty() &&
                "You must supply the --model option");

   {
     bool binary;
     Input ki(config.model_rxfilename, &binary);
     tmodel_.Read(ki.Stream(), binary);
     model_.Read(ki.Stream(), binary);
   }

   if (!config.online_alimdl_rxfilename.empty()) {
     bool binary;
     Input ki(config.online_alimdl_rxfilename, &binary);
     TransitionModel tmodel;
     tmodel.Read(ki.Stream(), binary);
     if (!tmodel.Compatible(tmodel_))
       KALDI_ERR << "Incompatible models given to the --model and "
                 << "--online-alignment-model options";
     online_alignment_model_.Read(ki.Stream(), binary);
   }

   if (!config.rescore_model_rxfilename.empty()) {
     bool binary;
     Input ki(config.rescore_model_rxfilename, &binary);
     TransitionModel tmodel;
     tmodel.Read(ki.Stream(), binary);
     if (!tmodel.Compatible(tmodel_))
       KALDI_ERR << "Incompatible models given to the --model and "
                 << "--final-model options";
     rescore_model_.Read(ki.Stream(), binary);
   }

   if (!config.fmllr_basis_rxfilename.empty()) {
     // We could just as easily use ReadKaldiObject() here.
     bool binary;
     Input ki(config.fmllr_basis_rxfilename, &binary);
     fmllr_basis_.Read(ki.Stream(), binary);
   }
 }


 const TransitionModel &OnlineGmmDecodingModels::GetTransitionModel() const {
   return tmodel_;
 }

 const AmDiagGmm &OnlineGmmDecodingModels::GetOnlineAlignmentModel() const {
   if (online_alignment_model_.NumPdfs() != 0)
     return online_alignment_model_;
   else
     return model_;
 }

 const AmDiagGmm &OnlineGmmDecodingModels::GetModel() const {
   return model_;
 }

 const AmDiagGmm &OnlineGmmDecodingModels::GetFinalModel() const {
   if (rescore_model_.NumPdfs() != 0)
     return rescore_model_;
   else
     return model_;
 }

 const BasisFmllrEstimate &OnlineGmmDecodingModels::GetFmllrBasis() const {
   return fmllr_basis_;
 }


 void OnlineGmmDecodingAdaptationPolicyConfig::Check() const {
   KALDI_ASSERT(adaptation_first_utt_delay > 0.0 &&
                adaptation_first_utt_ratio > 1.0);
   KALDI_ASSERT(adaptation_delay > 0.0 &&
                adaptation_ratio > 1.0);
 }

 bool OnlineGmmDecodingAdaptationPolicyConfig::DoAdapt(
     BaseFloat chunk_begin_secs,
     BaseFloat chunk_end_secs,
     bool is_first_utterance) const {
   Check();
   if (is_first_utterance) {
     // We aim to return true if a member of the sequence
     // ( adaptation_first_utt_delay * adaptation_first_utt_ratio^n )
     // for  n = 0, 1, 2, ...
     // is in the range [ chunk_begin_secs, chunk_end_secs ).
     BaseFloat delay = adaptation_first_utt_delay;
     while (delay < chunk_begin_secs)
       delay *= adaptation_first_utt_ratio;
     return (delay < chunk_end_secs);
   } else {
     // as above, but remove "first_utt".
     BaseFloat delay = adaptation_delay;
     while (delay < chunk_begin_secs)
       delay *= adaptation_ratio;
     return (delay < chunk_end_secs);
   }
 }


 }  // namespace kaldi
kaldi::OnlineFeaturePipeline::FreezeCmvn
void FreezeCmvn()
Definition: online-feature-pipeline.cc:237

kaldi::OnlineGmmDecodingModels::GetModel
const AmDiagGmm & GetModel() const
Definition: online-gmm-decoding.cc:400

kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::OnlineFeaturePipeline::Dim
virtual int32 Dim() const
Member functions from OnlineFeatureInterface:
Definition: online-feature-pipeline.cc:241

kaldi::SingleUtteranceGmmDecoder::config_
OnlineGmmDecodingConfig config_
Definition: online-gmm-decoding.h:293

kaldi::MatrixBase::Write
void Write(std::ostream &out, bool binary) const
write to stream.
Definition: kaldi-matrix.cc:1379

kaldi::OnlineGmmDecodingModels::GetFmllrBasis
const BasisFmllrEstimate & GetFmllrBasis() const
Definition: online-gmm-decoding.cc:411

kaldi::OnlineGmmDecodingConfig::fmllr_lattice_beam
BaseFloat fmllr_lattice_beam
Definition: online-gmm-decoding.h:96

rnnlm::j
int j
Definition: mikolov-rnnlm-lib.cc:66

kaldi::Input
Definition: kaldi-io.h:190

kaldi::LatticeFasterDecoderConfig::lattice_beam
BaseFloat lattice_beam
Definition: lattice-faster-decoder.h:42

kaldi::OnlineCmvnState::Write
void Write(std::ostream &os, bool binary) const
Definition: online-feature.cc:216

kaldi::LatticeFasterDecoderTpl::GetRawLattice
bool GetRawLattice(Lattice *ofst, bool use_final_probs=true) const
Outputs an FST corresponding to the raw, state-level tracebacks.
Definition: lattice-faster-decoder.cc:106

kaldi::SingleUtteranceGmmDecoder::AdvanceDecoding
void AdvanceDecoding()
advance the decoding as far as we can.
Definition: online-gmm-decoding.cc:69

kaldi::OnlineGmmDecodingModels
This class is used to read, store and give access to the models used for 3 phases of decoding (first-...
Definition: online-gmm-decoding.h:166

kaldi::SingleUtteranceGmmDecoder::SingleUtteranceGmmDecoder
SingleUtteranceGmmDecoder(const OnlineGmmDecodingConfig &config, const OnlineGmmDecodingModels &models, const OnlineFeaturePipeline &feature_prototype, const fst::Fst< fst::StdArc > &fst, const OnlineGmmAdaptationState &adaptation_state)
Definition: online-gmm-decoding.cc:48

kaldi::OnlineFeaturePipeline::HaveFmllrTransform
bool HaveFmllrTransform()
Definition: online-feature-pipeline.h:201

kaldi::OnlineGmmAdaptationState
Definition: online-gmm-decoding.h:199

fst::DeterminizeLatticePruned
bool DeterminizeLatticePruned(const ExpandedFst< ArcTpl< Weight > > &ifst, double beam, MutableFst< ArcTpl< CompactLatticeWeightTpl< Weight, IntType > > > *ofst, DeterminizeLatticePrunedOptions opts)
Definition: determinize-lattice-pruned.cc:1196

fst
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
Definition: graph.dox:21

kaldi::SingleUtteranceGmmDecoder::RescoringIsNeeded
bool RescoringIsNeeded() const
Returns true if doing a lattice rescoring pass would have any point, i.e.
Definition: online-gmm-decoding.cc:293

kaldi::OnlineGmmDecodingConfig::silence_phones
std::string silence_phones
Definition: online-gmm-decoding.h:118

kaldi::OnlineGmmAdaptationState::transform
Matrix< BaseFloat > transform
Definition: online-gmm-decoding.h:202

kaldi::SplitStringToIntegers
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
Definition: text-utils.h:68

kaldi::GetVerboseLevel
int32 GetVerboseLevel()
Get verbosity level, usually set via command line &#39;–verbose=&#39; switch.
Definition: kaldi-error.h:60

kaldi::SingleUtteranceGmmDecoder::GetAdaptationState
void GetAdaptationState(OnlineGmmAdaptationState *adaptation_state) const
Definition: online-gmm-decoding.cc:287

kaldi::MatrixBase::ApproxEqual
bool ApproxEqual(const MatrixBase< Real > &other, float tol=0.01) const
Returns true if ((*this)-other).FrobeniusNorm() <= tol * (*this).FrobeniusNorm(). ...
Definition: kaldi-matrix.cc:1915

kaldi::FmllrDiagGmmAccs
This does not work with multiple feature transforms.
Definition: fmllr-diag-gmm.h:61

kaldi::TopSortLatticeIfNeeded
void TopSortLatticeIfNeeded(Lattice *lat)
Topologically sort the lattice if not already topologically sorted.
Definition: lattice-functions.cc:616

kaldi::FmllrDiagGmmAccs::AccumulateFromPosteriors
void AccumulateFromPosteriors(const DiagGmm &gmm, const VectorBase< BaseFloat > &data, const VectorBase< BaseFloat > &posteriors)
Accumulate stats for a GMM, given supplied posteriors.
Definition: fmllr-diag-gmm.cc:30

kaldi::OnlineGmmDecodingModels::OnlineGmmDecodingModels
OnlineGmmDecodingModels(const OnlineGmmDecodingConfig &config)
Definition: online-gmm-decoding.cc:346

kaldi::SingleUtteranceGmmDecoder::adaptation_state_
OnlineGmmAdaptationState adaptation_state_
Definition: online-gmm-decoding.h:303

kaldi::OnlineEndpointConfig
Definition: online-endpoint.h:127

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

kaldi::LatticeFasterDecoderTpl::FinalizeDecoding
void FinalizeDecoding()
This function may be optionally called after AdvanceDecoding(), when you do not plan to decode any fu...
Definition: lattice-faster-decoder.cc:625

kaldi::SingleUtteranceGmmDecoder::GetGaussianPosteriors
bool GetGaussianPosteriors(bool end_of_utterance, GaussPost *gpost)
Definition: online-gmm-decoding.cc:111

kaldi::Matrix< BaseFloat >

kaldi::EndpointDetected
bool EndpointDetected(const OnlineEndpointConfig &config, int32 num_frames_decoded, int32 trailing_silence_frames, BaseFloat frame_shift_in_seconds, BaseFloat final_relative_cost)
This function returns true if this set of endpointing rules thinks we should terminate decoding...
Definition: online-endpoint.cc:46

kaldi::OnlineGmmDecodingAdaptationPolicyConfig::Check
void Check() const
Check that configuration values make sense.
Definition: online-gmm-decoding.cc:416

kaldi::SortAndUniq
void SortAndUniq(std::vector< T > *vec)
Sorts and uniq&#39;s (removes duplicates) from a vector.
Definition: stl-utils.h:39

kaldi::BasisFmllrEstimate::ComputeTransform
double ComputeTransform(const AffineXformStats &spk_stats, Matrix< BaseFloat > *out_xform, Vector< BaseFloat > *coefficients, BasisFmllrOptions options) const
This function performs speaker adaptation, computing the fMLLR matrix based on speaker statistics...
Definition: basis-fmllr-diag-gmm.cc:270

kaldi::AmDiagGmm
Definition: am-diag-gmm.h:36

kaldi::OnlineFeaturePipeline
OnlineFeaturePipeline is a class that&#39;s responsible for putting together the various stages of the fe...
Definition: online-feature-pipeline.h:157

kaldi::TransitionModel
Definition: transition-model.h:123

kaldi::WeightSilencePost
void WeightSilencePost(const TransitionModel &trans_model, const ConstIntegerSet< int32 > &silence_set, BaseFloat silence_scale, Posterior *post)
Weight any silence phones in the posterior (i.e.
Definition: posterior.cc:375

lattice-functions.h

kaldi::OnlineGmmDecodingModels::GetOnlineAlignmentModel
const AmDiagGmm & GetOnlineAlignmentModel() const
Definition: online-gmm-decoding.cc:393

kaldi::OnlineFeaturePipeline::GetCmvnState
void GetCmvnState(OnlineCmvnState *cmvn_state)
Definition: online-feature-pipeline.cc:149

kaldi::Input::Stream
std::istream & Stream()
Definition: kaldi-io.cc:826

kaldi::Matrix::Read
void Read(std::istream &in, bool binary, bool add=false)
read from stream.
Definition: kaldi-matrix.cc:1450

kaldi::DiagGmm::ComponentPosteriors
BaseFloat ComponentPosteriors(const VectorBase< BaseFloat > &data, Vector< BaseFloat > *posteriors) const
Computes the posterior probabilities of all Gaussian components given a data point.
Definition: diag-gmm.cc:601

kaldi::ConstIntegerSet< int32 >

kaldi::Posterior
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
Definition: posterior.h:42

kaldi::LatticeFasterDecoderTpl::InitDecoding
void InitDecoding()
InitDecoding initializes the decoding, and should only be used if you intend to call AdvanceDecoding(...
Definition: lattice-faster-decoder.cc:56

kaldi::OnlineCmvnState::Read
void Read(std::istream &is, bool binary)
Definition: online-feature.cc:227

kaldi::LatticeForwardBackward
BaseFloat LatticeForwardBackward(const Lattice &lat, Posterior *post, double *acoustic_like_sum)
This function does the forward-backward over lattices and computes the posterior probabilities of the...
Definition: lattice-functions.cc:314

kaldi::OnlineFeaturePipeline::GetFrame
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)
Gets the feature vector for this frame.
Definition: online-feature-pipeline.cc:251

float

kaldi::SingleUtteranceGmmDecoder::FinalizeDecoding
void FinalizeDecoding()
Finalize the decoding.
Definition: online-gmm-decoding.cc:105

kaldi::LatticeFasterDecoderTpl::NumFramesDecoded
int32 NumFramesDecoded() const
Definition: lattice-faster-decoder.h:340

kaldi::OnlineGmmDecodingConfig::fmllr_basis_rxfilename
std::string fmllr_basis_rxfilename
Definition: online-gmm-decoding.h:114

kaldi::ExpectToken
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
Definition: io-funcs.cc:191

kaldi::TransitionModel::Read
void Read(std::istream &is, bool binary)
Definition: transition-model.cc:394

kaldi::OnlineFeaturePipeline::FrameShiftInSeconds
BaseFloat FrameShiftInSeconds() const
Definition: online-feature-pipeline.h:184

kaldi::OnlineGmmDecodingConfig::rescore_model_rxfilename
std::string rescore_model_rxfilename
Definition: online-gmm-decoding.h:111

kaldi::Lattice
fst::VectorFst< LatticeArc > Lattice
Definition: kaldi-lattice.h:44

determinize-lattice-pruned.h

KALDI_ERR
#define KALDI_ERR
Definition: kaldi-error.h:147

kaldi::TransitionModel::Compatible
bool Compatible(const TransitionModel &other) const
returns true if all the integer class members are identical (but does not compare the transition prob...
Definition: transition-model.cc:906

KALDI_WARN
#define KALDI_WARN
Definition: kaldi-error.h:150

kaldi::OnlineGmmDecodingConfig::model_rxfilename
std::string model_rxfilename
Definition: online-gmm-decoding.h:108

kaldi::AffineXformStats::Write
void Write(std::ostream &out, bool binary) const
Definition: transform-common.cc:48

online-gmm-decoding.h

kaldi::OnlineFeaturePipeline::GetAsMatrix
void GetAsMatrix(Matrix< BaseFloat > *feats)
Definition: online-feature-pipeline.cc:299

kaldi::SingleUtteranceGmmDecoder::~SingleUtteranceGmmDecoder
~SingleUtteranceGmmDecoder()
Definition: online-gmm-decoding.cc:305

kaldi::OnlineGmmDecodingConfig::faster_decoder_opts
LatticeFasterDecoderConfig faster_decoder_opts
Definition: online-gmm-decoding.h:100

kaldi::WriteToken
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134

kaldi::OnlineGmmDecodingConfig
Definition: online-gmm-decoding.h:95

kaldi::SingleUtteranceGmmDecoder::feature_pipeline_
OnlineFeaturePipeline * feature_pipeline_
Definition: online-gmm-decoding.h:297

kaldi::VectorBase::Dim
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64

kaldi::VectorBase::Scale
void Scale(Real alpha)
Multiplies all elements by this constant.
Definition: kaldi-vector.cc:963

kaldi::CompactLattice
fst::VectorFst< CompactLatticeArc > CompactLattice
Definition: kaldi-lattice.h:46

kaldi::BasisFmllrEstimate::Dim
int32 Dim() const
Definition: basis-fmllr-diag-gmm.h:137

kaldi::OnlineGmmDecodingAdaptationPolicyConfig::DoAdapt
bool DoAdapt(BaseFloat chunk_begin_secs, BaseFloat chunk_end_secs, bool is_first_utterance) const
This function returns true if we are scheduled to re-estimate fMLLR somewhere in the interval [ chunk...
Definition: online-gmm-decoding.cc:423

kaldi::OnlineGmmDecodingConfig::acoustic_scale
BaseFloat acoustic_scale
Definition: online-gmm-decoding.h:116

kaldi::OnlineGmmDecodingConfig::adaptation_policy_opts
OnlineGmmDecodingAdaptationPolicyConfig adaptation_policy_opts
Definition: online-gmm-decoding.h:102

kaldi::SingleUtteranceGmmDecoder::EstimateFmllr
void EstimateFmllr(bool end_of_utterance)
Estimate the [basis-]fMLLR transform and apply it to the features.
Definition: online-gmm-decoding.cc:208

rnnlm::i
int i
Definition: mikolov-rnnlm-lib.cc:66

kaldi::LatticeFasterDecoderConfig::det_opts
fst::DeterminizeLatticePhonePrunedOptions det_opts
Definition: lattice-faster-decoder.h:56

kaldi::AmDiagGmm::GetPdf
DiagGmm & GetPdf(int32 pdf_index)
Accessors.
Definition: am-diag-gmm.h:119

kaldi::OnlineGmmDecodingModels::GetTransitionModel
const TransitionModel & GetTransitionModel() const
Definition: online-gmm-decoding.cc:389

kaldi::LatticeFasterDecoderTpl::AdvanceDecoding
void AdvanceDecoding(DecodableInterface *decodable, int32 max_num_frames=-1)
This will decode until there are no more frames ready in the decodable object.
Definition: lattice-faster-decoder.cc:580

kaldi::Vector
A class representing a vector.
Definition: kaldi-vector.h:406

kaldi::PruneLattice
bool PruneLattice(BaseFloat beam, LatType *lat)
Definition: lattice-functions.cc:229

KALDI_ASSERT
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

kaldi::MatrixBase::NumRows
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64

kaldi::OnlineGmmDecodingConfig::online_alimdl_rxfilename
std::string online_alimdl_rxfilename
Definition: online-gmm-decoding.h:106

kaldi::OnlineGmmAdaptationState::Read
void Read(std::istream &in_stream, bool binary)
Definition: online-gmm-decoding.cc:26

kaldi::SingleUtteranceGmmDecoder::GetBestPath
void GetBestPath(bool end_of_utterance, Lattice *best_path) const
Outputs an FST corresponding to the single best path through the current lattice. ...
Definition: online-gmm-decoding.cc:341

kaldi::OnlineFeaturePipeline::SetTransform
void SetTransform(const MatrixBase< BaseFloat > &transform)
Definition: online-feature-pipeline.cc:223

KALDI_VLOG
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156

kaldi::DiagGmm
Definition for Gaussian Mixture Model with diagonal covariances.
Definition: diag-gmm.h:42

kaldi::OnlineGmmDecodingConfig::silence_weight
BaseFloat silence_weight
Definition: online-gmm-decoding.h:119

kaldi::FmllrDiagGmmAccs::Init
void Init(size_t dim)
Definition: fmllr-diag-gmm.h:81

kaldi::SingleUtteranceGmmDecoder::models_
const OnlineGmmDecodingModels & models_
Definition: online-gmm-decoding.h:296

kaldi::OnlineGmmAdaptationState::Write
void Write(std::ostream &out_stream, bool binary) const
Definition: online-gmm-decoding.cc:37

kaldi::LatticeFasterOnlineDecoderTpl::GetBestPath
bool GetBestPath(Lattice *ofst, bool use_final_probs=true) const
Outputs an FST corresponding to the single best path through the lattice.
Definition: lattice-faster-online-decoder.cc:56

kaldi::SingleUtteranceGmmDecoder::EndpointDetected
bool EndpointDetected(const OnlineEndpointConfig &config)
This function calls EndpointDetected from online-endpoint.h, with the required arguments.
Definition: online-gmm-decoding.cc:310

kaldi::SingleUtteranceGmmDecoder::orig_adaptation_state_
const OnlineGmmAdaptationState & orig_adaptation_state_
Definition: online-gmm-decoding.h:298

kaldi::GaussPost
std::vector< std::vector< std::pair< int32, Vector< BaseFloat > > > > GaussPost
GaussPost is a typedef for storing Gaussian-level posteriors for an utterance.
Definition: posterior.h:51

kaldi::OnlineGmmDecodingModels::GetFinalModel
const AmDiagGmm & GetFinalModel() const
Definition: online-gmm-decoding.cc:404

kaldi::OnlineGmmAdaptationState::spk_stats
FmllrDiagGmmAccs spk_stats
Definition: online-gmm-decoding.h:201

kaldi::AffineXformStats::Dim
int32 Dim() const
Definition: transform-common.h:46

kaldi::ConvertPosteriorToPdfs
void ConvertPosteriorToPdfs(const TransitionModel &tmodel, const Posterior &post_in, Posterior *post_out)
Converts a posterior over transition-ids to be a posterior over pdf-ids.
Definition: posterior.cc:322

kaldi::SingleUtteranceGmmDecoder::GetLattice
void GetLattice(bool rescore_if_needed, bool end_of_utterance, CompactLattice *clat) const
Gets the lattice.
Definition: online-gmm-decoding.cc:318

kaldi::SingleUtteranceGmmDecoder::decoder_
LatticeFasterOnlineDecoder decoder_
Definition: online-gmm-decoding.h:304

kaldi::AffineXformStats::beta_
double beta_
beta_ is the occupation count.
Definition: transform-common.h:33

kaldi::RescoreLattice
bool RescoreLattice(DecodableInterface *decodable, Lattice *lat)
This function *adds* the negated scores obtained from the Decodable object, to the acoustic scores on...
Definition: lattice-functions.cc:1360

kaldi::SingleUtteranceGmmDecoder::silence_phones_
std::vector< int32 > silence_phones_
Definition: online-gmm-decoding.h:294

fst::DeterminizeLatticePhonePrunedWrapper
bool DeterminizeLatticePhonePrunedWrapper(const kaldi::TransitionModel &trans_model, MutableFst< kaldi::LatticeArc > *ifst, double beam, MutableFst< kaldi::CompactLatticeArc > *ofst, DeterminizeLatticePhonePrunedOptions opts)
This function is a wrapper of DeterminizeLatticePhonePruned() that works for Lattice type FSTs...
Definition: determinize-lattice-pruned.cc:1488

kaldi::OnlineGmmAdaptationState::cmvn_state
OnlineCmvnState cmvn_state
Definition: online-gmm-decoding.h:200

kaldi::DecodableDiagGmmScaledOnline
Definition: online-gmm-decodable.h:35

kaldi::LatticeFasterOnlineDecoderTpl::GetRawLatticePruned
bool GetRawLatticePruned(Lattice *ofst, bool use_final_probs, BaseFloat beam) const
Behaves the same as GetRawLattice but only processes tokens whose extra_cost is smaller than the best...
Definition: lattice-faster-online-decoder.cc:168

kaldi::SingleUtteranceGmmDecoder::HaveTransform
bool HaveTransform() const
Returns true if we already have an fMLLR transform.
Definition: online-gmm-decoding.cc:283

kaldi::OnlineGmmDecodingConfig::basis_opts
BasisFmllrOptions basis_opts
Definition: online-gmm-decoding.h:98

kaldi::FmllrDiagGmmAccs::Read
void Read(std::istream &in, bool binary, bool add)
Definition: fmllr-diag-gmm.h:84

kaldi::BasisFmllrEstimate
Estimation functions for basis fMLLR.
Definition: basis-fmllr-diag-gmm.h:107