27 ExpectToken(in_stream, binary,
"<ONLINEGMMADAPTATIONSTATE>");
34 ExpectToken(in_stream, binary,
"</ONLINEGMMADAPTATIONSTATE>");
38 WriteToken(out_stream, binary,
"<ONLINEGMMADAPTATIONSTATE>");
45 WriteToken(out_stream, binary,
"</ONLINEGMMADAPTATIONSTATE>");
52 const fst::Fst<fst::StdArc> &
fst,
54 config_(config), models_(models),
55 feature_pipeline_(feature_prototype.New()),
56 orig_adaptation_state_(adaptation_state),
57 adaptation_state_(adaptation_state),
58 decoder_(fst, config.faster_decoder_opts) {
61 KALDI_ERR <<
"Bad --silence-phones option '" 95 bool is_first_utterance_of_speaker =
97 bool end_of_utterance =
false;
99 new_frames * frame_shift,
100 is_first_utterance_of_speaker))
117 KALDI_WARN <<
"You have decoded no data so cannot estimate fMLLR.";
137 #if 1 // Do determinization. 143 fst::Invert(&raw_lat);
144 fst::ILabelCompare<kaldi::LatticeArc> ilabel_comp;
145 fst::ArcSort(&raw_lat, ilabel_comp);
151 fst::Invert(&det_lat);
153 if (det_lat.NumStates() == 0) {
155 KALDI_WARN <<
"Got empty lattice. Not estimating fMLLR.";
167 KALDI_VLOG(3) <<
"Lattice forward-backward likelihood was " 168 << (tot_fb_like / post.size()) <<
" per frame over " << post.size()
185 double tot_like = 0.0, tot_weight = 0.0;
186 gpost->resize(pdf_post.size());
187 for (
size_t i = 0;
i < pdf_post.size();
i++) {
189 for (
size_t j = 0;
j < pdf_post[
i].size();
j++) {
190 int32 pdf_id = pdf_post[
i][
j].first;
195 this_post_vec.
Scale(weight);
196 tot_like += like * weight;
197 tot_weight += weight;
198 (*gpost)[
i].push_back(std::make_pair(pdf_id, this_post_vec));
201 KALDI_VLOG(3) <<
"Average likelihood weighted by posterior was " 202 << (tot_like / tot_weight) <<
" over " << tot_weight
203 <<
" frames (after downweighting silence).";
210 KALDI_WARN <<
"You have decoded no data so cannot estimate fMLLR.";
225 if (spk_stats.
beta_ !=
235 if (spk_stats.
Dim() == 0)
255 for (
size_t i = 0;
i < gpost.size();
i++) {
257 for (
size_t j = 0;
j < gpost[
i].size();
j++) {
258 int32 pdf_id = gpost[
i][
j].first;
268 if (basis.
Dim() == 0)
269 KALDI_ERR <<
"In order to estimate fMLLR, you need to supply the " 270 <<
"--fmllr-basis option.";
275 KALDI_VLOG(3) <<
"Objective function improvement from basis-fMLLR is " 276 << (impr / spk_stats.
beta_) <<
" per frame, over " 277 << spk_stats.
beta_ <<
" frames, #params estimated is " 278 << basis_coeffs.
Dim();
319 bool end_of_utterance,
336 &lat, lat_beam, clat,
349 "You must supply the --model option");
354 tmodel_.Read(ki.
Stream(), binary);
355 model_.Read(ki.
Stream(), binary);
364 KALDI_ERR <<
"Incompatible models given to the --model and " 365 <<
"--online-alignment-model options";
366 online_alignment_model_.Read(ki.
Stream(), binary);
375 KALDI_ERR <<
"Incompatible models given to the --model and " 376 <<
"--final-model options";
377 rescore_model_.Read(ki.
Stream(), binary);
384 fmllr_basis_.Read(ki.
Stream(), binary);
394 if (online_alignment_model_.NumPdfs() != 0)
395 return online_alignment_model_;
405 if (rescore_model_.NumPdfs() != 0)
406 return rescore_model_;
418 adaptation_first_utt_ratio > 1.0);
420 adaptation_ratio > 1.0);
426 bool is_first_utterance)
const {
428 if (is_first_utterance) {
433 BaseFloat delay = adaptation_first_utt_delay;
434 while (delay < chunk_begin_secs)
435 delay *= adaptation_first_utt_ratio;
436 return (delay < chunk_end_secs);
440 while (delay < chunk_begin_secs)
441 delay *= adaptation_ratio;
442 return (delay < chunk_end_secs);
const AmDiagGmm & GetModel() const
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
virtual int32 Dim() const
Member functions from OnlineFeatureInterface:
OnlineGmmDecodingConfig config_
void Write(std::ostream &out, bool binary) const
write to stream.
const BasisFmllrEstimate & GetFmllrBasis() const
BaseFloat fmllr_lattice_beam
void Write(std::ostream &os, bool binary) const
bool GetRawLattice(Lattice *ofst, bool use_final_probs=true) const
Outputs an FST corresponding to the raw, state-level tracebacks.
void AdvanceDecoding()
advance the decoding as far as we can.
This class is used to read, store and give access to the models used for 3 phases of decoding (first-...
SingleUtteranceGmmDecoder(const OnlineGmmDecodingConfig &config, const OnlineGmmDecodingModels &models, const OnlineFeaturePipeline &feature_prototype, const fst::Fst< fst::StdArc > &fst, const OnlineGmmAdaptationState &adaptation_state)
bool HaveFmllrTransform()
bool DeterminizeLatticePruned(const ExpandedFst< ArcTpl< Weight > > &ifst, double beam, MutableFst< ArcTpl< CompactLatticeWeightTpl< Weight, IntType > > > *ofst, DeterminizeLatticePrunedOptions opts)
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
bool RescoringIsNeeded() const
Returns true if doing a lattice rescoring pass would have any point, i.e.
std::string silence_phones
Matrix< BaseFloat > transform
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
int32 GetVerboseLevel()
Get verbosity level, usually set via command line '–verbose=' switch.
void GetAdaptationState(OnlineGmmAdaptationState *adaptation_state) const
bool ApproxEqual(const MatrixBase< Real > &other, float tol=0.01) const
Returns true if ((*this)-other).FrobeniusNorm() <= tol * (*this).FrobeniusNorm(). ...
This does not work with multiple feature transforms.
void TopSortLatticeIfNeeded(Lattice *lat)
Topologically sort the lattice if not already topologically sorted.
void AccumulateFromPosteriors(const DiagGmm &gmm, const VectorBase< BaseFloat > &data, const VectorBase< BaseFloat > &posteriors)
Accumulate stats for a GMM, given supplied posteriors.
OnlineGmmDecodingModels(const OnlineGmmDecodingConfig &config)
OnlineGmmAdaptationState adaptation_state_
void FinalizeDecoding()
This function may be optionally called after AdvanceDecoding(), when you do not plan to decode any fu...
bool GetGaussianPosteriors(bool end_of_utterance, GaussPost *gpost)
bool EndpointDetected(const OnlineEndpointConfig &config, int32 num_frames_decoded, int32 trailing_silence_frames, BaseFloat frame_shift_in_seconds, BaseFloat final_relative_cost)
This function returns true if this set of endpointing rules thinks we should terminate decoding...
void Check() const
Check that configuration values make sense.
void SortAndUniq(std::vector< T > *vec)
Sorts and uniq's (removes duplicates) from a vector.
double ComputeTransform(const AffineXformStats &spk_stats, Matrix< BaseFloat > *out_xform, Vector< BaseFloat > *coefficients, BasisFmllrOptions options) const
This function performs speaker adaptation, computing the fMLLR matrix based on speaker statistics...
OnlineFeaturePipeline is a class that's responsible for putting together the various stages of the fe...
void WeightSilencePost(const TransitionModel &trans_model, const ConstIntegerSet< int32 > &silence_set, BaseFloat silence_scale, Posterior *post)
Weight any silence phones in the posterior (i.e.
const AmDiagGmm & GetOnlineAlignmentModel() const
void GetCmvnState(OnlineCmvnState *cmvn_state)
void Read(std::istream &in, bool binary, bool add=false)
read from stream.
BaseFloat ComponentPosteriors(const VectorBase< BaseFloat > &data, Vector< BaseFloat > *posteriors) const
Computes the posterior probabilities of all Gaussian components given a data point.
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
void InitDecoding()
InitDecoding initializes the decoding, and should only be used if you intend to call AdvanceDecoding(...
void Read(std::istream &is, bool binary)
BaseFloat LatticeForwardBackward(const Lattice &lat, Posterior *post, double *acoustic_like_sum)
This function does the forward-backward over lattices and computes the posterior probabilities of the...
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)
Gets the feature vector for this frame.
void FinalizeDecoding()
Finalize the decoding.
int32 NumFramesDecoded() const
std::string fmllr_basis_rxfilename
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
void Read(std::istream &is, bool binary)
BaseFloat FrameShiftInSeconds() const
std::string rescore_model_rxfilename
fst::VectorFst< LatticeArc > Lattice
bool Compatible(const TransitionModel &other) const
returns true if all the integer class members are identical (but does not compare the transition prob...
std::string model_rxfilename
void GetAsMatrix(Matrix< BaseFloat > *feats)
~SingleUtteranceGmmDecoder()
LatticeFasterDecoderConfig faster_decoder_opts
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
OnlineFeaturePipeline * feature_pipeline_
MatrixIndexT Dim() const
Returns the dimension of the vector.
void Scale(Real alpha)
Multiplies all elements by this constant.
fst::VectorFst< CompactLatticeArc > CompactLattice
bool DoAdapt(BaseFloat chunk_begin_secs, BaseFloat chunk_end_secs, bool is_first_utterance) const
This function returns true if we are scheduled to re-estimate fMLLR somewhere in the interval [ chunk...
OnlineGmmDecodingAdaptationPolicyConfig adaptation_policy_opts
void EstimateFmllr(bool end_of_utterance)
Estimate the [basis-]fMLLR transform and apply it to the features.
fst::DeterminizeLatticePhonePrunedOptions det_opts
DiagGmm & GetPdf(int32 pdf_index)
Accessors.
const TransitionModel & GetTransitionModel() const
void AdvanceDecoding(DecodableInterface *decodable, int32 max_num_frames=-1)
This will decode until there are no more frames ready in the decodable object.
A class representing a vector.
bool PruneLattice(BaseFloat beam, LatType *lat)
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
std::string online_alimdl_rxfilename
void Read(std::istream &in_stream, bool binary)
void GetBestPath(bool end_of_utterance, Lattice *best_path) const
Outputs an FST corresponding to the single best path through the current lattice. ...
void SetTransform(const MatrixBase< BaseFloat > &transform)
Definition for Gaussian Mixture Model with diagonal covariances.
const OnlineGmmDecodingModels & models_
void Write(std::ostream &out_stream, bool binary) const
bool GetBestPath(Lattice *ofst, bool use_final_probs=true) const
Outputs an FST corresponding to the single best path through the lattice.
bool EndpointDetected(const OnlineEndpointConfig &config)
This function calls EndpointDetected from online-endpoint.h, with the required arguments.
const OnlineGmmAdaptationState & orig_adaptation_state_
std::vector< std::vector< std::pair< int32, Vector< BaseFloat > > > > GaussPost
GaussPost is a typedef for storing Gaussian-level posteriors for an utterance.
const AmDiagGmm & GetFinalModel() const
FmllrDiagGmmAccs spk_stats
void ConvertPosteriorToPdfs(const TransitionModel &tmodel, const Posterior &post_in, Posterior *post_out)
Converts a posterior over transition-ids to be a posterior over pdf-ids.
void GetLattice(bool rescore_if_needed, bool end_of_utterance, CompactLattice *clat) const
Gets the lattice.
LatticeFasterOnlineDecoder decoder_
bool RescoreLattice(DecodableInterface *decodable, Lattice *lat)
This function *adds* the negated scores obtained from the Decodable object, to the acoustic scores on...
std::vector< int32 > silence_phones_
bool DeterminizeLatticePhonePrunedWrapper(const kaldi::TransitionModel &trans_model, MutableFst< kaldi::LatticeArc > *ifst, double beam, MutableFst< kaldi::CompactLatticeArc > *ofst, DeterminizeLatticePhonePrunedOptions opts)
This function is a wrapper of DeterminizeLatticePhonePruned() that works for Lattice type FSTs...
OnlineCmvnState cmvn_state
bool GetRawLatticePruned(Lattice *ofst, bool use_final_probs, BaseFloat beam) const
Behaves the same as GetRawLattice but only processes tokens whose extra_cost is smaller than the best...
bool HaveTransform() const
Returns true if we already have an fMLLR transform.
BasisFmllrOptions basis_opts
void Read(std::istream &in, bool binary, bool add)
Estimation functions for basis fMLLR.