21 #ifndef KALDI_ONLINE2_ONLINE_GMM_DECODING_H_ 22 #define KALDI_ONLINE2_ONLINE_GMM_DECODING_H_ 62 adaptation_first_utt_delay(2.0),
63 adaptation_first_utt_ratio(1.5),
64 adaptation_delay(5.0),
65 adaptation_ratio(2.0) { }
68 opts->
Register(
"adaptation-first-utt-delay", &adaptation_first_utt_delay,
69 "Delay before first basis-fMLLR adaptation for first utterance " 71 opts->
Register(
"adaptation-first-utt-ratio", &adaptation_first_utt_ratio,
72 "Ratio that controls frequency of fMLLR adaptation for first " 73 "utterance of each speaker");
74 opts->
Register(
"adaptation-delay", &adaptation_delay,
75 "Delay before first basis-fMLLR adaptation for not-first " 76 "utterances of each speaker");
77 opts->
Register(
"adaptation-ratio", &adaptation_ratio,
78 "Ratio that controls frequency of fMLLR adaptation for " 79 "not-first utterances of each speaker");
90 bool is_first_utterance)
const;
123 silence_weight(0.1) { }
131 adaptation_policy_opts.
Register(opts);
133 opts->
Register(
"acoustic-scale", &acoustic_scale,
134 "Scaling factor for acoustic likelihoods");
135 opts->
Register(
"silence-phones", &silence_phones,
136 "Colon-separated list of integer ids of silence phones, e.g. " 137 "1:2:3 (affects adaptation).");
138 opts->
Register(
"silence-weight", &silence_weight,
139 "Weight applied to silence frames for fMLLR estimation (if " 140 "--silence-phones option is supplied)");
141 opts->
Register(
"fmllr-lattice-beam", &fmllr_lattice_beam,
"Beam used in " 142 "pruning lattices for fMLLR estimation");
143 opts->
Register(
"online-alignment-model", &online_alimdl_rxfilename,
144 "(Extended) filename for model trained with online CMN " 145 "features, e.g. from apply-cmvn-online.");
146 opts->
Register(
"model", &model_rxfilename,
"(Extended) filename for model, " 147 "typically the one used for fMLLR computation. Required option.");
148 opts->
Register(
"rescore-model", &rescore_model_rxfilename,
"(Extended) filename " 149 "for model to rescore lattices with, e.g. discriminatively trained" 150 "model, if it differs from that supplied to --model option. Must" 151 "have the same tree.");
152 opts->
Register(
"fmllr-basis", &fmllr_basis_rxfilename,
"(Extended) filename " 153 "of fMLLR basis object, as output by gmm-basis-fmllr-training");
172 const AmDiagGmm &GetOnlineAlignmentModel()
const;
205 void Write(std::ostream &out_stream,
bool binary)
const;
206 void Read(std::istream &in_stream,
bool binary);
221 const fst::Fst<fst::StdArc> &
fst,
231 void AdvanceDecoding();
235 void FinalizeDecoding();
239 bool HaveTransform()
const;
248 void EstimateFmllr(
bool end_of_utterance);
259 void GetLattice(
bool rescore_if_needed,
260 bool end_of_utterance,
267 void GetBestPath(
bool end_of_utterance,
285 bool GetGaussianPosteriors(
bool end_of_utterance,
GaussPost *gpost);
291 bool RescoringIsNeeded()
const;
314 #endif // KALDI_ONLINE2_ONLINE_GMM_DECODING_H_
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
OnlineGmmDecodingConfig config_
BaseFloat fmllr_lattice_beam
BaseFloat adaptation_first_utt_delay
This class is used to read, store and give access to the models used for 3 phases of decoding (first-...
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
std::string silence_phones
void Register(OptionsItf *opts)
Matrix< BaseFloat > transform
This does not work with multiple feature transforms.
BaseFloat adaptation_ratio
OnlineGmmAdaptationState adaptation_state_
This file contains a class OnlineFeaturePipeline for online feature extraction, which puts together v...
OnlineGmmDecodingAdaptationPolicyConfig()
bool EndpointDetected(const OnlineEndpointConfig &config, int32 num_frames_decoded, int32 trailing_silence_frames, BaseFloat frame_shift_in_seconds, BaseFloat final_relative_cost)
This function returns true if this set of endpointing rules thinks we should terminate decoding...
BasisFmllrEstimate fmllr_basis_
void Check() const
Check that configuration values make sense.
OnlineGmmDecodingConfig()
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
OnlineFeaturePipeline is a class that's responsible for putting together the various stages of the fe...
You will instantiate this class when you want to decode a single utterance using the online-decoding ...
BaseFloat adaptation_delay
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
AmDiagGmm online_alignment_model_
std::string fmllr_basis_rxfilename
BaseFloat adaptation_first_utt_ratio
std::string rescore_model_rxfilename
void Register(OptionsItf *opts)
Struct OnlineCmvnState stores the state of CMVN adaptation between utterances (but not the state of t...
fst::VectorFst< LatticeArc > Lattice
std::string model_rxfilename
LatticeFasterDecoderConfig faster_decoder_opts
OnlineFeaturePipeline * feature_pipeline_
This configuration class controls when to re-estimate the basis-fMLLR during online decoding...
fst::VectorFst< CompactLatticeArc > CompactLattice
BaseFloat FinalRelativeCost()
This function outputs to "final_relative_cost", if non-NULL, a number >= 0 that will be close to zero...
bool DoAdapt(BaseFloat chunk_begin_secs, BaseFloat chunk_end_secs, bool is_first_utterance) const
This function returns true if we are scheduled to re-estimate fMLLR somewhere in the interval [ chunk...
OnlineGmmDecodingAdaptationPolicyConfig adaptation_policy_opts
OnlineFeaturePipeline & FeaturePipeline()
std::string online_alimdl_rxfilename
const OnlineGmmDecodingModels & models_
const OnlineGmmAdaptationState & orig_adaptation_state_
std::vector< std::vector< std::pair< int32, Vector< BaseFloat > > > > GaussPost
GaussPost is a typedef for storing Gaussian-level posteriors for an utterance.
FmllrDiagGmmAccs spk_stats
LatticeFasterOnlineDecoder decoder_
void Register(OptionsItf *opts)
void Register(OptionsItf *opts)
std::vector< int32 > silence_phones_
OnlineCmvnState cmvn_state
BasisFmllrOptions basis_opts
Estimation functions for basis fMLLR.