86 using namespace kaldi;
88 "Estimate global fMLLR transforms, either per utterance or for the supplied\n" 89 "set of speakers (spk2utt option). Reads features, and (with --weights option)\n" 90 "weights for each frame (also see --gselect option)\n" 91 "Usage: gmm-global-est-fmllr [options] <gmm-in> <feature-rspecifier> <transform-wspecifier>\n";
95 string spk2utt_rspecifier, gselect_rspecifier, weights_rspecifier,
99 po.
Register(
"spk2utt", &spk2utt_rspecifier,
"rspecifier for speaker to " 100 "utterance-list map");
101 po.Register(
"gselect", &gselect_rspecifier,
"rspecifier for gselect objects " 102 "to limit the #Gaussians accessed on each frame.");
103 po.Register(
"weights", &weights_rspecifier,
"rspecifier for a vector of floats " 104 "for each utterance, that's a per-frame weight.");
105 po.Register(
"align-model", &alignment_model,
"rxfilename for a model in the " 106 "speaker-independent space, to get Gaussian alignments from");
112 if (po.NumArgs() != 3) {
117 string gmm_rxfilename = po.GetArg(1),
118 feature_rspecifier = po.GetArg(2),
119 trans_wspecifier = po.GetArg(3);
124 if (alignment_model !=
"") {
126 Input ki(gmm_rxfilename, &binary);
127 ali_gmm_read.
Read(ki.Stream(), binary);
129 DiagGmm &ali_gmm = (alignment_model !=
"" ? ali_gmm_read : gmm);
134 double tot_impr = 0.0, tot_t = 0.0;
138 int32 num_done = 0, num_err = 0;
140 if (spk2utt_rspecifier !=
"") {
144 for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) {
146 string spk = spk2utt_reader.Key();
147 const vector<string> &uttlist = spk2utt_reader.Value();
148 for (
size_t i = 0;
i < uttlist.size();
i++) {
149 std::string utt = uttlist[
i];
150 if (!feature_reader.HasKey(utt)) {
151 KALDI_WARN <<
"Did not find features for utterance " << utt;
157 &gselect_reader, &fullcov_stats)) num_done++;
166 spk_stats.Update(fmllr_opts, &transform, &impr, &spk_tot_t);
167 transform_writer.Write(spk, transform);
169 KALDI_LOG <<
"For speaker " << spk <<
", auxf-impr from fMLLR is " 170 << (impr/spk_tot_t) <<
", over " << spk_tot_t <<
" frames.";
176 for (; !feature_reader.Done(); feature_reader.Next()) {
177 string utt = feature_reader.Key();
184 &gselect_reader, &fullcov_stats)) {
190 spk_stats.Update(fmllr_opts, &transform, &impr, &utt_tot_t);
191 transform_writer.Write(utt, transform);
193 KALDI_LOG <<
"For utterance " << utt <<
", auxf-impr from fMLLR is " 194 << (impr/utt_tot_t) <<
", over " << utt_tot_t <<
" frames.";
203 KALDI_LOG <<
"Done " << num_done <<
" files, " << num_err
205 KALDI_LOG <<
"Overall fMLLR auxf impr per frame is " 206 << (tot_impr / tot_t) <<
" over " << tot_t <<
" frames.";
207 return (num_done != 0 ? 0 : 1);
208 }
catch(
const std::exception &e) {
209 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
int32 Dim() const
Returns the dimensionality of the Gaussian mean vectors.
This does not work with multiple feature transforms.
void AccumulateForUtterance(const Matrix< BaseFloat > &feats, const GaussPost &gpost, const TransitionModel &trans_model, const AmDiagGmm &am_gmm, FmllrDiagGmmAccs *spk_stats)
A templated class for writing objects to an archive or script file; see The Table concept...
void SetUnit()
Sets to zero, except ones along diagonal [for non-square matrices too].
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Class for computing the maximum-likelihood estimates of the parameters of a Gaussian mixture model...
int32 NumGauss() const
Returns the number of mixture components in the GMM.
void Register(OptionsItf *opts)
void Read(std::istream &in, bool binary)
Definition for Gaussian Mixture Model with diagonal covariances.