34 const std::string &key,
39 if (weights_reader->
IsOpen()) {
40 if (!weights_reader->
HasKey(key)) {
41 KALDI_WARN <<
"No weights present for utterance " << key;
44 weights = weights_reader->
Value(key);
47 if (gselect_reader->
IsOpen()) {
48 if (!gselect_reader->
HasKey(key)) {
49 KALDI_WARN <<
"No gselect information present for utterance " << key;
52 const std::vector<std::vector<int32> > &gselect(gselect_reader->
Value(key));
53 if (gselect.size() != num_frames) {
54 KALDI_WARN <<
"gselect information has wrong size for utterance " << key;
57 for (
int32 t = 0; t < num_frames; t++) {
58 const std::vector<int32> &this_gselect(gselect[t]);
59 BaseFloat weight = (weights.
Dim() != 0 ? weights(t) : 1.0);
65 for (
size_t i = 0;
i < this_gselect.size();
i++)
67 this_gselect[
i], post(
i));
71 for (
int32 t = 0; t < num_frames; t++) {
72 BaseFloat weight = (weights.
Dim() != 0 ? weights(t) : 1.0);
83 int main(
int argc,
char *argv[]) {
86 using namespace kaldi;
88 "Estimate global fMLLR transforms, either per utterance or for the supplied\n" 89 "set of speakers (spk2utt option). Reads features, and (with --weights option)\n" 90 "weights for each frame (also see --gselect option)\n" 91 "Usage: gmm-global-est-fmllr [options] <gmm-in> <feature-rspecifier> <transform-wspecifier>\n";
95 string spk2utt_rspecifier, gselect_rspecifier, weights_rspecifier,
99 po.
Register(
"spk2utt", &spk2utt_rspecifier,
"rspecifier for speaker to " 100 "utterance-list map");
101 po.
Register(
"gselect", &gselect_rspecifier,
"rspecifier for gselect objects " 102 "to limit the #Gaussians accessed on each frame.");
103 po.
Register(
"weights", &weights_rspecifier,
"rspecifier for a vector of floats " 104 "for each utterance, that's a per-frame weight.");
105 po.
Register(
"align-model", &alignment_model,
"rxfilename for a model in the " 106 "speaker-independent space, to get Gaussian alignments from");
117 string gmm_rxfilename = po.
GetArg(1),
118 feature_rspecifier = po.
GetArg(2),
119 trans_wspecifier = po.
GetArg(3);
124 if (alignment_model !=
"") {
126 Input ki(gmm_rxfilename, &binary);
129 DiagGmm &ali_gmm = (alignment_model !=
"" ? ali_gmm_read : gmm);
134 double tot_impr = 0.0, tot_t = 0.0;
138 int32 num_done = 0, num_err = 0;
140 if (spk2utt_rspecifier !=
"") {
144 for (; !spk2utt_reader.
Done(); spk2utt_reader.
Next()) {
146 string spk = spk2utt_reader.
Key();
147 const vector<string> &uttlist = spk2utt_reader.
Value();
148 for (
size_t i = 0;
i < uttlist.size();
i++) {
149 std::string utt = uttlist[
i];
150 if (!feature_reader.
HasKey(utt)) {
151 KALDI_WARN <<
"Did not find features for utterance " << utt;
157 &gselect_reader, &fullcov_stats)) num_done++;
166 spk_stats.
Update(fmllr_opts, &transform, &impr, &spk_tot_t);
167 transform_writer.
Write(spk, transform);
169 KALDI_LOG <<
"For speaker " << spk <<
", auxf-impr from fMLLR is " 170 << (impr/spk_tot_t) <<
", over " << spk_tot_t <<
" frames.";
176 for (; !feature_reader.
Done(); feature_reader.
Next()) {
177 string utt = feature_reader.
Key();
184 &gselect_reader, &fullcov_stats)) {
190 spk_stats.
Update(fmllr_opts, &transform, &impr, &utt_tot_t);
191 transform_writer.
Write(utt, transform);
193 KALDI_LOG <<
"For utterance " << utt <<
", auxf-impr from fMLLR is " 194 << (impr/utt_tot_t) <<
", over " << utt_tot_t <<
" frames.";
203 KALDI_LOG <<
"Done " << num_done <<
" files, " << num_err
205 KALDI_LOG <<
"Overall fMLLR auxf impr per frame is " 206 << (tot_impr / tot_t) <<
" over " << tot_t <<
" frames.";
207 return (num_done != 0 ? 0 : 1);
208 }
catch(
const std::exception &e) {
209 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
int32 Dim() const
Returns the dimensionality of the Gaussian mean vectors.
int main(int argc, char *argv[])
void LogLikelihoodsPreselect(const VectorBase< BaseFloat > &data, const std::vector< int32 > &indices, Vector< BaseFloat > *loglikes) const
Outputs the per-component log-likelihoods of a subset of mixture components.
BaseFloat AccumulateFromDiag(const DiagGmm &gmm, const VectorBase< BaseFloat > &data, BaseFloat frame_posterior)
Accumulate for all components given a diagonal-covariance GMM.
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
This does not work with multiple feature transforms.
void AccumulateForUtterance(const Matrix< BaseFloat > &feats, const GaussPost &gpost, const TransitionModel &trans_model, const AmDiagGmm &am_gmm, FmllrDiagGmmAccs *spk_stats)
A templated class for writing objects to an archive or script file; see The Table concept...
void SetUnit()
Sets to zero, except ones along diagonal [for non-square matrices too].
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
const T & Value(const std::string &key)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
Class for computing the maximum-likelihood estimates of the parameters of a Gaussian mixture model...
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
int32 NumGauss() const
Returns the number of mixture components in the GMM.
MatrixIndexT Dim() const
Returns the dimension of the vector.
bool HasKey(const std::string &key)
void Register(OptionsItf *opts)
int NumArgs() const
Number of positional parameters (c.f. argc-1).
void Read(std::istream &in, bool binary)
A class representing a vector.
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition for Gaussian Mixture Model with diagonal covariances.
void AccumulateForComponent(const VectorBase< BaseFloat > &data, int32 comp_index, BaseFloat weight)
Accumulate for a single component, given the posterior.
void Update(const FmllrOptions &opts, MatrixBase< BaseFloat > *fmllr_mat, BaseFloat *objf_impr, BaseFloat *count)
Update.