34 using namespace kaldi;
38 "Accumulate stats from posteriors and features for instantiating " 39 "a full-covariance GMM. See also fgmm-global-acc-stats.\n" 40 "Usage: fgmm-global-acc-stats-post [options] <posterior-rspecifier> " 41 "<number-of-components> <feature-rspecifier> <stats-out>\n" 42 "e.g.: fgmm-global-acc-stats-post scp:post.scp 2048 " 43 "scp:train.scp 1.acc\n";
47 std::string update_flags_str =
"mvw";
48 std::string weights_rspecifier;
49 po.Register(
"binary", &binary,
"Write output in binary mode");
50 po.Register(
"update-flags", &update_flags_str,
"Which GMM parameters will be " 51 "updated: subset of mvw.");
52 po.Register(
"weights", &weights_rspecifier,
"rspecifier for a vector of floats " 53 "for each utterance, that's a per-frame weight.");
56 if (po.NumArgs() != 4) {
61 std::string post_rspecifier = po.GetArg(1),
62 feature_rspecifier = po.GetArg(3),
63 accs_wxfilename = po.GetArg(4);
65 int32 num_components = atoi(po.GetArg(2).c_str());
69 double tot_like = 0.0, tot_weight = 0.0;
74 int32 num_done = 0, num_err = 0;
76 for (; !post_reader.Done(); post_reader.Next()) {
77 std::string key = post_reader.Key();
79 if (!feature_reader.HasKey(key)) {
80 KALDI_WARN <<
"No features available for utterance " 86 int32 file_frames = mat.
NumRows();
90 fgmm_accs.Resize(num_components, mat.
NumCols(),
98 if (weights_rspecifier !=
"") {
99 if (!weights_reader.HasKey(key)) {
100 KALDI_WARN <<
"No per-frame weights available for utterance " 105 weights = weights_reader.Value(key);
106 if (weights.
Dim() != file_frames) {
107 KALDI_WARN <<
"Weights for utterance " << key <<
" have wrong dim " 108 << weights.
Dim() <<
" vs. " << file_frames;
114 if (post.size() !=
static_cast<size_t>(file_frames)) {
115 KALDI_WARN <<
"posterior information for utterance " << key
116 <<
" has wrong size " << post.size() <<
" vs. " 122 for (int32
i = 0;
i < file_frames;
i++) {
124 if (weight == 0.0)
continue;
125 file_weight += weight;
129 for (int32
j = 0;
j < post[
i].size();
j++)
130 fgmm_accs.AccumulateForComponent(data, post[
i][
j].first,
134 KALDI_VLOG(2) <<
"File '" << key <<
"': Average likelihood = " 135 << (file_like/file_weight) <<
" over " 136 << file_weight <<
" frames.";
137 tot_like += file_like;
138 tot_weight += file_weight;
141 KALDI_LOG <<
"Done " << num_done <<
" files; " 142 << num_err <<
" with errors.";
144 <<
"frame = " << (tot_like/tot_weight) <<
" over " 145 << tot_weight <<
" (weighted) frames.";
148 KALDI_LOG <<
"Written accs to " << accs_wxfilename;
149 return (num_done != 0 ? 0 : 1);
150 }
catch(
const std::exception &e) {
151 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
GmmFlagsType StringToGmmFlags(std::string str)
Convert string which is some subset of "mSwa" to flags.
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
BaseFloat TotalPosterior(const Posterior &post)
Returns the total of all the weights in "post".
Allows random access to a collection of objects in an archive or script file; see The Table concept...
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Class for computing the maximum-likelihood estimates of the parameters of a Gaussian mixture model...
MatrixIndexT Dim() const
Returns the dimension of the vector.
void ScalePosterior(BaseFloat scale, Posterior *post)
Scales the BaseFloat (weight) element in the posterior entries.
A class representing a vector.
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
void WriteKaldiObject(const C &c, const std::string &filename, bool binary)
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...