gmm-global-get-post.cc
Go to the documentation of this file.
1 // gmmbin/gmm-global-get-post.cc
2 
3 // Copyright 2009-2011 Saarland University; Microsoft Corporation
4 // 2013-2014 Johns Hopkins University (author: Daniel Povey)
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 
22 #include "base/kaldi-common.h"
23 #include "util/common-utils.h"
24 #include "gmm/diag-gmm.h"
25 #include "hmm/posterior.h"
26 
27 int main(int argc, char *argv[]) {
28  try {
29  using namespace kaldi;
30  using std::vector;
31  typedef kaldi::int32 int32;
32  const char *usage =
33  "Precompute Gaussian indices and convert immediately to top-n\n"
34  "posteriors (useful in iVector extraction with diagonal UBMs)\n"
35  "See also: gmm-gselect, fgmm-gselect, fgmm-global-gselect-to-post\n"
36  " (e.g. in training UBMs, SGMMs, tied-mixture systems)\n"
37  " For each frame, gives a list of the n best Gaussian indices,\n"
38  " sorted from best to worst.\n"
39  "Usage: gmm-global-get-post [options] <model-in> <feature-rspecifier> <post-wspecifier>\n"
40  "e.g.: gmm-global-get-post --n=20 1.gmm \"ark:feature-command |\" \"ark,t:|gzip -c >post.1.gz\"\n";
41 
42  ParseOptions po(usage);
43  int32 num_post = 50;
44  BaseFloat min_post = 0.0;
45  po.Register("n", &num_post, "Number of Gaussians to keep per frame\n");
46  po.Register("min-post", &min_post, "Minimum posterior we will output "
47  "before pruning and renormalizing (e.g. 0.01)");
48  po.Read(argc, argv);
49 
50  if (po.NumArgs() != 3) {
51  po.PrintUsage();
52  exit(1);
53  }
54 
55  std::string model_filename = po.GetArg(1),
56  feature_rspecifier = po.GetArg(2),
57  post_wspecifier = po.GetArg(3);
58 
59  DiagGmm gmm;
60  ReadKaldiObject(model_filename, &gmm);
61  KALDI_ASSERT(num_post > 0);
62  KALDI_ASSERT(min_post < 1.0);
63  int32 num_gauss = gmm.NumGauss();
64  if (num_post > num_gauss) {
65  KALDI_WARN << "You asked for " << num_post << " Gaussians but GMM "
66  << "only has " << num_gauss << ", returning this many. ";
67  num_post = num_gauss;
68  }
69 
70  double tot_like = 0.0;
71  kaldi::int64 tot_t = 0;
72 
73  SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
74  PosteriorWriter post_writer(post_wspecifier);
75 
76  int32 num_done = 0, num_err = 0;
77  for (; !feature_reader.Done(); feature_reader.Next()) {
78  std::string utt = feature_reader.Key();
79  const Matrix<BaseFloat> &feats = feature_reader.Value();
80  int32 T = feats.NumRows();
81  if (T == 0) {
82  KALDI_WARN << "Empty features for utterance " << utt;
83  num_err++;
84  continue;
85  }
86  if (feats.NumCols() != gmm.Dim()) {
87  KALDI_WARN << "Dimension mismatch for utterance " << utt
88  << ": got " << feats.NumCols() << ", expected " << gmm.Dim();
89  num_err++;
90  continue;
91  }
92  vector<vector<int32> > gselect(T);
93 
94  Matrix<BaseFloat> loglikes;
95 
96  gmm.LogLikelihoods(feats, &loglikes);
97 
98  Posterior post(T);
99 
100  double log_like_this_file = 0.0;
101  for (int32 t = 0; t < T; t++) {
102  log_like_this_file +=
103  VectorToPosteriorEntry(loglikes.Row(t), num_post,
104  min_post, &(post[t]));
105  }
106  KALDI_VLOG(1) << "Processed utterance " << utt << ", average likelihood "
107  << (log_like_this_file / T) << " over " << T << " frames";
108  tot_like += log_like_this_file;
109  tot_t += T;
110 
111  post_writer.Write(utt, post);
112  num_done++;
113  }
114 
115  KALDI_LOG << "Done " << num_done << " files, " << num_err
116  << " with errors, average UBM log-likelihood is "
117  << (tot_like/tot_t) << " over " << tot_t << " frames.";
118 
119  if (num_done != 0) return 0;
120  else return 1;
121  } catch(const std::exception &e) {
122  std::cerr << e.what();
123  return -1;
124  }
125 }
126 
127 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
int32 Dim() const
Returns the dimensionality of the Gaussian mean vectors.
Definition: diag-gmm.h:74
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
BaseFloat VectorToPosteriorEntry(const VectorBase< BaseFloat > &log_likes, int32 num_gselect, BaseFloat min_post, std::vector< std::pair< int32, BaseFloat > > *post_entry)
Given a vector of log-likelihoods (typically of Gaussians in a GMM but could be of pdf-ids)...
Definition: posterior.cc:440
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
int main(int argc, char *argv[])
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Definition: kaldi-io.cc:832
float BaseFloat
Definition: kaldi-types.h:29
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
Definition: posterior.h:42
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_WARN
Definition: kaldi-error.h:150
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
int32 NumGauss() const
Returns the number of mixture components in the GMM.
Definition: diag-gmm.h:72
void LogLikelihoods(const VectorBase< BaseFloat > &data, Vector< BaseFloat > *loglikes) const
Outputs the per-component log-likelihoods.
Definition: diag-gmm.cc:528
int NumArgs() const
Number of positional parameters (c.f. argc-1).
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
Definition for Gaussian Mixture Model with diagonal covariances.
Definition: diag-gmm.h:42
#define KALDI_LOG
Definition: kaldi-error.h:153