gmm-gselect.cc
Go to the documentation of this file.
1 // gmmbin/gmm-gselect.cc
2 
3 // Copyright 2009-2011 Saarland University; Microsoft Corporation
4 // 2013 Johns Hopkins University (author: Daniel Povey)
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 
22 #include "base/kaldi-common.h"
23 #include "util/common-utils.h"
24 #include "gmm/diag-gmm.h"
25 #include "hmm/transition-model.h"
26 
27 int main(int argc, char *argv[]) {
28  try {
29  using namespace kaldi;
30  using std::vector;
31  typedef kaldi::int32 int32;
32  const char *usage =
33  "Precompute Gaussian indices for pruning\n"
34  " (e.g. in training UBMs, SGMMs, tied-mixture systems)\n"
35  " For each frame, gives a list of the n best Gaussian indices,\n"
36  " sorted from best to worst.\n"
37  "See also: gmm-global-get-post, fgmm-global-gselect-to-post,\n"
38  "copy-gselect, fgmm-gselect\n"
39  "Usage: gmm-gselect [options] <model-in> <feature-rspecifier> <gselect-wspecifier>\n"
40  "The --gselect option (which takes an rspecifier) limits selection to a subset\n"
41  "of indices:\n"
42  "e.g.: gmm-gselect \"--gselect=ark:gunzip -c bigger.gselect.gz|\" --n=20 1.gmm \"ark:feature-command |\" \"ark,t:|gzip -c >gselect.1.gz\"\n";
43 
44  ParseOptions po(usage);
45  int32 num_gselect = 50;
46  std::string gselect_rspecifier;
47  std::string likelihood_wspecifier;
48  po.Register("n", &num_gselect, "Number of Gaussians to keep per frame\n");
49  po.Register("write-likes", &likelihood_wspecifier, "rspecifier for likelihoods per "
50  "utterance");
51  po.Register("gselect", &gselect_rspecifier, "rspecifier for gselect objects "
52  "to limit the search to");
53  po.Read(argc, argv);
54 
55  if (po.NumArgs() != 3) {
56  po.PrintUsage();
57  exit(1);
58  }
59 
60  std::string model_filename = po.GetArg(1),
61  feature_rspecifier = po.GetArg(2),
62  gselect_wspecifier = po.GetArg(3);
63 
64  DiagGmm gmm;
65  ReadKaldiObject(model_filename, &gmm);
66  KALDI_ASSERT(num_gselect > 0);
67  int32 num_gauss = gmm.NumGauss();
68  if (num_gselect > num_gauss) {
69  KALDI_WARN << "You asked for " << num_gselect << " Gaussians but GMM "
70  << "only has " << num_gauss << ", returning this many. "
71  << "Note: this means the Gaussian selection is pointless.";
72  num_gselect = num_gauss;
73  }
74 
75  double tot_like = 0.0;
76  kaldi::int64 tot_t = 0;
77 
78  SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
79  Int32VectorVectorWriter gselect_writer(gselect_wspecifier);
80  RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier); // may be ""
81  BaseFloatWriter likelihood_writer(likelihood_wspecifier); // may be ""
82 
83  int32 num_done = 0, num_err = 0;
84  for (; !feature_reader.Done(); feature_reader.Next()) {
85  int32 tot_t_this_file = 0; double tot_like_this_file = 0;
86  std::string utt = feature_reader.Key();
87  const Matrix<BaseFloat> &mat = feature_reader.Value();
88  vector<vector<int32> > gselect(mat.NumRows());
89  tot_t_this_file += mat.NumRows();
90  if(gselect_rspecifier != "") { // Limit Gaussians to preselected group...
91  if (!gselect_reader.HasKey(utt)) {
92  KALDI_WARN << "No gselect information for utterance " << utt;
93  num_err++;
94  continue;
95  }
96  const vector<vector<int32> > &preselect = gselect_reader.Value(utt);
97  if (preselect.size() != static_cast<size_t>(mat.NumRows())) {
98  KALDI_WARN << "Input gselect for utterance " << utt << " has wrong size "
99  << preselect.size() << " vs. " << mat.NumRows();
100  num_err++;
101  continue;
102  }
103  for (int32 i = 0; i < mat.NumRows(); i++)
104  tot_like_this_file +=
105  gmm.GaussianSelectionPreselect(mat.Row(i), preselect[i],
106  num_gselect, &(gselect[i]));
107  } else { // No "preselect" [i.e. no existing gselect]: simple case.
108  tot_like_this_file =
109  gmm.GaussianSelection(mat, num_gselect, &gselect);
110  }
111 
112  gselect_writer.Write(utt, gselect);
113  if (num_done % 10 == 0)
114  KALDI_LOG << "For " << num_done << "'th file, average UBM likelihood over "
115  << tot_t_this_file << " frames is "
116  << (tot_like_this_file/tot_t_this_file);
117  tot_t += tot_t_this_file;
118  tot_like += tot_like_this_file;
119 
120  if(likelihood_wspecifier != "")
121  likelihood_writer.Write(utt, tot_like_this_file);
122  num_done++;
123  }
124 
125  KALDI_LOG << "Done " << num_done << " files, " << num_err
126  << " with errors, average UBM log-likelihood is "
127  << (tot_like/tot_t) << " over " << tot_t << " frames.";
128 
129  if (num_done != 0) return 0;
130  else return 1;
131  } catch(const std::exception &e) {
132  std::cerr << e.what();
133  return -1;
134  }
135 }
136 
137 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
int main(int argc, char *argv[])
Definition: gmm-gselect.cc:27
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Definition: kaldi-io.cc:832
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
const T & Value(const std::string &key)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_WARN
Definition: kaldi-error.h:150
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
int32 NumGauss() const
Returns the number of mixture components in the GMM.
Definition: diag-gmm.h:72
bool HasKey(const std::string &key)
BaseFloat GaussianSelectionPreselect(const VectorBase< BaseFloat > &data, const std::vector< int32 > &preselect, int32 num_gselect, std::vector< int32 > *output) const
Get gaussian selection information for one frame.
Definition: diag-gmm.cc:875
int NumArgs() const
Number of positional parameters (c.f. argc-1).
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
BaseFloat GaussianSelection(const VectorBase< BaseFloat > &data, int32 num_gselect, std::vector< int32 > *output) const
Get gaussian selection information for one frame.
Definition: diag-gmm.cc:765
Definition for Gaussian Mixture Model with diagonal covariances.
Definition: diag-gmm.h:42
#define KALDI_LOG
Definition: kaldi-error.h:153