fgmm-gselect.cc
Go to the documentation of this file.
1 // fgmmbin/fgmm-gselect.cc
2 
3 // Copyright 2009-2011 Saarland University; Microsoft Corporation
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
21 #include "base/kaldi-common.h"
22 #include "util/common-utils.h"
23 #include "gmm/full-gmm.h"
24 #include "hmm/transition-model.h"
25 
26 int main(int argc, char *argv[]) {
27  try {
28  using namespace kaldi;
29  using std::vector;
30  typedef kaldi::int32 int32;
31  const char *usage =
32  "Precompute Gaussian indices for pruning\n"
33  " (e.g. in training UBMs, SGMMs, tied-mixture systems)\n"
34  " For each frame, gives a list of the n best Gaussian indices,\n"
35  " sorted from best to worst.\n"
36  "See also: gmm-gselect, copy-gselect, fgmm-gselect-to-post\n"
37  "Usage: fgmm-gselect [options] <model-in> <feature-rspecifier> <gselect-wspecifier>\n"
38  "The --gselect option (which takes an rspecifier) limits selection to a subset\n"
39  "of indices:\n"
40  "e.g.: fgmm-gselect \"--gselect=ark:gunzip -c bigger.gselect.gz|\" --n=20 1.gmm \"ark:feature-command |\" \"ark,t:|gzip -c >1.gselect.gz\"\n";
41 
42  ParseOptions po(usage);
43  int32 num_gselect = 50;
44  std::string gselect_rspecifier;
45  std::string likelihood_wspecifier;
46  po.Register("n", &num_gselect, "Number of Gaussians to keep per frame\n");
47  po.Register("write-likes", &likelihood_wspecifier, "Wspecifier for likelihoods per "
48  "utterance");
49  po.Register("gselect", &gselect_rspecifier, "rspecifier for gselect objects "
50  "to limit the search to");
51  po.Read(argc, argv);
52 
53  if (po.NumArgs() != 3) {
54  po.PrintUsage();
55  exit(1);
56  }
57 
58  std::string model_filename = po.GetArg(1),
59  feature_rspecifier = po.GetArg(2),
60  gselect_wspecifier = po.GetArg(3);
61 
62  FullGmm fgmm;
63  ReadKaldiObject(model_filename, &fgmm);
64  KALDI_ASSERT(num_gselect > 0);
65  int32 num_gauss = fgmm.NumGauss();
66  KALDI_ASSERT(num_gauss);
67  if (num_gselect > num_gauss) {
68  KALDI_WARN << "You asked for " << num_gselect << " Gaussians but GMM "
69  << "only has " << num_gauss << ", returning this many. "
70  << "Note: this means the Gaussian selection is pointless.";
71  num_gselect = num_gauss;
72  }
73 
74  double tot_like = 0.0;
75  kaldi::int64 tot_t = 0;
76 
77  SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
78  Int32VectorVectorWriter gselect_writer(gselect_wspecifier);
79  RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier); // may be ""
80  BaseFloatWriter likelihood_writer(likelihood_wspecifier); // may be ""
81 
82  int32 num_done = 0, num_err = 0;
83  for (; !feature_reader.Done(); feature_reader.Next()) {
84  int32 tot_t_this_file = 0; double tot_like_this_file = 0;
85  std::string utt = feature_reader.Key();
86  const Matrix<BaseFloat> &mat = feature_reader.Value();
87  vector<vector<int32> > gselect(mat.NumRows());
88  tot_t_this_file += mat.NumRows();
89  if(gselect_rspecifier != "") { // Limit Gaussians to preselected group...
90  if (!gselect_reader.HasKey(utt)) {
91  KALDI_WARN << "No gselect information for utterance " << utt;
92  num_err++;
93  continue;
94  }
95  const vector<vector<int32> > &preselect = gselect_reader.Value(utt);
96  if (preselect.size() != static_cast<size_t>(mat.NumRows())) {
97  KALDI_WARN << "Input gselect for utterance " << utt << " has wrong size "
98  << preselect.size() << " vs. " << mat.NumRows();
99  num_err++;
100  continue;
101  }
102  for (int32 i = 0; i < mat.NumRows(); i++)
103  tot_like_this_file +=
104  fgmm.GaussianSelectionPreselect(mat.Row(i), preselect[i],
105  num_gselect, &(gselect[i]));
106  } else { // No "preselect" [i.e. no existing gselect]: simple case.
107  for (int32 i = 0; i < mat.NumRows(); i++)
108  tot_like_this_file +=
109  fgmm.GaussianSelection(mat.Row(i), num_gselect, &(gselect[i]));
110  }
111 
112  gselect_writer.Write(utt, gselect);
113  if (num_done % 10 == 0)
114  KALDI_LOG << "For " << num_done << "'th file, average UBM likelihood over "
115  << tot_t_this_file << " frames is "
116  << (tot_like_this_file/tot_t_this_file);
117  tot_t += tot_t_this_file;
118  tot_like += tot_like_this_file;
119 
120  if(likelihood_wspecifier != "")
121  likelihood_writer.Write(utt, tot_like_this_file);
122  num_done++;
123  }
124 
125  KALDI_LOG << "Done " << num_done << " files, " << num_err
126  << " with errors, average UBM log-likelihood is "
127  << (tot_like/tot_t) << " over " << tot_t << " frames.";
128 
129  if (num_done != 0) return 0;
130  else return 1;
131  } catch(const std::exception &e) {
132  std::cerr << e.what();
133  return -1;
134  }
135 }
136 
137 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
int main(int argc, char *argv[])
Definition: fgmm-gselect.cc:26
Definition for Gaussian Mixture Model with full covariances.
Definition: full-gmm.h:40
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Definition: kaldi-io.cc:832
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
const T & Value(const std::string &key)
BaseFloat GaussianSelectionPreselect(const VectorBase< BaseFloat > &data, const std::vector< int32 > &preselect, int32 num_gselect, std::vector< int32 > *output) const
Get gaussian selection information for one frame.
Definition: full-gmm.cc:674
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
BaseFloat GaussianSelection(const VectorBase< BaseFloat > &data, int32 num_gselect, std::vector< int32 > *output) const
Get gaussian selection information for one frame.
Definition: full-gmm.cc:637
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_WARN
Definition: kaldi-error.h:150
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
int32 NumGauss() const
Returns the number of mixture components in the GMM.
Definition: full-gmm.h:58
bool HasKey(const std::string &key)
int NumArgs() const
Number of positional parameters (c.f. argc-1).
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
#define KALDI_LOG
Definition: kaldi-error.h:153