gmm-basis-fmllr-training.cc
Go to the documentation of this file.
1 // gmmbin/gmm-basis-fmllr-training.cc
2 
3 // Copyright 2012 Carnegie Mellon University (author: Yajie Miao)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include <string>
21 using std::string;
22 #include <vector>
23 using std::vector;
24 
25 #include "base/kaldi-common.h"
26 #include "util/common-utils.h"
27 #include "gmm/am-diag-gmm.h"
28 #include "hmm/transition-model.h"
31 
32 int main(int argc, char *argv[]) {
33  try {
34  typedef kaldi::int32 int32;
35  using namespace kaldi;
36  const char *usage =
37  "Estimate fMLLR basis representation. Reads a set of gradient scatter\n"
38  "accumulations. Outputs basis matrices.\n"
39  "Usage: gmm-basis-fmllr-training [options] <model-in> <basis-wspecifier> "
40  "<accs-in1> <accs-in2> ...\n";
41 
42  bool binary_write = true;
43  ParseOptions po(usage);
44  po.Register("binary", &binary_write, "Write output in binary mode");
45 
46  po.Read(argc, argv);
47  if (po.NumArgs() < 3) {
48  po.PrintUsage();
49  exit(1);
50  }
51 
52  string
53  model_rxfilename = po.GetArg(1),
54  basis_wspecifier = po.GetArg(2);
55 
56  TransitionModel trans_model;
57  AmDiagGmm am_gmm;
58  {
59  bool binary;
60  Input ki(model_rxfilename, &binary);
61  trans_model.Read(ki.Stream(), binary);
62  am_gmm.Read(ki.Stream(), binary);
63  }
64 
65  BasisFmllrAccus basis_accs(am_gmm.Dim());
66  int num_accs = po.NumArgs() - 2;
67 
68  for (int i = 3, max = po.NumArgs(); i <= max; ++i) {
69  std::string accs_in_filename = po.GetArg(i);
70  bool binary_read;
71  kaldi::Input ki(accs_in_filename, &binary_read);
72  basis_accs.Read(ki.Stream(), binary_read, true /* add read values*/);
73  }
74 
75  // Estimate the basis matrices
76  BasisFmllrEstimate basis_est(am_gmm.Dim());
77  basis_est.EstimateFmllrBasis(am_gmm, basis_accs);
78  WriteKaldiObject(basis_est, basis_wspecifier, binary_write);
79 
80  KALDI_LOG << "Summed " << num_accs << " gradient scatter stats";
81  KALDI_LOG << "Generate " << basis_est.BasisSize() << " bases, written to "
82  << basis_wspecifier;
83  return 0;
84  } catch(const std::exception& e) {
85  std::cerr << e.what();
86  return -1;
87  }
88 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
kaldi::int32 int32
int main(int argc, char *argv[])
void Register(const std::string &name, bool *ptr, const std::string &doc)
std::istream & Stream()
Definition: kaldi-io.cc:826
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
Stats for fMLLR subspace estimation.
void Read(std::istream &is, bool binary)
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
int32 Dim() const
Definition: am-diag-gmm.h:79
int NumArgs() const
Number of positional parameters (c.f. argc-1).
void WriteKaldiObject(const C &c, const std::string &filename, bool binary)
Definition: kaldi-io.h:257
void EstimateFmllrBasis(const AmDiagGmm &am_gmm, const BasisFmllrAccus &basis_accus)
Estimate the base matrices efficiently in a Maximum Likelihood manner.
#define KALDI_LOG
Definition: kaldi-error.h:153
void Read(std::istream &in_stream, bool binary)
Definition: am-diag-gmm.cc:147
Estimation functions for basis fMLLR.