gmm-init-model-flat.cc
Go to the documentation of this file.
1 // gmmbin/gmm-init-model-flat.cc
2 
3 // Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
21 #include "base/kaldi-common.h"
22 #include "util/common-utils.h"
23 #include "gmm/am-diag-gmm.h"
24 #include "hmm/transition-model.h"
25 #include "gmm/mle-am-diag-gmm.h"
26 #include "tree/build-tree-utils.h"
27 #include "tree/context-dep.h"
29 #include "util/text-utils.h"
30 
31 namespace kaldi {
32 
33 void GetFeatureMeanAndVariance(const std::string &feat_rspecifier,
34  Vector<BaseFloat> *inv_var_out,
35  Vector<BaseFloat> *mean_out) {
36  double count = 0.0;
37  Vector<double> x_stats, x2_stats;
38 
39  SequentialDoubleMatrixReader feat_reader(feat_rspecifier);
40  for (; !feat_reader.Done(); feat_reader.Next()) {
41  const Matrix<double> &mat = feat_reader.Value();
42  if (x_stats.Dim() == 0) {
43  int32 dim = mat.NumCols();
44  x_stats.Resize(dim);
45  x2_stats.Resize(dim);
46  }
47  for (int32 i = 0; i < mat.NumRows(); i++) {
48  count += 1.0;
49  x_stats.AddVec(1.0, mat.Row(i));
50  x2_stats.AddVec2(1.0, mat.Row(i));
51  }
52  }
53  if (count == 0) { KALDI_ERR << "No features were read!"; }
54  x_stats.Scale(1.0/count);
55  x2_stats.Scale(1.0/count);
56  x2_stats.AddVec2(-1.0, x_stats);
57  if (x2_stats.Min() <= 0.0)
58  KALDI_ERR << "Variance is zero or negative!";
59  x2_stats.InvertElements();
60  int32 dim = x_stats.Dim();
61  inv_var_out->Resize(dim);
62  mean_out->Resize(dim);
63  inv_var_out->CopyFromVec(x2_stats);
64  mean_out->CopyFromVec(x_stats);
65 }
66 
67 
68 }
69 
70 int main(int argc, char *argv[]) {
71  using namespace kaldi;
72  try {
73  using namespace kaldi;
74  typedef kaldi::int32 int32;
75 
76  const char *usage =
77  "Initialize GMM, with Gaussians initialized to mean and variance\n"
78  "of some provided example data (or to 0,1 if not provided: in that\n"
79  "case, provide --dim option)\n"
80  "Usage: gmm-init-model-flat [options] <tree-in> <topo-file> <model-out> [<features-rspecifier>]\n"
81  "e.g.: \n"
82  " gmm-init-model-flat tree topo 1.mdl ark:feats.scp\n";
83 
84  bool binary = true;
85  int32 dim = 40;
86 
87  ParseOptions po(usage);
88  po.Register("binary", &binary, "Write output in binary mode");
89  po.Register("dim", &dim, "Dimension of model (this matters only if not providing features).");
90 
91  po.Read(argc, argv);
92 
93  if (po.NumArgs() < 3 || po.NumArgs() > 4) {
94  po.PrintUsage();
95  exit(1);
96  }
97 
98  std::string
99  tree_filename = po.GetArg(1),
100  topo_filename = po.GetArg(2),
101  model_out_filename = po.GetArg(3),
102  feats_rspecifier = po.GetOptArg(4);
103 
104  ContextDependency ctx_dep;
105  ReadKaldiObject(tree_filename, &ctx_dep);
106 
107  HmmTopology topo;
108  ReadKaldiObject(topo_filename, &topo);
109 
110  Vector<BaseFloat> global_inverse_var, global_mean;
111  if (po.NumArgs() == 4) {
112  GetFeatureMeanAndVariance(feats_rspecifier,
113  &global_inverse_var,
114  &global_mean);
115  dim = global_mean.Dim();
116  } else {
117  global_inverse_var.Resize(dim);
118  global_inverse_var.Set(1.0);
119  global_mean.Resize(dim); // leave it at zero.
120  }
121 
122  int32 num_pdfs = ctx_dep.NumPdfs();
123 
124  AmDiagGmm am_gmm;
125  DiagGmm gmm;
126  gmm.Resize(1, dim);
127  { // Initialize the gmm.
128  Matrix<BaseFloat> inv_var(1, dim);
129  inv_var.Row(0).CopyFromVec(global_inverse_var);
130  Matrix<BaseFloat> mu(1, dim);
131  mu.Row(0).CopyFromVec(global_mean);
132  Vector<BaseFloat> weights(1);
133  weights.Set(1.0);
134  gmm.SetInvVarsAndMeans(inv_var, mu);
135  gmm.SetWeights(weights);
136  gmm.ComputeGconsts();
137  }
138  for (int i = 0; i < num_pdfs; i++)
139  am_gmm.AddPdf(gmm);
140 
141  TransitionModel trans_model(ctx_dep, topo);
142 
143  {
144  Output ko(model_out_filename, binary);
145  trans_model.Write(ko.Stream(), binary);
146  am_gmm.Write(ko.Stream(), binary);
147  }
148  KALDI_LOG << "Wrote model.";
149  } catch(const std::exception &e) {
150  std::cerr << e.what();
151  return -1;
152  }
153 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void AddPdf(const DiagGmm &gmm)
Adds a GMM to the model, and increments the total number of PDFs.
Definition: am-diag-gmm.cc:57
void SetInvVarsAndMeans(const MatrixBase< Real > &invvars, const MatrixBase< Real > &means)
Use SetInvVarsAndMeans if updating both means and (inverse) variances.
Definition: diag-gmm-inl.h:63
A class for storing topology information for phones.
Definition: hmm-topology.h:93
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
void Resize(int32 nMix, int32 dim)
Resizes arrays to this dim. Does not initialize data.
Definition: diag-gmm.cc:66
int32 ComputeGconsts()
Sets the gconsts.
Definition: diag-gmm.cc:114
kaldi::int32 int32
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
Real Min() const
Returns the minimum value of any element, or +infinity for the empty vector.
void Register(const std::string &name, bool *ptr, const std::string &doc)
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Definition: kaldi-io.cc:832
void AddVec2(const Real alpha, const VectorBase< Real > &v)
Add vector : *this = *this + alpha * rv^2 [element-wise squaring].
virtual int32 NumPdfs() const
NumPdfs() returns the number of acoustic pdfs (they are numbered 0.. NumPdfs()-1).
Definition: context-dep.h:71
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
const size_t count
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
std::ostream & Stream()
Definition: kaldi-io.cc:701
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_ERR
Definition: kaldi-error.h:147
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
void Scale(Real alpha)
Multiplies all elements by this constant.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
void Write(std::ostream &os, bool binary) const
A class representing a vector.
Definition: kaldi-vector.h:406
void InvertElements()
Invert all elements.
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void Set(Real f)
Set all members of a vector to a specified value.
void Write(std::ostream &out_stream, bool binary) const
Definition: am-diag-gmm.cc:163
Definition for Gaussian Mixture Model with diagonal covariances.
Definition: diag-gmm.h:42
void SetWeights(const VectorBase< Real > &w)
Mutators for both float or double.
Definition: diag-gmm-inl.h:28
int main(int argc, char *argv[])
#define KALDI_LOG
Definition: kaldi-error.h:153
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
void GetFeatureMeanAndVariance(const std::string &feat_rspecifier, Vector< BaseFloat > *inv_var_out, Vector< BaseFloat > *mean_out)
std::string GetOptArg(int param) const