gmm-acc-stats-twofeats.cc
Go to the documentation of this file.
1 // gmmbin/gmm-acc-stats-twofeats.cc
2 
3 // Copyright 2009-2011 Microsoft Corporation
4 // 2014 Guoguo Chen
5 // 2014 Johns Hopkins University (author: Daniel Povey)
6 
7 // See ../../COPYING for clarification regarding multiple authors
8 //
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 //
13 // http://www.apache.org/licenses/LICENSE-2.0
14 //
15 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
17 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
18 // MERCHANTABLITY OR NON-INFRINGEMENT.
19 // See the Apache 2 License for the specific language governing permissions and
20 // limitations under the License.
21 
22 
23 #include "base/kaldi-common.h"
24 #include "util/common-utils.h"
25 #include "gmm/am-diag-gmm.h"
26 #include "hmm/transition-model.h"
27 #include "gmm/mle-am-diag-gmm.h"
28 #include "hmm/posterior.h"
29 
30 
31 int main(int argc, char *argv[]) {
32  using namespace kaldi;
33  try {
34  const char *usage =
35  "Accumulate stats for GMM training, computing posteriors with one set of features\n"
36  "but accumulating statistics with another.\n"
37  "First features are used to get posteriors, second to accumulate stats\n"
38  "Usage: gmm-acc-stats-twofeats [options] <model-in> <feature1-rspecifier> <feature2-rspecifier> <posteriors-rspecifier> <stats-out>\n"
39  "e.g.: \n"
40  " gmm-acc-stats-twofeats 1.mdl 1.ali scp:train.scp scp:train_new.scp ark:1.ali 1.acc\n";
41 
42  ParseOptions po(usage);
43  bool binary = true;
44  po.Register("binary", &binary, "Write output in binary mode");
45  po.Read(argc, argv);
46 
47  if (po.NumArgs() != 5) {
48  po.PrintUsage();
49  exit(1);
50  }
51 
52  std::string model_filename = po.GetArg(1),
53  feature1_rspecifier = po.GetArg(2),
54  feature2_rspecifier = po.GetArg(3),
55  posteriors_rspecifier = po.GetArg(4),
56  accs_wxfilename = po.GetArg(5);
57 
58  using namespace kaldi;
59  typedef kaldi::int32 int32;
60 
61  AmDiagGmm am_gmm;
62  TransitionModel trans_model;
63  {
64  bool binary;
65  Input ki(model_filename, &binary);
66  trans_model.Read(ki.Stream(), binary);
67  am_gmm.Read(ki.Stream(), binary);
68  }
69 
70  Vector<double> transition_accs;
71  trans_model.InitStats(&transition_accs);
72  int32 new_dim = 0;
73  AccumAmDiagGmm gmm_accs;
74  // will initialize once we know new_dim.
75 
76  double tot_like = 0.0;
77  double tot_t = 0.0;
78 
79  SequentialBaseFloatMatrixReader feature1_reader(feature1_rspecifier);
80  RandomAccessBaseFloatMatrixReader feature2_reader(feature2_rspecifier);
81  RandomAccessPosteriorReader posteriors_reader(posteriors_rspecifier);
82 
83  int32 num_done = 0, num_no2ndfeats = 0, num_no_posterior = 0, num_other_error = 0;
84  for (; !feature1_reader.Done(); feature1_reader.Next()) {
85  std::string key = feature1_reader.Key();
86  if (!feature2_reader.HasKey(key)) {
87  KALDI_WARN << "For utterance " << key << ", second features not present.";
88  num_no2ndfeats ++;
89  } else if (!posteriors_reader.HasKey(key)) {
90  num_no_posterior++;
91  } else {
92  const Matrix<BaseFloat> &mat1 = feature1_reader.Value();
93  const Matrix<BaseFloat> &mat2 = feature2_reader.Value(key);
94  KALDI_ASSERT(mat1.NumRows() == mat2.NumRows());
95  if (new_dim == 0) {
96  new_dim = mat2.NumCols();
97  gmm_accs.Init(am_gmm, new_dim, kGmmAll);
98  }
99  const Posterior &posterior = posteriors_reader.Value(key);
100 
101  if (posterior.size() != mat1.NumRows()) {
102  KALDI_WARN << "Posteriors has wrong size "<< (posterior.size()) << " vs. "<< (mat1.NumRows());
103  num_other_error++;
104  continue;
105  }
106  if (mat1.NumRows() != mat2.NumRows()) {
107  KALDI_WARN << "Features have mismatched numbers of frames "
108  << mat1.NumRows() << " vs. " << mat2.NumRows();
109  num_other_error++;
110  continue;
111  }
112 
113  num_done++;
114  BaseFloat tot_like_this_file = 0.0,
115  tot_weight_this_file = 0.0;
116 
117  Posterior pdf_posterior;
118  ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior);
119  for (size_t i = 0; i < posterior.size(); i++) {
120  // Accumulates for GMM.
121  for (size_t j = 0; j <pdf_posterior[i].size(); j++) {
122  int32 pdf_id = pdf_posterior[i][j].first;
123  BaseFloat weight = pdf_posterior[i][j].second;
124  tot_like_this_file += weight *
125  gmm_accs.AccumulateForGmmTwofeats(am_gmm,
126  mat1.Row(i),
127  mat2.Row(i),
128  pdf_id,
129  weight);
130  tot_weight_this_file += weight;
131  }
132 
133  // Accumulates for transitions.
134  for (size_t j = 0; j < posterior[i].size(); j++) {
135  int32 tid = posterior[i][j].first;
136  BaseFloat weight = posterior[i][j].second;
137  trans_model.Accumulate(weight, tid, &transition_accs);
138  }
139  }
140  KALDI_LOG << "Average like for this file is "
141  << (tot_like_this_file/tot_weight_this_file) << " over "
142  << tot_weight_this_file <<" frames.";
143  tot_like += tot_like_this_file;
144  tot_t += tot_weight_this_file;
145  if (num_done % 10 == 0)
146  KALDI_LOG << "Avg like per frame so far is " << (tot_like/tot_t);
147  }
148  }
149 
150  KALDI_LOG << "Done " << num_done << " files, " << num_no_posterior
151  << " with no posteriors, " << num_no2ndfeats
152  << " with no second features, " << num_other_error
153  << " with other errors.";
154 
155  KALDI_LOG << "Overall avg like per frame (Gaussian only) = "
156  << (tot_like/tot_t) << " over " << tot_t << " frames.";
157 
158  {
159  Output ko(accs_wxfilename, binary);
160  transition_accs.Write(ko.Stream(), binary);
161  gmm_accs.Write(ko.Stream(), binary);
162  }
163  KALDI_LOG << "Written accs.";
164  if (num_done != 0) return 0;
165  else return 1;
166  } catch(const std::exception &e) {
167  std::cerr << e.what();
168  return -1;
169  }
170 }
171 
172 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
BaseFloat AccumulateForGmmTwofeats(const AmDiagGmm &model, const VectorBase< BaseFloat > &data1, const VectorBase< BaseFloat > &data2, int32 gmm_index, BaseFloat weight)
Accumulate stats for a single GMM in the model; uses data1 for getting posteriors and data2 for stats...
void Write(std::ostream &Out, bool binary) const
Writes to C++ stream (option to write in binary).
kaldi::int32 int32
void Register(const std::string &name, bool *ptr, const std::string &doc)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
std::istream & Stream()
Definition: kaldi-io.cc:826
float BaseFloat
Definition: kaldi-types.h:29
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
Definition: posterior.h:42
void InitStats(Vector< double > *stats) const
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
std::ostream & Stream()
Definition: kaldi-io.cc:701
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
const T & Value(const std::string &key)
void Read(std::istream &is, bool binary)
void Accumulate(BaseFloat prob, int32 trans_id, Vector< double > *stats) const
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_WARN
Definition: kaldi-error.h:150
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
bool HasKey(const std::string &key)
int NumArgs() const
Number of positional parameters (c.f. argc-1).
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void Write(std::ostream &out_stream, bool binary) const
void ConvertPosteriorToPdfs(const TransitionModel &tmodel, const Posterior &post_in, Posterior *post_out)
Converts a posterior over transition-ids to be a posterior over pdf-ids.
Definition: posterior.cc:322
#define KALDI_LOG
Definition: kaldi-error.h:153
void Read(std::istream &in_stream, bool binary)
Definition: am-diag-gmm.cc:147
void Init(const AmDiagGmm &model, GmmFlagsType flags)
Initializes accumulators for each GMM based on the number of components and dimension.
int main(int argc, char *argv[])