post-to-tacc.cc
Go to the documentation of this file.
1 // bin/post-to-tacc.cc
2 
3 // Copyright 2009-2011 Chao Weng Microsoft Corporation
4 // 2015 Minhua Wu
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 
22 #include "base/kaldi-common.h"
23 #include "util/common-utils.h"
24 #include "hmm/transition-model.h"
25 #include "hmm/posterior.h"
26 
27 int main(int argc, char *argv[]) {
28  try {
29  using namespace kaldi;
30  typedef kaldi::int32 int32;
31 
32  const char *usage =
33  "From posteriors, compute transition-accumulators\n"
34  "The output is a vector of counts/soft-counts, indexed by transition-id)\n"
35  "Note: the model is only read in order to get the size of the vector\n"
36  "\n"
37  "Usage: post-to-tacc [options] <model> <post-rspecifier> <accs>\n"
38  " e.g.: post-to-tacc --binary=false 1.mdl \"ark:ali-to-post 1.ali|\" 1.tacc\n"
39  "See also: get-post-on-ali\n";
40 
41  bool binary = true;
42  bool per_pdf = false;
43  ParseOptions po(usage);
44  po.Register("binary", &binary, "Write output in binary mode.");
45  po.Register("per-pdf", &per_pdf, "if ture, accumulate counts per pdf-id"
46  " rather than transition-id. (default: false)");
47  po.Read(argc, argv);
48 
49  if (po.NumArgs() != 3) {
50  po.PrintUsage();
51  exit(1);
52  }
53 
54  std::string model_rxfilename = po.GetArg(1),
55  post_rspecifier = po.GetArg(2),
56  accs_wxfilename = po.GetArg(3);
57 
58  kaldi::SequentialPosteriorReader posterior_reader(post_rspecifier);
59 
60  int32 num_transition_ids;
61 
62  bool binary_in;
63  Input ki(model_rxfilename, &binary_in);
64  TransitionModel trans_model;
65  trans_model.Read(ki.Stream(), binary_in);
66  num_transition_ids = trans_model.NumTransitionIds();
67 
68  Vector<double> transition_accs(num_transition_ids+1); // +1 because they're
69  // 1-based; position zero is empty. We'll write as float.
70  int32 num_done = 0;
71 
72  for (; !posterior_reader.Done(); posterior_reader.Next()) {
73  const kaldi::Posterior &posterior = posterior_reader.Value();
74  int32 num_frames = static_cast<int32>(posterior.size());
75  for (int32 i = 0; i < num_frames; i++) {
76  for (int32 j = 0; j < static_cast<int32>(posterior[i].size()); j++) {
77  int32 tid = posterior[i][j].first;
78  if (tid <= 0 || tid > num_transition_ids)
79  KALDI_ERR << "Invalid transition-id " << tid
80  << " encountered for utterance "
81  << posterior_reader.Key();
82  transition_accs(tid) += posterior[i][j].second;
83  }
84  }
85  num_done++;
86  }
87 
88  if (per_pdf) {
89  KALDI_LOG << "accumulate counts per pdf-id";
90  int32 num_pdf_ids = trans_model.NumPdfs();
91  Vector<double> pdf_accs(num_pdf_ids);
92  for (int32 i = 1; i < num_transition_ids; i++) {
93  int32 pid = trans_model.TransitionIdToPdf(i);
94  pdf_accs(pid) += transition_accs(i);
95  }
96  Vector<BaseFloat> pdf_accs_float(pdf_accs);
97  Output ko(accs_wxfilename, binary);
98  pdf_accs_float.Write(ko.Stream(), binary);
99  } else {
100  Vector<BaseFloat> transition_accs_float(transition_accs);
101  Output ko(accs_wxfilename, binary);
102  transition_accs_float.Write(ko.Stream(), binary);
103  }
104  KALDI_LOG << "Done computing transition stats over "
105  << num_done << " utterances; wrote stats to "
106  << accs_wxfilename;
107  return (num_done != 0 ? 0 : 1);
108  } catch(const std::exception &e) {
109  std::cerr << e.what();
110  return -1;
111  }
112 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
void Write(std::ostream &Out, bool binary) const
Writes to C++ stream (option to write in binary).
kaldi::int32 int32
int32 TransitionIdToPdf(int32 trans_id) const
int main(int argc, char *argv[])
Definition: post-to-tacc.cc:27
void Register(const std::string &name, bool *ptr, const std::string &doc)
std::istream & Stream()
Definition: kaldi-io.cc:826
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
Definition: posterior.h:42
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
std::ostream & Stream()
Definition: kaldi-io.cc:701
int32 NumTransitionIds() const
Returns the total number of transition-ids (note, these are one-based).
void Read(std::istream &is, bool binary)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_ERR
Definition: kaldi-error.h:147
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
#define KALDI_LOG
Definition: kaldi-error.h:153