post-to-phone-post.cc
Go to the documentation of this file.
1 // bin/post-to-phone-post.cc
2 
3 // Copyright 2012-2013 Johns Hopkins University (author: Daniel Povey)
4 // 2019 Daniel Povey
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 
22 #include "base/kaldi-common.h"
23 #include "util/common-utils.h"
24 #include "hmm/transition-model.h"
25 #include "hmm/posterior.h"
26 
27 int main(int argc, char *argv[]) {
28  try {
29  using namespace kaldi;
30  typedef kaldi::int32 int32;
31 
32  const char *usage =
33  "Convert posteriors (or pdf-level posteriors) to phone-level posteriors\n"
34  "See also: post-to-pdf-post, post-to-weights, get-post-on-ali\n"
35  "\n"
36  "First, the usage when your posteriors are on transition-ids (the normal case):\n"
37  "Usage: post-to-phone-post [options] <model> <post-rspecifier> <phone-post-wspecifier>\n"
38  " e.g.: post-to-phone-post --binary=false 1.mdl \"ark:ali-to-post 1.ali|\" ark,t:-\n"
39  "\n"
40  "Next, the usage when your posteriors are on pdfs (e.g. if they are neural-net\n"
41  "posteriors)\n"
42  "post-to-phone-post --transition-id-counts=final.tacc 1.mdl ark:pdf_post.ark ark,t:-\n"
43  "See documentation of --transition-id-counts option for more details.";
44 
45  std::string tacc_rxfilename;
46 
47  ParseOptions po(usage);
48 
49  po.Register("transition-id-counts", &tacc_rxfilename, "Rxfilename where vector of counts\n"
50  "for transition-ids can be read (would normally come from training data\n"
51  "alignments, e.g. from ali-to-post and then post-to-tacc with --per-pdf=false)\n");
52 
53  po.Read(argc, argv);
54 
55  if (po.NumArgs() != 3) {
56  po.PrintUsage();
57  exit(1);
58  }
59 
60  std::string model_rxfilename = po.GetArg(1),
61  post_rspecifier = po.GetArg(2),
62  phone_post_wspecifier = po.GetArg(3);
63 
64  kaldi::SequentialPosteriorReader posterior_reader(post_rspecifier);
65  kaldi::PosteriorWriter posterior_writer(phone_post_wspecifier);
66 
67  TransitionModel trans_model;
68  {
69  bool binary_in;
70  Input ki(model_rxfilename, &binary_in);
71  trans_model.Read(ki.Stream(), binary_in);
72  }
73  int32 num_done = 0;
74 
75 
76  if (tacc_rxfilename.empty()) {
77  // Input is transition-ids
78  for (; !posterior_reader.Done(); posterior_reader.Next()) {
79  const kaldi::Posterior &posterior = posterior_reader.Value();
80  kaldi::Posterior phone_posterior;
81  ConvertPosteriorToPhones(trans_model, posterior, &phone_posterior);
82  posterior_writer.Write(posterior_reader.Key(), phone_posterior);
83  num_done++;
84  }
85  } else {
86  Vector<BaseFloat> transition_counts;
87  ReadKaldiObject(tacc_rxfilename, &transition_counts);
88  int32 num_pdfs = trans_model.NumPdfs(),
89  num_tids = trans_model.NumTransitionIds();
90  if (transition_counts.Dim() != num_tids + 1) {
91  KALDI_ERR << "Wrong size for transition counts in " << tacc_rxfilename
92  << ", expected " << num_tids << " + 1, got "
93  << transition_counts.Dim();
94  }
95  // Maps from pdf-id to a map from phone -> count associated with that
96  // phone.
97  std::vector<std::unordered_map<int32, BaseFloat> > pdf_to_phones(num_pdfs);
98 
99  for (int32 i = 1; i <= num_tids; i++) {
100  BaseFloat count = transition_counts(i);
101  int32 phone = trans_model.TransitionIdToPhone(i),
102  pdf_id = trans_model.TransitionIdToPdf(i);
103  // Relying on C++11 value-initialization thingies that should make the
104  // map's elements default to zero.
105  pdf_to_phones[pdf_id][phone] += count;
106  }
107 
108  for (int32 i = 0; i < num_pdfs; i++) {
109  BaseFloat denominator = 0.0;
110  for (auto p: pdf_to_phones[i])
111  denominator += p.second;
112  for (auto iter = pdf_to_phones[i].begin(); iter != pdf_to_phones[i].end();
113  ++iter) {
114  if (denominator != 0.0)
115  iter->second /= denominator;
116  else
117  iter->second = 1.0 / pdf_to_phones[i].size();
118  }
119  }
120 
121  // Input is pdf-ids
122  for (; !posterior_reader.Done(); posterior_reader.Next()) {
123  const kaldi::Posterior &posterior = posterior_reader.Value();
124  int32 T = posterior.size();
125  kaldi::Posterior phone_posterior(T);
126  std::unordered_map<int32, BaseFloat> phone_to_count;
127  for (int32 t = 0; t < T; t++) {
128  phone_to_count.clear();
129  for (auto p : posterior[t]) {
130  int32 pdf_id = p.first;
131  BaseFloat count = p.second;
132  if (pdf_id < 0 || pdf_id >= num_pdfs)
133  KALDI_ERR << "pdf-id on input out of range, expected [0.." << (num_pdfs-1)
134  << ", got: " << pdf_id;
135  for (auto q: pdf_to_phones[pdf_id]) {
136  int32 phone = q.first;
137  BaseFloat prob = q.second;
138  if (prob != 0.0)
139  phone_to_count[phone] += count * prob;
140  }
141  }
142  for (auto p : phone_to_count) {
143  phone_posterior[t].push_back(
144  std::pair<int32, BaseFloat>(p.first, p.second));
145  }
146  }
147  posterior_writer.Write(posterior_reader.Key(), phone_posterior);
148  num_done++;
149  }
150  }
151  KALDI_LOG << "Done converting posteriors to phone posteriors for "
152  << num_done << " utterances.";
153  return (num_done != 0 ? 0 : 1);
154  } catch(const std::exception &e) {
155  std::cerr << e.what();
156  return -1;
157  }
158 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
int32 TransitionIdToPdf(int32 trans_id) const
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Definition: kaldi-io.cc:832
const size_t count
std::istream & Stream()
Definition: kaldi-io.cc:826
float BaseFloat
Definition: kaldi-types.h:29
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
Definition: posterior.h:42
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
int32 NumTransitionIds() const
Returns the total number of transition-ids (note, these are one-based).
void Read(std::istream &is, bool binary)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_ERR
Definition: kaldi-error.h:147
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
int main(int argc, char *argv[])
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
int NumArgs() const
Number of positional parameters (c.f. argc-1).
A class representing a vector.
Definition: kaldi-vector.h:406
void ConvertPosteriorToPhones(const TransitionModel &tmodel, const Posterior &post_in, Posterior *post_out)
Converts a posterior over transition-ids to be a posterior over phones.
Definition: posterior.cc:348
#define KALDI_LOG
Definition: kaldi-error.h:153
int32 TransitionIdToPhone(int32 trans_id) const