logprob-to-post.cc
Go to the documentation of this file.
1 // bin/logprob-to-post.cc
2 
3 // Copyright 2012 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
21 #include "base/kaldi-common.h"
22 #include "util/common-utils.h"
23 #include "gmm/am-diag-gmm.h"
24 #include "hmm/transition-model.h"
25 #include "hmm/hmm-utils.h"
26 #include "hmm/posterior.h"
27 
28 /* Convert a matrix of log-probabilities
29  to something of type Posterior, i.e. for each utterance, a
30  vector<vector<pair<int32, BaseFloat> > >, which is a sparse representation
31  of the probabilities.
32  To avoid getting very tiny values making it non-sparse, we support
33  thresholding, and this can either be done as a simple threshold, or (the
34  default) a pseudo-random thing where you preserve the expectation, e.g.
35  if the threshold is 0.01 and the value is 0.001, it will be zero with
36  probability 0.9 and 0.01 with probability 0.1.
37 */
38 
39 int main(int argc, char *argv[]) {
40  using namespace kaldi;
41  typedef kaldi::int32 int32;
42  try {
43  const char *usage =
44  "Convert a matrix of log-probabilities (e.g. from nnet-logprob) to posteriors\n"
45  "Usage: logprob-to-post [options] <logprob-matrix-rspecifier> <posteriors-wspecifier>\n"
46  "e.g.:\n"
47  " nnet-logprob [args] | logprob-to-post ark:- ark:1.post\n"
48  "Caution: in this particular example, the output would be posteriors of pdf-ids,\n"
49  "rather than transition-ids (c.f. post-to-pdf-post)\n";
50 
51  ParseOptions po(usage);
52 
53  BaseFloat min_post = 0.01;
54  bool random_prune = true; // preserve expectations.
55 
56  po.Register("min-post", &min_post, "Minimum posterior we will output (smaller "
57  "ones are pruned). Also see --random-prune");
58  po.Register("random-prune", &random_prune, "If true, prune posteriors with a "
59  "randomized method that preserves expectations.");
60 
61  po.Read(argc, argv);
62 
63  if (po.NumArgs() != 2) {
64  po.PrintUsage();
65  exit(1);
66  }
67 
68  std::string logprob_rspecifier = po.GetArg(1);
69  std::string posteriors_wspecifier = po.GetArg(2);
70 
71  int32 num_done = 0;
72  SequentialBaseFloatMatrixReader logprob_reader(logprob_rspecifier);
73  PosteriorWriter posterior_writer(posteriors_wspecifier);
74 
75  for (; !logprob_reader.Done(); logprob_reader.Next()) {
76  num_done++;
77  const Matrix<BaseFloat> &logprobs = logprob_reader.Value();
78  // Posterior is vector<vector<pair<int32, BaseFloat> > >
79  Posterior post(logprobs.NumRows());
80  for (int32 i = 0; i < logprobs.NumRows(); i++) {
81  SubVector<BaseFloat> row(logprobs, i);
82  for (int32 j = 0; j < row.Dim(); j++) {
83  BaseFloat p = Exp(row(j));
84  if (p >= min_post) {
85  post[i].push_back(std::make_pair(j, p));
86  } else if (random_prune && (p / min_post) >= RandUniform()) {
87  post[i].push_back(std::make_pair(j, min_post));
88  }
89  }
90  }
91  posterior_writer.Write(logprob_reader.Key(), post);
92  }
93  KALDI_LOG << "Converted " << num_done << " log-prob matrices to posteriors.";
94  return (num_done != 0 ? 0 : 1);
95  } catch(const std::exception &e) {
96  std::cerr << e.what();
97  return -1;
98  }
99 }
100 
101 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
double Exp(double x)
Definition: kaldi-math.h:83
float RandUniform(struct RandomState *state=NULL)
Returns a random number strictly between 0 and 1.
Definition: kaldi-math.h:151
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
float BaseFloat
Definition: kaldi-types.h:29
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
Definition: posterior.h:42
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
int NumArgs() const
Number of positional parameters (c.f. argc-1).
int main(int argc, char *argv[])
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
#define KALDI_LOG
Definition: kaldi-error.h:153
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501