get-post-on-ali.cc File Reference
#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "gmm/am-diag-gmm.h"
#include "hmm/transition-model.h"
#include "hmm/hmm-utils.h"
#include "hmm/posterior.h"
Include dependency graph for get-post-on-ali.cc:

Go to the source code of this file.

Functions

int main (int argc, char *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 30 of file get-post-on-ali.cc.

References SequentialTableReader< Holder >::Done(), ParseOptions::GetArg(), RandomAccessTableReader< Holder >::HasKey(), rnnlm::i, rnnlm::j, KALDI_ASSERT, KALDI_LOG, SequentialTableReader< Holder >::Key(), SequentialTableReader< Holder >::Next(), ParseOptions::NumArgs(), ParseOptions::PrintUsage(), ParseOptions::Read(), RandomAccessTableReader< Holder >::Value(), SequentialTableReader< Holder >::Value(), and TableWriter< Holder >::Write().

30  {
31  using namespace kaldi;
32  typedef kaldi::int32 int32;
33  try {
34  const char *usage =
35  "Given input posteriors, e.g. derived from lattice-to-post, and an alignment\n"
36  "typically derived from the best path of a lattice, outputs the probability in\n"
37  "the posterior of the corresponding index in the alignment, or zero if it was\n"
38  "not there. These are output as a vector of weights, one per utterance.\n"
39  "While, by default, lattice-to-post (as a source of posteriors) and sources of\n"
40  "alignments such as lattice-best-path will output transition-ids as the index,\n"
41  "it will generally make sense to either convert these to pdf-ids using\n"
42  "post-to-pdf-post and ali-to-pdf respectively, or to phones using post-to-phone-post\n"
43  "and (ali-to-phones --per-frame=true). Since this program only sees the integer\n"
44  "indexes, it does not care what they represent-- but of course they should match\n"
45  "(e.g. don't input posteriors with transition-ids and alignments with pdf-ids).\n"
46  "See http://kaldi-asr.org/doc/hmm.html#transition_model_identifiers for an\n"
47  "explanation of these types of indexes.\n"
48  "\n"
49  "See also: post-to-tacc, weight-post, post-to-weights, reverse-weights\n"
50  "\n"
51  "Usage: get-post-on-ali [options] <posteriors-rspecifier> <ali-rspecifier> <weights-wspecifier>\n"
52  "e.g.: get-post-on-ali ark:post.ark ark,s,cs:ali.ark ark:weights.ark\n";
53 
54  ParseOptions po(usage);
55 
56  po.Read(argc, argv);
57 
58  if (po.NumArgs() != 3) {
59  po.PrintUsage();
60  exit(1);
61  }
62 
63  std::string posteriors_rspecifier = po.GetArg(1),
64  alignments_rspecifier = po.GetArg(2),
65  confidences_wspecifier = po.GetArg(3);
66 
67  int32 num_done = 0, num_no_alignment = 0;
68  SequentialPosteriorReader posterior_reader(posteriors_rspecifier);
69  RandomAccessInt32VectorReader alignments_reader(alignments_rspecifier);
70  BaseFloatVectorWriter confidences_writer(confidences_wspecifier);
71 
72  for (; !posterior_reader.Done(); posterior_reader.Next()) {
73  std::string key = posterior_reader.Key();
74  if (!alignments_reader.HasKey(key)) {
75  num_no_alignment++;
76  } else {
77  //get the posterior
78  const kaldi::Posterior &posterior = posterior_reader.Value();
79  int32 num_frames = static_cast<int32>(posterior.size());
80  //get the alignment
81  const std::vector<int32> &alignment = alignments_reader.Value(key);
82  //check the lengths match
83  KALDI_ASSERT(num_frames == alignment.size());
84 
85  //fill the vector with posteriors on the alignment (under the alignment path)
86  Vector<BaseFloat> confidence(num_frames);
87  for(int32 i = 0; i < num_frames; i++) {
88  BaseFloat post_i = 0.0;
89  for(int32 j = 0; j < posterior[i].size(); j++) {
90  if(alignment[i] == posterior[i][j].first) {
91  post_i = posterior[i][j].second;
92  break;
93  }
94  }
95  confidence(i) = post_i;
96  }
97 
98  //write the vector with confidences
99  confidences_writer.Write(key,confidence);
100  num_done++;
101  }
102  }
103  KALDI_LOG << "Done getting the posteriors under the alignment path for "
104  << num_done << " utterances. " << num_no_alignment << " with missing alignments.";
105  return 0;
106  } catch(const std::exception &e) {
107  std::cerr << e.what();
108  return -1;
109  }
110 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
float BaseFloat
Definition: kaldi-types.h:29
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
Definition: posterior.h:42
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
A class representing a vector.
Definition: kaldi-vector.h:406
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
#define KALDI_LOG
Definition: kaldi-error.h:153