lattice-arc-post.cc
Go to the documentation of this file.
1 // latbin/lattice-arc-post.cc
2 
3 // Copyright 2015 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include "base/kaldi-common.h"
21 #include "util/common-utils.h"
22 #include "fstext/fstext-lib.h"
23 #include "lat/kaldi-lattice.h"
24 #include "lat/lattice-functions.h"
25 
26 namespace kaldi {
27 
28 // This class computes and outputs
29 // the information about arc posteriors.
30 
32  public:
33  // Note: 'clat' must be topologically sorted.
35  BaseFloat min_post,
36  bool print_alignment,
37  const TransitionModel *trans_model = NULL):
38  clat_(clat), min_post_(min_post), print_alignment_(print_alignment),
39  trans_model_(trans_model) { }
40 
41  // returns the number of arc posteriors that it output.
42  int32 OutputPosteriors(const std::string &utterance,
43  std::ostream &os) {
44  int32 num_post = 0;
46  return num_post;
48  return num_post;
49 
51  if (clat_.Start() < 0)
52  return 0;
53  double tot_like = beta_[clat_.Start()];
54 
55  int32 num_states = clat_.NumStates();
56  for (int32 state = 0; state < num_states; state++) {
57  for (fst::ArcIterator<CompactLattice> aiter(clat_, state);
58  !aiter.Done(); aiter.Next()) {
59  const CompactLatticeArc &arc = aiter.Value();
60  double arc_loglike = -ConvertToCost(arc.weight) +
61  alpha_[state] + beta_[arc.nextstate] - tot_like;
62  KALDI_ASSERT(arc_loglike < 0.1 &&
63  "Bad arc posterior in forward-backward computation");
64  if (arc_loglike > 0.0) arc_loglike = 0.0;
65  int32 num_frames = arc.weight.String().size(),
66  word = arc.ilabel;
67  BaseFloat arc_post = exp(arc_loglike);
68  if (arc_post <= min_post_) continue;
69  os << utterance << '\t' << state_times_[state] << '\t' << num_frames
70  << '\t' << arc_post << '\t' << word;
71  if (print_alignment_) {
72  os << '\t';
73  const std::vector<int32> &ali = arc.weight.String();
74  for (int32 frame = 0; frame < num_frames; frame++) {
75  os << ali[frame];
76  if (frame + 1 < num_frames) os << ',';
77  }
78  }
79  if (trans_model_ != NULL) {
80  // we want to print the phone sequence too.
81  os << '\t';
82  const std::vector<int32> &ali = arc.weight.String();
83  bool first_phone = true;
84  for (int32 frame = 0; frame < num_frames; frame++) {
85  if (trans_model_->IsFinal(ali[frame])) {
86  if (first_phone) first_phone = false;
87  else os << ' ';
88  os << trans_model_->TransitionIdToPhone(ali[frame]);
89  }
90  }
91  }
92  os << std::endl;
93  num_post++;
94  }
95  }
96  return num_post;
97  }
98  private:
100  std::vector<double> alpha_;
101  std::vector<double> beta_;
102  std::vector<int32> state_times_;
103 
107 };
108 
109 }
110 
111 
112 int main(int argc, char *argv[]) {
113  try {
114  typedef kaldi::int32 int32;
115  using fst::SymbolTable;
116  using fst::VectorFst;
117  using fst::StdArc;
118 
119  const char *usage =
120  "Print out information regarding posteriors of lattice arcs\n"
121  "This program computes posteriors from a lattice and prints out\n"
122  "information for each arc (the format is reminiscent of ctm, but\n"
123  "contains information from multiple paths). Each line is:\n"
124  " <utterance-id> <start-frame> <num-frames> <posterior> <word> [<ali>] [<phone1> <phone2>...]\n"
125  "for instance:\n"
126  "2013a04-bk42\t104\t26\t0.95\t0\t11,242,242,242,71,894,894,62,63,63,63,63\t2 8 9\n"
127  "where the --print-alignment option determines whether the alignments (i.e. the\n"
128  "sequences of transition-ids) are printed, and the phones are printed only if the\n"
129  "<model> is supplied on the command line. Note, there are tabs between the major\n"
130  "fields, but the phones are separated by spaces.\n"
131  "Usage: lattice-arc-post [<model>] <lattices-rspecifier> <output-wxfilename>\n"
132  "e.g.: lattice-arc-post --acoustic-scale=0.1 final.mdl 'ark:gunzip -c lat.1.gz|' post.txt\n"
133  "You will probably want to word-align the lattices (e.g. lattice-align-words or\n"
134  "lattice-align-words-lexicon) before this program, apply an acoustic scale either\n"
135  "via the --acoustic-scale option or using lattice-scale.\n"
136  "See also: lattice-post, lattice-to-ctm-conf, nbest-to-ctm\n";
137 
138  kaldi::BaseFloat acoustic_scale = 1.0, lm_scale = 1.0;
139  kaldi::BaseFloat min_post = 0.0001;
140  bool print_alignment = false;
141 
142  kaldi::ParseOptions po(usage);
143  po.Register("acoustic-scale", &acoustic_scale,
144  "Scaling factor for acoustic likelihoods");
145  po.Register("lm-scale", &lm_scale,
146  "Scaling factor for \"graph costs\" (including LM costs)");
147  po.Register("print-alignment", &print_alignment,
148  "If true, print alignments (i.e. sequences of transition-ids) for each\n"
149  "arc.");
150  po.Register("min-post", &min_post,
151  "Arc posteriors below this value will be pruned away");
152  po.Read(argc, argv);
153 
154  if (po.NumArgs() < 2 || po.NumArgs() > 3) {
155  po.PrintUsage();
156  exit(1);
157  }
158 
159  if (acoustic_scale == 0.0)
160  KALDI_ERR << "Do not use a zero acoustic scale (cannot be inverted)";
161 
162  kaldi::TransitionModel trans_model;
163 
164  std::string lats_rspecifier, output_wxfilename;
165  if (po.NumArgs() == 3) {
166  ReadKaldiObject(po.GetArg(1), &trans_model);
167  lats_rspecifier = po.GetArg(2);
168  output_wxfilename = po.GetArg(3);
169  } else {
170  lats_rspecifier = po.GetArg(1);
171  output_wxfilename = po.GetArg(2);
172  }
173 
174 
175  kaldi::Output output(output_wxfilename, false);
176 
177  // Read as regular lattice
178  kaldi::SequentialCompactLatticeReader clat_reader(lats_rspecifier);
179 
180  int64 tot_post = 0;
181  int32 num_lat_done = 0, num_lat_err = 0;
182 
183  for (; !clat_reader.Done(); clat_reader.Next()) {
184  std::string key = clat_reader.Key();
185  kaldi::CompactLattice clat = clat_reader.Value();
186  // FreeCurrent() is an optimization that prevents the lattice from being
187  // copied unnecessarily (OpenFst does copy-on-write).
188  clat_reader.FreeCurrent();
189  fst::ScaleLattice(fst::LatticeScale(lm_scale, acoustic_scale), &clat);
191 
193  clat, min_post, print_alignment,
194  (po.NumArgs() == 3 ? &trans_model : NULL));
195 
196  int32 num_post = computer.OutputPosteriors(key, output.Stream());
197  if (num_post != 0) {
198  num_lat_done++;
199  tot_post += num_post;
200  } else {
201  num_lat_err++;
202  KALDI_WARN << "No posterior printed for " << key;
203  }
204  }
205  KALDI_LOG << "Printed posteriors for " << num_lat_done << " lattices ("
206  << num_lat_err << " with errors); on average printed "
207  << (tot_post / (num_lat_done == 0 ? 1 : num_lat_done))
208  << " posteriors per lattice.";
209  return (num_lat_done > 0 ? 0 : 1);
210  } catch(const std::exception &e) {
211  std::cerr << e.what();
212  return -1;
213  }
214 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
std::vector< int32 > state_times_
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
fst::StdArc StdArc
std::vector< double > alpha_
ArcPosteriorComputer(const CompactLattice &clat, BaseFloat min_post, bool print_alignment, const TransitionModel *trans_model=NULL)
kaldi::int32 int32
void Register(const std::string &name, bool *ptr, const std::string &doc)
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Definition: kaldi-io.cc:832
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
std::ostream & Stream()
Definition: kaldi-io.cc:701
void ScaleLattice(const std::vector< std::vector< ScaleFloat > > &scale, MutableFst< ArcTpl< Weight > > *fst)
Scales the pairs of weights in LatticeWeight or CompactLatticeWeight by viewing the pair (a...
bool ComputeCompactLatticeBetas(const CompactLattice &clat, vector< double > *beta)
double ConvertToCost(const LatticeWeightTpl< Float > &w)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
std::vector< std::vector< double > > LatticeScale(double lmwt, double acwt)
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_ERR
Definition: kaldi-error.h:147
int32 CompactLatticeStateTimes(const CompactLattice &lat, vector< int32 > *times)
As LatticeStateTimes, but in the CompactLattice format.
#define KALDI_WARN
Definition: kaldi-error.h:150
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
int32 OutputPosteriors(const std::string &utterance, std::ostream &os)
fst::VectorFst< CompactLatticeArc > CompactLattice
Definition: kaldi-lattice.h:46
int NumArgs() const
Number of positional parameters (c.f. argc-1).
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
fst::ArcTpl< CompactLatticeWeight > CompactLatticeArc
Definition: kaldi-lattice.h:42
void TopSortCompactLatticeIfNeeded(CompactLattice *clat)
Topologically sort the compact lattice if not already topologically sorted.
int main(int argc, char *argv[])
std::vector< double > beta_
bool ComputeCompactLatticeAlphas(const CompactLattice &clat, vector< double > *alpha)
#define KALDI_LOG
Definition: kaldi-error.h:153
bool IsFinal(int32 trans_id) const
int32 TransitionIdToPhone(int32 trans_id) const
const CompactLattice & clat_
const TransitionModel * trans_model_