lattice-to-ctm-conf.cc
Go to the documentation of this file.
1 // latbin/lattice-to-ctm-conf.cc
2 
3 // Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey)
4 // 2015 Guoguo Chen
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 #include "util/common-utils.h"
22 #include "util/kaldi-table.h"
23 #include "lat/sausages.h"
24 #include <numeric>
25 
26 int main(int argc, char *argv[]) {
27  try {
28  using namespace kaldi;
29  typedef kaldi::int32 int32;
30 
31  const char *usage =
32  "This tool turns a lattice into a ctm with confidences, based on the\n"
33  "posterior probabilities in the lattice. The word sequence in the\n"
34  "ctm is determined as follows. Firstly we determine the initial word\n"
35  "sequence. In the 3-argument form, we read it from the\n"
36  "<1best-rspecifier> input; otherwise it is the 1-best of the lattice.\n"
37  "Then, if --decode-mbr=true, we iteratively refine the hypothesis\n"
38  "using Minimum Bayes Risk decoding. (Note that the default value of decode_mbr\n"
39  "is true. If you provide <1best-rspecifier> from MAP decoding, the output ctm\n"
40  "from MBR decoding may be mismatched with the provided 1best hypothesis (the\n"
41  "starting point of optimization). If you don't need confidences,\n"
42  "you can do lattice-1best and pipe to nbest-to-ctm. The ctm this\n"
43  "program produces will be relative to the utterance-id; a standard\n"
44  "ctm relative to the filename can be obtained using\n"
45  "utils/convert_ctm.pl. The times produced by this program will only\n"
46  "be meaningful if you do lattice-align-words on the input. The\n"
47  "<1-best-rspecifier> could be the output of utils/int2sym.pl or\n"
48  "nbest-to-linear.\n"
49  "\n"
50  "Usage: lattice-to-ctm-conf [options] <lattice-rspecifier> \\\n"
51  " <ctm-wxfilename>\n"
52  "Usage: lattice-to-ctm-conf [options] <lattice-rspecifier> \\\n"
53  " [<1best-rspecifier> [<times-rspecifier]] <ctm-wxfilename>\n"
54  " e.g.: lattice-to-ctm-conf --acoustic-scale=0.1 ark:1.lats 1.ctm\n"
55  " or: lattice-to-ctm-conf --acoustic-scale=0.1 --decode-mbr=false\\\n"
56  " ark:1.lats ark:1.1best 1.ctm\n"
57  "See also: lattice-mbr-decode, nbest-to-ctm, lattice-arc-post,\n"
58  " steps/get_ctm.sh, steps/get_train_ctm.sh and utils/convert_ctm.pl.\n";
59 
60  ParseOptions po(usage);
61  BaseFloat acoustic_scale = 1.0, inv_acoustic_scale = 1.0, lm_scale = 1.0;
62  BaseFloat frame_shift = 0.01;
63  int32 confidence_digits = 2;
64 
65  std::string word_syms_filename;
66  po.Register("acoustic-scale", &acoustic_scale, "Scaling factor for "
67  "acoustic likelihoods");
68  po.Register("inv-acoustic-scale", &inv_acoustic_scale, "An alternative way "
69  "of setting the acoustic scale: you can set its inverse.");
70  po.Register("lm-scale", &lm_scale, "Scaling factor for language model "
71  "probabilities");
72  po.Register("frame-shift", &frame_shift, "Time in seconds between frames.");
73  po.Register("confidence-digits", &confidence_digits, "Number of decimal digits for confidences in 'ctm'.");
74 
75 
76  MinimumBayesRiskOptions mbr_opts;
77  mbr_opts.Register(&po);
78 
79  po.Read(argc, argv);
80 
81  if (po.NumArgs() != 2 && po.NumArgs() != 3 && po.NumArgs() != 4) {
82  po.PrintUsage();
83  exit(1);
84  }
85 
86  KALDI_ASSERT(acoustic_scale == 1.0 || inv_acoustic_scale == 1.0);
87  if (inv_acoustic_scale != 1.0)
88  acoustic_scale = 1.0 / inv_acoustic_scale;
89 
90  std::string lats_rspecifier, one_best_rspecifier,
91  times_rspecifier, ctm_wxfilename;
92 
93  if (po.NumArgs() == 2) {
94  lats_rspecifier = po.GetArg(1);
95  one_best_rspecifier = "";
96  ctm_wxfilename = po.GetArg(2);
97  } else if (po.NumArgs() == 3) {
98  lats_rspecifier = po.GetArg(1);
99  one_best_rspecifier = po.GetArg(2);
100  ctm_wxfilename = po.GetArg(3);
101  } else if (po.NumArgs() == 4) {
102  lats_rspecifier = po.GetArg(1);
103  one_best_rspecifier = po.GetArg(2);
104  times_rspecifier = po.GetArg(3);
105  ctm_wxfilename = po.GetArg(4);
106  }
107 
108 
109  // Ensure the output ctm file is not a wspecifier
110  WspecifierType ctm_wx_type;
111  ctm_wx_type = ClassifyWspecifier(ctm_wxfilename, NULL, NULL, NULL);
112  if(ctm_wx_type != kNoWspecifier){
113  KALDI_ERR << "The output ctm file should not be a wspecifier. "
114  << "Please use things like 1.ctm istead of ark:-";
115  exit(1);
116  }
117 
118  // Read as compact lattice.
119  SequentialCompactLatticeReader clat_reader(lats_rspecifier);
120 
121  RandomAccessInt32VectorReader one_best_reader(one_best_rspecifier);
122  RandomAccessBaseFloatPairVectorReader times_reader(times_rspecifier);
123 
124  Output ko(ctm_wxfilename, false); // false == non-binary writing mode.
125  ko.Stream() << std::fixed; // Set to "fixed" floating point model, where precision() specifies
126  // the #digits after the decimal point.
127  ko.Stream().precision(confidence_digits);
128 
129  int32 n_done = 0, n_words = 0;
130  BaseFloat tot_bayes_risk = 0.0;
131 
132  for (; !clat_reader.Done(); clat_reader.Next()) {
133  std::string key = clat_reader.Key();
134  CompactLattice clat = clat_reader.Value();
135  clat_reader.FreeCurrent();
136  fst::ScaleLattice(fst::LatticeScale(lm_scale, acoustic_scale), &clat);
137 
138  MinimumBayesRisk *mbr = NULL;
139 
140  if (one_best_rspecifier == "") {
141  mbr = new MinimumBayesRisk(clat, mbr_opts);
142  } else {
143  // check,
144  if (!one_best_reader.HasKey(key)) {
145  KALDI_WARN << "No 1-best present for utterance " << key;
146  continue;
147  }
148  if (times_rspecifier != "" && !times_reader.HasKey(key)) {
149  KALDI_WARN << "No 'times' present for utterance " << key;
150  continue;
151  }
152  // get the 'mbr' decoding object,
153  if (times_rspecifier == "") {
154  const std::vector<int32> &one_best = one_best_reader.Value(key);
155  mbr = new MinimumBayesRisk(clat, one_best, mbr_opts); // no 'times',
156  } else {
157  // with initial 'times' of the bins,
158  const std::vector<int32> &one_best = one_best_reader.Value(key);
159  const std::vector<std::pair<BaseFloat,BaseFloat> > &times = times_reader.Value(key);
160  mbr = new MinimumBayesRisk(clat, one_best, times, mbr_opts);
161  }
162  }
163 
164  const std::vector<BaseFloat> &conf = mbr->GetOneBestConfidences();
165  const std::vector<int32> &words = mbr->GetOneBest();
166  const std::vector<std::pair<BaseFloat, BaseFloat> > &times =
167  mbr->GetOneBestTimes();
168  KALDI_ASSERT(conf.size() == words.size() && words.size() == times.size());
169  for (size_t i = 0; i < words.size(); i++) {
170  KALDI_ASSERT(words[i] != 0 || mbr_opts.print_silence); // Should not have epsilons.
171  ko.Stream() << key << " 1 " << (frame_shift * times[i].first) << ' '
172  << (frame_shift * (times[i].second-times[i].first)) << ' '
173  << words[i] << ' ' << conf[i] << '\n';
174  }
175  KALDI_LOG << "For utterance " << key << ", Bayes Risk "
176  << mbr->GetBayesRisk() << ", avg. confidence per-word "
177  << std::accumulate(conf.begin(),conf.end(),0.0) / words.size();
178  n_done++;
179  n_words += mbr->GetOneBest().size();
180  tot_bayes_risk += mbr->GetBayesRisk();
181  delete mbr;
182  }
183 
184  KALDI_LOG << "Done " << n_done << " lattices.";
185  KALDI_LOG << "Overall average Bayes Risk per sentence is "
186  << (tot_bayes_risk / n_done) << " and per word, "
187  << (tot_bayes_risk / n_words);
188 
189  return (n_done != 0 ? 0 : 1);
190  } catch(const std::exception &e) {
191  std::cerr << e.what();
192  return -1;
193  }
194 }
int32 words[kMaxOrder]
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
const std::vector< BaseFloat > & GetOneBestConfidences() const
Outputs the confidences for the one-best transcript.
Definition: sausages.h:132
kaldi::int32 int32
The implementation of the Minimum Bayes Risk decoding method described in "Minimum Bayes Risk decodin...
Definition: sausages.h:56
bool print_silence
Boolean configuration parameter: if true, the 1-best path will &#39;keep&#39; the <eps> bins,.
Definition: sausages.h:62
void Register(const std::string &name, bool *ptr, const std::string &doc)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
float BaseFloat
Definition: kaldi-types.h:29
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
std::ostream & Stream()
Definition: kaldi-io.cc:701
int main(int argc, char *argv[])
const T & Value(const std::string &key)
void ScaleLattice(const std::vector< std::vector< ScaleFloat > > &scale, MutableFst< ArcTpl< Weight > > *fst)
Scales the pairs of weights in LatticeWeight or CompactLatticeWeight by viewing the pair (a...
const std::vector< std::pair< BaseFloat, BaseFloat > > & GetOneBestTimes() const
Definition: sausages.h:122
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
const std::vector< int32 > & GetOneBest() const
Definition: sausages.h:104
std::vector< std::vector< double > > LatticeScale(double lmwt, double acwt)
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_ERR
Definition: kaldi-error.h:147
This class does the word-level Minimum Bayes Risk computation, and gives you either the 1-best MBR ou...
Definition: sausages.h:77
#define KALDI_WARN
Definition: kaldi-error.h:150
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
WspecifierType
Definition: kaldi-table.h:106
bool HasKey(const std::string &key)
fst::VectorFst< CompactLatticeArc > CompactLattice
Definition: kaldi-lattice.h:46
WspecifierType ClassifyWspecifier(const std::string &wspecifier, std::string *archive_wxfilename, std::string *script_wxfilename, WspecifierOptions *opts)
Definition: kaldi-table.cc:135
int NumArgs() const
Number of positional parameters (c.f. argc-1).
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void Register(OptionsItf *opts)
Definition: sausages.h:66
BaseFloat GetBayesRisk() const
Returns the expected WER over this sentence (assuming model correctness).
Definition: sausages.h:137
#define KALDI_LOG
Definition: kaldi-error.h:153