26 int main(
int argc,
char *argv[]) {
28 using namespace kaldi;
32 "This tool turns a lattice into a ctm with confidences, based on the\n" 33 "posterior probabilities in the lattice. The word sequence in the\n" 34 "ctm is determined as follows. Firstly we determine the initial word\n" 35 "sequence. In the 3-argument form, we read it from the\n" 36 "<1best-rspecifier> input; otherwise it is the 1-best of the lattice.\n" 37 "Then, if --decode-mbr=true, we iteratively refine the hypothesis\n" 38 "using Minimum Bayes Risk decoding. (Note that the default value of decode_mbr\n" 39 "is true. If you provide <1best-rspecifier> from MAP decoding, the output ctm\n" 40 "from MBR decoding may be mismatched with the provided 1best hypothesis (the\n" 41 "starting point of optimization). If you don't need confidences,\n" 42 "you can do lattice-1best and pipe to nbest-to-ctm. The ctm this\n" 43 "program produces will be relative to the utterance-id; a standard\n" 44 "ctm relative to the filename can be obtained using\n" 45 "utils/convert_ctm.pl. The times produced by this program will only\n" 46 "be meaningful if you do lattice-align-words on the input. The\n" 47 "<1-best-rspecifier> could be the output of utils/int2sym.pl or\n" 50 "Usage: lattice-to-ctm-conf [options] <lattice-rspecifier> \\\n" 52 "Usage: lattice-to-ctm-conf [options] <lattice-rspecifier> \\\n" 53 " [<1best-rspecifier> [<times-rspecifier]] <ctm-wxfilename>\n" 54 " e.g.: lattice-to-ctm-conf --acoustic-scale=0.1 ark:1.lats 1.ctm\n" 55 " or: lattice-to-ctm-conf --acoustic-scale=0.1 --decode-mbr=false\\\n" 56 " ark:1.lats ark:1.1best 1.ctm\n" 57 "See also: lattice-mbr-decode, nbest-to-ctm, lattice-arc-post,\n" 58 " steps/get_ctm.sh, steps/get_train_ctm.sh and utils/convert_ctm.pl.\n";
61 BaseFloat acoustic_scale = 1.0, inv_acoustic_scale = 1.0, lm_scale = 1.0;
63 int32 confidence_digits = 2;
65 std::string word_syms_filename;
66 po.
Register(
"acoustic-scale", &acoustic_scale,
"Scaling factor for " 67 "acoustic likelihoods");
68 po.
Register(
"inv-acoustic-scale", &inv_acoustic_scale,
"An alternative way " 69 "of setting the acoustic scale: you can set its inverse.");
70 po.
Register(
"lm-scale", &lm_scale,
"Scaling factor for language model " 72 po.
Register(
"frame-shift", &frame_shift,
"Time in seconds between frames.");
73 po.
Register(
"confidence-digits", &confidence_digits,
"Number of decimal digits for confidences in 'ctm'.");
86 KALDI_ASSERT(acoustic_scale == 1.0 || inv_acoustic_scale == 1.0);
87 if (inv_acoustic_scale != 1.0)
88 acoustic_scale = 1.0 / inv_acoustic_scale;
90 std::string lats_rspecifier, one_best_rspecifier,
91 times_rspecifier, ctm_wxfilename;
94 lats_rspecifier = po.
GetArg(1);
95 one_best_rspecifier =
"";
96 ctm_wxfilename = po.
GetArg(2);
98 lats_rspecifier = po.
GetArg(1);
99 one_best_rspecifier = po.
GetArg(2);
100 ctm_wxfilename = po.
GetArg(3);
101 }
else if (po.
NumArgs() == 4) {
102 lats_rspecifier = po.
GetArg(1);
103 one_best_rspecifier = po.
GetArg(2);
104 times_rspecifier = po.
GetArg(3);
105 ctm_wxfilename = po.
GetArg(4);
113 KALDI_ERR <<
"The output ctm file should not be a wspecifier. " 114 <<
"Please use things like 1.ctm istead of ark:-";
124 Output ko(ctm_wxfilename,
false);
125 ko.
Stream() << std::fixed;
127 ko.
Stream().precision(confidence_digits);
129 int32 n_done = 0, n_words = 0;
132 for (; !clat_reader.
Done(); clat_reader.
Next()) {
133 std::string key = clat_reader.
Key();
140 if (one_best_rspecifier ==
"") {
144 if (!one_best_reader.
HasKey(key)) {
145 KALDI_WARN <<
"No 1-best present for utterance " << key;
148 if (times_rspecifier !=
"" && !times_reader.
HasKey(key)) {
149 KALDI_WARN <<
"No 'times' present for utterance " << key;
153 if (times_rspecifier ==
"") {
154 const std::vector<int32> &one_best = one_best_reader.
Value(key);
158 const std::vector<int32> &one_best = one_best_reader.
Value(key);
159 const std::vector<std::pair<BaseFloat,BaseFloat> > × = times_reader.
Value(key);
166 const std::vector<std::pair<BaseFloat, BaseFloat> > × =
168 KALDI_ASSERT(conf.size() == words.size() && words.size() == times.size());
169 for (
size_t i = 0;
i < words.size();
i++) {
171 ko.
Stream() << key <<
" 1 " << (frame_shift * times[
i].first) <<
' ' 172 << (frame_shift * (times[
i].second-times[
i].first)) <<
' ' 173 << words[
i] <<
' ' << conf[
i] <<
'\n';
175 KALDI_LOG <<
"For utterance " << key <<
", Bayes Risk " 177 << std::accumulate(conf.begin(),conf.end(),0.0) / words.size();
184 KALDI_LOG <<
"Done " << n_done <<
" lattices.";
185 KALDI_LOG <<
"Overall average Bayes Risk per sentence is " 186 << (tot_bayes_risk / n_done) <<
" and per word, " 187 << (tot_bayes_risk / n_words);
189 return (n_done != 0 ? 0 : 1);
190 }
catch(
const std::exception &e) {
191 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
const std::vector< BaseFloat > & GetOneBestConfidences() const
Outputs the confidences for the one-best transcript.
The implementation of the Minimum Bayes Risk decoding method described in "Minimum Bayes Risk decodin...
bool print_silence
Boolean configuration parameter: if true, the 1-best path will 'keep' the <eps> bins,.
void Register(const std::string &name, bool *ptr, const std::string &doc)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
int main(int argc, char *argv[])
const T & Value(const std::string &key)
void ScaleLattice(const std::vector< std::vector< ScaleFloat > > &scale, MutableFst< ArcTpl< Weight > > *fst)
Scales the pairs of weights in LatticeWeight or CompactLatticeWeight by viewing the pair (a...
const std::vector< std::pair< BaseFloat, BaseFloat > > & GetOneBestTimes() const
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
const std::vector< int32 > & GetOneBest() const
std::vector< std::vector< double > > LatticeScale(double lmwt, double acwt)
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
This class does the word-level Minimum Bayes Risk computation, and gives you either the 1-best MBR ou...
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
bool HasKey(const std::string &key)
fst::VectorFst< CompactLatticeArc > CompactLattice
WspecifierType ClassifyWspecifier(const std::string &wspecifier, std::string *archive_wxfilename, std::string *script_wxfilename, WspecifierOptions *opts)
int NumArgs() const
Number of positional parameters (c.f. argc-1).
#define KALDI_ASSERT(cond)
void Register(OptionsItf *opts)
BaseFloat GetBayesRisk() const
Returns the expected WER over this sentence (assuming model correctness).