nbest-to-prons.cc File Reference
Include dependency graph for nbest-to-prons.cc:

Go to the source code of this file.

Functions

int main (int argc, char *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 25 of file nbest-to-prons.cc.

References Output::Close(), kaldi::CompactLatticeToWordProns(), SequentialTableReader< Holder >::Done(), ParseOptions::GetArg(), rnnlm::i, rnnlm::j, KALDI_ASSERT, KALDI_LOG, KALDI_WARN, SequentialTableReader< Holder >::Key(), SequentialTableReader< Holder >::Next(), ParseOptions::NumArgs(), ParseOptions::PrintUsage(), ParseOptions::Read(), kaldi::ReadKaldiObject(), ParseOptions::Register(), Output::Stream(), SequentialTableReader< Holder >::Value(), and words.

25  {
26  try {
27  using namespace kaldi;
28  typedef kaldi::int32 int32;
29 
30  const char *usage =
31  "Reads lattices which must be linear (single path), and must be in\n"
32  "CompactLattice form where the transition-ids on the arcs\n"
33  "have been aligned with the word boundaries (see lattice-align-words*)\n"
34  "and outputs a vaguely ctm-like format where each line is of the form:\n"
35  "<utterance-id> <begin-frame> <num-frames> <word> <phone1> <phone2> ... <phoneN>\n"
36  "where the words and phones will both be written as integers. For arcs\n"
37  "in the input lattice that don't correspond to words, <word> may be zero; this\n"
38  "will typically be the case for the optional silences.\n"
39  "\n"
40  "Usage: nbest-to-prons [options] <model> <aligned-linear-lattice-rspecifier> <output-wxfilename>\n"
41  "e.g.: lattice-1best --acoustic-weight=0.08333 ark:1.lats | \\\n"
42  " lattice-align-words data/lang/phones/word_boundary.int exp/dir/final.mdl ark:- ark:- | \\\n"
43  " nbest-to-prons exp/dir/final.mdl ark:- 1.prons\n"
44  "Note: the type of the model doesn't matter as only the transition-model is read.\n";
45 
46  ParseOptions po(usage);
47 
48  bool print_lengths_per_phone = false;
49  po.Register("print-lengths-per-phone", &print_lengths_per_phone,
50  "If true, in place of the length of the word, "
51  "print out a comma-separated list of the lengths of each phone in the word.");
52 
53 
54  po.Read(argc, argv);
55 
56  if (po.NumArgs() != 3) {
57  po.PrintUsage();
58  exit(1);
59  }
60 
61  std::string model_rxfilename = po.GetArg(1),
62  lats_rspecifier = po.GetArg(2),
63  wxfilename = po.GetArg(3);
64 
65 
66  TransitionModel trans_model;
67  ReadKaldiObject(model_rxfilename, &trans_model);
68 
69  SequentialCompactLatticeReader clat_reader(lats_rspecifier);
70 
71  int32 n_done = 0, n_err = 0;
72 
73  Output ko(wxfilename, false); // false == non-binary write mode.
74 
75  for (; !clat_reader.Done(); clat_reader.Next()) {
76  std::string utt = clat_reader.Key();
77  CompactLattice clat = clat_reader.Value();
78 
79  std::vector<int32> words, times, lengths;
80  std::vector<std::vector<int32> > prons;
81  std::vector<std::vector<int32> > phone_lengths;
82 
83  if (!CompactLatticeToWordProns(trans_model, clat, &words, &times, &lengths,
84  &prons, &phone_lengths)) {
85  n_err++;
86  KALDI_WARN << "Format conversion failed for utterance " << utt;
87  } else {
88  KALDI_ASSERT(words.size() == times.size() &&
89  words.size() == lengths.size() &&
90  words.size() == prons.size());
91  for (size_t i = 0; i < words.size(); i++) {
92  int32 sum_of_plengths = 0;
93  for (size_t j = 0; j < phone_lengths[i].size(); j++)
94  sum_of_plengths += phone_lengths[i][j];
95  KALDI_ASSERT(lengths[i] == sum_of_plengths);
96 
97  if (!print_lengths_per_phone)
98  ko.Stream() << utt << ' ' << times[i] << ' ' << lengths[i] << ' '
99  << words[i];
100  else {
101  ko.Stream() << utt << ' ' << times[i] << ' ';
102  for (size_t pl = 0; pl < phone_lengths[i].size()-1; pl++)
103  ko.Stream() << phone_lengths[i][pl] << ',';
104  ko.Stream() << phone_lengths[i][phone_lengths[i].size()-1]
105  << ' ' << words[i];
106  }
107  for (size_t j = 0; j < prons[i].size(); j++)
108  ko.Stream() << ' ' << prons[i][j];
109  ko.Stream() << std::endl;
110  }
111  n_done++;
112  }
113  }
114  ko.Close(); // Note: we don't normally call Close() on these things,
115  // we just let them go out of scope and it happens automatically.
116  // We do it this time in order to avoid wrongly printing out a success message
117  // if the stream was going to fail to close
118 
119  KALDI_LOG << "Printed prons for " << n_done << " linear lattices; "
120  << n_err << " had errors.";
121  return (n_done != 0 ? 0 : 1);
122  } catch(const std::exception &e) {
123  std::cerr << e.what();
124  return -1;
125  }
126 }
int32 words[kMaxOrder]
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
kaldi::int32 int32
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Definition: kaldi-io.cc:832
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
#define KALDI_WARN
Definition: kaldi-error.h:150
fst::VectorFst< CompactLatticeArc > CompactLattice
Definition: kaldi-lattice.h:46
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
#define KALDI_LOG
Definition: kaldi-error.h:153
bool CompactLatticeToWordProns(const TransitionModel &tmodel, const CompactLattice &clat, std::vector< int32 > *words, std::vector< int32 > *begin_times, std::vector< int32 > *lengths, std::vector< std::vector< int32 > > *prons, std::vector< std::vector< int32 > > *phone_lengths)
This function takes a CompactLattice that should only contain a single linear sequence (e...