27 using namespace kaldi;
31 "Takes as input lattices which must be linear (single path),\n" 32 "and must be in CompactLattice form where the transition-ids on the arcs\n" 33 "have been aligned with the word boundaries... typically the input will\n" 34 "be a lattice that has been piped through lattice-1best and then\n" 35 "lattice-align-words. On the other hand, whenever we directly pipe\n" 36 "the output of lattice-align-words-lexicon into nbest-to-ctm,\n" 37 "we need to put the command `lattice-1best ark:- ark:-` between them,\n" 38 "because even for linear lattices, lattice-align-words-lexicon can\n" 39 "in certain cases produce non-linear outputs (due to disambiguity\n" 40 "in the lexicon). It outputs ctm format (with integers in place of words),\n" 41 "assuming the frame length is 0.01 seconds by default (change this with the\n" 42 "--frame-length option). Note: the output is in the form\n" 43 "<utterance-id> 1 <begin-time> <end-time> <word-id>\n" 44 "and you can post-process this to account for segmentation issues and to \n" 45 "convert ints to words; note, the times are relative to start of the utterance.\n" 47 "Usage: nbest-to-ctm [options] <aligned-linear-lattice-rspecifier> <ctm-wxfilename>\n" 48 "e.g.: lattice-1best --acoustic-weight=0.08333 ark:1.lats | \\\n" 49 " lattice-align-words data/lang/phones/word_boundary.int exp/dir/final.mdl ark:- ark:- | \\\n" 50 " nbest-to-ctm ark:- 1.ctm\n" 51 "e.g.: lattice-align-words-lexicon data/lang/phones/align_lexicon.int exp/dir/final.mdl ark:1.lats ark:- | \\\n" 52 " lattice-1best ark:- ark:- | \\\n" 53 " nbest-to-ctm ark:- 1.ctm\n";
57 bool print_silence =
false;
60 po.Register(
"print-silence", &print_silence,
"If true, print optional-silence " 62 po.Register(
"frame-shift", &frame_shift,
"Time in seconds between frames.\n");
63 po.Register(
"precision", &precision,
64 "Number of decimal places for start duration times (note: we " 65 "may use a higher value than this if it's obvious from " 66 "--frame-shift that this value is too small");
70 if (po.NumArgs() != 2) {
75 std::string lats_rspecifier = po.GetArg(1),
76 ctm_wxfilename = po.GetArg(2);
78 if (frame_shift < 0.01 && precision <= 2)
80 if (frame_shift < 0.001 && precision <= 3)
86 int32 n_done = 0, n_err = 0;
88 Output ko(ctm_wxfilename,
false);
89 ko.Stream() << std::fixed;
91 ko.Stream().precision(precision);
93 for (; !clat_reader.Done(); clat_reader.Next()) {
94 std::string key = clat_reader.Key();
97 std::vector<int32>
words, times, lengths;
101 KALDI_WARN <<
"Format conversion failed for key " << key;
104 words.size() == lengths.size());
105 for (
size_t i = 0;
i < words.size();
i++) {
106 if (words[
i] == 0 && !print_silence)
108 ko.Stream() << key <<
" 1 " << (frame_shift * times[
i]) <<
' ' 109 << (frame_shift * lengths[
i]) <<
' ' << words[
i] <<std::endl;
119 KALDI_LOG <<
"Converted " << n_done <<
" linear lattices to ctm format; " 120 << n_err <<
" had errors.";
121 return (n_done != 0 ? 0 : 1);
122 }
catch(
const std::exception &e) {
123 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
fst::VectorFst< CompactLatticeArc > CompactLattice
#define KALDI_ASSERT(cond)
bool CompactLatticeToWordAlignment(const CompactLattice &clat, std::vector< int32 > *words, std::vector< int32 > *begin_times, std::vector< int32 > *lengths)
This function takes a CompactLattice that should only contain a single linear sequence (e...