linear-to-nbest.cc File Reference
Include dependency graph for linear-to-nbest.cc:

Go to the source code of this file.

Namespaces

 kaldi
 This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for mispronunciations detection tasks, the reference:
 

Functions

void MakeLatticeFromLinear (const std::vector< int32 > &ali, const std::vector< int32 > &words, BaseFloat lm_cost, BaseFloat ac_cost, Lattice *lat_out)
 
int main (int argc, char *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 50 of file linear-to-nbest.cc.

References fst::ConvertLattice(), SequentialTableReader< Holder >::Done(), ParseOptions::GetArg(), RandomAccessTableReader< Holder >::HasKey(), KALDI_ERR, KALDI_LOG, SequentialTableReader< Holder >::Key(), kaldi::MakeLatticeFromLinear(), SequentialTableReader< Holder >::Next(), ParseOptions::NumArgs(), ParseOptions::PrintUsage(), ParseOptions::Read(), RandomAccessTableReader< Holder >::Value(), SequentialTableReader< Holder >::Value(), words, and TableWriter< Holder >::Write().

50  {
51  try {
52  using namespace kaldi;
53  typedef kaldi::int32 int32;
54  typedef kaldi::int64 int64;
55  using fst::SymbolTable;
56  using fst::VectorFst;
57  using fst::StdArc;
58 
59  const char *usage =
60  "This does the opposite of nbest-to-linear. It takes 4 archives,\n"
61  "containing alignments, word-sequences, and acoustic and LM costs,\n"
62  "and turns it into a single archive containing FSTs with a linear\n"
63  "structure. The program is called linear-to-nbest because very often\n"
64  "the archives concerned will represent N-best lists\n"
65  "Usage: linear-to-nbest [options] <alignments-rspecifier> "
66  "<transcriptions-rspecifier> (<lm-cost-rspecifier>|'') (<ac-cost-rspecifier>|'') "
67  "<nbest-wspecifier>\n"
68  "Note: if the rspecifiers for lm-cost or ac-cost are the empty string,\n"
69  "these value will default to zero.\n"
70  " e.g.: linear-to-nbest ark:1.ali 'ark:sym2int.pl -f 2- words.txt text|' "
71  "ark:1.lmscore ark:1.acscore "
72  "ark:1.nbest\n";
73 
74  ParseOptions po(usage);
75 
76  po.Read(argc, argv);
77 
78  if (po.NumArgs() != 5) {
79  po.PrintUsage();
80  exit(1);
81  }
82 
83  std::string ali_rspecifier = po.GetArg(1),
84  trans_rspecifier = po.GetArg(2),
85  lm_cost_rspecifier = po.GetArg(3),
86  ac_cost_rspecifier = po.GetArg(4),
87  lats_wspecifier = po.GetArg(5); // will probably represent N-best.
88 
89 
90 
91  SequentialInt32VectorReader ali_reader(ali_rspecifier);
92  RandomAccessInt32VectorReader trans_reader(trans_rspecifier);
93  RandomAccessBaseFloatReader lm_cost_reader(lm_cost_rspecifier);
94  RandomAccessBaseFloatReader ac_cost_reader(ac_cost_rspecifier);
95 
96  CompactLatticeWriter compact_lattice_writer(lats_wspecifier);
97 
98  int32 n_done = 0, n_err = 0;
99 
100  for (; !ali_reader.Done(); ali_reader.Next()) {
101  std::string key = ali_reader.Key();
102  if (!trans_reader.HasKey(key)) {
103  KALDI_ERR << "No transcription for key " << key;
104  n_err++;
105  continue;
106  }
107  if (lm_cost_rspecifier != "" && !lm_cost_reader.HasKey(key)) {
108  KALDI_ERR << "No LM cost for key " << key;
109  n_err++;
110  continue;
111  }
112  if (ac_cost_rspecifier != "" && !ac_cost_reader.HasKey(key)) {
113  KALDI_ERR << "No acoustic cost for key " << key;
114  n_err++;
115  continue;
116  }
117  const std::vector<int32> &ali = ali_reader.Value();
118  const std::vector<int32> &words = trans_reader.Value(key);
119  BaseFloat
120  ac_cost = (ac_cost_rspecifier == "") ? 0.0 : ac_cost_reader.Value(key),
121  lm_cost = (lm_cost_rspecifier == "") ? 0.0 : lm_cost_reader.Value(key);
122  Lattice lat;
123  MakeLatticeFromLinear(ali, words, lm_cost, ac_cost, &lat);
124  CompactLattice clat;
125  ConvertLattice(lat, &clat);
126 
127  compact_lattice_writer.Write(key, clat);
128  n_done++;
129  }
130  KALDI_LOG << "Done " << n_done << " n-best entries ,"
131  << n_err << " had errors.";
132  return (n_done != 0 ? 0 : 1);
133  } catch(const std::exception &e) {
134  std::cerr << e.what();
135  return -1;
136  }
137 }
int32 words[kMaxOrder]
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
fst::StdArc StdArc
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
void MakeLatticeFromLinear(const std::vector< int32 > &ali, const std::vector< int32 > &words, BaseFloat lm_cost, BaseFloat ac_cost, Lattice *lat_out)
float BaseFloat
Definition: kaldi-types.h:29
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
void ConvertLattice(const ExpandedFst< ArcTpl< Weight > > &ifst, MutableFst< ArcTpl< CompactLatticeWeightTpl< Weight, Int > > > *ofst, bool invert)
Convert lattice from a normal FST to a CompactLattice FST.
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
fst::VectorFst< LatticeArc > Lattice
Definition: kaldi-lattice.h:44
#define KALDI_ERR
Definition: kaldi-error.h:147
fst::VectorFst< CompactLatticeArc > CompactLattice
Definition: kaldi-lattice.h:46
#define KALDI_LOG
Definition: kaldi-error.h:153