gmm-latgen-biglm-faster.cc File Reference
Include dependency graph for gmm-latgen-biglm-faster.cc:

Go to the source code of this file.

Namespaces

 kaldi
 This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for mispronunciations detection tasks, the reference:
 

Functions

bool DecodeUtterance (LatticeBiglmFasterDecoder &decoder, DecodableInterface &decodable, const TransitionModel &trans_model, const fst::SymbolTable *word_syms, std::string utt, double acoustic_scale, bool determinize, bool allow_partial, Int32VectorWriter *alignment_writer, Int32VectorWriter *words_writer, CompactLatticeWriter *compact_lattice_writer, LatticeWriter *lattice_writer, double *like_ptr)
 
int main (int argc, char *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 143 of file gmm-latgen-biglm-faster.cc.

References fst::ApplyProbabilityScale(), fst::CastOrConvertToVectorFst(), kaldi::ClassifyRspecifier(), kaldi::DecodeUtterance(), LatticeFasterDecoderConfig::determinize_lattice, SequentialTableReader< Holder >::Done(), Timer::Elapsed(), SequentialTableReader< Holder >::FreeCurrent(), ParseOptions::GetArg(), ParseOptions::GetOptArg(), RandomAccessTableReader< Holder >::HasKey(), KALDI_ERR, KALDI_LOG, KALDI_WARN, SequentialTableReader< Holder >::Key(), kaldi::kNoRspecifier, SequentialTableReader< Holder >::Next(), ParseOptions::NumArgs(), MatrixBase< Real >::NumRows(), TableWriter< Holder >::Open(), ParseOptions::PrintUsage(), AmDiagGmm::Read(), ParseOptions::Read(), TransitionModel::Read(), fst::ReadFstKaldi(), fst::ReadFstKaldiGeneric(), LatticeFasterDecoderConfig::Register(), ParseOptions::Register(), Input::Stream(), RandomAccessTableReader< Holder >::Value(), and SequentialTableReader< Holder >::Value().

143  {
144  try {
145  using namespace kaldi;
146  typedef kaldi::int32 int32;
147  using fst::SymbolTable;
148  using fst::VectorFst;
149  using fst::Fst;
150  using fst::StdArc;
151  using fst::ReadFstKaldi;
152 
153  const char *usage =
154  "Generate lattices using GMM-based model.\n"
155  "User supplies LM used to generate decoding graph, and desired LM;\n"
156  "this decoder applies the difference during decoding\n"
157  "Usage: gmm-latgen-biglm-faster [options] model-in (fst-in|fsts-rspecifier) "
158  "oldlm-fst-in newlm-fst-in features-rspecifier"
159  " lattice-wspecifier [ words-wspecifier [alignments-wspecifier] ]\n";
160  ParseOptions po(usage);
161  Timer timer;
162  bool allow_partial = false;
163  BaseFloat acoustic_scale = 0.1;
165 
166  std::string word_syms_filename;
167  config.Register(&po);
168  po.Register("acoustic-scale", &acoustic_scale, "Scaling factor for acoustic likelihoods");
169 
170  po.Register("word-symbol-table", &word_syms_filename, "Symbol table for words [for debug output]");
171  po.Register("allow-partial", &allow_partial, "If true, produce output even if end state was not reached.");
172 
173  po.Read(argc, argv);
174 
175  if (po.NumArgs() < 6 || po.NumArgs() > 8) {
176  po.PrintUsage();
177  exit(1);
178  }
179 
180  std::string model_in_filename = po.GetArg(1),
181  fst_in_str = po.GetArg(2),
182  old_lm_fst_rxfilename = po.GetArg(3),
183  new_lm_fst_rxfilename = po.GetArg(4),
184  feature_rspecifier = po.GetArg(5),
185  lattice_wspecifier = po.GetArg(6),
186  words_wspecifier = po.GetOptArg(7),
187  alignment_wspecifier = po.GetOptArg(8);
188 
189  TransitionModel trans_model;
190  AmDiagGmm am_gmm;
191  {
192  bool binary;
193  Input ki(model_in_filename, &binary);
194  trans_model.Read(ki.Stream(), binary);
195  am_gmm.Read(ki.Stream(), binary);
196  }
197 
198  VectorFst<StdArc> *old_lm_fst = fst::CastOrConvertToVectorFst(
199  fst::ReadFstKaldiGeneric(old_lm_fst_rxfilename));
200  ApplyProbabilityScale(-1.0, old_lm_fst); // Negate old LM probs...
201 
202  VectorFst<StdArc> *new_lm_fst = fst::CastOrConvertToVectorFst(
203  fst::ReadFstKaldiGeneric(new_lm_fst_rxfilename));
204 
205  fst::BackoffDeterministicOnDemandFst<StdArc> old_lm_dfst(*old_lm_fst);
206  fst::BackoffDeterministicOnDemandFst<StdArc> new_lm_dfst(*new_lm_fst);
207  fst::ComposeDeterministicOnDemandFst<StdArc> compose_dfst(&old_lm_dfst,
208  &new_lm_dfst);
209  fst::CacheDeterministicOnDemandFst<StdArc> cache_dfst(&compose_dfst);
210 
211  bool determinize = config.determinize_lattice;
212  CompactLatticeWriter compact_lattice_writer;
213  LatticeWriter lattice_writer;
214  if (! (determinize ? compact_lattice_writer.Open(lattice_wspecifier)
215  : lattice_writer.Open(lattice_wspecifier)))
216  KALDI_ERR << "Could not open table for writing lattices: "
217  << lattice_wspecifier;
218 
219  Int32VectorWriter words_writer(words_wspecifier);
220 
221  Int32VectorWriter alignment_writer(alignment_wspecifier);
222 
223  fst::SymbolTable *word_syms = NULL;
224  if (word_syms_filename != "")
225  if (!(word_syms = fst::SymbolTable::ReadText(word_syms_filename)))
226  KALDI_ERR << "Could not read symbol table from file "
227  << word_syms_filename;
228 
229  double tot_like = 0.0;
230  kaldi::int64 frame_count = 0;
231  int num_success = 0, num_fail = 0;
232 
233 
234  if (ClassifyRspecifier(fst_in_str, NULL, NULL) == kNoRspecifier) {
235  SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
236  // Input FST is just one FST, not a table of FSTs.
237  Fst<StdArc> *decode_fst = fst::ReadFstKaldiGeneric(fst_in_str);
238 
239  {
240  LatticeBiglmFasterDecoder decoder(*decode_fst, config, &cache_dfst);
241 
242  for (; !feature_reader.Done(); feature_reader.Next()) {
243  std::string utt = feature_reader.Key();
244  Matrix<BaseFloat> features (feature_reader.Value());
245  feature_reader.FreeCurrent();
246  if (features.NumRows() == 0) {
247  KALDI_WARN << "Zero-length utterance: " << utt;
248  num_fail++;
249  continue;
250  }
251 
252  DecodableAmDiagGmmScaled gmm_decodable(am_gmm, trans_model, features,
253  acoustic_scale);
254 
255 
256  double like;
257  if (DecodeUtterance(decoder, gmm_decodable, trans_model, word_syms,
258  utt, acoustic_scale, determinize, allow_partial,
259  &alignment_writer, &words_writer,
260  &compact_lattice_writer, &lattice_writer,
261  &like)) {
262  tot_like += like;
263  frame_count += features.NumRows();
264  num_success++;
265  } else num_fail++;
266  }
267  }
268  delete decode_fst; // delete this only after decoder goes out of scope.
269  } else { // We have different FSTs for different utterances.
270  SequentialTableReader<fst::VectorFstHolder> fst_reader(fst_in_str);
271  RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier);
272  for (; !fst_reader.Done(); fst_reader.Next()) {
273  std::string utt = fst_reader.Key();
274  if (!feature_reader.HasKey(utt)) {
275  KALDI_WARN << "Not decoding utterance " << utt
276  << " because no features available.";
277  num_fail++;
278  continue;
279  }
280  const Matrix<BaseFloat> &features = feature_reader.Value(utt);
281  if (features.NumRows() == 0) {
282  KALDI_WARN << "Zero-length utterance: " << utt;
283  num_fail++;
284  continue;
285  }
286  LatticeBiglmFasterDecoder decoder(fst_reader.Value(), config,
287  &cache_dfst);
288  DecodableAmDiagGmmScaled gmm_decodable(am_gmm, trans_model, features,
289  acoustic_scale);
290  double like;
291  if (DecodeUtterance(decoder, gmm_decodable, trans_model, word_syms, utt,
292  acoustic_scale, determinize, allow_partial,
293  &alignment_writer, &words_writer,
294  &compact_lattice_writer, &lattice_writer,
295  &like)) {
296  tot_like += like;
297  frame_count += features.NumRows();
298  num_success++;
299  } else num_fail++;
300  }
301  }
302 
303  double elapsed = timer.Elapsed();
304  KALDI_LOG << "Time taken "<< elapsed
305  << "s: real-time factor assuming 100 frames/sec is "
306  << (elapsed*100.0/frame_count);
307  KALDI_LOG << "Done " << num_success << " utterances, failed for "
308  << num_fail;
309  KALDI_LOG << "Overall log-likelihood per frame is " << (tot_like/frame_count) << " over "
310  << frame_count<<" frames.";
311 
312  delete word_syms;
313  if (num_success != 0) return 0;
314  else return 1;
315  } catch(const std::exception &e) {
316  std::cerr << e.what();
317  return -1;
318  }
319 }
This class wraps an Fst, representing a language model, using the interface for "BackoffDeterministic...
bool DecodeUtterance(LatticeBiglmFasterDecoder &decoder, DecodableInterface &decodable, const TransitionModel &trans_model, const fst::SymbolTable *word_syms, std::string utt, double acoustic_scale, bool determinize, bool allow_partial, Int32VectorWriter *alignment_writer, Int32VectorWriter *words_writer, CompactLatticeWriter *compact_lattice_writer, LatticeWriter *lattice_writer, double *like_ptr)
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
bool Open(const std::string &wspecifier)
Fst< StdArc > * ReadFstKaldiGeneric(std::string rxfilename, bool throw_on_err)
Definition: kaldi-fst-io.cc:45
fst::StdArc StdArc
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
RspecifierType ClassifyRspecifier(const std::string &rspecifier, std::string *rxfilename, RspecifierOptions *opts)
Definition: kaldi-table.cc:225
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
float BaseFloat
Definition: kaldi-types.h:29
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
void Read(std::istream &is, bool binary)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
This is as LatticeFasterDecoder, but does online composition between HCLG and the "difference languag...
void ApplyProbabilityScale(float scale, MutableFst< Arc > *fst)
ApplyProbabilityScale is applicable to FSTs in the log or tropical semiring.
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void ReadFstKaldi(std::istream &is, bool binary, VectorFst< Arc > *fst)
VectorFst< StdArc > * CastOrConvertToVectorFst(Fst< StdArc > *fst)
Definition: kaldi-fst-io.cc:94
#define KALDI_LOG
Definition: kaldi-error.h:153
double Elapsed() const
Returns time in seconds.
Definition: timer.h:74
void Read(std::istream &in_stream, bool binary)
Definition: am-diag-gmm.cc:147