lattice-rescore-mapped.cc
Go to the documentation of this file.
1 // latbin/lattice-rescore-mapped.cc
2 
3 // Copyright 2009-2012 Saarland University (author: Arnab Ghoshal)
4 // Johns Hopkins University (author: Daniel Povey)
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 #include "base/kaldi-common.h"
22 #include "util/common-utils.h"
23 #include "util/stl-utils.h"
24 #include "hmm/transition-model.h"
25 #include "fstext/fstext-lib.h"
26 #include "lat/kaldi-lattice.h"
27 #include "lat/lattice-functions.h"
28 
29 namespace kaldi {
30 
31 void LatticeAcousticRescore(const TransitionModel &trans_model,
32  const Matrix<BaseFloat> &log_likes,
33  const std::vector<int32> &state_times,
34  Lattice *lat) {
35  kaldi::uint64 props = lat->Properties(fst::kFstProperties, false);
36  if (!(props & fst::kTopSorted))
37  KALDI_ERR << "Input lattice must be topologically sorted.";
38 
39  KALDI_ASSERT(!state_times.empty());
40  std::vector<std::vector<int32> > time_to_state(log_likes.NumRows());
41  for (size_t i = 0; i < state_times.size(); i++) {
42  KALDI_ASSERT(state_times[i] >= 0);
43  if (state_times[i] < log_likes.NumRows()) // end state may be past this..
44  time_to_state[state_times[i]].push_back(i);
45  else
46  KALDI_ASSERT(state_times[i] == log_likes.NumRows()
47  && "There appears to be lattice/feature mismatch.");
48  }
49 
50  for (int32 t = 0; t < log_likes.NumRows(); t++) {
51  for (size_t i = 0; i < time_to_state[t].size(); i++) {
52  int32 state = time_to_state[t][i];
53  for (fst::MutableArcIterator<Lattice> aiter(lat, state); !aiter.Done();
54  aiter.Next()) {
55  LatticeArc arc = aiter.Value();
56  int32 trans_id = arc.ilabel;
57  if (trans_id != 0) { // Non-epsilon input label on arc
58  int32 pdf_id = trans_model.TransitionIdToPdf(trans_id);
59  if (pdf_id > log_likes.NumCols())
60  KALDI_ERR << "Pdf-id " << pdf_id << " is out of the range of "
61  << "input log-likelihoods " << log_likes.NumCols()
62  << " (probably some kind of mismatch).";
63  BaseFloat ll = log_likes(t, pdf_id);
64  arc.weight.SetValue2(-ll + arc.weight.Value2());
65  aiter.SetValue(arc);
66  }
67  }
68  }
69  }
70 }
71 
72 } // namespace kaldi
73 
74 int main(int argc, char *argv[]) {
75  try {
76  using namespace kaldi;
77  typedef kaldi::int32 int32;
78  typedef kaldi::int64 int64;
79  using fst::SymbolTable;
80  using fst::VectorFst;
81  using fst::StdArc;
82 
83  const char *usage =
84  "Replace the acoustic scores on a lattice using log-likelihoods read in\n"
85  "as a matrix for each utterance, indexed (frame, pdf-id). This does the same\n"
86  "as (e.g.) gmm-rescore-lattice, but from a matrix. The \"mapped\" means that\n"
87  "the transition-model is used to map transition-ids to pdf-ids. (c.f.\n"
88  "latgen-faster-mapped). Note: <transition-model-in> can be any type of\n"
89  "model file, e.g. GMM-based or neural-net based; only the transition model is read.\n"
90  "\n"
91  "Usage: lattice-rescore-mapped [options] <transition-model-in> <lattice-rspecifier> "
92  "<loglikes-rspecifier> <lattice-wspecifier>\n"
93  " e.g.: nnet-logprob [args] .. | lattice-rescore-mapped final.mdl ark:1.lats ark:- ark:2.lats\n";
94 
95  kaldi::BaseFloat old_acoustic_scale = 0.0;
96  kaldi::ParseOptions po(usage);
97  po.Register("old-acoustic-scale", &old_acoustic_scale,
98  "Add in the scores in the input lattices with this scale, rather "
99  "than discarding them.");
100  po.Read(argc, argv);
101 
102  if (po.NumArgs() != 4) {
103  po.PrintUsage();
104  exit(1);
105  }
106 
107  std::string model_filename = po.GetArg(1),
108  lats_rspecifier = po.GetArg(2),
109  loglike_rspecifier = po.GetArg(3),
110  lats_wspecifier = po.GetArg(4);
111 
112  TransitionModel trans_model;
113  {
114  bool binary;
115  Input ki(model_filename, &binary);
116  trans_model.Read(ki.Stream(), binary);
117  // Ignore what follows it in the model.
118  }
119 
120  RandomAccessBaseFloatMatrixReader loglike_reader(loglike_rspecifier);
121  // Read as regular lattice
122  SequentialLatticeReader lattice_reader(lats_rspecifier);
123  // Write as compact lattice.
124  CompactLatticeWriter compact_lattice_writer(lats_wspecifier);
125 
126  int32 num_done = 0, num_err = 0;
127  int64 num_frames = 0;
128  for (; !lattice_reader.Done(); lattice_reader.Next()) {
129  std::string key = lattice_reader.Key();
130  if (!loglike_reader.HasKey(key)) {
131  KALDI_WARN << "No log-likes found for utterance " << key << ". Skipping";
132  num_err++;
133  continue;
134  }
135 
136  Lattice lat = lattice_reader.Value();
137  lattice_reader.FreeCurrent();
138  if (old_acoustic_scale != 1.0)
139  fst::ScaleLattice(fst::AcousticLatticeScale(old_acoustic_scale), &lat);
140 
141  kaldi::uint64 props = lat.Properties(fst::kFstProperties, false);
142  if (!(props & fst::kTopSorted)) {
143  if (fst::TopSort(&lat) == false)
144  KALDI_ERR << "Cycles detected in lattice.";
145  }
146 
147  std::vector<int32> state_times;
148  int32 max_time = kaldi::LatticeStateTimes(lat, &state_times);
149  const Matrix<BaseFloat> &log_likes = loglike_reader.Value(key);
150  if (log_likes.NumRows() != max_time) {
151  KALDI_WARN << "Skipping utterance " << key << " since number of time "
152  << "frames in lattice ("<< max_time << ") differ from "
153  << "number of frames in log-likelihoods (" << log_likes.NumRows() << ").";
154  num_err++;
155  continue;
156  }
157 
158  kaldi::LatticeAcousticRescore(trans_model, log_likes, state_times,
159  &lat);
160  CompactLattice clat_out;
161  ConvertLattice(lat, &clat_out);
162  compact_lattice_writer.Write(key, clat_out);
163  num_done++;
164  num_frames += log_likes.NumRows();
165  }
166 
167  KALDI_LOG << "Done " << num_done << " lattices, " << num_err
168  << " with errors, #frames is " << num_frames;
169  return (num_done != 0 ? 0 : 1);
170  } catch(const std::exception &e) {
171  std::cerr << e.what();
172  return -1;
173  }
174 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
fst::ArcTpl< LatticeWeight > LatticeArc
Definition: kaldi-lattice.h:40
int32 LatticeStateTimes(const Lattice &lat, vector< int32 > *times)
This function iterates over the states of a topologically sorted lattice and counts the time instance...
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
fst::StdArc StdArc
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
void LatticeAcousticRescore(const TransitionModel &trans_model, const Matrix< BaseFloat > &log_likes, const std::vector< int32 > &state_times, Lattice *lat)
int32 TransitionIdToPdf(int32 trans_id) const
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
std::vector< std::vector< double > > AcousticLatticeScale(double acwt)
std::istream & Stream()
Definition: kaldi-io.cc:826
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
const T & Value(const std::string &key)
void ScaleLattice(const std::vector< std::vector< ScaleFloat > > &scale, MutableFst< ArcTpl< Weight > > *fst)
Scales the pairs of weights in LatticeWeight or CompactLatticeWeight by viewing the pair (a...
void Read(std::istream &is, bool binary)
void ConvertLattice(const ExpandedFst< ArcTpl< Weight > > &ifst, MutableFst< ArcTpl< CompactLatticeWeightTpl< Weight, Int > > > *ofst, bool invert)
Convert lattice from a normal FST to a CompactLattice FST.
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
fst::VectorFst< LatticeArc > Lattice
Definition: kaldi-lattice.h:44
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
bool HasKey(const std::string &key)
int main(int argc, char *argv[])
fst::VectorFst< CompactLatticeArc > CompactLattice
Definition: kaldi-lattice.h:46
int NumArgs() const
Number of positional parameters (c.f. argc-1).
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
#define KALDI_LOG
Definition: kaldi-error.h:153