kaldi-lattice.cc
Go to the documentation of this file.
1 // lat/kaldi-lattice.cc
2 
3 // Copyright 2009-2011 Microsoft Corporation
4 // 2013 Johns Hopkins University (author: Daniel Povey)
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 
22 #include "lat/kaldi-lattice.h"
23 #include "fst/script/print-impl.h"
24 
25 namespace kaldi {
26 
28 template<class OrigWeightType>
29 CompactLattice* ConvertToCompactLattice(fst::VectorFst<OrigWeightType> *ifst) {
30  if (!ifst) return NULL;
31  CompactLattice *ofst = new CompactLattice();
32  ConvertLattice(*ifst, ofst);
33  delete ifst;
34  return ofst;
35 }
36 
37 // This overrides the template if there is no type conversion going on
38 // (for efficiency).
39 template<>
41  return ifst;
42 }
43 
45 template<class OrigWeightType>
46 Lattice* ConvertToLattice(fst::VectorFst<OrigWeightType> *ifst) {
47  if (!ifst) return NULL;
48  Lattice *ofst = new Lattice();
49  ConvertLattice(*ifst, ofst);
50  delete ifst;
51  return ofst;
52 }
53 
54 // This overrides the template if there is no type conversion going on
55 // (for efficiency).
56 template<>
58  return ifst;
59 }
60 
61 
62 bool WriteCompactLattice(std::ostream &os, bool binary,
63  const CompactLattice &t) {
64  if (binary) {
65  fst::FstWriteOptions opts;
66  // Leave all the options default. Normally these lattices wouldn't have any
67  // osymbols/isymbols so no point directing it not to write them (who knows what
68  // we'd want to if we had them).
69  return t.Write(os, opts);
70  } else {
71  // Text-mode output. Note: we expect that t.InputSymbols() and
72  // t.OutputSymbols() would always return NULL. The corresponding input
73  // routine would not work if the FST actually had symbols attached.
74  // Write a newline after the key, so the first line of the FST appears
75  // on its own line.
76  os << '\n';
77  bool acceptor = true, write_one = false;
78  fst::FstPrinter<CompactLatticeArc> printer(t, t.InputSymbols(),
79  t.OutputSymbols(),
80  NULL, acceptor, write_one, "\t");
81  printer.Print(&os, "<unknown>");
82  if (os.fail())
83  KALDI_WARN << "Stream failure detected.";
84  // Write another newline as a terminating character. The read routine will
85  // detect this [this is a Kaldi mechanism, not somethig in the original
86  // OpenFst code].
87  os << '\n';
88  return os.good();
89  }
90 }
91 
95  typedef LatticeArc Arc;
99  typedef Arc::Label Label;
101  public:
102  // everything is static in this class.
103 
108  static std::pair<Lattice*, CompactLattice*> ReadText(
109  std::istream &is) {
110  typedef std::pair<Lattice*, CompactLattice*> PairT;
111  using std::string;
112  using std::vector;
113  Lattice *fst = new Lattice();
114  CompactLattice *cfst = new CompactLattice();
115  string line;
116  size_t nline = 0;
117  string separator = FLAGS_fst_field_separator + "\r\n";
118  while (std::getline(is, line)) {
119  nline++;
120  vector<string> col;
121  // on Windows we'll write in text and read in binary mode.
122  SplitStringToVector(line, separator.c_str(), true, &col);
123  if (col.size() == 0) break; // Empty line is a signal to stop, in our
124  // archive format.
125  if (col.size() > 5) {
126  KALDI_WARN << "Reading lattice: bad line in FST: " << line;
127  delete fst;
128  delete cfst;
129  return PairT(static_cast<Lattice*>(NULL),
130  static_cast<CompactLattice*>(NULL));
131  }
132  StateId s;
133  if (!ConvertStringToInteger(col[0], &s)) {
134  KALDI_WARN << "FstCompiler: bad line in FST: " << line;
135  delete fst;
136  delete cfst;
137  return PairT(static_cast<Lattice*>(NULL),
138  static_cast<CompactLattice*>(NULL));
139  }
140  if (fst)
141  while (s >= fst->NumStates())
142  fst->AddState();
143  if (cfst)
144  while (s >= cfst->NumStates())
145  cfst->AddState();
146  if (nline == 1) {
147  if (fst) fst->SetStart(s);
148  if (cfst) cfst->SetStart(s);
149  }
150 
151  if (fst) { // we still have fst; try to read that arc.
152  bool ok = true;
153  Arc arc;
154  Weight w;
155  StateId d = s;
156  switch (col.size()) {
157  case 1 :
158  fst->SetFinal(s, Weight::One());
159  break;
160  case 2:
161  if (!StrToWeight(col[1], true, &w)) ok = false;
162  else fst->SetFinal(s, w);
163  break;
164  case 3: // 3 columns not ok for Lattice format; it's not an acceptor.
165  ok = false;
166  break;
167  case 4:
168  ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
169  ConvertStringToInteger(col[2], &arc.ilabel) &&
170  ConvertStringToInteger(col[3], &arc.olabel);
171  if (ok) {
172  d = arc.nextstate;
173  arc.weight = Weight::One();
174  fst->AddArc(s, arc);
175  }
176  break;
177  case 5:
178  ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
179  ConvertStringToInteger(col[2], &arc.ilabel) &&
180  ConvertStringToInteger(col[3], &arc.olabel) &&
181  StrToWeight(col[4], false, &arc.weight);
182  if (ok) {
183  d = arc.nextstate;
184  fst->AddArc(s, arc);
185  }
186  break;
187  default:
188  ok = false;
189  }
190  while (d >= fst->NumStates())
191  fst->AddState();
192  if (!ok) {
193  delete fst;
194  fst = NULL;
195  }
196  }
197  if (cfst) {
198  bool ok = true;
199  CArc arc;
200  CWeight w;
201  StateId d = s;
202  switch (col.size()) {
203  case 1 :
204  cfst->SetFinal(s, CWeight::One());
205  break;
206  case 2:
207  if (!StrToCWeight(col[1], true, &w)) ok = false;
208  else cfst->SetFinal(s, w);
209  break;
210  case 3: // compact-lattice is acceptor format: state, next-state, label.
211  ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
212  ConvertStringToInteger(col[2], &arc.ilabel);
213  if (ok) {
214  d = arc.nextstate;
215  arc.olabel = arc.ilabel;
216  arc.weight = CWeight::One();
217  cfst->AddArc(s, arc);
218  }
219  break;
220  case 4:
221  ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
222  ConvertStringToInteger(col[2], &arc.ilabel) &&
223  StrToCWeight(col[3], false, &arc.weight);
224  if (ok) {
225  d = arc.nextstate;
226  arc.olabel = arc.ilabel;
227  cfst->AddArc(s, arc);
228  }
229  break;
230  case 5: default:
231  ok = false;
232  }
233  while (d >= cfst->NumStates())
234  cfst->AddState();
235  if (!ok) {
236  delete cfst;
237  cfst = NULL;
238  }
239  }
240  if (!fst && !cfst) {
241  KALDI_WARN << "Bad line in lattice text format: " << line;
242  // read until we get an empty line, so at least we
243  // have a chance to read the next one (although this might
244  // be a bit futile since the calling code will get unhappy
245  // about failing to read this one.
246  while (std::getline(is, line)) {
247  SplitStringToVector(line, separator.c_str(), true, &col);
248  if (col.empty()) break;
249  }
250  return PairT(static_cast<Lattice*>(NULL),
251  static_cast<CompactLattice*>(NULL));
252  }
253  }
254  return PairT(fst, cfst);
255  }
256 
257  static bool StrToWeight(const std::string &s, bool allow_zero, Weight *w) {
258  std::istringstream strm(s);
259  strm >> *w;
260  if (!strm || (!allow_zero && *w == Weight::Zero())) {
261  return false;
262  }
263  return true;
264  }
265 
266  static bool StrToCWeight(const std::string &s, bool allow_zero, CWeight *w) {
267  std::istringstream strm(s);
268  strm >> *w;
269  if (!strm || (!allow_zero && *w == CWeight::Zero())) {
270  return false;
271  }
272  return true;
273  }
274 };
275 
276 
278  std::pair<Lattice*, CompactLattice*> lat_pair = LatticeReader::ReadText(is);
279  if (lat_pair.second != NULL) {
280  delete lat_pair.first;
281  return lat_pair.second;
282  } else if (lat_pair.first != NULL) {
283  // note: ConvertToCompactLattice frees its input.
284  return ConvertToCompactLattice(lat_pair.first);
285  } else {
286  return NULL;
287  }
288 }
289 
290 
291 Lattice *ReadLatticeText(std::istream &is) {
292  std::pair<Lattice*, CompactLattice*> lat_pair = LatticeReader::ReadText(is);
293  if (lat_pair.first != NULL) {
294  delete lat_pair.second;
295  return lat_pair.first;
296  } else if (lat_pair.second != NULL) {
297  // note: ConvertToLattice frees its input.
298  return ConvertToLattice(lat_pair.second);
299  } else {
300  return NULL;
301  }
302 }
303 
304 bool ReadCompactLattice(std::istream &is, bool binary,
305  CompactLattice **clat) {
306  KALDI_ASSERT(*clat == NULL);
307  if (binary) {
308  fst::FstHeader hdr;
309  if (!hdr.Read(is, "<unknown>")) {
310  KALDI_WARN << "Reading compact lattice: error reading FST header.";
311  return false;
312  }
313  if (hdr.FstType() != "vector") {
314  KALDI_WARN << "Reading compact lattice: unsupported FST type: "
315  << hdr.FstType();
316  return false;
317  }
318  fst::FstReadOptions ropts("<unspecified>",
319  &hdr);
320 
323  typedef fst::LatticeWeightTpl<float> T3;
325  typedef fst::VectorFst<fst::ArcTpl<T1> > F1;
326  typedef fst::VectorFst<fst::ArcTpl<T2> > F2;
327  typedef fst::VectorFst<fst::ArcTpl<T3> > F3;
328  typedef fst::VectorFst<fst::ArcTpl<T4> > F4;
329 
330  CompactLattice *ans = NULL;
331  if (hdr.ArcType() == T1::Type()) {
332  ans = ConvertToCompactLattice(F1::Read(is, ropts));
333  } else if (hdr.ArcType() == T2::Type()) {
334  ans = ConvertToCompactLattice(F2::Read(is, ropts));
335  } else if (hdr.ArcType() == T3::Type()) {
336  ans = ConvertToCompactLattice(F3::Read(is, ropts));
337  } else if (hdr.ArcType() == T4::Type()) {
338  ans = ConvertToCompactLattice(F4::Read(is, ropts));
339  } else {
340  KALDI_WARN << "FST with arc type " << hdr.ArcType()
341  << " cannot be converted to CompactLattice.\n";
342  return false;
343  }
344  if (ans == NULL) {
345  KALDI_WARN << "Error reading compact lattice (after reading header).";
346  return false;
347  }
348  *clat = ans;
349  return true;
350  } else {
351  // The next line would normally consume the \r on Windows, plus any
352  // extra spaces that might have got in there somehow.
353  while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
354  if (is.peek() == '\n') is.get(); // consume the newline.
355  else { // saw spaces but no newline.. this is not expected.
356  KALDI_WARN << "Reading compact lattice: unexpected sequence of spaces "
357  << " at file position " << is.tellg();
358  return false;
359  }
360  *clat = ReadCompactLatticeText(is); // that routine will warn on error.
361  return (*clat != NULL);
362  }
363 }
364 
365 
366 bool CompactLatticeHolder::Read(std::istream &is) {
367  Clear(); // in case anything currently stored.
368  int c = is.peek();
369  if (c == -1) {
370  KALDI_WARN << "End of stream detected reading CompactLattice.";
371  return false;
372  } else if (isspace(c)) { // The text form of the lattice begins
373  // with space (normally, '\n'), so this means it's text (the binary form
374  // cannot begin with space because it starts with the FST Type() which is not
375  // space).
376  return ReadCompactLattice(is, false, &t_);
377  } else if (c != 214) { // 214 is first char of FST magic number,
378  // on little-endian machines which is all we support (\326 octal)
379  KALDI_WARN << "Reading compact lattice: does not appear to be an FST "
380  << " [non-space but no magic number detected], file pos is "
381  << is.tellg();
382  return false;
383  } else {
384  return ReadCompactLattice(is, true, &t_);
385  }
386 }
387 
388 bool WriteLattice(std::ostream &os, bool binary, const Lattice &t) {
389  if (binary) {
390  fst::FstWriteOptions opts;
391  // Leave all the options default. Normally these lattices wouldn't have any
392  // osymbols/isymbols so no point directing it not to write them (who knows what
393  // we'd want to do if we had them).
394  return t.Write(os, opts);
395  } else {
396  // Text-mode output. Note: we expect that t.InputSymbols() and
397  // t.OutputSymbols() would always return NULL. The corresponding input
398  // routine would not work if the FST actually had symbols attached.
399  // Write a newline after the key, so the first line of the FST appears
400  // on its own line.
401  os << '\n';
402  bool acceptor = false, write_one = false;
403  fst::FstPrinter<LatticeArc> printer(t, t.InputSymbols(),
404  t.OutputSymbols(),
405  NULL, acceptor, write_one, "\t");
406  printer.Print(&os, "<unknown>");
407  if (os.fail())
408  KALDI_WARN << "Stream failure detected.";
409  // Write another newline as a terminating character. The read routine will
410  // detect this [this is a Kaldi mechanism, not somethig in the original
411  // OpenFst code].
412  os << '\n';
413  return os.good();
414  }
415 }
416 
417 bool ReadLattice(std::istream &is, bool binary,
418  Lattice **lat) {
419  KALDI_ASSERT(*lat == NULL);
420  if (binary) {
421  fst::FstHeader hdr;
422  if (!hdr.Read(is, "<unknown>")) {
423  KALDI_WARN << "Reading lattice: error reading FST header.";
424  return false;
425  }
426  if (hdr.FstType() != "vector") {
427  KALDI_WARN << "Reading lattice: unsupported FST type: "
428  << hdr.FstType();
429  return false;
430  }
431  fst::FstReadOptions ropts("<unspecified>",
432  &hdr);
433 
436  typedef fst::LatticeWeightTpl<float> T3;
438  typedef fst::VectorFst<fst::ArcTpl<T1> > F1;
439  typedef fst::VectorFst<fst::ArcTpl<T2> > F2;
440  typedef fst::VectorFst<fst::ArcTpl<T3> > F3;
441  typedef fst::VectorFst<fst::ArcTpl<T4> > F4;
442 
443  Lattice *ans = NULL;
444  if (hdr.ArcType() == T1::Type()) {
445  ans = ConvertToLattice(F1::Read(is, ropts));
446  } else if (hdr.ArcType() == T2::Type()) {
447  ans = ConvertToLattice(F2::Read(is, ropts));
448  } else if (hdr.ArcType() == T3::Type()) {
449  ans = ConvertToLattice(F3::Read(is, ropts));
450  } else if (hdr.ArcType() == T4::Type()) {
451  ans = ConvertToLattice(F4::Read(is, ropts));
452  } else {
453  KALDI_WARN << "FST with arc type " << hdr.ArcType()
454  << " cannot be converted to Lattice.\n";
455  return false;
456  }
457  if (ans == NULL) {
458  KALDI_WARN << "Error reading lattice (after reading header).";
459  return false;
460  }
461  *lat = ans;
462  return true;
463  } else {
464  // The next line would normally consume the \r on Windows, plus any
465  // extra spaces that might have got in there somehow.
466  while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
467  if (is.peek() == '\n') is.get(); // consume the newline.
468  else { // saw spaces but no newline.. this is not expected.
469  KALDI_WARN << "Reading compact lattice: unexpected sequence of spaces "
470  << " at file position " << is.tellg();
471  return false;
472  }
473  *lat = ReadLatticeText(is); // that routine will warn on error.
474  return (*lat != NULL);
475  }
476 }
477 
478 
479 /* Since we don't write the binary headers for this type of holder,
480  we use a different method to work out whether we're in binary mode.
481  */
482 bool LatticeHolder::Read(std::istream &is) {
483  Clear(); // in case anything currently stored.
484  int c = is.peek();
485  if (c == -1) {
486  KALDI_WARN << "End of stream detected reading Lattice.";
487  return false;
488  } else if (isspace(c)) { // The text form of the lattice begins
489  // with space (normally, '\n'), so this means it's text (the binary form
490  // cannot begin with space because it starts with the FST Type() which is not
491  // space).
492  return ReadLattice(is, false, &t_);
493  } else if (c != 214) { // 214 is first char of FST magic number,
494  // on little-endian machines which is all we support (\326 octal)
495  KALDI_WARN << "Reading compact lattice: does not appear to be an FST "
496  << " [non-space but no magic number detected], file pos is "
497  << is.tellg();
498  return false;
499  } else {
500  return ReadLattice(is, true, &t_);
501  }
502 }
503 
504 
505 
506 } // end namespace kaldi
fst::StdArc::StateId StateId
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
bool ConvertStringToInteger(const std::string &str, Int *out)
Converts a string into an integer via strtoll and returns false if there was any kind of problem (i...
Definition: text-utils.h:118
fst::ArcTpl< LatticeWeight > LatticeArc
Definition: kaldi-lattice.h:40
bool ReadLattice(std::istream &is, bool binary, Lattice **lat)
static const LatticeWeightTpl One()
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
Definition: graph.dox:21
kaldi::int32 int32
LatticeWeight Weight
static bool StrToWeight(const std::string &s, bool allow_zero, Weight *w)
bool Read(std::istream &is)
Lattice * ReadLatticeText(std::istream &is)
static const CompactLatticeWeightTpl< WeightType, IntType > One()
CompactLattice * ConvertToCompactLattice(fst::VectorFst< OrigWeightType > *ifst)
Converts lattice types if necessary, deleting its input.
void SplitStringToVector(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< std::string > *out)
Split a string using any of the single character delimiters.
Definition: text-utils.cc:63
void ConvertLattice(const ExpandedFst< ArcTpl< Weight > > &ifst, MutableFst< ArcTpl< CompactLatticeWeightTpl< Weight, Int > > > *ofst, bool invert)
Convert lattice from a normal FST to a CompactLattice FST.
static const LatticeWeightTpl Zero()
static std::pair< Lattice *, CompactLattice * > ReadText(std::istream &is)
This function reads from the FST text format; it does not know in advance whether it&#39;s a Lattice or C...
fst::VectorFst< LatticeArc > Lattice
Definition: kaldi-lattice.h:44
#define KALDI_WARN
Definition: kaldi-error.h:150
static bool StrToCWeight(const std::string &s, bool allow_zero, CWeight *w)
fst::StdArc::Label Label
LatticeReader provides (static) functions for reading both Lattice and CompactLattice, in text form.
fst::VectorFst< CompactLatticeArc > CompactLattice
Definition: kaldi-lattice.h:46
CompactLattice * ReadCompactLatticeText(std::istream &is)
bool WriteLattice(std::ostream &os, bool binary, const Lattice &t)
CompactLatticeArc CArc
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
bool WriteCompactLattice(std::ostream &os, bool binary, const CompactLattice &t)
static const CompactLatticeWeightTpl< WeightType, IntType > Zero()
fst::ArcTpl< CompactLatticeWeight > CompactLatticeArc
Definition: kaldi-lattice.h:42
bool Read(std::istream &is)
CompactLatticeWeight CWeight
Lattice * ConvertToLattice(fst::VectorFst< OrigWeightType > *ifst)
Converts lattice types if necessary, deleting its input.
bool ReadCompactLattice(std::istream &is, bool binary, CompactLattice **clat)