kaldi-fst-io-inl.h
Go to the documentation of this file.
1 // fstext/kaldi-fst-io-inl.h
2 
3 // Copyright 2009-2011 Microsoft Corporation
4 // 2012-2015 Johns Hopkins University (Author: Daniel Povey)
5 // 2013 Guoguo Chen
6 
7 // See ../../COPYING for clarification regarding multiple authors
8 //
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 //
13 // http://www.apache.org/licenses/LICENSE-2.0
14 //
15 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
17 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
18 // MERCHANTABLITY OR NON-INFRINGEMENT.
19 // See the Apache 2 License for the specific language governing permissions and
20 // limitations under the License.
21 
22 #ifndef KALDI_FSTEXT_KALDI_FST_IO_INL_H_
23 #define KALDI_FSTEXT_KALDI_FST_IO_INL_H_
24 
25 #include "util/text-utils.h"
26 
27 namespace fst {
28 
29 
30 template <class Arc>
31 void WriteFstKaldi(std::ostream &os, bool binary,
32  const VectorFst<Arc> &t) {
33  bool ok;
34  if (binary) {
35  // Binary-mode writing.
36  ok = t.Write(os, FstWriteOptions());
37  } else {
38  // Text-mode output. Note: we expect that t.InputSymbols() and
39  // t.OutputSymbols() would always return NULL. The corresponding input
40  // routine would not work if the FST actually had symbols attached. Write a
41  // newline to start the FST; in a table, the first line of the FST will
42  // appear on its own line.
43  os << '\n';
44  bool acceptor = false, write_one = false;
45  FstPrinter<Arc> printer(t, t.InputSymbols(), t.OutputSymbols(),
46  NULL, acceptor, write_one, "\t");
47  printer.Print(&os, "<unknown>");
48  if (os.fail())
49  KALDI_ERR << "Stream failure detected writing FST to stream";
50  // Write another newline as a terminating character. The read routine will
51  // detect this [this is a Kaldi mechanism, not something in the original
52  // OpenFst code].
53  os << '\n';
54  ok = os.good();
55  }
56  if (!ok) {
57  KALDI_ERR << "Error writing FST to stream";
58  }
59 }
60 
61 // Utility function used in ReadFstKaldi
62 template <class W>
63 inline bool StrToWeight(const std::string &s, bool allow_zero, W *w) {
64  std::istringstream strm(s);
65  strm >> *w;
66  if (strm.fail() || (!allow_zero && *w == W::Zero())) {
67  return false;
68  }
69  return true;
70 }
71 
72 template <class Arc>
73 void ReadFstKaldi(std::istream &is, bool binary,
74  VectorFst<Arc> *fst) {
75  typedef typename Arc::Weight Weight;
76  typedef typename Arc::StateId StateId;
77  if (binary) {
78  // We don't have access to the filename here, so write [unknown].
79  VectorFst<Arc> *ans =
80  VectorFst<Arc>::Read(is, fst::FstReadOptions(std::string("[unknown]")));
81  if (ans == NULL) {
82  KALDI_ERR << "Error reading FST from stream.";
83  }
84  *fst = *ans; // shallow copy.
85  delete ans;
86  } else {
87  // Consume the \r on Windows, the \n that the text-form FST format starts
88  // with, and any extra spaces that might have got in there somehow.
89  while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
90  if (is.peek() == '\n') is.get(); // consume the newline.
91  else { // saw spaces but no newline.. this is not expected.
92  KALDI_ERR << "Reading FST: unexpected sequence of spaces "
93  << " at file position " << is.tellg();
94  }
95  using std::string;
96  using std::vector;
99  fst->DeleteStates();
100  string line;
101  size_t nline = 0;
102  string separator = FLAGS_fst_field_separator + "\r\n";
103  while (std::getline(is, line)) {
104  nline++;
105  vector<string> col;
106  // on Windows we'll write in text and read in binary mode.
107  kaldi::SplitStringToVector(line, separator.c_str(), true, &col);
108  if (col.size() == 0) break; // Empty line is a signal to stop, in our
109  // archive format.
110  if (col.size() > 5) {
111  KALDI_ERR << "Bad line in FST: " << line;
112  }
113  StateId s;
114  if (!ConvertStringToInteger(col[0], &s)) {
115  KALDI_ERR << "Bad line in FST: " << line;
116  }
117  while (s >= fst->NumStates())
118  fst->AddState();
119  if (nline == 1) fst->SetStart(s);
120 
121  bool ok = true;
122  Arc arc;
123  Weight w;
124  StateId d = s;
125  switch (col.size()) {
126  case 1:
127  fst->SetFinal(s, Weight::One());
128  break;
129  case 2:
130  if (!StrToWeight(col[1], true, &w)) ok = false;
131  else fst->SetFinal(s, w);
132  break;
133  case 3: // 3 columns not ok for Lattice format; it's not an acceptor.
134  ok = false;
135  break;
136  case 4:
137  ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
138  ConvertStringToInteger(col[2], &arc.ilabel) &&
139  ConvertStringToInteger(col[3], &arc.olabel);
140  if (ok) {
141  d = arc.nextstate;
142  arc.weight = Weight::One();
143  fst->AddArc(s, arc);
144  }
145  break;
146  case 5:
147  ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
148  ConvertStringToInteger(col[2], &arc.ilabel) &&
149  ConvertStringToInteger(col[3], &arc.olabel) &&
150  StrToWeight(col[4], false, &arc.weight);
151  if (ok) {
152  d = arc.nextstate;
153  fst->AddArc(s, arc);
154  }
155  break;
156  default:
157  ok = false;
158  }
159  while (d >= fst->NumStates()) fst->AddState();
160  if (!ok)
161  KALDI_ERR << "Bad line in FST: " << line;
162  }
163  }
164 }
165 
166 
167 
168 
169 template<class Arc> // static
170 bool VectorFstTplHolder<Arc>::Write(std::ostream &os, bool binary, const T &t) {
171  try {
172  WriteFstKaldi(os, binary, t);
173  return true;
174  } catch (...) {
175  return false;
176  }
177 }
178 
179 template<class Arc> // static
180 bool VectorFstTplHolder<Arc>::Read(std::istream &is) {
181  Clear();
182  int c = is.peek();
183  if (c == -1) {
184  KALDI_WARN << "End of stream detected reading Fst";
185  return false;
186  } else if (isspace(c)) { // The text form of the FST begins
187  // with space (normally, '\n'), so this means it's text (the binary form
188  // cannot begin with space because it starts with the FST Type() which is not
189  // space).
190  try {
191  t_ = new VectorFst<Arc>();
192  ReadFstKaldi(is, false, t_);
193  } catch (...) {
194  Clear();
195  return false;
196  }
197  } else { // reading a binary FST.
198  try {
199  t_ = new VectorFst<Arc>();
200  ReadFstKaldi(is, true, t_);
201  } catch (...) {
202  Clear();
203  return false;
204  }
205  }
206  return true;
207 }
208 
209 } // namespace fst.
210 
211 #endif // KALDI_FSTEXT_KALDI_FST_IO_INL_H_
fst::StdArc::StateId StateId
bool Read(std::istream &is)
bool ConvertStringToInteger(const std::string &str, Int *out)
Converts a string into an integer via strtoll and returns false if there was any kind of problem (i...
Definition: text-utils.h:118
static bool Write(std::ostream &os, bool binary, const T &t)
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
Definition: graph.dox:21
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
Definition: text-utils.h:68
VectorFst< Arc > T
Definition: kaldi-fst-io.h:98
void SplitStringToVector(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< std::string > *out)
Split a string using any of the single character delimiters.
Definition: text-utils.cc:63
bool StrToWeight(const std::string &s, bool allow_zero, W *w)
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
fst::StdArc::Weight Weight
void WriteFstKaldi(std::ostream &os, bool binary, const VectorFst< Arc > &t)
void ReadFstKaldi(std::istream &is, bool binary, VectorFst< Arc > *fst)