kaldi-table-inl.h
Go to the documentation of this file.
1 // util/kaldi-table-inl.h
2 
3 // Copyright 2009-2011 Microsoft Corporation
4 // 2013 Johns Hopkins University (author: Daniel Povey)
5 // 2016 Xiaohui Zhang
6 
7 // See ../../COPYING for clarification regarding multiple authors
8 //
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 //
13 // http://www.apache.org/licenses/LICENSE-2.0
14 //
15 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
17 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
18 // MERCHANTABLITY OR NON-INFRINGEMENT.
19 // See the Apache 2 License for the specific language governing permissions and
20 // limitations under the License.
21 
22 
23 #ifndef KALDI_UTIL_KALDI_TABLE_INL_H_
24 #define KALDI_UTIL_KALDI_TABLE_INL_H_
25 
26 #include <algorithm>
27 #include <string>
28 #include <thread>
29 #include <utility>
30 #include <vector>
31 #include <errno.h>
32 #include "util/kaldi-io.h"
33 #include "util/kaldi-holder.h"
34 #include "util/text-utils.h"
35 #include "util/stl-utils.h" // for StringHasher.
36 #include "util/kaldi-semaphore.h"
37 
38 
39 namespace kaldi {
40 
43 
44 template<class Holder> class SequentialTableReaderImplBase {
45  public:
46  typedef typename Holder::T T;
47  // note that Open takes rxfilename not rspecifier. Open will only be
48  // called on a just-allocated object.
49  virtual bool Open(const std::string &rxfilename) = 0;
50  // Done() should be called on a successfully opened, not-closed object.
51  // only throws if called at the wrong time (i.e. code error).
52  virtual bool Done() const = 0;
53  // Returns true if the reader is open [i.e. Open() succeeded and
54  // the user has not called Close()]
55  virtual bool IsOpen() const = 0;
56  // Returns the current key; it is valid to call this if Done() returned false.
57  // Only throws on code error (i.e. called at the wrong time).
58  virtual std::string Key() = 0;
59  // Returns the value associated with the current key. Valid to call it if
60  // Done() returned false. It throws if the value could not be read. [However
61  // if you use the ,p modifier it will never throw, unless you call it at the
62  // wrong time, i.e. unless there is a code error.]
63  virtual T &Value() = 0;
64  virtual void FreeCurrent() = 0;
65  // move to the next object. This won't throw unless called wrongly (e.g. on
66  // non-open archive.]
67  virtual void Next() = 0;
68  // Close the table. Returns its status as bool so it won't throw, unless
69  // called wrongly [i.e. on non-open archive.]
70  virtual bool Close() = 0;
71  // SwapHolder() is not part of the public interface of SequentialTableReader.
72  // It should be called when it would be valid to call Value() or FreeCurrent()
73  // (i.e. when a value is stored), and after this it's not valid to get the
74  // value any more until you call Next(). It swaps the contents of
75  // this->holder_ with those of 'other_holder'. It's needed as part of how
76  // we implement SequentialTableReaderBackgroundImpl.
77  virtual void SwapHolder(Holder *other_holder) = 0;
79  virtual ~SequentialTableReaderImplBase() { } // throws.
80  private:
82 };
83 
84 // This is the implementation for SequentialTableReader
85 // when it's actually a script file.
86 template<class Holder> class SequentialTableReaderScriptImpl:
87  public SequentialTableReaderImplBase<Holder> {
88  public:
89  typedef typename Holder::T T;
90 
91  SequentialTableReaderScriptImpl(): state_(kUninitialized) { }
92 
93  // You may call Open from states kUninitialized and kError.
94  // It may leave the object in any of the states.
95  virtual bool Open(const std::string &rspecifier) {
96  if (state_ != kUninitialized && state_ != kError)
97  if (!Close()) // call Close() yourself to suppress this exception.
98  KALDI_ERR << "Error closing previous input: "
99  << "rspecifier was " << rspecifier_;
100  bool binary;
101  rspecifier_ = rspecifier;
102  RspecifierType rs = ClassifyRspecifier(rspecifier, &script_rxfilename_,
103  &opts_);
105  if (!script_input_.Open(script_rxfilename_, &binary)) { // Failure on Open
106  KALDI_WARN << "Failed to open script file "
107  << PrintableRxfilename(script_rxfilename_);
108  state_ = kUninitialized;
109  return false;
110  } else { // Open succeeded.
111  if (binary) {
112  KALDI_WARN << "Script file should not be binary file.";
113  SetErrorState();
114  return false;
115  } else {
116  state_ = kFileStart;
117  Next();
118  if (state_ == kError)
119  return false;
120  // any other status, including kEof, is OK from the point of view of
121  // the 'open' function (empty scp file is not inherently an error).
122  return true;
123  }
124  }
125  }
126 
127  virtual bool IsOpen() const {
128  switch (state_) {
129  case kEof: case kHaveScpLine: case kHaveObject: case kHaveRange:
130  return true;
131  case kUninitialized: case kError:
132  return false;
133  default: KALDI_ERR << "IsOpen() called on invalid object.";
134  // note: kFileStart is not a valid state for the user to call a member
135  // function (we never return from a public function in this state).
136  return false;
137  }
138  }
139 
140  virtual bool Done() const {
141  switch (state_) {
142  case kHaveScpLine: case kHaveObject: case kHaveRange: return false;
143  case kEof: case kError: return true; // Error condition, like Eof, counts
144  // as Done(); the destructor/Close() will inform the user of the error.
145  default: KALDI_ERR << "Done() called on TableReader object at the wrong"
146  " time.";
147  return false;
148  }
149  }
150 
151  virtual std::string Key() {
152  // Valid to call this whenever Done() returns false.
153  switch (state_) {
154  case kHaveScpLine: case kHaveObject: case kHaveRange: break;
155  default:
156  // coding error.
157  KALDI_ERR << "Key() called on TableReader object at the wrong time.";
158  }
159  return key_;
160  }
161 
162  T &Value() {
163  if (!EnsureObjectLoaded())
164  KALDI_ERR << "Failed to load object from "
165  << PrintableRxfilename(data_rxfilename_)
166  << " (to suppress this error, add the permissive "
167  << "(p, ) option to the rspecifier.";
168  // Because EnsureObjectLoaded() returned with success, we know
169  // that if range_ is nonempty (i.e. a range was requested), the
170  // state will be kHaveRange.
171  if (state_ == kHaveRange) {
172  return range_holder_.Value();
173  } else {
174  KALDI_ASSERT(state_ == kHaveObject);
175  return holder_.Value();
176  }
177  }
178 
179  void FreeCurrent() {
180  if (state_ == kHaveObject) {
181  holder_.Clear();
182  state_ = kHaveScpLine;
183  } else if (state_ == kHaveRange) {
184  range_holder_.Clear();
185  state_ = kHaveObject;
186  } else {
187  KALDI_WARN << "FreeCurrent called at the wrong time.";
188  }
189  }
190 
191  void SwapHolder(Holder *other_holder) {
192  // call Value() to ensure we have a value, and ignore its return value while
193  // suppressing compiler warnings by casting to void. It will cause the
194  // program to die with KALDI_ERR if we couldn't get a value.
195  (void) Value();
196  // At this point we know that we successfully loaded an object,
197  // and if there was a range specified, it's in range_holder_.
198  if (state_ == kHaveObject) {
199  holder_.Swap(other_holder);
200  state_ = kHaveScpLine;
201  } else if (state_ == kHaveRange) {
202  range_holder_.Swap(other_holder);
203  state_ = kHaveObject;
204  // This indicates that we still have the base object (but no range).
205  } else {
206  KALDI_ERR << "Code error";
207  }
208  // Note: after this call there may be some junk left in range_holder_ or
209  // holder_, but it won't matter. We avoid calling Clear() on them, as this
210  // function needs to be lightweight for the 'bg' feature to work well.
211  }
212 
213  // Next goes to the next object.
214  // It can leave the object in most of the statuses, but
215  // the only circumstances under which it will return are:
216  // either:
217  // - if Done() returned true, i.e. kError or kEof.
218  // or:
219  // - in non-permissive mode, status kHaveScpLine or kHaveObjecct
220  // - in permissive mode, only when we successfully have an object,
221  // which means either (kHaveObject and range_.empty()), or
222  // kHaveRange.
223  void Next() {
224  while (1) {
225  NextScpLine();
226  if (Done()) return;
227  if (opts_.permissive) {
228  // Permissive mode means, when reading scp files, we treat keys whose
229  // scp entry cannot be read as nonexistent. This means trying to read.
230  if (EnsureObjectLoaded()) return; // Success.
231  // else try the next scp line.
232  } else {
233  return; // We go the next key; Value() will crash if we can't read the
234  // object on the scp line.
235  }
236  }
237  }
238 
239  // This function may be entered at in any state. At exit, the object will be
240  // in state kUninitialized. It only returns false in the situation where we
241  // were at the end of the stream (kEof) and the script_input_ was a pipe and
242  // it ended with error status; this is so that we can catch errors from
243  // programs that we invoked via a pipe.
244  virtual bool Close() {
245  int32 status = 0;
246  if (script_input_.IsOpen())
247  status = script_input_.Close();
248  if (data_input_.IsOpen())
249  data_input_.Close();
250  range_holder_.Clear();
251  holder_.Clear();
252  if (!this->IsOpen())
253  KALDI_ERR << "Close() called on input that was not open.";
254  StateType old_state = state_;
255  state_ = kUninitialized;
256  if (old_state == kError || (old_state == kEof && status != 0)) {
257  if (opts_.permissive) {
258  KALDI_WARN << "Close() called on scp file with read error, ignoring the"
259  " error because permissive mode specified.";
260  return true;
261  } else {
262  return false; // User will do something with the error status.
263  }
264  } else {
265  return true;
266  }
267  // Possible states Return value
268  // kLoadSucceeded/kRangeSucceeded/kRangeFailed true
269  // kError (if opts_.permissive) true
270  // kError (if !opts_.permissive) false
271  // kEof (if script_input_.Close() && !opts.permissive) false
272  // kEof (if !script_input_.Close() || opts.permissive) true
273  // kUninitialized/kFileStart/kHaveScpLine true
274  // kUnitialized true
275  }
276 
278  if (this->IsOpen() && !Close())
279  KALDI_ERR << "TableReader: reading script file failed: from scp "
280  << PrintableRxfilename(script_rxfilename_);
281  }
282  private:
283 
284  // Function EnsureObjectLoaded() ensures that we have fully loaded any object
285  // (including object range) associated with the current key, and returns true
286  // on success (i.e. we have the object) and false on failure.
287  //
288  // Possible entry states: kHaveScpLine, kLoadSucceeded, kRangeSucceeded
289  //
290  // Possible exit states: kHaveScpLine, kLoadSucceeded, kRangeSucceeded.
291  //
292  // Note: the return status has information that cannot be deduced from
293  // just the exit state. If the object could not be loaded we go to state
294  // kHaveScpLine but return false; and if the range was requested but
295  // could not be extracted, we go to state kLoadSucceeded but return false.
297  if (!(state_ == kHaveScpLine || state_ == kHaveObject ||
298  state_ == kHaveRange))
299  KALDI_ERR << "Invalid state (code error)";
300 
301  if (state_ == kHaveScpLine) { // need to load the object into holder_.
302  bool ans;
303  // note, NULL means it doesn't read the binary-mode header
304  if (Holder::IsReadInBinary()) {
305  ans = data_input_.Open(data_rxfilename_, NULL);
306  } else {
307  ans = data_input_.OpenTextMode(data_rxfilename_);
308  }
309  if (!ans) {
310  KALDI_WARN << "Failed to open file "
311  << PrintableRxfilename(data_rxfilename_);
312  return false;
313  } else {
314  if (holder_.Read(data_input_.Stream())) {
315  state_ = kHaveObject;
316  } else { // holder_ will not contain data.
317  KALDI_WARN << "Failed to load object from "
318  << PrintableRxfilename(data_rxfilename_);
319  return false;
320  }
321  }
322  }
323  // OK, at this point the state must be either
324  // kHaveObject or kHaveRange.
325  if (range_.empty()) {
326  // if range_ is the empty string, we should not be in the state
327  // kHaveRange.
328  KALDI_ASSERT(state_ == kHaveObject);
329  return true;
330  }
331  // range_ is nonempty.
332  if (state_ == kHaveRange) {
333  // range was already extracted, so there nothing to do.
334  return true;
335  }
336  // OK, range_ is nonempty and state_ is kHaveObject. We attempt to extract
337  // the range object. Note: ExtractRange() will throw with KALDI_ERR if the
338  // object type doesn't support ranges.
339  if (!range_holder_.ExtractRange(holder_, range_)) {
340  KALDI_WARN << "Failed to load object from "
341  << PrintableRxfilename(data_rxfilename_)
342  << "[" << range_ << "]";
343  return false;
344  } else {
345  state_ = kHaveRange;
346  return true;
347  }
348  }
349 
350  void SetErrorState() {
351  state_ = kError;
352  script_input_.Close();
353  data_input_.Close();
354  holder_.Clear();
355  range_holder_.Clear();
356  }
357 
358  // Reads the next line in the script file.
359  // Possible entry states: kHaveObject, kHaveRange, kHaveScpLine, kFileStart.
360  // Possible exit states: kEof, kError, kHaveScpLine, kHaveObject.
361  void NextScpLine() {
362  switch (state_) { // Check and simplify the state.
363  case kHaveRange:
364  range_holder_.Clear();
365  state_ = kHaveObject;
366  break;
367  case kHaveScpLine: case kHaveObject: case kFileStart: break;
368  default:
369  // No other states are valid to call Next() from.
370  KALDI_ERR << "Reading script file: Next called wrongly.";
371  }
372  // at this point the state will be kHaveObject, kHaveScpLine, or kFileStart.
373  std::string line;
374  if (getline(script_input_.Stream(), line)) {
375  // After extracting "key" from "line", we put the rest
376  // of "line" into "rest", and then extract data_rxfilename_
377  // (e.g. 1.ark:100) and possibly the range_ specifer
378  // (e.g. [1:2,2:10]) from "rest".
379  std::string data_rxfilename, rest;
380  SplitStringOnFirstSpace(line, &key_, &rest);
381  if (!key_.empty() && !rest.empty()) {
382  // Got a valid line.
383  if (rest[rest.size()-1] == ']') {
384  if(!ExtractRangeSpecifier(rest, &data_rxfilename, &range_)) {
385  KALDI_WARN << "Reading rspecifier '" << rspecifier_
386  << ", cannot make sense of scp line "
387  << line;
388  SetErrorState();
389  return;
390  }
391  } else {
392  data_rxfilename = rest;
393  range_ = "";
394  }
395  bool filenames_equal = (data_rxfilename_ == data_rxfilename);
396  if (!filenames_equal)
397  data_rxfilename_ = data_rxfilename;
398  if (state_ == kHaveObject) {
399  if (!filenames_equal) {
400  holder_.Clear();
401  state_ = kHaveScpLine;
402  }
403  // else leave state_ at kHaveObject and leave the object in the
404  // holder.
405  } else {
406  state_ = kHaveScpLine;
407  }
408  } else {
409  KALDI_WARN << "We got an invalid line in the scp file. "
410  << "It should look like: some_key 1.ark:10, got: "
411  << line;
412  SetErrorState();
413  }
414  } else {
415  state_ = kEof; // there is nothing more in the scp file. Might as well
416  // close input streams as we don't need them.
417  script_input_.Close();
418  if (data_input_.IsOpen())
419  data_input_.Close();
420  holder_.Clear(); // clear the holder if it was nonempty.
421  range_holder_.Clear(); // clear the range holder if it was nonempty.
422  }
423  }
424 
425  std::string rspecifier_; // the rspecifier that this class was opened with.
427  std::string script_rxfilename_; // rxfilename of the script file.
428 
429  Input script_input_; // Input object for the .scp file
430  Input data_input_; // Input object for the entries in the script file;
431  // we make this a class member instead of a local variable,
432  // so that rspecifiers of the form filename:byte-offset,
433  // e.g. foo.ark:12345, can be handled using fseek().
434 
435  Holder holder_; // Holds the object.
436  Holder range_holder_; // Holds the partial object corresponding to the object
437  // range specifier 'range_'; this is only used when
438  // 'range_' is specified, i.e. when the .scp file
439  // contains lines of the form rspecifier[range], like
440  // foo.ark:242[0:9] (representing a row range of a
441  // matrix).
442 
443 
444  std::string key_; // the key of the current scp line we're processing
445  std::string data_rxfilename_; // the rxfilename corresponding to the current key
446  std::string range_; // the range of object corresponding to the current key, if an
447  // object range was specified in the script file, else "".
448 
449  enum StateType {
450  // Summary of the states this object can be in (state_).
451  //
452  // (*) Does holder_ contain the object corresponding to
453  // data_rxfilename_ ?
454  // (*) Does range_holder_ contain a range object?
455  // (*) is script_input_ open?
456  // (*) are key_, data_rxfilename_ and range_ [if applicable] set?
457  //
458  kUninitialized, // no no no no Uninitialized or closed object.
459  kFileStart, // no no yes no We just opened the .scp file (we'll never be in this
460  // state when a user-visible function is called.)
461  kEof, // no no no no We did Next() and found eof in script file.
462  kError, // no no no no Error reading or parsing script file.
463  kHaveScpLine, // no no yes yes Have a line of the script file but nothing else.
464  kHaveObject, // yes no yes yes holder_ contains an object but range_holder_ does not.
465  kHaveRange, // yes yes yes yes we have the range object in range_holder_ (implies
466  // range_ nonempty).
467  } state_;
468 
469 
470 };
471 
472 
473 // This is the implementation for SequentialTableReader
474 // when it's an archive. Note that the archive format is:
475 // key1 [space] object1 key2 [space]
476 // object2 ... eof.
477 // "object1" is the output of the Holder::Write function and will
478 // typically contain a binary header (in binary mode) and then
479 // the output of object.Write(os, binary).
480 // The archive itself does not care whether it is in binary
481 // or text mode, for reading purposes.
482 
483 template<class Holder> class SequentialTableReaderArchiveImpl:
484  public SequentialTableReaderImplBase<Holder> {
485  public:
486  typedef typename Holder::T T;
487 
488  SequentialTableReaderArchiveImpl(): state_(kUninitialized) { }
489 
490  virtual bool Open(const std::string &rspecifier) {
491  if (state_ != kUninitialized) {
492  if (!Close()) { // call Close() yourself to suppress this exception.
493  if (opts_.permissive)
494  KALDI_WARN << "Error closing previous input "
495  "(only warning, since permissive mode).";
496  else
497  KALDI_ERR << "Error closing previous input.";
498  }
499  }
500  rspecifier_ = rspecifier;
501  RspecifierType rs = ClassifyRspecifier(rspecifier,
502  &archive_rxfilename_,
503  &opts_);
505 
506  bool ans;
507  // NULL means don't expect binary-mode header
508  if (Holder::IsReadInBinary())
509  ans = input_.Open(archive_rxfilename_, NULL);
510  else
511  ans = input_.OpenTextMode(archive_rxfilename_);
512  if (!ans) { // header.
513  KALDI_WARN << "Failed to open stream "
514  << PrintableRxfilename(archive_rxfilename_);
515  state_ = kUninitialized; // Failure on Open
516  return false; // User should print the error message.
517  }
518  state_ = kFileStart;
519  Next();
520  if (state_ == kError) {
521  KALDI_WARN << "Error beginning to read archive file (wrong filename?): "
522  << PrintableRxfilename(archive_rxfilename_);
523  input_.Close();
524  state_ = kUninitialized;
525  return false;
526  }
527  KALDI_ASSERT(state_ == kHaveObject || state_ == kEof);
528  return true;
529  }
530 
531  virtual void Next() {
532  switch (state_) {
533  case kHaveObject:
534  holder_.Clear();
535  break;
536  case kFileStart: case kFreedObject:
537  break;
538  default:
539  KALDI_ERR << "Next() called wrongly.";
540  }
541  std::istream &is = input_.Stream();
542  is.clear(); // Clear any fail bits that may have been set... just in case
543  // this happened in the Read function.
544  is >> key_; // This eats up any leading whitespace and gets the string.
545  if (is.eof()) {
546  state_ = kEof;
547  return;
548  }
549  if (is.fail()) { // This shouldn't really happen, barring file-system
550  // errors.
551  KALDI_WARN << "Error reading archive "
552  << PrintableRxfilename(archive_rxfilename_);
553  state_ = kError;
554  return;
555  }
556  int c;
557  if ((c = is.peek()) != ' ' && c != '\t' && c != '\n') { // We expect a
558  // space ' ' after the key.
559  // We also allow tab [which is consumed] and newline [which is not], just
560  // so we can read archives generated by scripts that may not be fully
561  // aware of how this format works.
562  KALDI_WARN << "Invalid archive file format: expected space after key "
563  << key_ << ", got character "
564  << CharToString(static_cast<char>(is.peek())) << ", reading "
565  << PrintableRxfilename(archive_rxfilename_);
566  state_ = kError;
567  return;
568  }
569  if (c != '\n') is.get(); // Consume the space or tab.
570  if (holder_.Read(is)) {
571  state_ = kHaveObject;
572  return;
573  } else {
574  KALDI_WARN << "Object read failed, reading archive "
575  << PrintableRxfilename(archive_rxfilename_);
576  state_ = kError;
577  return;
578  }
579  }
580 
581  virtual bool IsOpen() const {
582  switch (state_) {
583  case kEof: case kError: case kHaveObject: case kFreedObject: return true;
584  case kUninitialized: return false;
585  default: KALDI_ERR << "IsOpen() called on invalid object."; // kFileStart
586  // is not valid state for user to call something on.
587  return false;
588  }
589  }
590 
591  virtual bool Done() const {
592  switch (state_) {
593  case kHaveObject:
594  return false;
595  case kEof: case kError:
596  return true; // Error-state counts as Done(), but destructor
597  // will fail (unless you check the status with Close()).
598  default:
599  KALDI_ERR << "Done() called on TableReader object at the wrong time.";
600  return false;
601  }
602  }
603 
604  virtual std::string Key() {
605  // Valid to call this whenever Done() returns false
606  switch (state_) {
607  case kHaveObject: break; // only valid case.
608  default:
609  // coding error.
610  KALDI_ERR << "Key() called on TableReader object at the wrong time.";
611  }
612  return key_;
613  }
614 
615  T &Value() {
616  switch (state_) {
617  case kHaveObject:
618  break; // only valid case.
619  default:
620  // coding error.
621  KALDI_ERR << "Value() called on TableReader object at the wrong time.";
622  }
623  return holder_.Value();
624  }
625 
626  virtual void FreeCurrent() {
627  if (state_ == kHaveObject) {
628  holder_.Clear();
629  state_ = kFreedObject;
630  } else {
631  KALDI_WARN << "FreeCurrent called at the wrong time.";
632  }
633  }
634 
635  void SwapHolder(Holder *other_holder) {
636  // call Value() to ensure we have a value, and ignore its return value while
637  // suppressing compiler warnings by casting to void.
638  (void) Value();
639  if (state_ == kHaveObject) {
640  holder_.Swap(other_holder);
641  state_ = kFreedObject;
642  } else {
643  KALDI_ERR << "SwapHolder called at the wrong time "
644  "(error related to ',bg' modifier).";
645  }
646  }
647 
648  virtual bool Close() {
649  // To clean up, Close() also closes the Input object if
650  // it's open. It will succeed if the stream was not in an error state,
651  // and the Input object isn't in an error state we've found eof in the archive.
652  if (!this->IsOpen())
653  KALDI_ERR << "Close() called on TableReader twice or otherwise wrongly.";
654  int32 status = 0;
655  if (input_.IsOpen())
656  status = input_.Close();
657  if (state_ == kHaveObject)
658  holder_.Clear();
659  StateType old_state = state_;
660  state_ = kUninitialized;
661  if (old_state == kError || (old_state == kEof && status != 0)) {
662  if (opts_.permissive) {
663  KALDI_WARN << "Error detected closing TableReader for archive "
664  << PrintableRxfilename(archive_rxfilename_)
665  << " but ignoring "
666  << "it as permissive mode specified.";
667  return true;
668  } else {
669  return false;
670  }
671  } else {
672  return true;
673  }
674  }
675 
677  if (this->IsOpen() && !Close())
678  KALDI_ERR << "TableReader: error detected closing archive "
679  << PrintableRxfilename(archive_rxfilename_);
680  }
681  private:
682  Input input_; // Input object for the archive
683  Holder holder_; // Holds the object.
684  std::string key_;
685  std::string rspecifier_;
686  std::string archive_rxfilename_;
688  enum StateType { // [The state of the reading process] [does holder_ [is input_
689  // have object] open]
690  kUninitialized, // Uninitialized or closed. no no
691  kFileStart, // [state we use internally: just opened.] no yes
692  kEof, // We did Next() and found eof in archive no no
693  kError, // Some other error no no
694  kHaveObject, // We read the key and the object after it. yes yes
695  kFreedObject, // The user called FreeCurrent(). no yes
696  } state_;
697 };
698 
699 // this is for when someone adds the 'th' modifier; it wraps around the basic
700 // implementation and allows it to do the reading in a background thread.
701 template<class Holder>
703  public SequentialTableReaderImplBase<Holder> {
704  public:
705  typedef typename Holder::T T;
706 
709  base_reader_(base_reader) {}
710 
711  // This function ignores the rxfilename argument.
712  // We use the same function signature as the regular Open(),
713  // for convenience.
714  virtual bool Open(const std::string &rxfilename) {
715  KALDI_ASSERT(base_reader_ != NULL &&
716  base_reader_->IsOpen()); // or code error.
717  {
719  this);
720  }
721 
722  if (!base_reader_->Done())
723  Next();
724  return true;
725  }
726 
727  virtual bool IsOpen() const {
728  // Close() sets base_reader_ to NULL, and we never initialize this object
729  // with a non-open base_reader_, so no need to check if it's open.
730  return base_reader_ != NULL;
731  }
732 
734  try {
735  // This function is called in the background thread. The whole point of
736  // the background thread is that we don't want to do the actual reading
737  // (inside Next()) in the foreground.
738  while (base_reader_ != NULL && !base_reader_->Done()) {
739  consumer_sem_.Signal();
740  // Here is where the consumer process (parent thread) gets to do its
741  // stuff. Principally it calls SwapHolder()-- a shallow swap that is
742  // cheap.
743  producer_sem_.Wait();
744  // we check that base_reader_ is not NULL in case Close() was
745  // called in the main thread.
746  if (base_reader_ != NULL)
747  base_reader_->Next(); // here is where the work happens.
748  }
749  // this signal will be waited on in the Next() function of the foreground
750  // thread if it is still running, or Close() otherwise.
751  consumer_sem_.Signal();
752  // this signal may be waited on in Close().
753  consumer_sem_.Signal();
754  } catch (...) {
755  // There is nothing we called above that could potentially throw due to
756  // user data. So we treat reaching this point as a code-error condition.
757  // Closing base_reader_ will trigger an exception in Next() in the main
758  // thread when it checks that base_reader_->IsOpen().
759  if (base_reader_->IsOpen()) {
760  base_reader_->Close();
761  delete base_reader_;
762  base_reader_ = NULL;
763  }
764  consumer_sem_.Signal();
765  return;
766  }
767  }
769  object->RunInBackground();
770  }
771  virtual bool Done() const {
772  return key_.empty();
773  }
774  virtual std::string Key() {
775  if (key_.empty())
776  KALDI_ERR << "Calling Key() at the wrong time.";
777  return key_;
778  }
779  virtual T &Value() {
780  if (key_.empty())
781  KALDI_ERR << "Calling Value() at the wrong time.";
782  return holder_.Value();
783  }
784  void SwapHolder(Holder *other_holder) {
785  KALDI_ERR << "SwapHolder() should not be called on this class.";
786  }
787  virtual void FreeCurrent() {
788  if (key_.empty())
789  KALDI_ERR << "Calling FreeCurrent() at the wrong time.";
790  // note: ideally a call to Value() should crash if you have just called
791  // FreeCurrent(). For typical holders such as KaldiObjectHolder this will
792  // happen inside the holder_.Value() call. This won't be the case for all
793  // holders, but it's not a great loss (just a missed opportunity to spot a
794  // code error).
795  holder_.Clear();
796  }
797  virtual void Next() {
798  consumer_sem_.Wait();
799  if (base_reader_ == NULL || !base_reader_->IsOpen())
800  KALDI_ERR << "Error detected (likely code error) in background "
801  << "reader (',bg' option)";
802  if (base_reader_->Done()) {
803  // there is nothing else to read.
804  key_ = "";
805  } else {
806  key_ = base_reader_->Key();
807  base_reader_->SwapHolder(&holder_);
808  }
809  // this Signal() tells the producer thread, in the background,
810  // that it's now safe to read the next value.
811  producer_sem_.Signal();
812  }
813 
814  // note: we can be sure that Close() won't be called twice, as the TableReader
815  // object will delete this object after calling Close.
816  virtual bool Close() {
817  KALDI_ASSERT(base_reader_ != NULL && thread_.joinable());
818  // wait until the producer thread is idle.
819  consumer_sem_.Wait();
820  bool ans = true;
821  try {
822  ans = base_reader_->Close();
823  } catch (...) {
824  ans = false;
825  }
826  delete base_reader_;
827  // setting base_reader_ to NULL will cause the loop in the producer thread
828  // to exit.
829  base_reader_ = NULL;
830  producer_sem_.Signal();
831 
832  thread_.join();
833  return ans;
834  }
836  if (base_reader_) {
837  if (!Close()) {
838  KALDI_ERR << "Error detected closing background reader "
839  << "(relates to ',bg' modifier)";
840  }
841  }
842  }
843  private:
844  std::string key_;
845  Holder holder_;
846  // I couldn't figure out what to call these semaphores. consumer_sem_ is the
847  // one that the consumer (main thread) waits on; producer_sem_ is the one
848  // that the producer (background thread) waits on.
851  std::thread thread_;
853 
854 };
855 
856 template<class Holder>
858  &rspecifier): impl_(NULL) {
859  if (rspecifier != "" && !Open(rspecifier))
860  KALDI_ERR << "Error constructing TableReader: rspecifier is " << rspecifier;
861 }
862 
863 template<class Holder>
864 bool SequentialTableReader<Holder>::Open(const std::string &rspecifier) {
865  if (IsOpen())
866  if (!Close())
867  KALDI_ERR << "Could not close previously open object.";
868  // now impl_ will be NULL.
869 
870  RspecifierOptions opts;
871  RspecifierType wt = ClassifyRspecifier(rspecifier, NULL, &opts);
872  switch (wt) {
873  case kArchiveRspecifier:
875  break;
876  case kScriptRspecifier:
878  break;
879  case kNoRspecifier: default:
880  KALDI_WARN << "Invalid rspecifier " << rspecifier;
881  return false;
882  }
883  if (!impl_->Open(rspecifier)) {
884  delete impl_;
885  impl_ = NULL;
886  return false; // sub-object will have printed warnings.
887  }
888  if (opts.background) {
890  impl_);
891  if (!impl_->Open("")) {
892  // the rxfilename is ignored in that Open() call.
893  // It should only return false on code error.
894  return false;
895  }
896  }
897  return true;
898 }
899 
900 template<class Holder>
902  CheckImpl();
903  bool ans = impl_->Close();
904  delete impl_; // We don't keep around empty impl_ objects.
905  impl_ = NULL;
906  return ans;
907 }
908 
909 
910 template<class Holder>
912  return (impl_ != NULL); // Because we delete the object whenever
913  // that object is not open. Thus, the IsOpen functions of the
914  // Impl objects are not really needed.
915 }
916 
917 template<class Holder>
919  CheckImpl();
920  return impl_->Key(); // this call may throw if called wrongly in other ways,
921  // e.g. eof.
922 }
923 
924 
925 template<class Holder>
927  CheckImpl();
928  impl_->FreeCurrent();
929 }
930 
931 
932 template<class Holder>
935  CheckImpl();
936  return impl_->Value(); // This may throw (if EnsureObjectLoaded() returned false you
937  // are safe.).
938 }
939 
940 
941 template<class Holder>
943  CheckImpl();
944  impl_->Next();
945 }
946 
947 template<class Holder>
949  CheckImpl();
950  return impl_->Done();
951 }
952 
953 
954 template<class Holder>
956  delete impl_;
957  // Destructor of impl_ may throw.
958 }
959 
960 
961 
962 template<class Holder> class TableWriterImplBase {
963  public:
964  typedef typename Holder::T T;
965 
966  virtual bool Open(const std::string &wspecifier) = 0;
967 
968  // Write returns true on success, false on failure, but
969  // some errors may not be detected until we call Close().
970  // It throws (via KALDI_ERR) if called wrongly. We could
971  // have just thrown on all errors, since this is what
972  // TableWriter does; it was designed this way because originally
973  // TableWriter::Write returned an exit status.
974  virtual bool Write(const std::string &key, const T &value) = 0;
975 
976  // Flush will flush any archive; it does not return error status,
977  // any errors will be reported on the next Write or Close.
978  virtual void Flush() = 0;
979 
980  virtual bool Close() = 0;
981 
982  virtual bool IsOpen() const = 0;
983 
984  // May throw on write error if Close was not called.
985  virtual ~TableWriterImplBase() { }
986 
988  private:
990 };
991 
992 
993 // The implementation of TableWriter we use when writing directly
994 // to an archive with no associated scp.
995 template<class Holder>
997  public:
998  typedef typename Holder::T T;
999 
1000  virtual bool Open(const std::string &wspecifier) {
1001  switch (state_) {
1002  case kUninitialized:
1003  break;
1004  case kWriteError:
1005  KALDI_ERR << "Opening stream, already open with write error.";
1006  case kOpen: default:
1007  if (!Close()) // throw because this error may not have been previously
1008  // detected by the user.
1009  KALDI_ERR << "Opening stream, error closing previously open stream.";
1010  }
1011  wspecifier_ = wspecifier;
1012  WspecifierType ws = ClassifyWspecifier(wspecifier,
1013  &archive_wxfilename_,
1014  NULL,
1015  &opts_);
1016  KALDI_ASSERT(ws == kArchiveWspecifier); // or wrongly called.
1017 
1018  if (output_.Open(archive_wxfilename_, opts_.binary, false)) { // false
1019  // means no binary header.
1020  state_ = kOpen;
1021  return true;
1022  } else {
1023  // stream will not be open. User will report this error
1024  // (we return bool), so don't bother printing anything.
1025  state_ = kUninitialized;
1026  return false;
1027  }
1028  }
1029 
1030  virtual bool IsOpen() const {
1031  switch (state_) {
1032  case kUninitialized: return false;
1033  case kOpen: case kWriteError: return true;
1034  default: KALDI_ERR << "IsOpen() called on TableWriter in invalid state.";
1035  }
1036  return false;
1037  }
1038 
1039  // Write returns true on success, false on failure, but
1040  // some errors may not be detected till we call Close().
1041  virtual bool Write(const std::string &key, const T &value) {
1042  switch (state_) {
1043  case kOpen: break;
1044  case kWriteError:
1045  // user should have known from the last
1046  // call to Write that there was a problem.
1047  KALDI_WARN << "Attempting to write to invalid stream.";
1048  return false;
1049  case kUninitialized: default:
1050  KALDI_ERR << "Write called on invalid stream";
1051  }
1052  // state is now kOpen or kWriteError.
1053  if (!IsToken(key)) // e.g. empty string or has spaces...
1054  KALDI_ERR << "Using invalid key " << key;
1055  output_.Stream() << key << ' ';
1056  if (!Holder::Write(output_.Stream(), opts_.binary, value)) {
1057  KALDI_WARN << "Write failure to "
1058  << PrintableWxfilename(archive_wxfilename_);
1059  state_ = kWriteError;
1060  return false;
1061  }
1062  if (state_ == kWriteError) return false; // Even if this Write seems to
1063  // have succeeded, we fail because a previous Write failed and the archive
1064  // may be corrupted and unreadable.
1065 
1066  if (opts_.flush)
1067  Flush();
1068  return true;
1069  }
1070 
1071  // Flush will flush any archive; it does not return error status,
1072  // any errors will be reported on the next Write or Close.
1073  virtual void Flush() {
1074  switch (state_) {
1075  case kWriteError: case kOpen:
1076  output_.Stream().flush(); // Don't check error status.
1077  return;
1078  default:
1079  KALDI_WARN << "Flush called on not-open writer.";
1080  }
1081  }
1082 
1083  virtual bool Close() {
1084  if (!this->IsOpen() || !output_.IsOpen())
1085  KALDI_ERR << "Close called on a stream that was not open."
1086  << this->IsOpen() << ", " << output_.IsOpen();
1087  bool close_success = output_.Close();
1088  if (!close_success) {
1089  KALDI_WARN << "Error closing stream: wspecifier is " << wspecifier_;
1090  state_ = kUninitialized;
1091  return false;
1092  }
1093  if (state_ == kWriteError) {
1094  KALDI_WARN << "Closing writer in error state: wspecifier is "
1095  << wspecifier_;
1096  state_ = kUninitialized;
1097  return false;
1098  }
1099  state_ = kUninitialized;
1100  return true;
1101  }
1102 
1103  TableWriterArchiveImpl(): state_(kUninitialized) {}
1104 
1105  // May throw on write error if Close was not called.
1107  if (!IsOpen()) return;
1108  else if (!Close())
1109  KALDI_ERR << "At TableWriter destructor: Write failed or stream close "
1110  << "failed: wspecifier is "<< wspecifier_;
1111  }
1112 
1113  private:
1116  std::string wspecifier_;
1117  std::string archive_wxfilename_;
1118  enum { // is stream open?
1120  kOpen, // yes
1121  kWriteError, // yes
1122  } state_;
1123 };
1124 
1125 
1126 
1127 
1128 // The implementation of TableWriter we use when writing to
1129 // individual files (more generally, wxfilenames) specified
1130 // in an scp file that we read.
1131 
1132 // Note: the code for this class is similar to
1133 // RandomAccessTableReaderScriptImpl; try to keep them in sync.
1134 
1135 template<class Holder>
1137  public:
1138  typedef typename Holder::T T;
1139 
1140  TableWriterScriptImpl(): last_found_(0), state_(kUninitialized) {}
1141 
1142  virtual bool Open(const std::string &wspecifier) {
1143  switch (state_) {
1144  case kReadScript:
1145  KALDI_ERR << " Opening already open TableWriter: call Close first.";
1146  case kUninitialized: case kNotReadScript:
1147  break;
1148  }
1149  wspecifier_ = wspecifier;
1150  WspecifierType ws = ClassifyWspecifier(wspecifier,
1151  NULL,
1152  &script_rxfilename_,
1153  &opts_);
1154  KALDI_ASSERT(ws == kScriptWspecifier); // or wrongly called.
1155  KALDI_ASSERT(script_.empty()); // no way it could be nonempty at this point.
1156 
1157  if (!ReadScriptFile(script_rxfilename_,
1158  true, // print any warnings
1159  &script_)) { // error reading script file or invalid
1160  // format
1161  state_ = kNotReadScript;
1162  return false; // no need to print further warnings. user gets the error.
1163  }
1164  std::sort(script_.begin(), script_.end());
1165  for (size_t i = 0; i+1 < script_.size(); i++) {
1166  if (script_[i].first.compare(script_[i+1].first) >= 0) {
1167  // script[i] not < script[i+1] in lexical order...
1168  KALDI_WARN << "Script file " << PrintableRxfilename(script_rxfilename_)
1169  << " contains duplicate key " << script_[i].first;
1170  state_ = kNotReadScript;
1171  return false;
1172  }
1173  }
1174  state_ = kReadScript;
1175  return true;
1176  }
1177 
1178  virtual bool IsOpen() const { return (state_ == kReadScript); }
1179 
1180  virtual bool Close() {
1181  if (!IsOpen())
1182  KALDI_ERR << "Close() called on TableWriter that was not open.";
1183  state_ = kUninitialized;
1184  last_found_ = 0;
1185  script_.clear();
1186  return true;
1187  }
1188 
1189  // Write returns true on success, false on failure, but
1190  // some errors may not be detected till we call Close().
1191  virtual bool Write(const std::string &key, const T &value) {
1192  if (!IsOpen())
1193  KALDI_ERR << "Write called on invalid stream";
1194 
1195  if (!IsToken(key)) // e.g. empty string or has spaces...
1196  KALDI_ERR << "Using invalid key " << key;
1197 
1198  std::string wxfilename;
1199  if (!LookupFilename(key, &wxfilename)) {
1200  if (opts_.permissive) {
1201  return true; // In permissive mode, it's as if we're writing to
1202  // /dev/null for missing keys.
1203  } else {
1204  KALDI_WARN << "Script file "
1205  << PrintableRxfilename(script_rxfilename_)
1206  << " has no entry for key " <<key;
1207  return false;
1208  }
1209  }
1210  Output output;
1211  if (!output.Open(wxfilename, opts_.binary, false)) {
1212  // Open in the text/binary mode (on Windows) given by member var. "binary"
1213  // (obtained from wspecifier), but do not put the binary-mode header (it
1214  // will be written, if needed, by the Holder::Write function.)
1215  KALDI_WARN << "Failed to open stream: "
1216  << PrintableWxfilename(wxfilename);
1217  return false;
1218  }
1219  if (!Holder::Write(output.Stream(), opts_.binary, value)
1220  || !output.Close()) {
1221  KALDI_WARN << "Failed to write data to "
1222  << PrintableWxfilename(wxfilename);
1223 
1224  return false;
1225  }
1226  return true;
1227  }
1228 
1229  // Flush does nothing in this implementation, there is nothing to flush.
1230  virtual void Flush() { }
1231 
1232 
1234  // Nothing to do in destructor.
1235  }
1236 
1237  private:
1238  // Note: this function is almost the same as in
1239  // RandomAccessTableReaderScriptImpl.
1240  bool LookupFilename(const std::string &key, std::string *wxfilename) {
1241  // First, an optimization: if we're going consecutively, this will
1242  // make the lookup very fast.
1243  last_found_++;
1244  if (last_found_ < script_.size() && script_[last_found_].first == key) {
1245  *wxfilename = script_[last_found_].second;
1246  return true;
1247  }
1248  std::pair<std::string, std::string> pr(key, ""); // Important that ""
1249  // compares less than or equal to any string, so lower_bound points to the
1250  // element that has the same key.
1251  typedef typename std::vector<std::pair<std::string, std::string> >
1252  ::const_iterator IterType;
1253  IterType iter = std::lower_bound(script_.begin(), script_.end(), pr);
1254  if (iter != script_.end() && iter->first == key) {
1255  last_found_ = iter - script_.begin();
1256  *wxfilename = iter->second;
1257  return true;
1258  } else {
1259  return false;
1260  }
1261  }
1262 
1263 
1265  std::string wspecifier_;
1266  std::string script_rxfilename_;
1267 
1268  // the script_ variable contains pairs of (key, filename), sorted using
1269  // std::sort. This can be used with binary_search to look up filenames for
1270  // writing. If this becomes inefficient we can use std::unordered_map (but I
1271  // suspect this wouldn't be significantly faster & would use more memory).
1272  // If memory becomes a problem here, the user should probably be passing
1273  // only the relevant part of the scp file rather than expecting us to get too
1274  // clever in the code.
1275  std::vector<std::pair<std::string, std::string> > script_;
1276  size_t last_found_; // This is for an optimization used in LookupFilename.
1277 
1278  enum {
1281  kNotReadScript, // read of script failed.
1282  } state_;
1283 };
1284 
1285 
1286 // The implementation of TableWriter we use when writing directly
1287 // to an archive plus an associated scp.
1288 template<class Holder>
1290  public:
1291  typedef typename Holder::T T;
1292 
1293  virtual bool Open(const std::string &wspecifier) {
1294  switch (state_) {
1295  case kUninitialized:
1296  break;
1297  case kWriteError:
1298  KALDI_ERR << "Opening stream, already open with write error.";
1299  case kOpen: default:
1300  if (!Close()) // throw because this error may not have been previously
1301  // detected by user.
1302  KALDI_ERR << "Opening stream, error closing previously open stream.";
1303  }
1304  wspecifier_ = wspecifier;
1305  WspecifierType ws = ClassifyWspecifier(wspecifier,
1306  &archive_wxfilename_,
1307  &script_wxfilename_,
1308  &opts_);
1309  KALDI_ASSERT(ws == kBothWspecifier); // or wrongly called.
1310  if (ClassifyWxfilename(archive_wxfilename_) != kFileOutput)
1311  KALDI_WARN << "When writing to both archive and script, the script file "
1312  "will generally not be interpreted correctly unless the archive is "
1313  "an actual file: wspecifier = " << wspecifier;
1314 
1315  if (!archive_output_.Open(archive_wxfilename_, opts_.binary, false)) {
1316  // false means no binary header.
1317  state_ = kUninitialized;
1318  return false;
1319  }
1320  if (!script_output_.Open(script_wxfilename_, false, false)) { // first
1321  // false means text mode: script files always text-mode. second false
1322  // means don't write header (doesn't matter for text mode).
1323  archive_output_.Close(); // Don't care about status: error anyway.
1324  state_ = kUninitialized;
1325  return false;
1326  }
1327  state_ = kOpen;
1328  return true;
1329  }
1330 
1331  virtual bool IsOpen() const {
1332  switch (state_) {
1333  case kUninitialized: return false;
1334  case kOpen: case kWriteError: return true;
1335  default: KALDI_ERR << "IsOpen() called on TableWriter in invalid state.";
1336  }
1337  return false;
1338  }
1339 
1340  void MakeFilename(typename std::ostream::pos_type streampos,
1341  std::string *output) const {
1342  std::ostringstream ss;
1343  ss << ':' << streampos;
1344  KALDI_ASSERT(ss.str() != ":-1");
1345  *output = archive_wxfilename_ + ss.str();
1346 
1347  // e.g. /some/file:12302.
1348  // Note that we warned if archive_wxfilename_ is not an actual filename;
1349  // the philosophy is we give the user rope and if they want to hang
1350  // themselves, with it, fine.
1351  }
1352 
1353  // Write returns true on success, false on failure, but
1354  // some errors may not be detected till we call Close().
1355  virtual bool Write(const std::string &key, const T &value) {
1356  switch (state_) {
1357  case kOpen: break;
1358  case kWriteError:
1359  // user should have known from the last
1360  // call to Write that there was a problem. Warn about it.
1361  KALDI_WARN << "Writing to non-open TableWriter object.";
1362  return false;
1363  case kUninitialized: default:
1364  KALDI_ERR << "Write called on invalid stream";
1365  }
1366  // state is now kOpen or kWriteError.
1367  if (!IsToken(key)) // e.g. empty string or has spaces...
1368  KALDI_ERR << "Using invalid key " << key;
1369  std::ostream &archive_os = archive_output_.Stream();
1370  archive_os << key << ' ';
1371  typename std::ostream::pos_type archive_os_pos = archive_os.tellp();
1372  // position at start of Write() to archive. We will record this in the
1373  // script file.
1374  std::string offset_rxfilename; // rxfilename with offset into the archive,
1375  // e.g. some_archive_name.ark:431541423
1376  MakeFilename(archive_os_pos, &offset_rxfilename);
1377 
1378  // Write to the script file first.
1379  // The idea is that we want to get all the information possible into the
1380  // script file, to make it easier to unwind errors later.
1381  std::ostream &script_os = script_output_.Stream();
1382  script_output_.Stream() << key << ' ' << offset_rxfilename << '\n';
1383 
1384  if (!Holder::Write(archive_output_.Stream(), opts_.binary, value)) {
1385  KALDI_WARN << "Write failure to"
1386  << PrintableWxfilename(archive_wxfilename_);
1387  state_ = kWriteError;
1388  return false;
1389  }
1390 
1391  if (script_os.fail()) {
1392  KALDI_WARN << "Write failure to script file detected: "
1393  << PrintableWxfilename(script_wxfilename_);
1394  state_ = kWriteError;
1395  return false;
1396  }
1397 
1398  if (archive_os.fail()) {
1399  KALDI_WARN << "Write failure to archive file detected: "
1400  << PrintableWxfilename(archive_wxfilename_);
1401  state_ = kWriteError;
1402  return false;
1403  }
1404 
1405  if (state_ == kWriteError) return false; // Even if this Write seems to
1406  // have succeeded, we fail because a previous Write failed and the archive
1407  // may be corrupted and unreadable.
1408 
1409  if (opts_.flush)
1410  Flush();
1411  return true;
1412  }
1413 
1414  // Flush will flush any archive; it does not return error status,
1415  // any errors will be reported on the next Write or Close.
1416  virtual void Flush() {
1417  switch (state_) {
1418  case kWriteError: case kOpen:
1419  archive_output_.Stream().flush(); // Don't check error status.
1420  script_output_.Stream().flush(); // Don't check error status.
1421  return;
1422  default:
1423  KALDI_WARN << "Flush called on not-open writer.";
1424  }
1425  }
1426 
1427  virtual bool Close() {
1428  if (!this->IsOpen())
1429  KALDI_ERR << "Close called on a stream that was not open.";
1430  bool close_success = true;
1431  if (archive_output_.IsOpen())
1432  if (!archive_output_.Close()) close_success = false;
1433  if (script_output_.IsOpen())
1434  if (!script_output_.Close()) close_success = false;
1435  bool ans = close_success && (state_ != kWriteError);
1436  state_ = kUninitialized;
1437  return ans;
1438  }
1439 
1440  TableWriterBothImpl(): state_(kUninitialized) {}
1441 
1442  // May throw on write error if Close() was not called.
1443  // User can get the error status by calling Close().
1445  if (!IsOpen()) return;
1446  else if (!Close())
1447  KALDI_ERR << "Write failed or stream close failed: "
1448  << wspecifier_;
1449  }
1450 
1451  private:
1455  std::string archive_wxfilename_;
1456  std::string script_wxfilename_;
1457  std::string wspecifier_;
1458  enum { // is stream open?
1460  kOpen, // yes
1461  kWriteError, // yes
1462  } state_;
1463 };
1464 
1465 
1466 template<class Holder>
1467 TableWriter<Holder>::TableWriter(const std::string &wspecifier): impl_(NULL) {
1468  if (wspecifier != "" && !Open(wspecifier))
1469  KALDI_ERR << "Failed to open table for writing with wspecifier: " << wspecifier
1470  << ": errno (in case it's relevant) is: " << strerror(errno);
1471 }
1472 
1473 template<class Holder>
1475  return (impl_ != NULL);
1476 }
1477 
1478 
1479 template<class Holder>
1480 bool TableWriter<Holder>::Open(const std::string &wspecifier) {
1481  if (IsOpen()) {
1482  if (!Close()) // call Close() yourself to suppress this exception.
1483  KALDI_ERR << "Failed to close previously open writer.";
1484  }
1485  KALDI_ASSERT(impl_ == NULL);
1486  WspecifierType wtype = ClassifyWspecifier(wspecifier, NULL, NULL, NULL);
1487  switch (wtype) {
1488  case kBothWspecifier:
1490  break;
1491  case kArchiveWspecifier:
1493  break;
1494  case kScriptWspecifier:
1496  break;
1497  case kNoWspecifier: default:
1498  KALDI_WARN << "ClassifyWspecifier: invalid wspecifier " << wspecifier;
1499  return false;
1500  }
1501  if (impl_->Open(wspecifier)) {
1502  return true;
1503  } else { // The class will have printed a more specific warning.
1504  delete impl_;
1505  impl_ = NULL;
1506  return false;
1507  }
1508 }
1509 
1510 template<class Holder>
1511 void TableWriter<Holder>::Write(const std::string &key,
1512  const T &value) const {
1513  CheckImpl();
1514  if (!impl_->Write(key, value))
1515  KALDI_ERR << "Error in TableWriter::Write";
1516  // More specific warning will have
1517  // been printed in the Write function.
1518 }
1519 
1520 template<class Holder>
1522  CheckImpl();
1523  impl_->Flush();
1524 }
1525 
1526 template<class Holder>
1528  CheckImpl();
1529  bool ans = impl_->Close();
1530  delete impl_; // We don't keep around non-open impl_ objects
1531  // [c.f. definition of IsOpen()]
1532  impl_ = NULL;
1533  return ans;
1534 }
1535 
1536 template<class Holder>
1538  if (IsOpen() && !Close()) {
1539  KALDI_ERR << "Error closing TableWriter [in destructor].";
1540  }
1541 }
1542 
1543 
1544 // Types of RandomAccessTableReader:
1545 // In principle, we would like to have four types of RandomAccessTableReader:
1546 // the 4 combinations [scp, archive], [seekable, not-seekable],
1547 // where if something is seekable we only store a file offset. However,
1548 // it seems sufficient for now to only implement two of these, in both
1549 // cases assuming it's not seekable so we never store file offsets and always
1550 // store either the scp line or the data in the archive. The reasons are:
1551 // (1)
1552 // For scp files, storing the actual entry is not that much more expensive
1553 // than storing the file offsets (since the entries are just filenames), and
1554 // avoids a lot of fseek operations that might be expensive.
1555 // (2)
1556 // For archive files, there is no real reason, if you have the archive file
1557 // on disk somewhere, why you wouldn't access it via its associated scp.
1558 // [i.e. write it as ark, scp]. The main reason to read archives directly
1559 // is if they are part of a pipe, and in this case it's not seekable, so
1560 // we implement only this case.
1561 //
1562 // Note that we will rarely in practice have to keep in memory everything in
1563 // the archive, as long as things are only read once from the archive (the
1564 // "o, " or "once" option) and as long as we keep our keys in sorted order;
1565 // to take advantage of this we need the "s, " (sorted) option, so we would
1566 // read archives as e.g. "s, o, ark:-" (this is the rspecifier we would use if
1567 // it was the standard input and these conditions held).
1568 
1569 template<class Holder> class RandomAccessTableReaderImplBase {
1570  public:
1571  typedef typename Holder::T T;
1572 
1573  virtual bool Open(const std::string &rspecifier) = 0;
1574 
1575  virtual bool HasKey(const std::string &key) = 0;
1576 
1577  virtual const T &Value(const std::string &key) = 0;
1578 
1579  virtual bool Close() = 0;
1580 
1582 };
1583 
1584 
1585 // Implementation of RandomAccessTableReader for a script file; for simplicity
1586 // we just read it in all in one go, as it's unlikely someone would generate
1587 // this from a pipe. In principle we could read it on-demand as for the
1588 // archives, but this would probably be overkill.
1589 
1590 // Note: the code for this this class is similar to TableWriterScriptImpl:
1591 // try to keep them in sync.
1592 template<class Holder>
1594  public RandomAccessTableReaderImplBase<Holder> {
1595  public:
1596  typedef typename Holder::T T;
1597 
1598  RandomAccessTableReaderScriptImpl(): last_found_(0), state_(kUninitialized) {}
1599 
1600  virtual bool Open(const std::string &rspecifier) {
1601  switch (state_) {
1602  case kNotHaveObject: case kHaveObject: case kHaveRange:
1603  KALDI_ERR << " Opening already open RandomAccessTableReader:"
1604  " call Close first.";
1605  case kUninitialized: case kNotReadScript:
1606  break;
1607  }
1608  rspecifier_ = rspecifier;
1609  RspecifierType rs = ClassifyRspecifier(rspecifier,
1610  &script_rxfilename_,
1611  &opts_);
1612  KALDI_ASSERT(rs == kScriptRspecifier); // or wrongly called.
1613  KALDI_ASSERT(script_.empty()); // no way it could be nonempty at this point
1614 
1615  if (!ReadScriptFile(script_rxfilename_,
1616  true, // print any warnings
1617  &script_)) { // error reading script file or invalid
1618  // format
1619  state_ = kNotReadScript;
1620  return false; // no need to print further warnings. user gets the error.
1621  }
1622 
1623  rspecifier_ = rspecifier;
1624  // If opts_.sorted, the user has asserted that the keys are already sorted.
1625  // Although we could easily sort them, we want to let the user know of this
1626  // mistake. This same mistake could have serious effects if used with an
1627  // archive rather than a script.
1628  if (!opts_.sorted)
1629  std::sort(script_.begin(), script_.end());
1630  for (size_t i = 0; i + 1 < script_.size(); i++) {
1631  if (script_[i].first.compare(script_[i+1].first) >= 0) {
1632  // script[i] not < script[i+1] in lexical order...
1633  bool same = (script_[i].first == script_[i+1].first);
1634  KALDI_WARN << "Script file " << PrintableRxfilename(script_rxfilename_)
1635  << (same ? " contains duplicate key: " :
1636  " is not sorted (remove s, option or add ns, option):"
1637  " key is ") << script_[i].first;
1638  state_ = kNotReadScript;
1639  return false;
1640  }
1641  }
1642  state_ = kNotHaveObject;
1643  key_ = ""; // make sure we don't have a key set
1644  return true;
1645  }
1646 
1647  virtual bool IsOpen() const {
1648  return (state_ == kNotHaveObject || state_ == kHaveObject ||
1649  state_ == kHaveRange);
1650  }
1651 
1652  virtual bool Close() {
1653  if (!IsOpen())
1654  KALDI_ERR << "Close() called on RandomAccessTableReader that was not"
1655  " open.";
1656  holder_.Clear();
1657  range_holder_.Clear();
1658  state_ = kUninitialized;
1659  last_found_ = 0;
1660  script_.clear();
1661  key_ = "";
1662  range_ = "";
1663  data_rxfilename_ = "";
1664  // This cannot fail because any errors of a "global" nature would have been
1665  // detected when we did Open(). With archives it's different.
1666  return true;
1667  }
1668 
1669  virtual bool HasKey(const std::string &key) {
1670  bool preload = opts_.permissive;
1671  // In permissive mode, we have to check that we can read
1672  // the scp entry before we assert that the key is there.
1673  return HasKeyInternal(key, preload);
1674  }
1675 
1676 
1677  // Write returns true on success, false on failure, but
1678  // some errors may not be detected till we call Close().
1679  virtual const T& Value(const std::string &key) {
1680  if (!HasKeyInternal(key, true)) // true == preload.
1681  KALDI_ERR << "Could not get item for key " << key
1682  << ", rspecifier is " << rspecifier_ << " [to ignore this, "
1683  << "add the p, (permissive) option to the rspecifier.";
1684  KALDI_ASSERT(key_ == key);
1685  if (state_ == kHaveObject) {
1686  return holder_.Value();
1687  } else {
1688  KALDI_ASSERT(state_ == kHaveRange);
1689  return range_holder_.Value();
1690  }
1691  }
1692 
1694 
1695  private:
1696 
1697  // HasKeyInternal when called with preload == false just tells us whether the
1698  // key is in the scp. With preload == true, which happens when the ,p
1699  // (permissive) option is given in the rspecifier (or when called from
1700  // Value()), it will also check that we can preload the object from disk
1701  // (loading from the rxfilename in the scp), and only return true if we can.
1702  // This function is called both from HasKey and from Value().
1703  virtual bool HasKeyInternal(const std::string &key, bool preload) {
1704  switch (state_) {
1705  case kUninitialized: case kNotReadScript:
1706  KALDI_ERR << "HasKey called on RandomAccessTableReader object that is"
1707  " not open.";
1708  case kHaveObject:
1709  if (key == key_ && range_.empty())
1710  return true;
1711  break;
1712  case kHaveRange:
1713  if (key == key_)
1714  return true;
1715  break;
1716  case kNotHaveObject: default: break;
1717  }
1718  KALDI_ASSERT(IsToken(key));
1719  size_t key_pos = 0;
1720  if (!LookupKey(key, &key_pos)) {
1721  return false;
1722  } else {
1723  if (!preload) {
1724  return true; // we have the key, and were not asked to verify that the
1725  // object could be read.
1726  } else { // preload specified, so we have to attempt to pre-load the
1727  // object before returning.
1728  std::string data_rxfilename, range; // We will split
1729  // script_[key_pos].second (e.g. "1.ark:100[0:2]" into data_rxfilename
1730  // (e.g. "1.ark:100") and range (if any), e.g. "0:2".
1731  if (script_[key_pos].second[script_[key_pos].second.size()-1] == ']') {
1732  if(!ExtractRangeSpecifier(script_[key_pos].second,
1733  &data_rxfilename,
1734  &range)) {
1735  KALDI_ERR << "TableReader: failed to parse range in '"
1736  << script_[key_pos].second << "'";
1737  }
1738  } else {
1739  data_rxfilename = script_[key_pos].second;
1740  }
1741  if (state_ == kHaveRange) {
1742  if (data_rxfilename_ == data_rxfilename && range_ == range) {
1743  // the odd situation where two keys had the same rxfilename and range:
1744  // just change the key and keep the object.
1745  key_ = key;
1746  return true;
1747  } else {
1748  range_holder_.Clear();
1749  state_ = kHaveObject;
1750  }
1751  }
1752  // OK, at this point the state will be kHaveObject or kNotHaveObject.
1753  if (state_ == kHaveObject) {
1754  if (data_rxfilename_ != data_rxfilename) {
1755  // clear out the object.
1756  state_ = kNotHaveObject;
1757  holder_.Clear();
1758  }
1759  }
1760  // At this point we can safely switch to the new key, data_rxfilename
1761  // and range, and we know that if we have an object, it will already be
1762  // the correct one. The state is now kHaveObject or kNotHaveObject.
1763  key_ = key;
1764  data_rxfilename_ = data_rxfilename;
1765  range_ = range;
1766  if (state_ == kNotHaveObject) {
1767  // we need to read the object.
1768  if (!input_.Open(data_rxfilename)) {
1769  KALDI_WARN << "Error opening stream "
1770  << PrintableRxfilename(data_rxfilename);
1771  return false;
1772  } else {
1773  if (holder_.Read(input_.Stream())) {
1774  state_ = kHaveObject;
1775  } else {
1776  KALDI_WARN << "Error reading object from "
1777  "stream " << PrintableRxfilename(data_rxfilename);
1778  return false;
1779  }
1780  }
1781  }
1782  // At this point the state is kHaveObject.
1783  if (range.empty())
1784  return true; // we're done: no range was requested.
1785  if (range_holder_.ExtractRange(holder_, range)) {
1786  state_ = kHaveRange;
1787  return true;
1788  } else {
1789  KALDI_WARN << "Failed to load object from "
1790  << PrintableRxfilename(data_rxfilename)
1791  << "[" << range << "]";
1792  // leave state at kHaveObject.
1793  return false;
1794  }
1795  }
1796  }
1797  }
1798 
1799  // This function attempts to look up the key "key" in the sorted array
1800  // script_. If it was found it returns true and puts the array offset into
1801  // 'script_offset'; otherwise it returns false.
1802  bool LookupKey(const std::string &key, size_t *script_offset) {
1803  // First, an optimization: if we're going consecutively, this will
1804  // make the lookup very fast. Since we may call HasKey and then
1805  // Value(), which both may look up the key, we test if either the
1806  // current or next position are correct.
1807  if (last_found_ < script_.size() && script_[last_found_].first == key) {
1808  *script_offset = last_found_;
1809  return true;
1810  }
1811  last_found_++;
1812  if (last_found_ < script_.size() && script_[last_found_].first == key) {
1813  *script_offset = last_found_;
1814  return true;
1815  }
1816  std::pair<std::string, std::string> pr(key, ""); // Important that ""
1817  // compares less than or equal to any string, so lower_bound points to the
1818  // element that has the same key.
1819  typedef typename std::vector<std::pair<std::string, std::string> >
1820  ::const_iterator IterType;
1821  IterType iter = std::lower_bound(script_.begin(), script_.end(), pr);
1822  if (iter != script_.end() && iter->first == key) {
1823  last_found_ = *script_offset = iter - script_.begin();
1824  return true;
1825  } else {
1826  return false;
1827  }
1828  }
1829 
1830 
1831  Input input_; // Use the same input_ object for reading each file, in case
1832  // the scp specifies offsets in an archive so we can keep the
1833  // same file open.
1835  std::string rspecifier_; // rspecifier used to open this object; used in
1836  // debug messages
1837  std::string script_rxfilename_; // rxfilename of script file that we read.
1838 
1839  std::string key_; // The current key of the object that we have, but see the
1840  // notes regarding states_ for more explanation of the
1841  // semantics.
1842 
1843  Holder holder_;
1844  Holder range_holder_; // Holds the partial object corresponding to the object
1845  // range specifier 'range_'. this is only used when
1846  // 'range_' is specified.
1847  std::string range_; // range within which we read the object from holder_.
1848  // If key_ is set, always correspond to the key.
1849  std::string data_rxfilename_; // the rxfilename corresponding to key_,
1850  // always set when key_ is set.
1851 
1852 
1853  // the script_ variable contains pairs of (key, filename), sorted using
1854  // std::sort. This can be used with binary_search to look up filenames for
1855  // writing. If this becomes inefficient we can use std::unordered_map (but I
1856  // suspect this wouldn't be significantly faster & would use more memory).
1857  // If memory becomes a problem here, the user should probably be passing
1858  // only the relevant part of the scp file rather than expecting us to get too
1859  // clever in the code.
1860  std::vector<std::pair<std::string, std::string> > script_;
1861  size_t last_found_; // This is for an optimization used in FindFilename.
1862 
1863  enum {
1864  // (*) is script_ set up?
1865  // (*) does holder_ contain an object?
1866  // (*) does range_holder_ contain and object?
1867  //
1868  //
1869  kUninitialized, // no no no
1870  kNotReadScript, // no no no
1871  kNotHaveObject, // yes no no
1872  kHaveObject, // yes yes no
1873  kHaveRange, // yes yes yes
1874 
1875  // If we are in a state where holder_ contains an object, it always contains
1876  // the object from 'key_', and the corresponding rxfilename is always
1877  // 'data_rxfilename_'. If range_holder_ contains an object, it always
1878  // corresponds to the range 'range_' of the object in 'holder_', and always
1879  // corresponds to the current key.
1880  } state_;
1881 };
1882 
1883 
1884 
1885 
1886 // This is the base-class (with some implemented functions) for the
1887 // implementations of RandomAccessTableReader when it's an archive. This
1888 // base-class handles opening the files, storing the state of the reading
1889 // process, and loading objects. This is the only case in which we have
1890 // an intermediate class in the hierarchy between the virtual ImplBase
1891 // class and the actual Impl classes.
1892 // The child classes vary in the assumptions regarding sorting, etc.
1893 
1894 template<class Holder>
1896  public RandomAccessTableReaderImplBase<Holder> {
1897  public:
1898  typedef typename Holder::T T;
1899 
1901  state_(kUninitialized) { }
1902 
1903  virtual bool Open(const std::string &rspecifier) {
1904  if (state_ != kUninitialized) {
1905  if (!this->Close()) // call Close() yourself to suppress this exception.
1906  KALDI_ERR << "Error closing previous input.";
1907  }
1908  rspecifier_ = rspecifier;
1909  RspecifierType rs = ClassifyRspecifier(rspecifier, &archive_rxfilename_,
1910  &opts_);
1912 
1913  // NULL means don't expect binary-mode header
1914  bool ans;
1915  if (Holder::IsReadInBinary())
1916  ans = input_.Open(archive_rxfilename_, NULL);
1917  else
1918  ans = input_.OpenTextMode(archive_rxfilename_);
1919  if (!ans) { // header.
1920  KALDI_WARN << "Failed to open stream "
1921  << PrintableRxfilename(archive_rxfilename_);
1922  state_ = kUninitialized; // Failure on Open
1923  return false; // User should print the error message.
1924  } else {
1925  state_ = kNoObject;
1926  }
1927  return true;
1928  }
1929 
1930  // ReadNextObject() requires that the state be kNoObject,
1931  // and it will try read the next object. If it succeeds,
1932  // it sets the state to kHaveObject, and
1933  // cur_key_ and holder_ have the key and value. If it fails,
1934  // it sets the state to kError or kEof.
1936  if (state_ != kNoObject)
1937  KALDI_ERR << "ReadNextObject() called from wrong state.";
1938  // Code error somewhere in this class or a child class.
1939  std::istream &is = input_.Stream();
1940  is.clear(); // Clear any fail bits that may have been set... just in case
1941  // this happened in the Read function.
1942  is >> cur_key_; // This eats up any leading whitespace and gets the string.
1943  if (is.eof()) {
1944  state_ = kEof;
1945  return;
1946  }
1947  if (is.fail()) { // This shouldn't really happen, barring file-system
1948  // errors.
1949  KALDI_WARN << "Error reading archive: rspecifier is " << rspecifier_;
1950  state_ = kError;
1951  return;
1952  }
1953  int c;
1954  if ((c = is.peek()) != ' ' && c != '\t' && c != '\n') { // We expect a
1955  // space ' ' after the key.
1956  // We also allow tab, just so we can read archives generated by scripts
1957  // that may not be fully aware of how this format works.
1958  KALDI_WARN << "Invalid archive file format: expected space after key "
1959  <<cur_key_
1960  <<", got character "
1961  << CharToString(static_cast<char>(is.peek()))
1962  << ", reading archive "
1963  << PrintableRxfilename(archive_rxfilename_);
1964  state_ = kError;
1965  return;
1966  }
1967  if (c != '\n') is.get(); // Consume the space or tab.
1968  holder_ = new Holder;
1969  if (holder_->Read(is)) {
1970  state_ = kHaveObject;
1971  return;
1972  } else {
1973  KALDI_WARN << "Object read failed, reading archive "
1974  << PrintableRxfilename(archive_rxfilename_);
1975  state_ = kError;
1976  delete holder_;
1977  holder_ = NULL;
1978  return;
1979  }
1980  }
1981 
1982  virtual bool IsOpen() const {
1983  switch (state_) {
1984  case kEof: case kError: case kHaveObject: case kNoObject: return true;
1985  case kUninitialized: return false;
1986  default: KALDI_ERR << "IsOpen() called on invalid object.";
1987  return false;
1988  }
1989  }
1990 
1991  // Called by the child-class virutal Close() functions; does the
1992  // shared parts of the cleanup.
1993  bool CloseInternal() {
1994  if (!this->IsOpen())
1995  KALDI_ERR << "Close() called on TableReader twice or otherwise wrongly.";
1996  if (input_.IsOpen())
1997  input_.Close();
1998  if (state_ == kHaveObject) {
1999  KALDI_ASSERT(holder_ != NULL);
2000  delete holder_;
2001  holder_ = NULL;
2002  } else {
2003  KALDI_ASSERT(holder_ == NULL);
2004  }
2005  bool ans = (state_ != kError);
2006  state_ = kUninitialized;
2007  if (!ans && opts_.permissive) {
2008  KALDI_WARN << "Error state detected closing reader. "
2009  << "Ignoring it because you specified permissive mode.";
2010  return true;
2011  }
2012  return ans;
2013  }
2014 
2016  // The child class has the responsibility to call CloseInternal().
2017  KALDI_ASSERT(state_ == kUninitialized && holder_ == NULL);
2018  }
2019  private:
2020  Input input_; // Input object for the archive
2021  protected:
2022  // The variables below are accessed by child classes.
2023 
2024  std::string cur_key_; // current key (if state == kHaveObject).
2025  Holder *holder_; // Holds the object we just read (if state == kHaveObject)
2026 
2027  std::string rspecifier_;
2028  std::string archive_rxfilename_;
2030 
2031  enum { // [The state of the reading process] [does holder_ [is input_
2032  // have object] open]
2033  kUninitialized, // Uninitialized or closed no no
2034  kNoObject, // Do not have object in holder_ no yes
2035  kHaveObject, // Have object in holder_ yes yes
2036  kEof, // End of file no yes
2037  kError, // Some kind of error-state in the reading. no yes
2038  } state_;
2039 };
2040 
2041 
2042 // RandomAccessTableReaderDSortedArchiveImpl (DSorted for "doubly sorted") is
2043 // the implementation for random-access reading of archives when both the
2044 // archive, and the calling code, are in sorted order (i.e. we ask for the keys
2045 // in sorted order). This is when the s and cs options are both given. It only
2046 // ever has to keep one object in memory. It inherits from
2047 // RandomAccessTableReaderArchiveImplBase which implements the common parts of
2048 // RandomAccessTableReader that are used when it's an archive we're reading from
2049 
2050 template<class Holder>
2065  public:
2066  typedef typename Holder::T T;
2067 
2069 
2070  virtual bool Close() {
2071  // We don't have anything additional to clean up, so just
2072  // call generic base-class one.
2073  return this->CloseInternal();
2074  }
2075 
2076  virtual bool HasKey(const std::string &key) {
2077  return FindKeyInternal(key);
2078  }
2079  virtual const T & Value(const std::string &key) {
2080  if (!FindKeyInternal(key)) {
2081  KALDI_ERR << "Value() called but no such key " << key
2082  << " in archive " << PrintableRxfilename(archive_rxfilename_);
2083  }
2084  KALDI_ASSERT(this->state_ == kHaveObject && key == this->cur_key_
2085  && holder_ != NULL);
2086  return this->holder_->Value();
2087  }
2088 
2090  if (this->IsOpen())
2091  if (!Close()) // more specific warning will already have been printed.
2092  // we are in some kind of error state & user did not find out by
2093  // calling Close().
2094  KALDI_ERR << "Error closing RandomAccessTableReader: rspecifier is "
2095  << rspecifier_;
2096  }
2097  private:
2098  // FindKeyInternal tries to find the key by calling "ReadNextObject()"
2099  // as many times as necessary till we get to it. It is called from
2100  // both FindKey and Value().
2101  bool FindKeyInternal(const std::string &key) {
2102  // First check that the user is calling us right: should be
2103  // in sorted order. If not, error.
2104  if (!last_requested_key_.empty()) {
2105  if (key.compare(last_requested_key_) < 0) { // key < last_requested_key_
2106  KALDI_ERR << "You provided the \"cs\" option "
2107  << "but are not calling with keys in sorted order: "
2108  << key << " < " << last_requested_key_ << ": rspecifier is "
2109  << rspecifier_;
2110  }
2111  }
2112  // last_requested_key_ is just for debugging of order of calling.
2113  last_requested_key_ = key;
2114 
2115  if (state_ == kNoObject)
2116  ReadNextObject(); // This can only happen
2117  // once, the first time someone calls HasKey() or Value(). We don't
2118  // do it in the initializer to stop the program hanging too soon,
2119  // if reading from a pipe.
2120 
2121  if (state_ == kEof || state_ == kError) return false;
2122 
2123  if (state_ == kUninitialized)
2124  KALDI_ERR << "Trying to access a RandomAccessTableReader object that is"
2125  " not open.";
2126 
2127  std::string last_key_; // To check that
2128  // the archive we're reading is in sorted order.
2129  while (1) {
2130  KALDI_ASSERT(state_ == kHaveObject);
2131  int compare = key.compare(cur_key_);
2132  if (compare == 0) { // key == key_
2133  return true; // we got it..
2134  } else if (compare < 0) { // key < cur_key_, so we already read past the
2135  // place where we want to be. This implies that we will never find it
2136  // [due to the sorting etc., this means it just isn't in the archive].
2137  return false;
2138  } else { // compare > 0, key > cur_key_. We need to read further ahead.
2139  last_key_ = cur_key_;
2140  // read next object.. we have to set state to kNoObject first.
2141  KALDI_ASSERT(holder_ != NULL);
2142  delete holder_;
2143  holder_ = NULL;
2144  state_ = kNoObject;
2145  ReadNextObject();
2146  if (state_ != kHaveObject)
2147  return false; // eof or read error.
2148  if (cur_key_.compare(last_key_) <= 0) {
2149  KALDI_ERR << "You provided the \"s\" option "
2150  << " (sorted order), but keys are out of order or"
2151  " duplicated: "
2152  << last_key_ << " is followed by " << cur_key_
2153  << ": rspecifier is " << rspecifier_;
2154  }
2155  }
2156  }
2157  }
2158 
2160  std::string last_requested_key_;
2161 };
2162 
2163 // RandomAccessTableReaderSortedArchiveImpl is for random-access reading of
2164 // archives when the user specified the sorted (s) option but not the
2165 // called-sorted (cs) options.
2166 template<class Holder>
2181 
2182  public:
2183  typedef typename Holder::T T;
2184 
2186  last_found_index_(static_cast<size_t>(-1)),
2187  pending_delete_(static_cast<size_t>(-1)) { }
2188 
2189  virtual bool Close() {
2190  for (size_t i = 0; i < seen_pairs_.size(); i++)
2191  delete seen_pairs_[i].second;
2192  seen_pairs_.clear();
2193 
2194  pending_delete_ = static_cast<size_t>(-1);
2195  last_found_index_ = static_cast<size_t>(-1);
2196 
2197  return this->CloseInternal();
2198  }
2199  virtual bool HasKey(const std::string &key) {
2200  HandlePendingDelete();
2201  size_t index;
2202  bool ans = FindKeyInternal(key, &index);
2203  if (ans && opts_.once && seen_pairs_[index].second == NULL) {
2204  // Just do a check RE the once option. "&&opts_.once" is for
2205  // efficiency since this can only happen in that case.
2206  KALDI_ERR << "Error: HasKey called after Value() already called for "
2207  << " that key, and once (o) option specified: rspecifier is "
2208  << rspecifier_;
2209  }
2210  return ans;
2211  }
2212  virtual const T & Value(const std::string &key) {
2213  HandlePendingDelete();
2214  size_t index;
2215  if (!FindKeyInternal(key, &index)) {
2216  KALDI_ERR << "Value() called but no such key " << key
2217  << " in archive " << PrintableRxfilename(archive_rxfilename_);
2218  }
2219  if (seen_pairs_[index].second == NULL) { // can happen if opts.once_
2220  KALDI_ERR << "Error: Value() called more than once for key "
2221  << key << " and once (o) option specified: rspecifier is "
2222  << rspecifier_;
2223  }
2224  if (opts_.once)
2225  pending_delete_ = index; // mark this index to be deleted on next call.
2226  return seen_pairs_[index].second->Value();
2227  }
2229  if (this->IsOpen())
2230  if (!Close()) // more specific warning will already have been printed.
2231  // we are in some kind of error state & user did not find out by
2232  // calling Close().
2233  KALDI_ERR << "Error closing RandomAccessTableReader: rspecifier is "
2234  << rspecifier_;
2235  }
2236  private:
2238  const size_t npos = static_cast<size_t>(-1);
2239  if (pending_delete_ != npos) {
2240  KALDI_ASSERT(pending_delete_ < seen_pairs_.size());
2241  KALDI_ASSERT(seen_pairs_[pending_delete_].second != NULL);
2242  delete seen_pairs_[pending_delete_].second;
2243  seen_pairs_[pending_delete_].second = NULL;
2244  pending_delete_ = npos;
2245  }
2246  }
2247 
2248  // FindKeyInternal tries to find the key in the array "seen_pairs_".
2249  // If it is not already there, it reads ahead as far as necessary
2250  // to determine whether we have the key or not. On success it returns
2251  // true and puts the index into the array seen_pairs_, into "index";
2252  // on failure it returns false.
2253  // It will leave the state as either kNoObject, kEof or kError.
2254  // FindKeyInternal does not do any checking about whether you are asking
2255  // about a key that has been already given (with the "once" option).
2256  // That is the user's responsibility.
2257 
2258  bool FindKeyInternal(const std::string &key, size_t *index) {
2259  // First, an optimization in case the previous call was for the
2260  // same key, and we found it.
2261  if (last_found_index_ < seen_pairs_.size()
2262  && seen_pairs_[last_found_index_].first == key) {
2263  *index = last_found_index_;
2264  return true;
2265  }
2266 
2267  if (state_ == kUninitialized)
2268  KALDI_ERR << "Trying to access a RandomAccessTableReader object that is"
2269  " not open.";
2270 
2271  // Step one is to see whether we have to read ahead for the object..
2272  // Note, the possible states right now are kNoObject, kEof or kError.
2273  // We are never in the state kHaveObject except just after calling
2274  // ReadNextObject().
2275  bool looped = false;
2276  while (state_ == kNoObject &&
2277  (seen_pairs_.empty() || key.compare(seen_pairs_.back().first) > 0)) {
2278  looped = true;
2279  // Read this as:
2280  // while ( the stream is potentially good for reading &&
2281  // ([got no keys] || key > most_recent_key) ) { ...
2282  // Try to read a new object.
2283  // Note that the keys in seen_pairs_ are ordered from least to greatest.
2284  ReadNextObject();
2285  if (state_ == kHaveObject) { // Successfully read object.
2286  if (!seen_pairs_.empty() && // This is just a check.
2287  cur_key_.compare(seen_pairs_.back().first) <= 0) {
2288  // read the expression above as: !( cur_key_ > previous_key).
2289  // it means we are not in sorted order [the user specified that we
2290  // are, or we would not be using this implementation].
2291  KALDI_ERR << "You provided the sorted (s) option but keys in archive "
2292  << PrintableRxfilename(archive_rxfilename_) << " are not "
2293  << "in sorted order: " << seen_pairs_.back().first
2294  << " is followed by " << cur_key_;
2295  }
2296  KALDI_ASSERT(holder_ != NULL);
2297  seen_pairs_.push_back(std::make_pair(cur_key_, holder_));
2298  holder_ = NULL;
2299  state_ = kNoObject;
2300  }
2301  }
2302  if (looped) { // We only need to check the last element of the seen_pairs_
2303  // array, since we would not have read more after getting "key".
2304  if (!seen_pairs_.empty() && seen_pairs_.back().first == key) {
2305  last_found_index_ = *index = seen_pairs_.size() - 1;
2306  return true;
2307  } else {
2308  return false;
2309  }
2310  }
2311  // Now we have do an actual binary search in the seen_pairs_ array.
2312  std::pair<std::string, Holder*> pr(key, static_cast<Holder*>(NULL));
2313  typename std::vector<std::pair<std::string, Holder*> >::iterator
2314  iter = std::lower_bound(seen_pairs_.begin(), seen_pairs_.end(),
2315  pr, PairCompare());
2316  if (iter != seen_pairs_.end() &&
2317  key == iter->first) {
2318  last_found_index_ = *index = (iter - seen_pairs_.begin());
2319  return true;
2320  } else {
2321  return false;
2322  }
2323  }
2324 
2325  // These are the pairs of (key, object) we have read. We keep all the keys we
2326  // have read but the actual objects (if they are stored with pointers inside
2327  // the Holder object) may be deallocated if once == true, and the Holder
2328  // pointer set to NULL.
2329  std::vector<std::pair<std::string, Holder*> > seen_pairs_;
2330  size_t last_found_index_; // An optimization s.t. if FindKeyInternal called
2331  // twice with same key (as it often will), it doesn't have to do the key
2332  // search twice.
2333  size_t pending_delete_; // If opts_.once == true, this is the index of
2334  // element of seen_pairs_ that is pending deletion.
2335  struct PairCompare {
2336  // PairCompare is the Less-than operator for the pairs of(key, Holder).
2337  // compares the keys.
2338  inline bool operator() (const std::pair<std::string, Holder*> &pr1,
2339  const std::pair<std::string, Holder*> &pr2) {
2340  return (pr1.first.compare(pr2.first) < 0);
2341  }
2342  };
2343 };
2344 
2345 
2346 
2347 // RandomAccessTableReaderUnsortedArchiveImpl is for random-access reading of
2348 // archives when the user does not specify the sorted (s) option (in this case
2349 // the called-sorted, or "cs" option, is ignored). This is the least efficient
2350 // of the random access archive readers, in general, but it can be as efficient
2351 // as the others, in speed, memory and latency, if the "once" option is
2352 // specified and it happens that the keys of the archive are the same as the
2353 // keys the code is called with (to HasKey() and Value()), and in the same
2354 // order. However, if you ask it for a key that's not present it will have to
2355 // read the archive till the end and store it all in memory.
2356 
2357 template<class Holder>
2372 
2373  typedef typename Holder::T T;
2374 
2375  public:
2376  RandomAccessTableReaderUnsortedArchiveImpl(): to_delete_iter_(map_.end()),
2377  to_delete_iter_valid_(false) {
2378  map_.max_load_factor(0.5); // make it quite empty -> quite efficient.
2379  // default seems to be 1.
2380  }
2381 
2382  virtual bool Close() {
2383  for (typename MapType::iterator iter = map_.begin();
2384  iter != map_.end();
2385  ++iter) {
2386  delete iter->second;
2387  }
2388  map_.clear();
2389  first_deleted_string_ = "";
2390  to_delete_iter_valid_ = false;
2391  return this->CloseInternal();
2392  }
2393 
2394  virtual bool HasKey(const std::string &key) {
2395  HandlePendingDelete();
2396  return FindKeyInternal(key, NULL);
2397  }
2398  virtual const T & Value(const std::string &key) {
2399  HandlePendingDelete();
2400  const T *ans_ptr = NULL;
2401  if (!FindKeyInternal(key, &ans_ptr))
2402  KALDI_ERR << "Value() called but no such key " << key
2403  << " in archive " << PrintableRxfilename(archive_rxfilename_);
2404  return *ans_ptr;
2405  }
2407  if (this->IsOpen())
2408  if (!Close()) // more specific warning will already have been printed.
2409  // we are in some kind of error state & user did not find out by
2410  // calling Close().
2411  KALDI_ERR << "Error closing RandomAccessTableReader: rspecifier is "
2412  << rspecifier_;
2413  }
2414  private:
2416  if (to_delete_iter_valid_) {
2417  to_delete_iter_valid_ = false;
2418  delete to_delete_iter_->second; // Delete Holder object.
2419  if (first_deleted_string_.length() == 0)
2420  first_deleted_string_ = to_delete_iter_->first;
2421  map_.erase(to_delete_iter_); // delete that element.
2422  }
2423  }
2424 
2425  // FindKeyInternal tries to find the key in the map "map_"
2426  // If it is not already there, it reads ahead either until it finds the
2427  // key, or until end of file. If called with value_ptr == NULL,
2428  // it assumes it's called from HasKey() and just returns true or false
2429  // and doesn't otherwise have side effects. If called with value_ptr !=
2430  // NULL, it assumes it's called from Value(). Thus, it will crash
2431  // if it cannot find the key. If it can find it it puts its address in
2432  // *value_ptr, and if opts_once == true it will mark that element of the
2433  // map to be deleted.
2434 
2435  bool FindKeyInternal(const std::string &key, const T **value_ptr = NULL) {
2436  typename MapType::iterator iter = map_.find(key);
2437  if (iter != map_.end()) { // Found in the map...
2438  if (value_ptr == NULL) { // called from HasKey
2439  return true; // this is all we have to do.
2440  } else {
2441  *value_ptr = &(iter->second->Value());
2442  if (opts_.once) { // value won't be needed again, so mark
2443  // for deletion.
2444  to_delete_iter_ = iter; // pending delete.
2445  KALDI_ASSERT(!to_delete_iter_valid_);
2446  to_delete_iter_valid_ = true;
2447  }
2448  return true;
2449  }
2450  }
2451  while (state_ == kNoObject) {
2452  ReadNextObject();
2453  if (state_ == kHaveObject) { // Successfully read object.
2454  state_ = kNoObject; // we are about to transfer ownership
2455  // of the object in holder_ to map_.
2456  // Insert it into map_.
2457  std::pair<typename MapType::iterator, bool> pr =
2458  map_.insert(typename MapType::value_type(cur_key_, holder_));
2459 
2460  if (!pr.second) { // Was not inserted-- previous element w/ same key
2461  delete holder_; // map was not changed, no ownership transferred.
2462  holder_ = NULL;
2463  KALDI_ERR << "Error in RandomAccessTableReader: duplicate key "
2464  << cur_key_ << " in archive " << archive_rxfilename_;
2465  }
2466  holder_ = NULL; // ownership transferred to map_.
2467  if (cur_key_ == key) { // the one we wanted..
2468  if (value_ptr == NULL) { // called from HasKey
2469  return true;
2470  } else { // called from Value()
2471  *value_ptr = &(pr.first->second->Value()); // this gives us the
2472  // Value() from the Holder in the map.
2473  if (opts_.once) { // mark for deletion, as won't be needed again.
2474  to_delete_iter_ = pr.first;
2475  KALDI_ASSERT(!to_delete_iter_valid_);
2476  to_delete_iter_valid_ = true;
2477  }
2478  return true;
2479  }
2480  }
2481  }
2482  }
2483  if (opts_.once && key == first_deleted_string_) {
2484  KALDI_ERR << "You specified the once (o) option but "
2485  << "you are calling using key " << key
2486  << " more than once: rspecifier is " << rspecifier_;
2487  }
2488  return false; // We read the entire archive (or got to error state) and
2489  // didn't find it.
2490  }
2491 
2492  typedef unordered_map<std::string, Holder*, StringHasher> MapType;
2493  MapType map_;
2494 
2495  typename MapType::iterator to_delete_iter_;
2497 
2498  std::string first_deleted_string_; // keep the first string we deleted
2499  // from map_ (if opts_.once == true). It's for an inexact spot-check that the
2500  // "once" option isn't being used incorrectly.
2501 };
2502 
2503 
2504 
2505 
2506 
2507 template<class Holder>
2509  std::string &rspecifier):
2510  impl_(NULL) {
2511  if (rspecifier != "" && !Open(rspecifier))
2512  KALDI_ERR << "Error opening RandomAccessTableReader object "
2513  " (rspecifier is: " << rspecifier << ")";
2514 }
2515 
2516 template<class Holder>
2517 bool RandomAccessTableReader<Holder>::Open(const std::string &rspecifier) {
2518  if (IsOpen())
2519  KALDI_ERR << "Already open.";
2520  RspecifierOptions opts;
2521  RspecifierType rs = ClassifyRspecifier(rspecifier, NULL, &opts);
2522  switch (rs) {
2523  case kScriptRspecifier:
2525  break;
2526  case kArchiveRspecifier:
2527  if (opts.sorted) {
2528  if (opts.called_sorted) // "doubly" sorted case.
2530  else
2532  } else {
2534  }
2535  break;
2536  case kNoRspecifier: default:
2537  KALDI_WARN << "Invalid rspecifier: "
2538  << rspecifier;
2539  return false;
2540  }
2541  if (!impl_->Open(rspecifier)) {
2542  // A warning will already have been printed.
2543  delete impl_;
2544  impl_ = NULL;
2545  return false;
2546  }
2547  return true;
2548 }
2549 
2550 template<class Holder>
2551 bool RandomAccessTableReader<Holder>::HasKey(const std::string &key) {
2552  CheckImpl();
2553  if (!IsToken(key))
2554  KALDI_ERR << "Invalid key \"" << key << '"';
2555  return impl_->HasKey(key);
2556 }
2557 
2558 
2559 template<class Holder>
2560 const typename RandomAccessTableReader<Holder>::T&
2562  CheckImpl();
2563  return impl_->Value(key);
2564 }
2565 
2566 template<class Holder>
2568  CheckImpl();
2569  bool ans =impl_->Close();
2570  delete impl_;
2571  impl_ = NULL;
2572  return ans;
2573 }
2574 
2575 template<class Holder>
2577  if (IsOpen() && !Close()) // call Close() yourself to stop this being thrown.
2578  KALDI_ERR << "failure detected in destructor.";
2579 }
2580 
2581 template<class Holder>
2583  if (!impl_) {
2584  KALDI_ERR << "Trying to use empty SequentialTableReader (perhaps you "
2585  << "passed the empty string as an argument to a program?)";
2586  }
2587 }
2588 
2589 template<class Holder>
2591  if (!impl_) {
2592  KALDI_ERR << "Trying to use empty RandomAccessTableReader (perhaps you "
2593  << "passed the empty string as an argument to a program?)";
2594  }
2595 }
2596 
2597 template<class Holder>
2599  if (!impl_) {
2600  KALDI_ERR << "Trying to use empty TableWriter (perhaps you "
2601  << "passed the empty string as an argument to a program?)";
2602  }
2603 }
2604 
2605 template<class Holder>
2607  const std::string &table_rxfilename,
2608  const std::string &utt2spk_rxfilename):
2609  reader_(table_rxfilename), token_reader_(table_rxfilename.empty() ? "" :
2610  utt2spk_rxfilename),
2611  utt2spk_rxfilename_(utt2spk_rxfilename) { }
2612 
2613 template<class Holder>
2615  const std::string &table_rxfilename,
2616  const std::string &utt2spk_rxfilename) {
2617  if (reader_.IsOpen()) reader_.Close();
2618  if (token_reader_.IsOpen()) token_reader_.Close();
2619  KALDI_ASSERT(!table_rxfilename.empty());
2620  if (!reader_.Open(table_rxfilename)) return false; // will have printed
2621  // warning internally, probably.
2622  if (!utt2spk_rxfilename.empty()) {
2623  if (!token_reader_.Open(utt2spk_rxfilename)) {
2624  reader_.Close();
2625  return false;
2626  }
2627  }
2628  return true;
2629 }
2630 
2631 
2632 template<class Holder>
2634  // We don't check IsOpen, we let the call go through to the member variable
2635  // (reader_), which will crash with a more informative error message than
2636  // we can give here, as we don't any longer know the rxfilename.
2637  if (token_reader_.IsOpen()) { // We need to map the key from utt to spk.
2638  if (!token_reader_.HasKey(utt))
2639  KALDI_ERR << "Attempting to read key " << utt << ", which is not present "
2640  << "in utt2spk map or similar map being read from "
2642  const std::string &spk = token_reader_.Value(utt);
2643  return reader_.HasKey(spk);
2644  } else {
2645  return reader_.HasKey(utt);
2646  }
2647 }
2648 
2649 template<class Holder>
2651  const std::string &utt) {
2652  if (token_reader_.IsOpen()) { // We need to map the key from utt to spk.
2653  if (!token_reader_.HasKey(utt))
2654  KALDI_ERR << "Attempting to read key " << utt << ", which is not present "
2655  << "in utt2spk map or similar map being read from "
2657  const std::string &spk = token_reader_.Value(utt);
2658  return reader_.Value(spk);
2659  } else {
2660  return reader_.Value(utt);
2661  }
2662 }
2663 
2664 
2665 
2667 
2668 } // end namespace kaldi
2669 
2670 
2671 
2672 #endif // KALDI_UTIL_KALDI_TABLE_INL_H_
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
virtual bool Open(const std::string &rspecifier)
virtual bool Open(const std::string &rspecifier)
virtual bool IsOpen() const
virtual bool Open(const std::string &wspecifier)
bool Open(const std::string &wspecifier)
SequentialTableReaderImplBase< Holder > * base_reader_
virtual const T & Value(const std::string &key)
virtual bool HasKeyInternal(const std::string &key, bool preload)
RandomAccessTableReader< TokenHolder > token_reader_
Definition: kaldi-table.h:461
virtual bool Open(const std::string &rxfilename)
This class is for when you are reading something in random access, but it may actually be stored per-...
Definition: kaldi-table.h:432
static void run(SequentialTableReaderBackgroundImpl< Holder > *object)
virtual void SwapHolder(Holder *other_holder)=0
bool Open(const std::string &rspecifier)
bool LookupFilename(const std::string &key, std::string *wxfilename)
virtual bool Open(const std::string &wspecifier)
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
SequentialTableReaderImplBase< Holder > * impl_
Definition: kaldi-table.h:361
#define KALDI_DISALLOW_COPY_AND_ASSIGN(type)
Definition: kaldi-utils.h:121
virtual bool HasKey(const std::string &key)
void Write(const std::string &key, const T &value) const
std::vector< std::pair< std::string, std::string > > script_
void MakeFilename(typename std::ostream::pos_type streampos, std::string *output) const
RspecifierType ClassifyRspecifier(const std::string &rspecifier, std::string *rxfilename, RspecifierOptions *opts)
Definition: kaldi-table.cc:225
bool IsToken(const std::string &token)
Returns true if "token" is nonempty, and all characters are printable and whitespace-free.
Definition: text-utils.cc:105
std::vector< std::pair< std::string, std::string > > script_
std::vector< std::pair< std::string, Holder * > > seen_pairs_
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
unordered_map< std::string, Holder *, StringHasher > MapType
bool FindKeyInternal(const std::string &key, const T **value_ptr=NULL)
virtual bool Open(const std::string &rxfilename)=0
virtual bool Open(const std::string &rspecifier)
std::ostream & Stream()
Definition: kaldi-io.cc:701
virtual const T & Value(const std::string &key)
virtual bool Write(const std::string &key, const T &value)
void SplitStringOnFirstSpace(const std::string &str, std::string *first, std::string *rest)
Removes leading and trailing white space from the string, then splits on the first section of whitesp...
Definition: text-utils.cc:120
bool Open(const std::string &table_rxfilename, const std::string &utt2spk_rxfilename)
Note: when calling Open, utt2spk_rxfilename may be empty.
RspecifierType
Definition: kaldi-table.h:219
virtual bool Write(const std::string &key, const T &value)
virtual bool Write(const std::string &key, const T &value)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
virtual const T & Value(const std::string &key)
virtual const T & Value(const std::string &key)
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
virtual bool Done() const =0
WspecifierType
Definition: kaldi-table.h:106
bool HasKey(const std::string &key)
virtual bool Open(const std::string &rspecifier)
KALDI_DISALLOW_COPY_AND_ASSIGN(SequentialTableReaderImplBase)
RandomAccessTableReader< Holder > reader_
Definition: kaldi-table.h:460
WspecifierType ClassifyWspecifier(const std::string &wspecifier, std::string *archive_wxfilename, std::string *script_wxfilename, WspecifierOptions *opts)
Definition: kaldi-table.cc:135
std::string CharToString(const char &c)
Definition: kaldi-utils.cc:36
virtual bool IsOpen() const =0
bool Open(const std::string &wxfilename, bool binary, bool write_header)
This opens the stream, with the given mode (binary or text).
Definition: kaldi-io.cc:707
bool ReadScriptFile(const std::string &rxfilename, bool warn, std::vector< std::pair< std::string, std::string > > *script_out)
Definition: kaldi-table.cc:26
virtual bool HasKey(const std::string &key)
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
virtual bool IsOpen() const
std::string last_requested_key_
Last string provided to HasKey() or Value();.
OutputType ClassifyWxfilename(const std::string &filename)
ClassifyWxfilename interprets filenames as follows:
Definition: kaldi-io.cc:85
std::string PrintableRxfilename(const std::string &rxfilename)
PrintableRxfilename turns the rxfilename into a more human-readable form for error reporting...
Definition: kaldi-io.cc:61
std::string PrintableWxfilename(const std::string &wxfilename)
PrintableWxfilename turns the wxfilename into a more human-readable form for error reporting...
Definition: kaldi-io.cc:73
void SwapHolder(Holder *other_holder)
bool ExtractRangeSpecifier(const std::string &rxfilename_with_range, std::string *data_rxfilename, std::string *range)
const T & Value(const std::string &key)
bool FindKeyInternal(const std::string &key, size_t *index)
TableWriterImplBase< Holder > * impl_
Definition: kaldi-table.h:414
virtual bool Open(const std::string &wspecifier)
void SwapHolder(Holder *other_holder)
bool LookupKey(const std::string &key, size_t *script_offset)
SequentialTableReaderBackgroundImpl(SequentialTableReaderImplBase< Holder > *base_reader)
virtual bool HasKey(const std::string &key)
virtual bool HasKey(const std::string &key)
bool Close()
Definition: kaldi-io.cc:677