kaldi-io.cc
Go to the documentation of this file.
1 // util/kaldi-io.cc
2 
3 // Copyright 2009-2011 Microsoft Corporation; Jan Silovsky
4 // 2016 Xiaohui Zhang
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 
12 // http://www.apache.org/licenses/LICENSE-2.0
13 
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 #include "util/kaldi-io.h"
21 #include <errno.h>
22 #include <cstdlib>
23 #include "base/kaldi-math.h"
24 #include "util/text-utils.h"
25 #include "util/parse-options.h"
26 #include "util/kaldi-holder.h"
27 #include "util/kaldi-pipebuf.h"
28 #include "util/kaldi-table.h" // for Classify{W,R}specifier
29 #include <stdio.h>
30 #include <stdlib.h>
31 
32 #ifdef KALDI_CYGWIN_COMPAT
34 #define MapOsPath(x) MapCygwinPath(x)
35 #else // KALDI_CYGWIN_COMPAT
36 #define MapOsPath(x) x
37 #endif // KALDI_CYGWIN_COMPAT
38 
39 
40 #if defined(_MSC_VER)
41 static FILE *popen(const char* command, const char* mode) {
42 #ifdef KALDI_CYGWIN_COMPAT
43  return kaldi::CygwinCompatPopen(command, mode);
44 #else // KALDI_CYGWIN_COMPAT
45  return _popen(command, mode);
46 #endif // KALDI_CYGWIN_COMPAT
47 }
48 #endif // _MSC_VER
49 
50 namespace kaldi {
51 
52 #ifndef _MSC_VER // on VS, we don't need this type.
53 // could replace basic_pipebuf<char> with stdio_filebuf<char> on some platforms.
54 // Would mean we could use less of our own code.
56 #endif
57 }
58 
59 namespace kaldi {
60 
61 std::string PrintableRxfilename(const std::string &rxfilename) {
62  if (rxfilename == "" || rxfilename == "-") {
63  return "standard input";
64  } else {
65  // If this call to Escape later causes compilation issues,
66  // just replace it with "return rxfilename"; it's only a
67  // pretty-printing issue.
68  return ParseOptions::Escape(rxfilename);
69  }
70 }
71 
72 
73 std::string PrintableWxfilename(const std::string &wxfilename) {
74  if (wxfilename == "" || wxfilename == "-") {
75  return "standard output";
76  } else {
77  // If this call to Escape later causes compilation issues,
78  // just replace it with "return wxfilename"; it's only a
79  // pretty-printing issue.
80  return ParseOptions::Escape(wxfilename);
81  }
82 }
83 
84 
85 OutputType ClassifyWxfilename(const std::string &filename) {
86  const char *c = filename.c_str();
87  size_t length = filename.length();
88  char first_char = c[0],
89  last_char = (length == 0 ? '\0' : c[filename.length()-1]);
90 
91  // if 'filename' is "" or "-", return kStandardOutput.
92  if (length == 0 || (length == 1 && first_char == '-'))
93  return kStandardOutput;
94  else if (first_char == '|') return kPipeOutput; // An output pipe like "|blah".
95  else if (isspace(first_char) || isspace(last_char) || last_char == '|') {
96  return kNoOutput; // Leading or trailing space: can't interpret this.
97  // Final '|' would represent an input pipe, not an
98  // output pipe.
99  } else if ((first_char == 'a' || first_char == 's') &&
100  strchr(c, ':') != NULL &&
101  (ClassifyWspecifier(filename, NULL, NULL, NULL) != kNoWspecifier ||
102  ClassifyRspecifier(filename, NULL, NULL) != kNoRspecifier)) {
103  // e.g. ark:something or scp:something... this is almost certainly a
104  // scripting error, so call it an error rather than treating it as a file.
105  // In practice in modern kaldi scripts all (r,w)filenames begin with "ark"
106  // or "scp", even though technically speaking options like "b", "t", "s" or
107  // "cs" can appear before the ark or scp, like "b,ark". For efficiency,
108  // and because this code is really just a nicety to catch errors earlier
109  // than they would otherwise be caught, we only call those extra functions
110  // for filenames beginning with 'a' or 's'.
111  return kNoOutput;
112  } else if (isdigit(last_char)) {
113  // This could be a file, but we have to see if it's an offset into a file
114  // (like foo.ark:4314328), which is not allowed for writing (but is
115  // allowed for reaching). This eliminates some things which would be
116  // valid UNIX filenames but are not allowed by Kaldi. (Even if we allowed
117  // such filenames for writing, we woudln't be able to correctly read them).
118  const char *d = c + length - 1;
119  while (isdigit(*d) && d > c) d--;
120  if (*d == ':') return kNoOutput;
121  // else it could still be a filename; continue to the next check.
122  }
123 
124  // At this point it matched no other pattern so we assume a filename, but we
125  // check for internal '|' as it's a common source of errors to have pipe
126  // commands without the pipe in the right place. Say that it can't be
127  // classified.
128  if (strchr(c, '|') != NULL) {
129  KALDI_WARN << "Trying to classify wxfilename with pipe symbol in the"
130  " wrong place (pipe without | at the beginning?): " <<
131  filename;
132  return kNoOutput;
133  }
134  return kFileOutput; // It matched no other pattern: assume it's a filename.
135 }
136 
137 
138 InputType ClassifyRxfilename(const std::string &filename) {
139  const char *c = filename.c_str();
140  size_t length = filename.length();
141  char first_char = c[0],
142  last_char = (length == 0 ? '\0' : c[filename.length()-1]);
143 
144  // if 'filename' is "" or "-", return kStandardInput.
145  if (length == 0 || (length == 1 && first_char == '-')) {
146  return kStandardInput;
147  } else if (first_char == '|') {
148  return kNoInput; // An output pipe like "|blah": not
149  // valid for input.
150  } else if (last_char == '|') {
151  return kPipeInput;
152  } else if (isspace(first_char) || isspace(last_char)) {
153  return kNoInput; // We don't allow leading or trailing space in a filename.
154  } else if ((first_char == 'a' || first_char == 's') &&
155  strchr(c, ':') != NULL &&
156  (ClassifyWspecifier(filename, NULL, NULL, NULL) != kNoWspecifier ||
157  ClassifyRspecifier(filename, NULL, NULL) != kNoRspecifier)) {
158  // e.g. ark:something or scp:something... this is almost certainly a
159  // scripting error, so call it an error rather than treating it as a file.
160  // In practice in modern kaldi scripts all (r,w)filenames begin with "ark"
161  // or "scp", even though technically speaking options like "b", "t", "s" or
162  // "cs" can appear before the ark or scp, like "b,ark". For efficiency,
163  // and because this code is really just a nicety to catch errors earlier
164  // than they would otherwise be caught, we only call those extra functions
165  // for filenames beginning with 'a' or 's'.
166  return kNoInput;
167  } else if (isdigit(last_char)) {
168  const char *d = c + length - 1;
169  while (isdigit(*d) && d > c) d--;
170  if (*d == ':') return kOffsetFileInput; // Filename is like
171  // some_file:12345
172  // otherwise it could still be a filename; continue to the next check.
173  }
174 
175 
176  // At this point it matched no other pattern so we assume a filename, but
177  // we check for '|' as it's a common source of errors to have pipe
178  // commands without the pipe in the right place. Say that it can't be
179  // classified in this case.
180  if (strchr(c, '|') != NULL) {
181  KALDI_WARN << "Trying to classify rxfilename with pipe symbol in the"
182  " wrong place (pipe without | at the end?): " << filename;
183  return kNoInput;
184  }
185  return kFileInput; // It matched no other pattern: assume it's a filename.
186 }
187 
189  public:
190  // Open will open it as a file (no header), and return true
191  // on success. It cannot be called on an already open stream.
192  virtual bool Open(const std::string &filename, bool binary) = 0;
193  virtual std::ostream &Stream() = 0;
194  virtual bool Close() = 0;
195  virtual ~OutputImplBase() { }
196 };
197 
198 
200  public:
201  virtual bool Open(const std::string &filename, bool binary) {
202  if (os_.is_open()) KALDI_ERR << "FileOutputImpl::Open(), "
203  << "open called on already open file.";
204  filename_ = filename;
205  os_.open(MapOsPath(filename_).c_str(),
206  binary ? std::ios_base::out | std::ios_base::binary
207  : std::ios_base::out);
208  return os_.is_open();
209  }
210 
211  virtual std::ostream &Stream() {
212  if (!os_.is_open())
213  KALDI_ERR << "FileOutputImpl::Stream(), file is not open.";
214  // I believe this error can only arise from coding error.
215  return os_;
216  }
217 
218  virtual bool Close() {
219  if (!os_.is_open())
220  KALDI_ERR << "FileOutputImpl::Close(), file is not open.";
221  // I believe this error can only arise from coding error.
222  os_.close();
223  return !(os_.fail());
224  }
225  virtual ~FileOutputImpl() {
226  if (os_.is_open()) {
227  os_.close();
228  if (os_.fail())
229  KALDI_ERR << "Error closing output file " << filename_;
230  }
231  }
232  private:
233  std::string filename_;
234  std::ofstream os_;
235 };
236 
238  public:
239  StandardOutputImpl(): is_open_(false) { }
240 
241  virtual bool Open(const std::string &filename, bool binary) {
242  if (is_open_) KALDI_ERR << "StandardOutputImpl::Open(), "
243  "open called on already open file.";
244 #ifdef _MSC_VER
245  _setmode(_fileno(stdout), binary ? _O_BINARY : _O_TEXT);
246 #endif
247  is_open_ = std::cout.good();
248  return is_open_;
249  }
250 
251  virtual std::ostream &Stream() {
252  if (!is_open_)
253  KALDI_ERR << "StandardOutputImpl::Stream(), object not initialized.";
254  // I believe this error can only arise from coding error.
255  return std::cout;
256  }
257 
258  virtual bool Close() {
259  if (!is_open_)
260  KALDI_ERR << "StandardOutputImpl::Close(), file is not open.";
261  is_open_ = false;
262  std::cout << std::flush;
263  return !(std::cout.fail());
264  }
266  if (is_open_) {
267  std::cout << std::flush;
268  if (std::cout.fail())
269  KALDI_ERR << "Error writing to standard output";
270  }
271  }
272  private:
273  bool is_open_;
274 };
275 
277  public:
278  PipeOutputImpl(): f_(NULL), os_(NULL) { }
279 
280  virtual bool Open(const std::string &wxfilename, bool binary) {
281  filename_ = wxfilename;
282  KALDI_ASSERT(f_ == NULL); // Make sure closed.
283  KALDI_ASSERT(wxfilename.length() != 0 && wxfilename[0] == '|'); // should
284  // start with '|'
285  std::string cmd_name(wxfilename, 1);
286 #if defined(_MSC_VER) || defined(__CYGWIN__)
287  f_ = popen(cmd_name.c_str(), (binary ? "wb" : "w"));
288 #else
289  f_ = popen(cmd_name.c_str(), "w");
290 #endif
291  if (!f_) { // Failure.
292  KALDI_WARN << "Failed opening pipe for writing, command is: "
293  << cmd_name << ", errno is " << strerror(errno);
294  return false;
295  } else {
296 #ifndef _MSC_VER
297  fb_ = new PipebufType(f_, // Using this constructor won't make the
298  // destructor try to close the stream when
299  // we're done.
300  (binary ? std::ios_base::out|
301  std::ios_base::binary
302  :std::ios_base::out));
303  KALDI_ASSERT(fb_ != NULL); // or would be alloc error.
304  os_ = new std::ostream(fb_);
305 #else
306  os_ = new std::ofstream(f_);
307 #endif
308  return os_->good();
309  }
310  }
311 
312  virtual std::ostream &Stream() {
313  if (os_ == NULL) KALDI_ERR << "PipeOutputImpl::Stream(),"
314  " object not initialized.";
315  // I believe this error can only arise from coding error.
316  return *os_;
317  }
318 
319  virtual bool Close() {
320  if (os_ == NULL) KALDI_ERR << "PipeOutputImpl::Close(), file is not open.";
321  bool ok = true;
322  os_->flush();
323  if (os_->fail()) ok = false;
324  delete os_;
325  os_ = NULL;
326  int status;
327 #ifdef _MSC_VER
328  status = _pclose(f_);
329 #else
330  status = pclose(f_);
331 #endif
332  if (status)
333  KALDI_WARN << "Pipe " << filename_ << " had nonzero return status "
334  << status;
335  f_ = NULL;
336 #ifndef _MSC_VER
337  delete fb_;
338  fb_ = NULL;
339 #endif
340  return ok;
341  }
342  virtual ~PipeOutputImpl() {
343  if (os_) {
344  if (!Close())
345  KALDI_ERR << "Error writing to pipe " << PrintableWxfilename(filename_);
346  }
347  }
348  private:
349  std::string filename_;
350  FILE *f_;
351 #ifndef _MSC_VER
353 #endif
354  std::ostream *os_;
355 };
356 
357 
358 
360  public:
361  // Open will open it as a file, and return true on success.
362  // May be called twice only for kOffsetFileInput (otherwise,
363  // if called twice, we just create a new Input object, to avoid
364  // having to deal with the extra hassle of reopening with the
365  // same object.
366  // Note that we will to call Open with true (binary) for
367  // for text-mode Kaldi files; the only actual text-mode input
368  // is for non-Kaldi files.
369  virtual bool Open(const std::string &filename, bool binary) = 0;
370  virtual std::istream &Stream() = 0;
371  virtual int32 Close() = 0; // We only need to check failure in the case of
372  // kPipeInput.
373  // on close for input streams.
374  virtual InputType MyType() = 0; // Because if it's kOffsetFileInput, we may
375  // call Open twice
376  // (has efficiency benefits).
377 
378  virtual ~InputImplBase() { }
379 };
380 
382  public:
383  virtual bool Open(const std::string &filename, bool binary) {
384  if (is_.is_open()) KALDI_ERR << "FileInputImpl::Open(), "
385  << "open called on already open file.";
386  is_.open(MapOsPath(filename).c_str(),
387  binary ? std::ios_base::in | std::ios_base::binary
388  : std::ios_base::in);
389  return is_.is_open();
390  }
391 
392  virtual std::istream &Stream() {
393  if (!is_.is_open())
394  KALDI_ERR << "FileInputImpl::Stream(), file is not open.";
395  // I believe this error can only arise from coding error.
396  return is_;
397  }
398 
399  virtual int32 Close() {
400  if (!is_.is_open())
401  KALDI_ERR << "FileInputImpl::Close(), file is not open.";
402  // I believe this error can only arise from coding error.
403  is_.close();
404  // Don't check status.
405  return 0;
406  }
407 
408  virtual InputType MyType() { return kFileInput; }
409 
410  virtual ~FileInputImpl() {
411  // Stream will automatically be closed, and we don't care about
412  // whether it fails.
413  }
414  private:
415  std::ifstream is_;
416 };
417 
418 
420  public:
421  StandardInputImpl(): is_open_(false) { }
422 
423  virtual bool Open(const std::string &filename, bool binary) {
424  if (is_open_) KALDI_ERR << "StandardInputImpl::Open(), "
425  "open called on already open file.";
426  is_open_ = true;
427 #ifdef _MSC_VER
428  _setmode(_fileno(stdin), binary ? _O_BINARY : _O_TEXT);
429 #endif
430  return true; // Don't check good() because would be false if
431  // eof, which may be valid input.
432  }
433 
434  virtual std::istream &Stream() {
435  if (!is_open_)
436  KALDI_ERR << "StandardInputImpl::Stream(), object not initialized.";
437  // I believe this error can only arise from coding error.
438  return std::cin;
439  }
440 
441  virtual InputType MyType() { return kStandardInput; }
442 
443  virtual int32 Close() {
444  if (!is_open_) KALDI_ERR << "StandardInputImpl::Close(), file is not open.";
445  is_open_ = false;
446  return 0;
447  }
448  virtual ~StandardInputImpl() { }
449  private:
450  bool is_open_;
451 };
452 
454  public:
455  PipeInputImpl(): f_(NULL), is_(NULL) { }
456 
457  virtual bool Open(const std::string &rxfilename, bool binary) {
458  filename_ = rxfilename;
459  KALDI_ASSERT(f_ == NULL); // Make sure closed.
460  KALDI_ASSERT(rxfilename.length() != 0 &&
461  rxfilename[rxfilename.length()-1] == '|'); // should end with '|'
462  std::string cmd_name(rxfilename, 0, rxfilename.length()-1);
463 #if defined(_MSC_VER) || defined(__CYGWIN__)
464  f_ = popen(cmd_name.c_str(), (binary ? "rb" : "r"));
465 #else
466  f_ = popen(cmd_name.c_str(), "r");
467 #endif
468 
469  if (!f_) { // Failure.
470  KALDI_WARN << "Failed opening pipe for reading, command is: "
471  << cmd_name << ", errno is " << strerror(errno);
472  return false;
473  } else {
474 #ifndef _MSC_VER
475  fb_ = new PipebufType(f_, // Using this constructor won't lead the
476  // destructor to close the stream.
477  (binary ? std::ios_base::in|
478  std::ios_base::binary
479  :std::ios_base::in));
480  KALDI_ASSERT(fb_ != NULL); // or would be alloc error.
481  is_ = new std::istream(fb_);
482 #else
483  is_ = new std::ifstream(f_);
484 #endif
485  if (is_->fail() || is_->bad()) return false;
486  if (is_->eof()) {
487  KALDI_WARN << "Pipe opened with command "
488  << PrintableRxfilename(rxfilename)
489  << " is empty.";
490  // don't return false: empty may be valid.
491  }
492  return true;
493  }
494  }
495 
496  virtual std::istream &Stream() {
497  if (is_ == NULL)
498  KALDI_ERR << "PipeInputImpl::Stream(), object not initialized.";
499  // I believe this error can only arise from coding error.
500  return *is_;
501  }
502 
503  virtual int32 Close() {
504  if (is_ == NULL)
505  KALDI_ERR << "PipeInputImpl::Close(), file is not open.";
506  delete is_;
507  is_ = NULL;
508  int32 status;
509 #ifdef _MSC_VER
510  status = _pclose(f_);
511 #else
512  status = pclose(f_);
513 #endif
514  if (status)
515  KALDI_WARN << "Pipe " << filename_ << " had nonzero return status "
516  << status;
517  f_ = NULL;
518 #ifndef _MSC_VER
519  delete fb_;
520  fb_ = NULL;
521 #endif
522  return status;
523  }
524  virtual ~PipeInputImpl() {
525  if (is_)
526  Close();
527  }
528  virtual InputType MyType() { return kPipeInput; }
529  private:
530  std::string filename_;
531  FILE *f_;
532 #ifndef _MSC_VER
534 #endif
535  std::istream *is_;
536 };
537 
538 /*
539 #else
540 
541 // Just have an empty implementation of the pipe input that crashes if
542 // called.
543 class PipeInputImpl: public InputImplBase {
544  public:
545  PipeInputImpl() { KALDI_ASSERT(0 && "Pipe input not yet supported on this
546  platform."); }
547  virtual bool Open(const std::string, bool) { return 0; }
548  virtual std::istream &Stream() const { return NULL; }
549  virtual void Close() {}
550  virtual InputType MyType() { return kPipeInput; }
551 };
552 
553 #endif
554 */
555 
557  // This class is a bit more complicated than the
558 
559  public:
560  // splits a filename like /my/file:123 into /my/file and the
561  // number 123. Crashes if not this format.
562  static void SplitFilename(const std::string &rxfilename,
563  std::string *filename,
564  size_t *offset) {
565  size_t pos = rxfilename.find_last_of(':');
566  KALDI_ASSERT(pos != std::string::npos); // would indicate error in calling
567  // code, as the filename is supposed to be of the correct form at this
568  // point.
569  *filename = std::string(rxfilename, 0, pos);
570  std::string number(rxfilename, pos+1);
571  bool ans = ConvertStringToInteger(number, offset);
572  if (!ans)
573  KALDI_ERR << "Cannot get offset from filename " << rxfilename
574  << " (possibly you compiled in 32-bit and have a >32-bit"
575  << " byte offset into a file; you'll have to compile 64-bit.";
576  }
577 
578  bool Seek(size_t offset) {
579  size_t cur_pos = is_.tellg();
580  if (cur_pos == offset) return true;
581  else if (cur_pos<offset && cur_pos+100 > offset) {
582  // We're close enough that it may be faster to just
583  // read that data, rather than seek.
584  for (size_t i = cur_pos; i < offset; i++)
585  is_.get();
586  return (is_.tellg() == std::streampos(offset));
587  }
588  // Try to actually seek.
589  is_.seekg(offset, std::ios_base::beg);
590  if (is_.fail()) { // failbit or badbit is set [error happened]
591  is_.close();
592  return false; // failure.
593  } else {
594  is_.clear(); // Clear any failure bits (e.g. eof).
595  return true; // success.
596  }
597  }
598 
599  // This Open routine is unusual in that it is designed to work even
600  // if it was already open. This for efficiency when seeking multiple
601  // times.
602  virtual bool Open(const std::string &rxfilename, bool binary) {
603  if (is_.is_open()) {
604  // We are opening when we have an already-open file.
605  // We may have to seek within this file, or else close it and
606  // open a different one.
607  std::string tmp_filename;
608  size_t offset;
609  SplitFilename(rxfilename, &tmp_filename, &offset);
610  if (tmp_filename == filename_ && binary == binary_) { // Just seek
611  is_.clear(); // clear fail bit, etc.
612  return Seek(offset);
613  } else {
614  is_.close(); // don't bother checking error status of is_.
615  filename_ = tmp_filename;
616  is_.open(MapOsPath(filename_).c_str(),
617  binary ? std::ios_base::in | std::ios_base::binary
618  : std::ios_base::in);
619  if (!is_.is_open()) return false;
620  else
621  return Seek(offset);
622  }
623  } else {
624  size_t offset;
625  SplitFilename(rxfilename, &filename_, &offset);
626  binary_ = binary;
627  is_.open(MapOsPath(filename_).c_str(),
628  binary ? std::ios_base::in | std::ios_base::binary
629  : std::ios_base::in);
630  if (!is_.is_open()) return false;
631  else
632  return Seek(offset);
633  }
634  }
635 
636  virtual std::istream &Stream() {
637  if (!is_.is_open())
638  KALDI_ERR << "FileInputImpl::Stream(), file is not open.";
639  // I believe this error can only arise from coding error.
640  return is_;
641  }
642 
643  virtual int32 Close() {
644  if (!is_.is_open())
645  KALDI_ERR << "FileInputImpl::Close(), file is not open.";
646  // I believe this error can only arise from coding error.
647  is_.close();
648  // Don't check status.
649  return 0;
650  }
651 
652  virtual InputType MyType() { return kOffsetFileInput; }
653 
655  // Stream will automatically be closed, and we don't care about
656  // whether it fails.
657  }
658  private:
659  std::string filename_; // the actual filename
660  bool binary_; // true if was opened in binary mode.
661  std::ifstream is_;
662 };
663 
664 
665 Output::Output(const std::string &wxfilename, bool binary,
666  bool write_header):impl_(NULL) {
667  if (!Open(wxfilename, binary, write_header)) {
668  if (impl_) {
669  delete impl_;
670  impl_ = NULL;
671  }
672  KALDI_ERR << "Error opening output stream " <<
673  PrintableWxfilename(wxfilename);
674  }
675 }
676 
678  if (!impl_) {
679  return false; // error to call Close if not open.
680  } else {
681  bool ans = impl_->Close();
682  delete impl_;
683  impl_ = NULL;
684  return ans;
685  }
686 }
687 
689  if (impl_) {
690  bool ok = impl_->Close();
691  delete impl_;
692  impl_ = NULL;
693  if (!ok)
694  KALDI_ERR << "Error closing output file "
697  " (disk full?)" : "");
698  }
699 }
700 
701 std::ostream &Output::Stream() { // will throw if not open; else returns
702  // stream.
703  if (!impl_) KALDI_ERR << "Output::Stream() called but not open.";
704  return impl_->Stream();
705 }
706 
707 bool Output::Open(const std::string &wxfn, bool binary, bool header) {
708  if (IsOpen()) {
709  if (!Close()) { // Throw here rather than return status, as it's an error
710  // about something else: if the user wanted to avoid the exception he/she
711  // could have called Close().
712  KALDI_ERR << "Output::Open(), failed to close output stream: "
714  }
715  }
716 
717  filename_ = wxfn;
718 
719  OutputType type = ClassifyWxfilename(wxfn);
720  KALDI_ASSERT(impl_ == NULL);
721 
722  if (type == kFileOutput) {
723  impl_ = new FileOutputImpl();
724  } else if (type == kStandardOutput) {
725  impl_ = new StandardOutputImpl();
726  } else if (type == kPipeOutput) {
727  impl_ = new PipeOutputImpl();
728  } else { // type == kNoOutput
729  KALDI_WARN << "Invalid output filename format "<<
730  PrintableWxfilename(wxfn);
731  return false;
732  }
733  if (!impl_->Open(wxfn, binary)) {
734  delete impl_;
735  impl_ = NULL;
736  return false; // failed to open.
737  } else { // successfully opened it.
738  if (header) {
739  InitKaldiOutputStream(impl_->Stream(), binary);
740  bool ok = impl_->Stream().good(); // still OK?
741  if (!ok) {
742  delete impl_;
743  impl_ = NULL;
744  return false;
745  }
746  return true;
747  } else {
748  return true;
749  }
750  }
751 }
752 
753 
754 Input::Input(const std::string &rxfilename, bool *binary): impl_(NULL) {
755  if (!Open(rxfilename, binary)) {
756  KALDI_ERR << "Error opening input stream "
757  << PrintableRxfilename(rxfilename);
758  }
759 }
760 
762  if (impl_) {
763  int32 ans = impl_->Close();
764  delete impl_;
765  impl_ = NULL;
766  return ans;
767  } else {
768  return 0;
769  }
770 }
771 
772 bool Input::OpenInternal(const std::string &rxfilename,
773  bool file_binary,
774  bool *contents_binary) {
775  InputType type = ClassifyRxfilename(rxfilename);
776  if (IsOpen()) {
777  // May have to close the stream first.
778  if (type == kOffsetFileInput && impl_->MyType() == kOffsetFileInput) {
779  // We want to use the same object to Open... this is in case
780  // the files are the same, so we can just seek.
781  if (!impl_->Open(rxfilename, file_binary)) { // true is binary mode--
782  // always open in binary.
783  delete impl_;
784  impl_ = NULL;
785  return false;
786  }
787  // read the binary header, if requested.
788  if (contents_binary != NULL)
789  return InitKaldiInputStream(impl_->Stream(), contents_binary);
790  else
791  return true;
792  } else {
793  Close();
794  // and fall through to code below which actually opens the file.
795  }
796  }
797  if (type == kFileInput) {
798  impl_ = new FileInputImpl();
799  } else if (type == kStandardInput) {
800  impl_ = new StandardInputImpl();
801  } else if (type == kPipeInput) {
802  impl_ = new PipeInputImpl();
803  } else if (type == kOffsetFileInput) {
804  impl_ = new OffsetFileInputImpl();
805  } else { // type == kNoInput
806  KALDI_WARN << "Invalid input filename format "<<
807  PrintableRxfilename(rxfilename);
808  return false;
809  }
810  if (!impl_->Open(rxfilename, file_binary)) { // true is binary mode--
811  // always read in binary.
812  delete impl_;
813  impl_ = NULL;
814  return false;
815  }
816  if (contents_binary != NULL)
817  return InitKaldiInputStream(impl_->Stream(), contents_binary);
818  else
819  return true;
820 }
821 
822 
823 Input::~Input() { if (impl_) Close(); }
824 
825 
826 std::istream &Input::Stream() {
827  if (!IsOpen()) KALDI_ERR << "Input::Stream(), not open.";
828  return impl_->Stream();
829 }
830 
831 
832 template <> void ReadKaldiObject(const std::string &filename,
833  Matrix<float> *m) {
834  if (!filename.empty() && filename[filename.size() - 1] == ']') {
835  // This filename seems to have a 'range'... like foo.ark:4312423[20:30].
836  // (the bit in square brackets is the range).
837  std::string rxfilename, range;
838  if (!ExtractRangeSpecifier(filename, &rxfilename, &range)) {
839  KALDI_ERR << "Could not make sense of possible range specifier in filename "
840  << "while reading matrix: " << filename;
841  }
842  Matrix<float> temp;
843  bool binary_in;
844  Input ki(rxfilename, &binary_in);
845  temp.Read(ki.Stream(), binary_in);
846  if (!ExtractObjectRange(temp, range, m)) {
847  KALDI_ERR << "Error extracting range of object: " << filename;
848  }
849  } else {
850  // The normal case, there is no range.
851  bool binary_in;
852  Input ki(filename, &binary_in);
853  m->Read(ki.Stream(), binary_in);
854  }
855 }
856 
857 template <> void ReadKaldiObject(const std::string &filename,
858  Matrix<double> *m) {
859  if (!filename.empty() && filename[filename.size() - 1] == ']') {
860  // This filename seems to have a 'range'... like foo.ark:4312423[20:30].
861  // (the bit in square brackets is the range).
862  std::string rxfilename, range;
863  if (!ExtractRangeSpecifier(filename, &rxfilename, &range)) {
864  KALDI_ERR << "Could not make sense of possible range specifier in filename "
865  << "while reading matrix: " << filename;
866  }
867  Matrix<double> temp;
868  bool binary_in;
869  Input ki(rxfilename, &binary_in);
870  temp.Read(ki.Stream(), binary_in);
871  if (!ExtractObjectRange(temp, range, m)) {
872  KALDI_ERR << "Error extracting range of object: " << filename;
873  }
874  } else {
875  // The normal case, there is no range.
876  bool binary_in;
877  Input ki(filename, &binary_in);
878  m->Read(ki.Stream(), binary_in);
879  }
880 }
881 
882 
883 
884 } // end namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
bool ConvertStringToInteger(const std::string &str, Int *out)
Converts a string into an integer via strtoll and returns false if there was any kind of problem (i...
Definition: text-utils.h:118
InputType ClassifyRxfilename(const std::string &filename)
ClassifyRxfilenames interprets filenames for reading as follows:
Definition: kaldi-io.cc:138
bool InitKaldiInputStream(std::istream &is, bool *binary)
Initialize an opened stream for reading by detecting the binary header and.
Definition: io-funcs-inl.h:306
virtual bool Open(const std::string &filename, bool binary)=0
virtual ~FileOutputImpl()
Definition: kaldi-io.cc:225
virtual ~InputImplBase()
Definition: kaldi-io.cc:378
virtual int32 Close()
Definition: kaldi-io.cc:503
virtual ~OffsetFileInputImpl()
Definition: kaldi-io.cc:654
virtual bool Close()
Definition: kaldi-io.cc:319
virtual std::istream & Stream()
Definition: kaldi-io.cc:636
bool IsOpen()
Definition: kaldi-io-inl.h:34
virtual InputType MyType()
Definition: kaldi-io.cc:408
virtual bool Open(const std::string &filename, bool binary)
Definition: kaldi-io.cc:423
InputImplBase * impl_
Definition: kaldi-io.h:235
std::ifstream is_
Definition: kaldi-io.cc:415
virtual InputType MyType()=0
bool Open(const std::string &rxfilename, bool *contents_binary=NULL)
Definition: kaldi-io-inl.h:26
virtual std::ostream & Stream()=0
virtual std::ostream & Stream()
Definition: kaldi-io.cc:251
#define MapOsPath(x)
Definition: kaldi-io.cc:36
virtual std::istream & Stream()
Definition: kaldi-io.cc:392
static std::string Escape(const std::string &str)
The following function will return a possibly quoted and escaped version of "str", according to the current shell.
std::string filename_
Definition: kaldi-io.cc:233
kaldi::int32 int32
std::string filename_
Definition: kaldi-io.cc:530
virtual ~FileInputImpl()
Definition: kaldi-io.cc:410
virtual ~PipeOutputImpl()
Definition: kaldi-io.cc:342
basic_pipebuf< char > PipebufType
Definition: kaldi-io.cc:55
virtual std::istream & Stream()
Definition: kaldi-io.cc:434
virtual int32 Close()=0
RspecifierType ClassifyRspecifier(const std::string &rspecifier, std::string *rxfilename, RspecifierOptions *opts)
Definition: kaldi-table.cc:225
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Definition: kaldi-io.cc:832
virtual int32 Close()
Definition: kaldi-io.cc:643
virtual std::istream & Stream()
Definition: kaldi-io.cc:496
std::ofstream os_
Definition: kaldi-io.cc:234
std::istream & Stream()
Definition: kaldi-io.cc:826
void Read(std::istream &in, bool binary, bool add=false)
read from stream.
bool ExtractObjectRange(const GeneralMatrix &input, const std::string &range, GeneralMatrix *output)
GeneralMatrix is always of type BaseFloat.
Definition: kaldi-holder.cc:88
virtual bool Open(const std::string &filename, bool binary)
Definition: kaldi-io.cc:383
std::ostream & Stream()
Definition: kaldi-io.cc:701
virtual ~StandardInputImpl()
Definition: kaldi-io.cc:448
virtual bool Close()
Definition: kaldi-io.cc:258
virtual bool Open(const std::string &rxfilename, bool binary)
Definition: kaldi-io.cc:457
virtual bool Open(const std::string &wxfilename, bool binary)
Definition: kaldi-io.cc:280
virtual bool Open(const std::string &filename, bool binary)
Definition: kaldi-io.cc:201
static void SplitFilename(const std::string &rxfilename, std::string *filename, size_t *offset)
Definition: kaldi-io.cc:562
OutputType
Definition: kaldi-io.h:89
bool OpenInternal(const std::string &rxfilename, bool file_binary, bool *contents_binary)
Definition: kaldi-io.cc:772
virtual int32 Close()
Definition: kaldi-io.cc:399
virtual bool Open(const std::string &filename, bool binary)=0
virtual std::istream & Stream()=0
PipebufType * fb_
Definition: kaldi-io.cc:533
#define KALDI_ERR
Definition: kaldi-error.h:147
virtual InputType MyType()
Definition: kaldi-io.cc:652
#define KALDI_WARN
Definition: kaldi-error.h:150
int32 Close()
Definition: kaldi-io.cc:761
virtual ~StandardOutputImpl()
Definition: kaldi-io.cc:265
static FILE * CygwinCompatPopen(const char *command, const char *mode)
virtual int32 Close()
Definition: kaldi-io.cc:443
virtual bool Close()=0
std::ostream * os_
Definition: kaldi-io.cc:354
WspecifierType ClassifyWspecifier(const std::string &wspecifier, std::string *archive_wxfilename, std::string *script_wxfilename, WspecifierOptions *opts)
Definition: kaldi-table.cc:135
bool Open(const std::string &wxfilename, bool binary, bool write_header)
This opens the stream, with the given mode (binary or text).
Definition: kaldi-io.cc:707
virtual InputType MyType()
Definition: kaldi-io.cc:441
InputType
Definition: kaldi-io.h:105
PipebufType * fb_
Definition: kaldi-io.cc:352
virtual bool Close()
Definition: kaldi-io.cc:218
virtual std::ostream & Stream()
Definition: kaldi-io.cc:312
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
std::istream * is_
Definition: kaldi-io.cc:535
std::string filename_
Definition: kaldi-io.cc:349
OutputType ClassifyWxfilename(const std::string &filename)
ClassifyWxfilename interprets filenames as follows:
Definition: kaldi-io.cc:85
virtual std::ostream & Stream()
Definition: kaldi-io.cc:211
virtual ~OutputImplBase()
Definition: kaldi-io.cc:195
std::string PrintableRxfilename(const std::string &rxfilename)
PrintableRxfilename turns the rxfilename into a more human-readable form for error reporting...
Definition: kaldi-io.cc:61
std::string PrintableWxfilename(const std::string &wxfilename)
PrintableWxfilename turns the wxfilename into a more human-readable form for error reporting...
Definition: kaldi-io.cc:73
void InitKaldiOutputStream(std::ostream &os, bool binary)
InitKaldiOutputStream initializes an opened stream for writing by writing an optional binary header a...
Definition: io-funcs-inl.h:291
bool ExtractRangeSpecifier(const std::string &rxfilename_with_range, std::string *data_rxfilename, std::string *range)
virtual bool Open(const std::string &filename, bool binary)
Definition: kaldi-io.cc:241
OutputImplBase * impl_
Definition: kaldi-io.h:160
std::string filename_
Definition: kaldi-io.h:161
virtual bool Open(const std::string &rxfilename, bool binary)
Definition: kaldi-io.cc:602
This is an Kaldi C++ Library header.
virtual ~PipeInputImpl()
Definition: kaldi-io.cc:524
bool Close()
Definition: kaldi-io.cc:677
bool Seek(size_t offset)
Definition: kaldi-io.cc:578
virtual InputType MyType()
Definition: kaldi-io.cc:528