io-funcs-inl.h
Go to the documentation of this file.
1 // base/io-funcs-inl.h
2 
3 // Copyright 2009-2011 Microsoft Corporation; Saarland University;
4 // Jan Silovsky; Yanmin Qian;
5 // Johns Hopkins University (Author: Daniel Povey)
6 // 2016 Xiaohui Zhang
7 
8 // See ../../COPYING for clarification regarding multiple authors
9 //
10 // Licensed under the Apache License, Version 2.0 (the "License");
11 // you may not use this file except in compliance with the License.
12 // You may obtain a copy of the License at
13 
14 // http://www.apache.org/licenses/LICENSE-2.0
15 
16 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
18 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
19 // MERCHANTABLITY OR NON-INFRINGEMENT.
20 // See the Apache 2 License for the specific language governing permissions and
21 // limitations under the License.
22 
23 #ifndef KALDI_BASE_IO_FUNCS_INL_H_
24 #define KALDI_BASE_IO_FUNCS_INL_H_ 1
25 
26 // Do not include this file directly. It is included by base/io-funcs.h
27 
28 #include <limits>
29 #include <vector>
30 
31 namespace kaldi {
32 
33 // Template that covers integers.
34 template<class T> void WriteBasicType(std::ostream &os,
35  bool binary, T t) {
36  // Compile time assertion that this is not called with a wrong type.
38  if (binary) {
39  char len_c = (std::numeric_limits<T>::is_signed ? 1 : -1)
40  * static_cast<char>(sizeof(t));
41  os.put(len_c);
42  os.write(reinterpret_cast<const char *>(&t), sizeof(t));
43  } else {
44  if (sizeof(t) == 1)
45  os << static_cast<int16>(t) << " ";
46  else
47  os << t << " ";
48  }
49  if (os.fail()) {
50  KALDI_ERR << "Write failure in WriteBasicType.";
51  }
52 }
53 
54 // Template that covers integers.
55 template<class T> inline void ReadBasicType(std::istream &is,
56  bool binary, T *t) {
57  KALDI_PARANOID_ASSERT(t != NULL);
58  // Compile time assertion that this is not called with a wrong type.
60  if (binary) {
61  int len_c_in = is.get();
62  if (len_c_in == -1)
63  KALDI_ERR << "ReadBasicType: encountered end of stream.";
64  char len_c = static_cast<char>(len_c_in), len_c_expected
65  = (std::numeric_limits<T>::is_signed ? 1 : -1)
66  * static_cast<char>(sizeof(*t));
67  if (len_c != len_c_expected) {
68  KALDI_ERR << "ReadBasicType: did not get expected integer type, "
69  << static_cast<int>(len_c)
70  << " vs. " << static_cast<int>(len_c_expected)
71  << ". You can change this code to successfully"
72  << " read it later, if needed.";
73  // insert code here to read "wrong" type. Might have a switch statement.
74  }
75  is.read(reinterpret_cast<char *>(t), sizeof(*t));
76  } else {
77  if (sizeof(*t) == 1) {
78  int16 i;
79  is >> i;
80  *t = i;
81  } else {
82  is >> *t;
83  }
84  }
85  if (is.fail()) {
86  KALDI_ERR << "Read failure in ReadBasicType, file position is "
87  << is.tellg() << ", next char is " << is.peek();
88  }
89 }
90 
91 // Template that covers integers.
92 template<class T>
93 inline void WriteIntegerPairVector(std::ostream &os, bool binary,
94  const std::vector<std::pair<T, T> > &v) {
95  // Compile time assertion that this is not called with a wrong type.
97  if (binary) {
98  char sz = sizeof(T); // this is currently just a check.
99  os.write(&sz, 1);
100  int32 vecsz = static_cast<int32>(v.size());
101  KALDI_ASSERT((size_t)vecsz == v.size());
102  os.write(reinterpret_cast<const char *>(&vecsz), sizeof(vecsz));
103  if (vecsz != 0) {
104  os.write(reinterpret_cast<const char *>(&(v[0])), sizeof(T) * vecsz * 2);
105  }
106  } else {
107  // focus here is on prettiness of text form rather than
108  // efficiency of reading-in.
109  // reading-in is dominated by low-level operations anyway:
110  // for efficiency use binary.
111  os << "[ ";
112  typename std::vector<std::pair<T, T> >::const_iterator iter = v.begin(),
113  end = v.end();
114  for (; iter != end; ++iter) {
115  if (sizeof(T) == 1)
116  os << static_cast<int16>(iter->first) << ','
117  << static_cast<int16>(iter->second) << ' ';
118  else
119  os << iter->first << ','
120  << iter->second << ' ';
121  }
122  os << "]\n";
123  }
124  if (os.fail()) {
125  KALDI_ERR << "Write failure in WriteIntegerPairVector.";
126  }
127 }
128 
129 // Template that covers integers.
130 template<class T>
131 inline void ReadIntegerPairVector(std::istream &is, bool binary,
132  std::vector<std::pair<T, T> > *v) {
134  KALDI_ASSERT(v != NULL);
135  if (binary) {
136  int sz = is.peek();
137  if (sz == sizeof(T)) {
138  is.get();
139  } else { // this is currently just a check.
140  KALDI_ERR << "ReadIntegerPairVector: expected to see type of size "
141  << sizeof(T) << ", saw instead " << sz << ", at file position "
142  << is.tellg();
143  }
144  int32 vecsz;
145  is.read(reinterpret_cast<char *>(&vecsz), sizeof(vecsz));
146  if (is.fail() || vecsz < 0) goto bad;
147  v->resize(vecsz);
148  if (vecsz > 0) {
149  is.read(reinterpret_cast<char *>(&((*v)[0])), sizeof(T)*vecsz*2);
150  }
151  } else {
152  std::vector<std::pair<T, T> > tmp_v; // use temporary so v doesn't use extra memory
153  // due to resizing.
154  is >> std::ws;
155  if (is.peek() != static_cast<int>('[')) {
156  KALDI_ERR << "ReadIntegerPairVector: expected to see [, saw "
157  << is.peek() << ", at file position " << is.tellg();
158  }
159  is.get(); // consume the '['.
160  is >> std::ws; // consume whitespace.
161  while (is.peek() != static_cast<int>(']')) {
162  if (sizeof(T) == 1) { // read/write chars as numbers.
163  int16 next_t1, next_t2;
164  is >> next_t1;
165  if (is.fail()) goto bad;
166  if (is.peek() != static_cast<int>(','))
167  KALDI_ERR << "ReadIntegerPairVector: expected to see ',', saw "
168  << is.peek() << ", at file position " << is.tellg();
169  is.get(); // consume the ','.
170  is >> next_t2 >> std::ws;
171  if (is.fail()) goto bad;
172  else
173  tmp_v.push_back(std::make_pair<T, T>((T)next_t1, (T)next_t2));
174  } else {
175  T next_t1, next_t2;
176  is >> next_t1;
177  if (is.fail()) goto bad;
178  if (is.peek() != static_cast<int>(','))
179  KALDI_ERR << "ReadIntegerPairVector: expected to see ',', saw "
180  << is.peek() << ", at file position " << is.tellg();
181  is.get(); // consume the ','.
182  is >> next_t2 >> std::ws;
183  if (is.fail()) goto bad;
184  else
185  tmp_v.push_back(std::pair<T, T>(next_t1, next_t2));
186  }
187  }
188  is.get(); // get the final ']'.
189  *v = tmp_v; // could use std::swap to use less temporary memory, but this
190  // uses less permanent memory.
191  }
192  if (!is.fail()) return;
193  bad:
194  KALDI_ERR << "ReadIntegerPairVector: read failure at file position "
195  << is.tellg();
196 }
197 
198 template<class T> inline void WriteIntegerVector(std::ostream &os, bool binary,
199  const std::vector<T> &v) {
200  // Compile time assertion that this is not called with a wrong type.
202  if (binary) {
203  char sz = sizeof(T); // this is currently just a check.
204  os.write(&sz, 1);
205  int32 vecsz = static_cast<int32>(v.size());
206  KALDI_ASSERT((size_t)vecsz == v.size());
207  os.write(reinterpret_cast<const char *>(&vecsz), sizeof(vecsz));
208  if (vecsz != 0) {
209  os.write(reinterpret_cast<const char *>(&(v[0])), sizeof(T)*vecsz);
210  }
211  } else {
212  // focus here is on prettiness of text form rather than
213  // efficiency of reading-in.
214  // reading-in is dominated by low-level operations anyway:
215  // for efficiency use binary.
216  os << "[ ";
217  typename std::vector<T>::const_iterator iter = v.begin(), end = v.end();
218  for (; iter != end; ++iter) {
219  if (sizeof(T) == 1)
220  os << static_cast<int16>(*iter) << " ";
221  else
222  os << *iter << " ";
223  }
224  os << "]\n";
225  }
226  if (os.fail()) {
227  KALDI_ERR << "Write failure in WriteIntegerVector.";
228  }
229 }
230 
231 
232 template<class T> inline void ReadIntegerVector(std::istream &is,
233  bool binary,
234  std::vector<T> *v) {
236  KALDI_ASSERT(v != NULL);
237  if (binary) {
238  int sz = is.peek();
239  if (sz == sizeof(T)) {
240  is.get();
241  } else { // this is currently just a check.
242  KALDI_ERR << "ReadIntegerVector: expected to see type of size "
243  << sizeof(T) << ", saw instead " << sz << ", at file position "
244  << is.tellg();
245  }
246  int32 vecsz;
247  is.read(reinterpret_cast<char *>(&vecsz), sizeof(vecsz));
248  if (is.fail() || vecsz < 0) goto bad;
249  v->resize(vecsz);
250  if (vecsz > 0) {
251  is.read(reinterpret_cast<char *>(&((*v)[0])), sizeof(T)*vecsz);
252  }
253  } else {
254  std::vector<T> tmp_v; // use temporary so v doesn't use extra memory
255  // due to resizing.
256  is >> std::ws;
257  if (is.peek() != static_cast<int>('[')) {
258  KALDI_ERR << "ReadIntegerVector: expected to see [, saw "
259  << is.peek() << ", at file position " << is.tellg();
260  }
261  is.get(); // consume the '['.
262  is >> std::ws; // consume whitespace.
263  while (is.peek() != static_cast<int>(']')) {
264  if (sizeof(T) == 1) { // read/write chars as numbers.
265  int16 next_t;
266  is >> next_t >> std::ws;
267  if (is.fail()) goto bad;
268  else
269  tmp_v.push_back((T)next_t);
270  } else {
271  T next_t;
272  is >> next_t >> std::ws;
273  if (is.fail()) goto bad;
274  else
275  tmp_v.push_back(next_t);
276  }
277  }
278  is.get(); // get the final ']'.
279  *v = tmp_v; // could use std::swap to use less temporary memory, but this
280  // uses less permanent memory.
281  }
282  if (!is.fail()) return;
283  bad:
284  KALDI_ERR << "ReadIntegerVector: read failure at file position "
285  << is.tellg();
286 }
287 
288 
289 // Initialize an opened stream for writing by writing an optional binary
290 // header and modifying the floating-point precision.
291 inline void InitKaldiOutputStream(std::ostream &os, bool binary) {
292  // This does not throw exceptions (does not check for errors).
293  if (binary) {
294  os.put('\0');
295  os.put('B');
296  }
297  // Note, in non-binary mode we may at some point want to mess with
298  // the precision a bit.
299  // 7 is a bit more than the precision of float..
300  if (os.precision() < 7)
301  os.precision(7);
302 }
303 
305 // setting the "binary" value appropriately.
306 inline bool InitKaldiInputStream(std::istream &is, bool *binary) {
307  // Sets the 'binary' variable.
308  // Throws exception in the very unusual situation that stream
309  // starts with '\0' but not then 'B'.
310 
311  if (is.peek() == '\0') { // seems to be binary
312  is.get();
313  if (is.peek() != 'B') {
314  return false;
315  }
316  is.get();
317  *binary = true;
318  return true;
319  } else {
320  *binary = false;
321  return true;
322  }
323 }
324 
325 } // end namespace kaldi.
326 
327 #endif // KALDI_BASE_IO_FUNCS_INL_H_
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
bool InitKaldiInputStream(std::istream &is, bool *binary)
Initialize an opened stream for reading by detecting the binary header and.
Definition: io-funcs-inl.h:306
void WriteIntegerPairVector(std::ostream &os, bool binary, const std::vector< std::pair< T, T > > &v)
Function for writing STL vectors of pairs of integer types.
Definition: io-funcs-inl.h:93
#define KALDI_ASSERT_IS_INTEGER_TYPE(I)
Definition: kaldi-utils.h:133
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55
kaldi::int32 int32
void ReadIntegerPairVector(std::istream &is, bool binary, std::vector< std::pair< T, T > > *v)
Function for reading STL vector of pairs of integer types.
Definition: io-funcs-inl.h:131
void ReadIntegerVector(std::istream &is, bool binary, std::vector< T > *v)
Function for reading STL vector of integer types.
Definition: io-funcs-inl.h:232
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_PARANOID_ASSERT(cond)
Definition: kaldi-error.h:206
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void WriteIntegerVector(std::ostream &os, bool binary, const std::vector< T > &v)
Function for writing STL vectors of integer types.
Definition: io-funcs-inl.h:198
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34
void InitKaldiOutputStream(std::ostream &os, bool binary)
InitKaldiOutputStream initializes an opened stream for writing by writing an optional binary header a...
Definition: io-funcs-inl.h:291