kaldi-io-test.cc
Go to the documentation of this file.
1 // util/kaldi-io-test.cc
2 
3 // Copyright 2009-2011 Microsoft Corporation
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 
11 // http://www.apache.org/licenses/LICENSE-2.0
12 
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 #ifndef _MSC_VER
20 #include <unistd.h>
21 #endif
22 #include "base/io-funcs.h"
23 #include "util/kaldi-io.h"
24 #include "base/kaldi-math.h"
25 #include "base/kaldi-utils.h"
26 
27 namespace kaldi {
28 
29 
30 
46  KALDI_ASSERT(ClassifyRxfilename("ark,s,cs:a b c") == kNoInput);
47  KALDI_ASSERT(ClassifyRxfilename("scp:a b c") == kNoInput);
48 
49 }
50 
51 
63  KALDI_ASSERT(ClassifyWxfilename("a b c:123") == kNoOutput);
64  KALDI_ASSERT(ClassifyWxfilename("ark,s,cs:a b c") == kNoOutput);
65  KALDI_ASSERT(ClassifyWxfilename("scp:a b c") == kNoOutput);
69 }
70 
71 void UnitTestIoNew(bool binary) {
72  {
73  const char *filename = "tmpf";
74 
75  Output ko(filename, binary);
76  std::ostream &outfile = ko.Stream();
77  if (!binary) outfile << "\t";
78  int64 i1 = Rand() % 10000;
79  WriteBasicType(outfile, binary, i1);
80  uint16 i2 = Rand() % 10000;
81  WriteBasicType(outfile, binary, i2);
82  if (!binary) outfile << "\t";
83  char c = Rand();
84  WriteBasicType(outfile, binary, c);
85  if (!binary && Rand()%2 == 0) outfile << " \n";
86  std::vector<int32> vec1;
87  WriteIntegerVector(outfile, binary, vec1);
88  if (!binary && Rand()%2 == 0) outfile << " \n";
89  std::vector<uint16> vec2;
90  for (size_t i = 0; i < 10; i++) vec2.push_back(Rand()%100 - 10);
91  WriteIntegerVector(outfile, binary, vec2);
92  if (!binary) outfile << " \n";
93  std::vector<char> vec3;
94  for (size_t i = 0; i < 10; i++) vec3.push_back(Rand()%100);
95  WriteIntegerVector(outfile, binary, vec3);
96  if (!binary && Rand()%2 == 0) outfile << " \n";
97  const char *token1 = "Hi";
98  WriteToken(outfile, binary, token1);
99  if (!binary) outfile << " \n";
100  std::string token2 = "There.";
101  WriteToken(outfile, binary, token2);
102  if (!binary && Rand()%2 == 0) outfile << " \n";
103  std::string token3 = "You.";
104  WriteToken(outfile, binary, token3);
105  if (!binary && Rand()%2 == 0) outfile << " ";
106  float f1 = RandUniform();
107  WriteBasicType(outfile, binary, f1);
108  if (!binary && Rand()%2 == 0) outfile << "\t";
109  float f2 = RandUniform();
110  WriteBasicType(outfile, binary, f2);
111  double d1 = RandUniform();
112  WriteBasicType(outfile, binary, d1);
113  if (!binary && Rand()%2 == 0) outfile << "\t";
114  double d2 = RandUniform();
115  WriteBasicType(outfile, binary, d2);
116  if (!binary && Rand()%2 == 0) outfile << "\t";
117  ko.Close();
118 
119  {
120  bool binary_in;
121  Input ki(filename, &binary_in);
122  std::istream &infile = ki.Stream();
123  int64 i1_in;
124  ReadBasicType(infile, binary_in, &i1_in);
125  KALDI_ASSERT(i1_in == i1);
126  uint16 i2_in;
127  ReadBasicType(infile, binary_in, &i2_in);
128  KALDI_ASSERT(i2_in == i2);
129  char c_in;
130  ReadBasicType(infile, binary_in, &c_in);
131  KALDI_ASSERT(c_in == c);
132  std::vector<int32> vec1_in;
133  ReadIntegerVector(infile, binary_in, &vec1_in);
134  KALDI_ASSERT(vec1_in == vec1);
135  std::vector<uint16> vec2_in;
136  ReadIntegerVector(infile, binary_in, &vec2_in);
137  KALDI_ASSERT(vec2_in == vec2);
138  std::vector<char> vec3_in;
139  ReadIntegerVector(infile, binary_in, &vec3_in);
140  KALDI_ASSERT(vec3_in == vec3);
141  std::string token1_in, token2_in;
142  KALDI_ASSERT(Peek(infile, binary_in) == static_cast<int>(*token1));
143  ReadToken(infile, binary_in, &token1_in);
144  KALDI_ASSERT(token1_in == (std::string)token1);
145  ReadToken(infile, binary_in, &token2_in);
146  KALDI_ASSERT(token2_in == token2);
147  if (Rand() % 2 == 0)
148  ExpectToken(infile, binary_in, token3.c_str());
149  else
150  ExpectToken(infile, binary_in, token3);
151  float f1_in; // same type.
152  ReadBasicType(infile, binary_in, &f1_in);
153  AssertEqual(f1_in, f1);
154  double f2_in; // wrong type.
155  ReadBasicType(infile, binary_in, &f2_in);
156  AssertEqual(f2_in, f2);
157  double d1_in; // same type.
158  ReadBasicType(infile, binary_in, &d1_in);
159  AssertEqual(d1_in, d1);
160  float d2_in; // wrong type.
161  ReadBasicType(infile, binary_in, &d2_in);
162  AssertEqual(d2_in, d2);
163  KALDI_ASSERT(Peek(infile, binary_in) == -1);
164  }
165  unlink(filename);
166  }
167 }
168 
169 void UnitTestIoPipe(bool binary) {
170  // This is as UnitTestIoNew except with different filenames.
171  {
172 #if defined(_MSC_VER) && !defined(KALDI_CYGWIN_COMPAT)
173  // self-invocation on Windows that emulates cat(1)
174  const char *filename_out = "|kaldi-io-test cat > tmpf.gz",
175  *filename_in = "kaldi-io-test cat tmpf.gz|";
176 #else
177  const char *filename_out = "|gzip -c > tmpf.gz",
178  *filename_in = "gunzip -c tmpf.gz |";
179 #endif
180 
181  Output ko(filename_out, binary);
182  std::ostream &outfile = ko.Stream();
183  if (!binary) outfile << "\t";
184  int64 i1 = Rand() % 10000;
185  WriteBasicType(outfile, binary, i1);
186  uint16 i2 = Rand() % 10000;
187  WriteBasicType(outfile, binary, i2);
188  if (!binary) outfile << "\t";
189  char c = Rand();
190  WriteBasicType(outfile, binary, c);
191  if (!binary && Rand()%2 == 0) outfile << " \n";
192  std::vector<int32> vec1;
193  WriteIntegerVector(outfile, binary, vec1);
194  if (!binary && Rand()%2 == 0) outfile << " \n";
195  std::vector<uint16> vec2;
196  for (size_t i = 0; i < 10; i++) vec2.push_back(Rand()%100 - 10);
197  WriteIntegerVector(outfile, binary, vec2);
198  if (!binary) outfile << " \n";
199  WriteToken(outfile, binary, "<foo>");
200  std::vector<char> vec3;
201  for (size_t i = 0; i < 10; i++) vec3.push_back(Rand()%100);
202  WriteIntegerVector(outfile, binary, vec3);
203  if (!binary && Rand()%2 == 0) outfile << " \n";
204  const char *token1 = "Hi";
205  WriteToken(outfile, binary, token1);
206  if (!binary) outfile << " \n";
207  std::string token2 = "There.";
208  WriteToken(outfile, binary, token2);
209  if (!binary && Rand()%2 == 0) outfile << " \n";
210  std::string token3 = "You.";
211  WriteToken(outfile, binary, token3);
212  if (!binary && Rand()%2 == 0) outfile << " ";
213  float f1 = RandUniform();
214  WriteBasicType(outfile, binary, f1);
215  if (!binary && Rand()%2 == 0) outfile << "\t";
216  float f2 = RandUniform();
217  WriteBasicType(outfile, binary, f2);
218  double d1 = RandUniform();
219  WriteBasicType(outfile, binary, d1);
220  if (!binary && Rand()%2 == 0) outfile << "\t";
221  double d2 = RandUniform();
222  WriteBasicType(outfile, binary, d2);
223  if (!binary && Rand()%2 == 0) outfile << "\t";
224  bool ans = ko.Close();
225  KALDI_ASSERT(ans);
226 #ifndef _MSC_VER
227  Sleep(1); // This test does not work without this sleep:
228  // seems to be some kind of file-system latency.
229 #endif
230  {
231  bool binary_in;
232  Input ki(filename_in, &binary_in);
233  std::istream &infile = ki.Stream();
234  int64 i1_in;
235  ReadBasicType(infile, binary_in, &i1_in);
236  KALDI_ASSERT(i1_in == i1);
237  uint16 i2_in;
238  ReadBasicType(infile, binary_in, &i2_in);
239  KALDI_ASSERT(i2_in == i2);
240  char c_in;
241  ReadBasicType(infile, binary_in, &c_in);
242  KALDI_ASSERT(c_in == c);
243  std::vector<int32> vec1_in;
244  ReadIntegerVector(infile, binary_in, &vec1_in);
245  KALDI_ASSERT(vec1_in == vec1);
246  std::vector<uint16> vec2_in;
247  ReadIntegerVector(infile, binary_in, &vec2_in);
248  KALDI_ASSERT(vec2_in == vec2);
249  std::vector<char> vec3_in;
250  KALDI_ASSERT(PeekToken(infile, binary_in) == static_cast<int>('f'));
251  ExpectToken(infile, binary_in, "<foo>");
252  ReadIntegerVector(infile, binary_in, &vec3_in);
253  KALDI_ASSERT(vec3_in == vec3);
254  std::string token1_in, token2_in;
255  KALDI_ASSERT(Peek(infile, binary_in) == static_cast<int>(*token1));
256  ReadToken(infile, binary_in, &token1_in);
257  KALDI_ASSERT(token1_in == (std::string)token1);
258  ReadToken(infile, binary_in, &token2_in);
259  KALDI_ASSERT(token2_in == token2);
260  if (Rand() % 2 == 0)
261  ExpectToken(infile, binary_in, token3.c_str());
262  else
263  ExpectToken(infile, binary_in, token3);
264  float f1_in; // same type.
265  ReadBasicType(infile, binary_in, &f1_in);
266  AssertEqual(f1_in, f1);
267  double f2_in; // wrong type.
268  ReadBasicType(infile, binary_in, &f2_in);
269  AssertEqual(f2_in, f2);
270  double d1_in; // same type.
271  ReadBasicType(infile, binary_in, &d1_in);
272  AssertEqual(d1_in, d1);
273  float d2_in; // wrong type.
274  ReadBasicType(infile, binary_in, &d2_in);
275  AssertEqual(d2_in, d2);
276  KALDI_ASSERT(Peek(infile, binary_in) == -1);
277  }
278  }
279  unlink("tmpf.txt");
280  unlink("tmpf.gz");
281 }
282 
284  /*
285  Don't do the the following part because it requires
286  to pipe from an empty file, for it to not hang.
287  {
288  Input inp("", NULL); // standard input.
289  KALDI_ASSERT(inp.Stream().get() == -1);
290  }
291  {
292  Input inp("-", NULL); // standard input.
293  KALDI_ASSERT(inp.Stream().get() == -1);
294  }*/
295 
296  {
297  std::cout << "Should see: foo\n";
298  Output out("", false);
299  out.Stream() << "foo\n";
300  }
301  {
302  std::cout << "Should see: bar\n";
303  Output out("-", false);
304  out.Stream() << "bar\n";
305  }
306 }
307 
308 // This is Windows-specific.
310 #ifdef KALDI_CYGWIN_COMPAT
311  extern std::string MapCygwinPath(const std::string &filename);
312 
313  KALDI_ASSERT(MapCygwinPath("") == "");
314  KALDI_ASSERT(MapCygwinPath(".") == ".");
315  KALDI_ASSERT(MapCygwinPath("..") == "..");
316  KALDI_ASSERT(MapCygwinPath("/dev/null")[0] != '/');
317  KALDI_ASSERT(MapCygwinPath("/tmp")[1] == ':');
318  KALDI_ASSERT(MapCygwinPath("/tmp/")[1] == ':');
319  KALDI_ASSERT(MapCygwinPath("/tmp/foo")[1] == ':');
320  KALDI_ASSERT(MapCygwinPath("/cygdrive/c") == "c:/");
321  KALDI_ASSERT(MapCygwinPath("/cygdrive/c/") == "c:/");
322  KALDI_ASSERT(MapCygwinPath("/cygdrive/c/foo") == "c:/foo");
323 #endif
324 }
325 
326 } // end namespace kaldi.
327 
328 #if defined(_MSC_VER) && !defined(KALDI_CYGWIN_COMPAT)
329 // Windows has no cat! There is probably no suitable tool to test popen I/O on
330 // Windows, so we emulate a lame version of cat(1).
331 static int TinyCat(int argc, const char** argv) {
332  const char* name_in = argc > 0 && strcmp(argv[0], "-") ? argv[0] : NULL;
333  int fd_in = name_in ? _open(name_in, _O_RDONLY) : _fileno(stdin);
334  if (fd_in < 0)
335  return 1;
336 
337  int fd_out = _fileno(stdout);
338  _setmode(fd_in, _O_BINARY);
339  _setmode(fd_out, _O_BINARY);
340 
341  char buffer[100];
342  int last_read;
343  while ((last_read = _read(fd_in, buffer, sizeof(buffer))) > 0)
344  _write(fd_out, buffer, last_read);
345 
346  if (name_in) _close(fd_in);
347  return 0;
348 }
349 #endif
350 
351 int main(int argc, const char** argv) {
352  using namespace kaldi;
353 #if defined(_MSC_VER) && !defined(KALDI_CYGWIN_COMPAT)
354  if (argc > 1 && strcmp(argv[1], "cat") == 0)
355  return TinyCat(argc - 2, argv + 2);
356 #endif
357 
359  UnitTestIoNew(false);
360  UnitTestIoNew(true);
361  UnitTestIoPipe(true);
362  UnitTestIoPipe(false);
366 
367  KALDI_ASSERT(1); // just wanted to check that KALDI_ASSERT does not fail
368  // for 1.
369  return 0;
370 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void UnitTestClassifyWxfilename()
InputType ClassifyRxfilename(const std::string &filename)
ClassifyRxfilenames interprets filenames for reading as follows:
Definition: kaldi-io.cc:138
float RandUniform(struct RandomState *state=NULL)
Returns a random number strictly between 0 and 1.
Definition: kaldi-math.h:151
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55
void Sleep(float seconds)
Definition: kaldi-utils.cc:45
void UnitTestNativeFilename()
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
Definition: io-funcs.cc:154
int Peek(std::istream &is, bool binary)
Peek consumes whitespace (if binary == false) and then returns the peek() value of the stream...
Definition: io-funcs.cc:145
std::istream & Stream()
Definition: kaldi-io.cc:826
std::ostream & Stream()
Definition: kaldi-io.cc:701
void ReadIntegerVector(std::istream &is, bool binary, std::vector< T > *v)
Function for reading STL vector of integer types.
Definition: io-funcs-inl.h:232
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
Definition: io-funcs.cc:191
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134
int PeekToken(std::istream &is, bool binary)
PeekToken will return the first character of the next token, or -1 if end of file.
Definition: io-funcs.cc:170
int Rand(struct RandomState *state)
Definition: kaldi-math.cc:45
int main(int argc, const char **argv)
void UnitTestIoNew(bool binary)
void UnitTestIoStandard()
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
static void AssertEqual(float a, float b, float relative_tolerance=0.001)
assert abs(a - b) <= relative_tolerance * (abs(a)+abs(b))
Definition: kaldi-math.h:276
OutputType ClassifyWxfilename(const std::string &filename)
ClassifyWxfilename interprets filenames as follows:
Definition: kaldi-io.cc:85
void WriteIntegerVector(std::ostream &os, bool binary, const std::vector< T > &v)
Function for writing STL vectors of integer types.
Definition: io-funcs-inl.h:198
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34
std::string MapCygwinPath(const std::string &filename)
void UnitTestClassifyRxfilename()
void UnitTestIoPipe(bool binary)
bool Close()
Definition: kaldi-io.cc:677