kaldi-table-test.cc
Go to the documentation of this file.
1 // util/kaldi-table-test.cc
2 
3 // Copyright 2009-2011 Microsoft Corporation
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 
11 // http://www.apache.org/licenses/LICENSE-2.0
12 
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 #include "base/io-funcs.h"
20 #include "util/kaldi-io.h"
21 #include "base/kaldi-math.h"
22 #include "util/kaldi-table.h"
23 #include "util/kaldi-holder.h"
24 #include "util/table-types.h"
25 
26 namespace kaldi {
27 
29  typedef std::pair<std::string, std::string> pr;
30  {
31  std::stringstream ss;
32  ss << " a b\n";
33  ss << "c d \n";
34  ss << "c d e \n";
35  std::vector<pr> script;
36  bool ans = ReadScriptFile(ss, true, &script);
37  KALDI_ASSERT(ans);
38  std::vector<pr> script2;
39  script2.push_back(std::pair<std::string, std::string>("a", "b"));
40  script2.push_back(std::pair<std::string, std::string>("c", "d"));
41  script2.push_back(std::pair<std::string, std::string>("c", "d e"));
42  KALDI_ASSERT(script == script2);
43  }
44  {
45  typedef std::pair<std::string, std::string> pr;
46  std::stringstream ss;
47  ss << " a \n";
48  std::vector<pr> script;
49  // suppress the warning since I already checked it's OK.
50  KALDI_ASSERT(!ReadScriptFile(ss, false, &script));
51  }
52  {
53  typedef std::pair<std::string, std::string> pr;
54  std::stringstream ss;
55  ss << "\n";
56  std::vector<pr> script;
57  // suppress the warning since I already checked it's OK.
58  KALDI_ASSERT(!ReadScriptFile(ss, false, &script));
59  }
60 #if !defined(_MSC_VER) || defined(KALDI_CYGWIN_COMPAT)
61  {
62  Output ko("| gzip -c > tmpf.gz", false); // text mode.
63  ko.Stream() << "a b\n";
64  ko.Close();
65  std::vector<pr> script;
66  Sleep(1); // This test does not work without this sleep:
67  bool ans = ReadScriptFile("gunzip -c tmpf.gz |", true, &script);
68  KALDI_ASSERT(ans);
69  std::vector<pr> script2;
70  script2.push_back(std::pair<std::string, std::string>("a", "b"));
71  KALDI_ASSERT(script == script2);
72  }
73 
74  {
75  Output ko("| gzip -c > tmpf.gz", true); // binary mode w/ header:
76  // should fail, because script files should not have binary header.
77  ko.Stream() << "a b\n";
78  bool ans = ko.Close();
79  KALDI_ASSERT(ans);
80  Sleep(1); // This test does not work without this sleep:
81  // seems to be some kind of file-system latency.
82  std::vector<pr> script;
83  ans = ReadScriptFile("gunzip -c tmpf.gz |", false, &script);
84  KALDI_ASSERT(!ans);
85  }
86  unlink("tmpf.gz");
87 #endif
88 }
89 
90 
92  {
93  std::string a = "b,ark:|foo";
94  std::string ark = "x", scp = "y";
95  WspecifierOptions opts;
96  WspecifierType ans = ClassifyWspecifier(a, &ark, &scp, &opts);
97  KALDI_ASSERT(ans == kArchiveWspecifier && ark == "|foo" && scp == "" &&
98  opts.binary == true);
99  }
100 
101  {
102  std::string a = "t,ark:|foo";
103  std::string ark = "x", scp = "y";
104  WspecifierOptions opts;
105  WspecifierType ans = ClassifyWspecifier(a, &ark, &scp, &opts);
106  KALDI_ASSERT(ans == kArchiveWspecifier && ark == "|foo" && scp == "" &&
107  opts.binary == false);
108  }
109 
110  {
111  std::string a = "t,scp:a b c d";
112  std::string ark = "x", scp = "y";
113  WspecifierOptions opts;
114  WspecifierType ans = ClassifyWspecifier(a, &ark, &scp, &opts);
115  KALDI_ASSERT(ans == kScriptWspecifier && ark == "" && scp == "a b c d" &&
116  opts.binary == false);
117  }
118 
119  {
120  std::string a = "t,ark,scp:a b,c,d";
121  std::string ark = "x", scp = "y";
122  WspecifierOptions opts;
123  WspecifierType ans = ClassifyWspecifier(a, &ark, &scp, &opts);
124  KALDI_ASSERT(ans == kBothWspecifier && ark == "a b" && scp == "c,d" &&
125  opts.binary == false);
126  }
127 
128  {
129  std::string a = "";
130  std::string ark = "x", scp = "y";
131  WspecifierOptions opts;
132  WspecifierType ans = ClassifyWspecifier(a, &ark, &scp, &opts);
133  KALDI_ASSERT(ans == kNoWspecifier);
134  }
135 
136  {
137  std::string a = " t,ark:boo"; // leading space not allowed.
138  WspecifierType ans = ClassifyWspecifier(a, NULL, NULL, NULL);
139  KALDI_ASSERT(ans == kNoWspecifier);
140  }
141 
142  {
143  std::string a = "t,ark:boo "; // trailing space not allowed.
144  WspecifierType ans = ClassifyWspecifier(a, NULL, NULL, NULL);
145  KALDI_ASSERT(ans == kNoWspecifier);
146  }
147 
148  {
149  std::string a = "b,ark,scp:,"; // empty ark, scp fnames valid.
150  std::string ark = "x", scp = "y";
151  WspecifierOptions opts;
152  WspecifierType ans = ClassifyWspecifier(a, &ark, &scp, &opts);
153  KALDI_ASSERT(ans == kBothWspecifier && ark == "" && scp == "" &&
154  opts.binary == true);
155  }
156 
157  {
158  std::string a = "f,b,ark,scp:,"; // empty ark, scp fnames valid.
159  std::string ark = "x", scp = "y";
160  WspecifierOptions opts;
161  WspecifierType ans = ClassifyWspecifier(a, &ark, &scp, &opts);
162  KALDI_ASSERT(ans == kBothWspecifier && ark == "" && scp == "" &&
163  opts.binary == true && opts.flush == true);
164  }
165 
166  {
167  std::string a = "nf,b,ark,scp:,"; // empty ark, scp fnames valid.
168  std::string ark = "x", scp = "y";
169  WspecifierOptions opts;
170  WspecifierType ans = ClassifyWspecifier(a, &ark, &scp, &opts);
171  KALDI_ASSERT(ans == kBothWspecifier && ark == "" && scp == "" &&
172  opts.binary == true && opts.flush == false);
173  }
174 }
175 
176 
178  {
179  std::string a = "ark:foo|";
180  std::string fname = "x";
181  RspecifierOptions opts;
182  RspecifierType ans = ClassifyRspecifier(a, &fname, &opts);
183  KALDI_ASSERT(ans == kArchiveRspecifier && fname == "foo|");
184  }
185 
186 
187  {
188  std::string a = "b,ark:foo|"; // b, is ignored.
189  std::string fname = "x";
190  RspecifierOptions opts;
191  RspecifierType ans = ClassifyRspecifier(a, &fname, &opts);
192  KALDI_ASSERT(ans == kArchiveRspecifier && fname == "foo|");
193  }
194 
195  {
196  std::string a = "ark,b:foo|"; // , b is ignored.
197  std::string fname = "x";
198  RspecifierOptions opts;
199  RspecifierType ans = ClassifyRspecifier(a, &fname, &opts);
200  KALDI_ASSERT(ans == kArchiveRspecifier && fname == "foo|");
201  }
202 
203 
204  {
205  std::string a = "scp,b:foo|";
206  std::string fname = "x";
207  RspecifierOptions opts;
208  RspecifierType ans = ClassifyRspecifier(a, &fname, &opts);
209  KALDI_ASSERT(ans == kScriptRspecifier && fname == "foo|");
210  }
211 
212  {
213  std::string a = "scp,scp,b:foo|"; // invalid as repeated.
214  std::string fname = "x";
215  RspecifierOptions opts;
216  RspecifierType ans = ClassifyRspecifier(a, &fname, &opts);
217  KALDI_ASSERT(ans == kNoRspecifier && fname == "");
218  }
219 
220  {
221  std::string a = "ark,scp,b:foo|"; // invalid as combined.
222  std::string fname = "x";
223  RspecifierOptions opts;
224  RspecifierType ans = ClassifyRspecifier(a, &fname, &opts);
225  KALDI_ASSERT(ans == kNoRspecifier && fname == "");
226  }
227 
228  {
229  std::string a = "scp,o:foo|";
230  std::string fname = "x";
231  RspecifierOptions opts;
232  RspecifierType ans = ClassifyRspecifier(a, &fname, &opts);
233  KALDI_ASSERT(ans == kScriptRspecifier && fname == "foo|");
234  KALDI_ASSERT(opts.once);
235  }
236 
237  {
238  std::string a = "scp,no:foo|";
239  std::string fname = "x";
240  RspecifierOptions opts;
241  RspecifierType ans = ClassifyRspecifier(a, &fname, &opts);
242  KALDI_ASSERT(ans == kScriptRspecifier && fname == "foo|");
243  KALDI_ASSERT(!opts.once);
244  }
245 
246  {
247  std::string a = "s,scp,no:foo|";
248  std::string fname = "x";
249  RspecifierOptions opts;
250  RspecifierType ans = ClassifyRspecifier(a, &fname, &opts);
251  KALDI_ASSERT(ans == kScriptRspecifier && fname == "foo|");
252  KALDI_ASSERT(!opts.once && opts.sorted);
253  }
254 
255 
256  {
257  std::string a = "scp:foo|";
258  std::string fname = "x";
259  RspecifierType ans = ClassifyRspecifier(a, &fname, NULL);
260  KALDI_ASSERT(ans == kScriptRspecifier && fname == "foo|");
261  }
262 
263  {
264  std::string a = "scp:"; // empty fname valid.
265  std::string fname = "x";
266  RspecifierType ans = ClassifyRspecifier(a, &fname, NULL);
267  KALDI_ASSERT(ans == kScriptRspecifier && fname == "");
268  }
269 
270  {
271  std::string a = "scp:"; // empty fname valid.
272  RspecifierType ans = ClassifyRspecifier(a, NULL, NULL);
274  }
275 
276  {
277  std::string a = "";
278  RspecifierType ans = ClassifyRspecifier(a, NULL, NULL);
279  KALDI_ASSERT(ans == kNoRspecifier);
280  }
281 
282  {
283  std::string a = "scp";
284  RspecifierType ans = ClassifyRspecifier(a, NULL, NULL);
285  KALDI_ASSERT(ans == kNoRspecifier);
286  }
287 
288  {
289  std::string a = "ark";
290  RspecifierType ans = ClassifyRspecifier(a, NULL, NULL);
291  KALDI_ASSERT(ans == kNoRspecifier);
292  }
293 
294  {
295  std::string a = "ark:foo "; // trailing space not allowed.
296  RspecifierType ans = ClassifyRspecifier(a, NULL, NULL);
297  KALDI_ASSERT(ans == kNoRspecifier);
298  }
299 
300  // Testing it accepts the meaningless t, and b, prefixes.
301  {
302  std::string a = "b,scp:a", b;
303  RspecifierType ans = ClassifyRspecifier(a, &b, NULL);
304  KALDI_ASSERT(ans == kScriptRspecifier && b == "a");
305  }
306  {
307  std::string a = "t,scp:a", b;
308  RspecifierType ans = ClassifyRspecifier(a, &b, NULL);
309  KALDI_ASSERT(ans == kScriptRspecifier && b == "a");
310  }
311  {
312  std::string a = "b,ark:a", b;
313  RspecifierType ans = ClassifyRspecifier(a, &b, NULL);
314  KALDI_ASSERT(ans == kArchiveRspecifier && b == "a");
315  }
316  {
317  std::string a = "t,ark:a", b;
318  RspecifierType ans = ClassifyRspecifier(a, &b, NULL);
319  KALDI_ASSERT(ans == kArchiveRspecifier && b == "a");
320  }
321 }
322 
323 void UnitTestTableSequentialInt32(bool binary) {
324  int32 sz = Rand() % 10;
325  std::vector<std::string> k;
326  std::vector<int32> v;
327 
328  for (int32 i = 0; i < sz; i++) {
329  k.push_back(CharToString('a' + static_cast<char>(i))); // This gives us
330  // some single quotes too but it doesn't really matter.
331  if (i%2 == 0) k.back() = k.back() + CharToString('a' + i); // make them
332  // different lengths.
333  v.push_back(Rand());
334  }
335 
336  bool ans;
337  Int32Writer bw(binary ? "b,ark:tmpf" : "t,ark:tmpf");
338  for (int32 i = 0; i < sz; i++) {
339  bw.Write(k[i], v[i]);
340  }
341  ans = bw.Close();
342  KALDI_ASSERT(ans);
343 
344  SequentialInt32Reader sbr(RandInt(0, 1) == 0 ? "ark:tmpf" : "ark,bg:tmpf");
345  std::vector<std::string> k2;
346  std::vector<int32> v2;
347  for (; !sbr.Done(); sbr.Next()) {
348  k2.push_back(sbr.Key());
349  v2.push_back(sbr.Value());
350  }
351  KALDI_ASSERT(sbr.Close());
352  KALDI_ASSERT(k2 == k);
353  KALDI_ASSERT(v2 == v);
354 }
355 
356 void UnitTestTableSequentialBool(bool binary) {
357  int32 sz = Rand() % 10;
358  std::vector<std::string> k;
359  std::vector<bool> v;
360 
361  for (int32 i = 0; i < sz; i++) {
362  k.push_back(CharToString('a' + static_cast<char>(i))); // This gives us
363  // some single quotes too but it doesn't really matter.
364  if (i%2 == 0) k.back() = k.back() + CharToString('a' + i); // make them
365  // different lengths.
366  v.push_back((Rand()%2 == 0));
367  }
368 
369  bool ans;
370  BoolWriter bw(binary ? "b,ark:tmpf" : "t,ark:tmpf");
371  for (int32 i = 0; i < sz; i++) {
372  bw.Write(k[i], v[i]);
373  }
374  ans = bw.Close();
375  KALDI_ASSERT(ans);
376 
377  SequentialBoolReader sbr(RandInt(0, 1) == 0 ? "ark:tmpf" : "ark,bg:tmpf");
378  std::vector<std::string> k2;
379  std::vector<bool> v2;
380  for (; !sbr.Done(); sbr.Next()) {
381  k2.push_back(sbr.Key());
382  v2.push_back(sbr.Value());
383  }
384  KALDI_ASSERT(sbr.Close());
385  KALDI_ASSERT(k2 == k);
386  KALDI_ASSERT(v2 == v);
387 }
388 
389 
390 void UnitTestTableSequentialDouble(bool binary) {
391  int32 sz = Rand() % 10;
392  std::vector<std::string> k;
393  std::vector<double> v;
394 
395  for (int32 i = 0; i < sz; i++) {
396  k.push_back(CharToString('a' + static_cast<char>(i))); // This gives us
397  // some single quotes too but it doesn't really matter.
398  if (i%2 == 0) k.back() = k.back() + CharToString('a' + i); // make them
399  // different lengths.
400  v.push_back((Rand() / static_cast<double>(Rand())));
401  }
402 
403  bool ans;
404  DoubleWriter bw(binary ? "b,ark:tmpf" : "t,ark:tmpf");
405  for (int32 i = 0; i < sz; i++) {
406  bw.Write(k[i], v[i]);
407  }
408  ans = bw.Close();
409  KALDI_ASSERT(ans);
410 
411  SequentialDoubleReader sbr(RandInt(0, 1) == 0 ? "ark:tmpf" : "ark,bg:tmpf");
412  std::vector<std::string> k2;
413  std::vector<double> v2;
414  for (; !sbr.Done(); sbr.Next()) {
415  k2.push_back(sbr.Key());
416  v2.push_back(sbr.Value());
417  }
418  KALDI_ASSERT(sbr.Close());
419  KALDI_ASSERT(k2 == k);
420  if (binary) {
421  KALDI_ASSERT(v2 == v);
422  } else {
423  KALDI_ASSERT(v2.size() == v.size());
424  for (size_t i = 0; i < v2.size(); i++)
425  KALDI_ASSERT(ApproxEqual(v[i], v2[i]));
426  }
427 }
428 
429 
430 // Writing as both and reading as archive.
431 void UnitTestTableSequentialDoubleBoth(bool binary, bool read_scp) {
432  int32 sz = Rand() % 10;
433  std::vector<std::string> k;
434  std::vector<double> v;
435 
436  for (int32 i = 0; i < sz; i++) {
437  k.push_back(CharToString('a' + static_cast<char>(i))); // This gives us
438  // some single quotes too but it doesn't really matter.
439  if (i%2 == 0) k.back() = k.back() + CharToString('a' + i); // make them
440  // different lengths.
441  v.push_back((Rand() / static_cast<double>(Rand())));
442  }
443 
444  bool ans;
445  DoubleWriter bw(binary ? "b,ark,scp:tmpf,tmpf.scp" :
446  "t,ark,scp:tmpf,tmpf.scp");
447  for (int32 i = 0; i < sz; i++) {
448  bw.Write(k[i], v[i]);
449  }
450  ans = bw.Close();
451  KALDI_ASSERT(ans);
452 
453  SequentialDoubleReader sbr(RandInt(0, 1) == 0 ?
454  (read_scp ? "scp:tmpf.scp" : "ark:tmpf") :
455  (read_scp ? "scp,bg:tmpf.scp" : "ark,bg:tmpf"));
456  std::vector<std::string> k2;
457  std::vector<double> v2;
458  for (; !sbr.Done(); sbr.Next()) {
459  k2.push_back(sbr.Key());
460  v2.push_back(sbr.Value());
461  }
462  KALDI_ASSERT(sbr.Close());
463  KALDI_ASSERT(k2 == k);
464  if (binary) {
465  KALDI_ASSERT(v2 == v);
466  } else {
467  KALDI_ASSERT(v2.size() == v.size());
468  for (size_t i = 0; i < v2.size(); i++)
469  KALDI_ASSERT(ApproxEqual(v[i], v2[i]));
470  }
471  unlink("tmpf.scp");
472  unlink("tmpf");
473 }
474 
475 
476 // Writing as both and reading as archive.
477 void UnitTestTableSequentialInt32VectorBoth(bool binary, bool read_scp) {
478  int32 sz = Rand() % 10;
479  std::vector<std::string> k;
480  std::vector<std::vector<int32> > v;
481 
482  for (int32 i = 0; i < sz; i++) {
483  k.push_back(CharToString('a' + static_cast<char>(i))); // This gives us
484  // some single quotes too but it doesn't really matter.
485  if (i%2 == 0) k.back() = k.back() + CharToString('a' + i); // make them
486  // different lengths.
487  v.push_back(std::vector<int32>());
488  int32 sz2 = Rand() % 5;
489  for (int32 j = 0; j < sz2; j++)
490  v.back().push_back(Rand() % 100);
491  }
492 
493  bool ans;
494  Int32VectorWriter bw(binary ? "b,ark,scp:tmpf,tmpf.scp" :
495  "t,ark,scp:tmpf,tmpf.scp");
496  for (int32 i = 0; i < sz; i++) {
497  bw.Write(k[i], v[i]);
498  }
499  ans = bw.Close();
500  KALDI_ASSERT(ans);
501 
502  SequentialInt32VectorReader sbr(RandInt(0, 1) == 0 ?
503  (read_scp ? "scp:tmpf.scp" : "ark:tmpf") :
504  (read_scp ? "scp,bg:tmpf.scp" : "ark,bg:tmpf"));
505  std::vector<std::string> k2;
506  std::vector<std::vector<int32> > v2;
507  for (; !sbr.Done(); sbr.Next()) {
508  k2.push_back(sbr.Key());
509  v2.push_back(sbr.Value());
510  }
511  KALDI_ASSERT(sbr.Close());
512  KALDI_ASSERT(k2 == k);
513  KALDI_ASSERT(v2 == v);
514 }
515 
516 
517 // Writing as both and reading as archive.
518 void UnitTestTableSequentialInt32PairVectorBoth(bool binary, bool read_scp) {
519  int32 sz = Rand() % 10;
520  std::vector<std::string> k(sz);
521  std::vector<std::vector<std::pair<int32, int32> > > v(sz);
522 
523  for (int32 i = 0; i < sz; i++) {
524  k[i] = CharToString('a' + static_cast<char>(i)); // This gives us
525  // some single quotes too but it doesn't really matter.
526  if (i%2 == 0) k.back() = k.back() + CharToString('a' + i); // make them
527  // different lengths.
528  int32 sz2 = Rand() % 5;
529  for (int32 j = 0; j < sz2; j++)
530  v[i].push_back(std::pair<int32, int32>(Rand() % 10, Rand() % 10));
531  }
532 
533  bool ans;
534  Int32PairVectorWriter bw(binary ? "b,ark,scp:tmpf,tmpf.scp" :
535  "t,ark,scp:tmpf,tmpf.scp");
536  for (int32 i = 0; i < sz; i++) {
537  bw.Write(k[i], v[i]);
538  }
539  ans = bw.Close();
540  KALDI_ASSERT(ans);
541 
543  (read_scp ? "scp:tmpf.scp" : "ark:tmpf") :
544  (read_scp ? "scp,bg:tmpf.scp" : "ark,bg:tmpf"));
545  std::vector<std::string> k2;
546  std::vector<std::vector<std::pair<int32, int32> > > v2;
547  for (; !sbr.Done(); sbr.Next()) {
548  k2.push_back(sbr.Key());
549  v2.push_back(sbr.Value());
550  }
551  KALDI_ASSERT(sbr.Close());
552  KALDI_ASSERT(k2 == k);
553  KALDI_ASSERT(v2 == v);
554 }
555 
556 
557 // Writing as both and reading as archive.
558 void UnitTestTableSequentialInt32VectorVectorBoth(bool binary, bool read_scp) {
559  int32 sz = Rand() % 10;
560  std::vector<std::string> k;
561  std::vector<std::vector<std::vector<int32> > > v;
562 
563  for (int32 i = 0; i < sz; i++) {
564  k.push_back(CharToString('a' + static_cast<char>(i))); // This gives us
565  // some single quotes too but it doesn't really matter.
566  if (i%2 == 0) k.back() = k.back() + CharToString('a' + i); // make them
567  // different lengths.
568  v.push_back(std::vector<std::vector<int32> >());
569  int32 sz2 = Rand() % 5;
570  for (int32 j = 0; j < sz2; j++) {
571  v.back().push_back(std::vector<int32>());
572  int32 sz3 = Rand() % 2;
573  for (int32 k = 0; k < sz3; k++)
574  v.back().back().push_back(Rand() % 100);
575  }
576  }
577 
578  bool ans;
579  Int32VectorVectorWriter bw(binary ? "b,ark,scp:tmpf,tmpf.scp" :
580  "t,ark,scp:tmpf,tmpf.scp");
581  for (int32 i = 0; i < sz; i++) {
582  bw.Write(k[i], v[i]);
583  }
584  ans = bw.Close();
585  KALDI_ASSERT(ans);
586 
587  SequentialInt32VectorVectorReader sbr(read_scp ? "scp:tmpf.scp" : "ark:tmpf");
588  std::vector<std::string> k2;
589  std::vector<std::vector<std::vector<int32> > > v2;
590  for (; !sbr.Done(); sbr.Next()) {
591  k2.push_back(sbr.Key());
592  v2.push_back(sbr.Value());
593  }
594  KALDI_ASSERT(sbr.Close());
595  KALDI_ASSERT(k2 == k);
596  KALDI_ASSERT(v2 == v);
597 }
598 
599 
601  int32 sz = Rand() % 10;
602  std::vector<std::pair<std::string, std::string> > script;
603  std::vector<std::string> k;
604  std::vector<int32> v;
605 
606  for (int32 i = 0; i < sz; i++) {
607  char buf[3];
608  buf[0] = 'a' + static_cast<char>(i);
609  buf[1] = (i%2 == 0 ? 'b'+static_cast<char>(i) : '\0');
610  buf[2] = '\0';
611  k.push_back(std::string(buf));
612  script.push_back(std::make_pair(std::string(buf),
613  std::string(buf) + ".tmp"));
614  v.push_back(Rand());
615  }
616 
617  WriteScriptFile("tmp.scp", script);
618  {
619  std::vector<std::pair<std::string, std::string> > script2;
620  ReadScriptFile("tmp.scp", true, &script2);
621  KALDI_ASSERT(script2 == script); // This tests WriteScriptFile and
622  // ReadScriptFile.
623  }
624 
625  bool ans;
626  Int32Writer bw(binary ? "b,scp:tmp.scp" : "t,scp:tmp.scp");
627  for (int32 i = 0; i < sz; i++) {
628  bw.Write(k[i], v[i]);
629  }
630  ans = bw.Close();
631  KALDI_ASSERT(ans);
632 
633  SequentialInt32Reader sbr(RandInt(0, 1) == 0 ?
634  "scp:tmp.scp" : "scp,bg:tmp.scp");
635  std::vector<std::string> k2;
636  std::vector<int32> v2;
637  for (; !sbr.Done(); sbr.Next()) {
638  k2.push_back(sbr.Key());
639  v2.push_back(sbr.Value());
640  }
641  KALDI_ASSERT(sbr.Close());
642 
643  unlink("tmp.scp");
644  for (size_t i = 0; i < script.size(); i++) {
645  unlink(script[i].second.c_str());
646  }
647  KALDI_ASSERT(k2 == k);
648  KALDI_ASSERT(v2 == v);
649 }
650 
651 // Writing as both and reading as archive.
652 void UnitTestTableSequentialDoubleMatrixBoth(bool binary, bool read_scp) {
653  int32 sz = Rand() % 10;
654  std::vector<std::string> k;
655  std::vector<Matrix<double>*> v;
656 
657  for (int32 i = 0; i < sz; i++) {
658  k.push_back(CharToString('a' + static_cast<char>(i))); // This gives us
659  // some single quotes too but it doesn't really matter.
660  if (i%2 == 0) k.back() = k.back() + CharToString('a' + i); // make them
661  // different lengths.
662  v.push_back(new Matrix<double>(1 + Rand()%4, 1 + Rand() % 4));
663  for (int32 i = 0; i < v.back()->NumRows(); i++)
664  for (int32 j = 0; j < v.back()->NumCols(); j++)
665  (*(v.back()))(i, j) = RandGauss();
666  }
667 
668  bool ans;
669  DoubleMatrixWriter bw(binary ? "b,ark,scp:tmpf,tmpf.scp" :
670  "t,ark,scp:tmpf,tmpf.scp");
671  for (int32 i = 0; i < sz; i++) {
672  bw.Write(k[i], *(v[i]));
673  }
674  ans = bw.Close();
675  KALDI_ASSERT(ans);
676 
677  SequentialDoubleMatrixReader sbr(read_scp ? "scp:tmpf.scp" : "ark:tmpf");
678  std::vector<std::string> k2;
679  std::vector<Matrix<double>* > v2;
680  for (; !sbr.Done(); sbr.Next()) {
681  k2.push_back(sbr.Key());
682  v2.push_back(new Matrix<double>(sbr.Value()));
683  }
684  KALDI_ASSERT(sbr.Close());
685  KALDI_ASSERT(k2 == k);
686  if (binary) {
687  for (size_t i = 0; i < v2.size(); i++)
688  KALDI_ASSERT(v2[i]->ApproxEqual(*(v[i]), 1.0e-10));
689  } else {
690  KALDI_ASSERT(v2.size() == v.size());
691  for (size_t i = 0; i < v2.size(); i++)
692  KALDI_ASSERT(v2[i]->ApproxEqual(*(v[i])));
693  }
694  for (int32 i = 0; i < sz; i++) {
695  delete v[i];
696  delete v2[i];
697  }
698  unlink("tmpf");
699  unlink("tmpf.scp");
700 }
701 
702 
703 // Writing as both and reading as archive.
704 void UnitTestTableSequentialBaseFloatVectorBoth(bool binary, bool read_scp) {
705  int32 sz = Rand() % 10;
706  std::vector<std::string> k;
707  std::vector<Vector<BaseFloat>*> v;
708 
709  for (int32 i = 0; i < sz; i++) {
710  k.push_back(CharToString('a' + static_cast<char>(i))); // This gives us
711  // some single quotes too but it doesn't really matter.
712  if (i%2 == 0) k.back() = k.back() + CharToString('a' + i); // make them
713  // different lengths.
714  v.push_back(new Vector<BaseFloat>(1 + Rand()%4));
715  for (int32 i = 0; i < v.back()->Dim(); i++)
716  (*(v.back()))(i) = RandGauss();
717  }
718 
719  bool ans;
720  BaseFloatVectorWriter bw(binary ? "b,ark,scp:tmpf,tmpf.scp" :
721  "t,ark,scp:tmpf,tmpf.scp");
722  for (int32 i = 0; i < sz; i++) {
723  bw.Write(k[i], *(v[i]));
724  }
725  ans = bw.Close();
726  KALDI_ASSERT(ans);
727 
729  RandInt(0, 1) == 0 ?
730  (read_scp ? "scp:tmpf.scp" : "ark:tmpf") :
731  (read_scp ? "scp,bg:tmpf.scp" : "ark,bg:tmpf"));
732  std::vector<std::string> k2;
733  std::vector<Vector<BaseFloat>* > v2;
734  for (; !sbr.Done(); sbr.Next()) {
735  k2.push_back(sbr.Key());
736  v2.push_back(new Vector<BaseFloat>(sbr.Value()));
737  }
738  KALDI_ASSERT(sbr.Close());
739  KALDI_ASSERT(k2 == k);
740  if (binary) {
741  for (size_t i = 0; i < v2.size(); i++)
742  KALDI_ASSERT(v2[i]->ApproxEqual(*(v[i]), 1.0e-10));
743  } else {
744  KALDI_ASSERT(v2.size() == v.size());
745  for (size_t i = 0; i < v2.size(); i++)
746  KALDI_ASSERT(v2[i]->ApproxEqual(*(v[i])));
747  }
748  for (int32 i = 0; i < sz; i++) {
749  delete v[i];
750  delete v2[i];
751  }
752 }
753 
754 template<class T> void RandomizeVector(std::vector<T> *v) {
755  if (v->size() > 1) {
756  for (size_t i = 0; i < 10; i++) {
757  size_t j = Rand() % v->size(),
758  k = Rand() % v->size();
759  if (j != k)
760  std::swap((*v)[j], (*v)[k]);
761  }
762  }
763 }
764 
765 
766 // Writing as both scp and archive, with random access.
767 
768 void UnitTestTableRandomBothDouble(bool binary, bool read_scp,
769  bool sorted, bool called_sorted,
770  bool once) {
771  int32 sz = Rand() % 10;
772  std::vector<std::string> k;
773  std::vector<double> v;
774 
775  for (int32 i = 0; i < sz; i++) {
776  k.push_back(CharToString('a' + static_cast<char>(i))); // This gives us
777  // some single quotes too but it doesn't really matter.
778  if (i%2 == 0) k.back() = k.back() + CharToString('a' + i); // make them
779  // different lengths.
780  v.push_back((Rand() / static_cast<double>(Rand())));
781  }
782 
783  if (!sorted)
784  RandomizeVector(&k);
785 
786 
787  bool ans;
788  DoubleWriter bw(binary ? "b,f,ark,scp:tmpf,tmpf.scp" :
789  "t,f,ark,scp:tmpf,tmpf.scp"); // Putting the "flush" option
790  // in too, just for good measure..
791  for (int32 i = 0; i < sz; i++) {
792  bw.Write(k[i], v[i]);
793  }
794  ans = bw.Close();
795  KALDI_ASSERT(ans);
796 
797 
798  std::string name;
799  if (sorted) name += "s,";
800  else if (Rand()%2 == 0) name += "ns,";
801  if (called_sorted) name += "cs,";
802  else if (Rand()%2 == 0) name += "ncs,";
803  if (once) name += "o,";
804  else if (Rand()%2 == 0) name += "no,";
805  name += std::string(read_scp ? "scp:tmpf.scp" : "ark:tmpf");
806 
807  RandomAccessDoubleReader sbr(name);
808 
809  if (sz != 0) {
810  std::vector<std::string> read_keys;
811  int32 read_sz = Rand() % 5;
812  for (int32 i = 0; i < read_sz; i++)
813  read_keys.push_back(k[Rand() % k.size()]);
814  std::sort(read_keys.begin(), read_keys.end());
815  if (once) Uniq(&read_keys);
816  if (!called_sorted)
817  RandomizeVector(&read_keys);
818 
819  for (size_t i = 0; i < read_keys.size(); i++) {
820  std::cout << "Looking up key " << read_keys[i] << std::endl;
821  std::string cur_key = read_keys[i];
822 
823  auto it = std::find(k.begin(), k.end(), cur_key);
824  KALDI_ASSERT(it != k.end());
825  size_t idx = std::distance(k.begin(), it);
826  double value = v[idx];
827  if (Rand() % 2 == 0) {
828  bool ans = sbr.HasKey(cur_key);
829  KALDI_ASSERT(ans == true);
830  }
831  if (binary) {
832  KALDI_ASSERT(value == sbr.Value(cur_key));
833  } else {
834  KALDI_ASSERT(ApproxEqual(value, sbr.Value(cur_key)));
835  }
836  }
837  }
838 }
839 
840 
841 
842 void UnitTestRangesMatrix(bool binary) {
843  int32 archive_size = RandInt(1, 10);
844  std::vector<std::pair<std::string, Matrix<BaseFloat> > > archive_contents(
845  archive_size);
846  for (int32 i = 0; i < archive_size; i++) {
847  char key_buf[2];
848  key_buf[0] = 'A' + i;
849  key_buf[1] = '\0';
850  std::string key(key_buf);
851  archive_contents[i].first = key;
852  archive_contents[i].second.Resize(RandInt(1, 5), RandInt(1, 5));
853  archive_contents[i].second.SetRandn();
854  }
855  if (RandInt(0, 1) == 0)
856  std::random_shuffle(archive_contents.begin(), archive_contents.end());
857 
858  std::ostringstream writer_name;
859  writer_name << "ark,scp";
860  if (binary) writer_name << ",b";
861  else writer_name << ",t";
862  writer_name << ":tmpf,tmpf.scp";
863 
864  {
865  BaseFloatMatrixWriter writer(writer_name.str());
866  for (int32 i = 0; i < archive_size; i++)
867  writer.Write(archive_contents[i].first, archive_contents[i].second);
868  }
869 
870  std::vector<std::string> scp_lines;
871  {
872  bool binary;
873  Input scp_input("tmpf.scp", &binary);
874  KALDI_ASSERT(!binary);
875  std::string line;
876  while (getline(scp_input.Stream(), line)) {
877  Trim(&line); // remove trailing and beginning whitespace.
878  scp_lines.push_back(line);
879  }
880  KALDI_ASSERT(scp_lines.size() == archive_contents.size());
881  }
882 
883  int32 scp_length = RandInt(0, 10);
884  std::vector<std::pair<std::string, Matrix<BaseFloat> > >
885  scp_intended_contents(scp_length);
886 
887  {
888  Output output("tmpf_ranges.scp", false);
889 
890  for (int32 i = 0; i < scp_length; i++) {
891  int32 src_i = RandInt(0, archive_size - 1);
892  std::string scp_line_str = scp_lines[src_i]; // a line like "A tmpf:1043", without newline.
893  scp_line_str[0] = 'a' + i; // now scp_line_str looks like "a tmpf:1043".
894  std::string key("x");
895  key[0] = 'a' + i;
896  scp_intended_contents[i].first = key;
897  output.Stream() << scp_line_str;
898  const Matrix<BaseFloat> &src_mat = archive_contents[src_i].second;
899  if (RandInt(0, 1) == 0) { // Use a range.
900  int32 tot_rows = src_mat.NumRows(), tot_cols = src_mat.NumCols();
901  int32 row_offset = RandInt(0, tot_rows - 1),
902  num_rows = RandInt(1, tot_rows - row_offset),
903  col_offset = RandInt(0, tot_cols - 1),
904  num_cols = RandInt(1, tot_cols - col_offset);
905  SubMatrix<BaseFloat> sub_mat(src_mat, row_offset, num_rows,
906  col_offset, num_cols);
907  scp_intended_contents[i].second = sub_mat;
908  output.Stream() << "[";
909  if (row_offset != 0 || num_rows != tot_rows)
910  output.Stream() << row_offset << ":"
911  << (row_offset + num_rows - 1);
912  else
913  output.Stream() << ":";
914  if (col_offset != 0 || num_cols != tot_cols) {
915  output.Stream() << "," << col_offset
916  << ":" << (col_offset + num_cols - 1);
917  } else {
918  if (RandInt(0, 1) == 0) {
919  output.Stream() << ",:";
920  }
921  }
922  output.Stream() << "]";
923  } else { // no range.
924  scp_intended_contents[i].second = src_mat;
925  }
926  output.Stream() << "\n";
927  }
928  }
929 
930  { // test random-access reading.
931  bool permissive = (RandInt(0, 1) == 0);
932  RandomAccessDoubleMatrixReader reader(permissive ?
933  "scp,p:tmpf_ranges.scp" :
934  "scp:tmpf_ranges.scp");
935 
936  int32 num_queries = RandInt(0, 10);
937  for (int32 n = 0; n < num_queries; n++) {
938  int32 i = RandInt(0, scp_length);
939  if (i == scp_length) { // fake "bad" query.
940  KALDI_ASSERT(!reader.HasKey("foobar"));
941  } else {
942  std::string key = scp_intended_contents[i].first;
943  if (RandInt(0, 1) == 0)
944  KALDI_ASSERT(reader.HasKey(key));
945  Matrix<BaseFloat> value (reader.Value(key));
946  KALDI_ASSERT(value.ApproxEqual(scp_intended_contents[i].second));
947  }
948  }
949  }
950 
951 
952  { // test sequential reading.
953  bool permissive = (RandInt(0, 1) == 0);
954  SequentialBaseFloatMatrixReader reader(permissive ?
955  "scp,p:tmpf_ranges.scp" :
956  "scp:tmpf_ranges.scp");
957 
958  int32 i = 0;
959  for (; !reader.Done(); reader.Next(), i++) {
960  KALDI_ASSERT(reader.Key() == scp_intended_contents[i].first);
961  KALDI_ASSERT(reader.Value().ApproxEqual(scp_intended_contents[i].second));
962  }
963  KALDI_ASSERT(i == scp_length);
964  }
965 
966 
967  unlink("tmpf");
968  unlink("tmpf.scp");
969  unlink("tmpf_ranges.scp");
970 }
971 
972 void UnitTestTableRandomBothDoubleMatrix(bool binary, bool read_scp,
973  bool sorted, bool called_sorted,
974  bool once) {
975  int32 sz = Rand() % 10;
976  std::vector<std::string> k;
977  std::vector<Matrix<double> > v;
978 
979  for (int32 i = 0; i < sz; i++) {
980  k.push_back(CharToString('a' + static_cast<char>(i))); // This gives us
981  // some single quotes too but it doesn't really matter.
982  if (i%2 == 0) k.back() = k.back() + CharToString('a' + i); // make them
983  // different lengths.
984  v.resize(v.size()+1);
985  v.back().Resize(1 + Rand()%3, 1 + Rand()%3);
986  for (int32 j = 0; j < v.back().NumRows(); j++)
987  for (int32 k = 0; k < v.back().NumCols(); k++)
988  v.back()(j, k) = (Rand() % 100);
989  }
990 
991  if (!sorted)
992  RandomizeVector(&k);
993 
994 
995  bool ans;
996  DoubleMatrixWriter bw(binary ? "b,f,ark,scp:tmpf,tmpf.scp" :
997  "t,f,ark,scp:tmpf,tmpf.scp"); // Putting the "flush"
998  // option in too, just for good measure..
999  for (int32 i = 0; i < sz; i++) {
1000  bw.Write(k[i], v[i]);
1001  }
1002  ans = bw.Close();
1003  KALDI_ASSERT(ans);
1004 
1005 
1006  std::string name;
1007  if (sorted) name += "s,";
1008  else if (Rand()%2 == 0) name += "ns,";
1009  if (called_sorted) name += "cs,";
1010  else if (Rand()%2 == 0) name += "ncs,";
1011  if (once) name += "o,";
1012  else if (Rand()%2 == 0) name += "no,";
1013  name += std::string(read_scp ? "scp:tmpf.scp" : "ark:tmpf");
1015 
1016  if (sz != 0) {
1017  std::vector<std::string> read_keys;
1018  int32 read_sz = Rand() % 5;
1019  for (int32 i = 0; i < read_sz; i++)
1020  read_keys.push_back(k[Rand() % k.size()]);
1021  std::sort(read_keys.begin(), read_keys.end());
1022  if (once) Uniq(&read_keys);
1023  if (!called_sorted)
1024  RandomizeVector(&read_keys);
1025 
1026  for (size_t i = 0; i < read_keys.size(); i++) {
1027  std::cout << "Looking up key " << read_keys[i] << std::endl;
1028  std::string cur_key = read_keys[i];
1029  Matrix<double> *value_ptr = NULL;
1030  for (size_t i = 0; i < k.size(); i++)
1031  if (cur_key == k[i]) value_ptr = &(v[i]);
1032  if (Rand() % 2 == 0) {
1033  bool ans = sbr.HasKey(cur_key);
1034  KALDI_ASSERT(ans == true);
1035  }
1036  if (binary) {
1037  KALDI_ASSERT(value_ptr->ApproxEqual(sbr.Value(cur_key), 1.0e-10));
1038  } else {
1039  KALDI_ASSERT(value_ptr->ApproxEqual(sbr.Value(cur_key), 0.01));
1040  }
1041  }
1042  }
1043  unlink("tmpf");
1044  unlink("tmpf.scp");
1045 }
1046 
1047 
1048 
1049 } // end namespace kaldi.
1050 
1051 int main() {
1052  using namespace kaldi;
1056  for (int i = 0; i < 10; i++) {
1057  bool b = (i == 0);
1063  for (int j = 0; j < 2; j++) {
1064  bool c = (j == 0);
1071  for (int k = 0; k < 2; k++) {
1072  bool d = (k == 0);
1073  for (int l = 0; l < 2; l++) {
1074  bool e = (l == 0);
1075  for (int m = 0; m < 2; m++) {
1076  bool f = (m == 0);
1077  UnitTestTableRandomBothDouble(b, c, d, e, f);
1078  UnitTestTableRandomBothDoubleMatrix(b, c, d, e, f);
1079  }
1080  }
1081  }
1082  }
1083  }
1084  std::cout << "Test OK.\n";
1085  return 0;
1086 }
1087 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void UnitTestTableSequentialInt32(bool binary)
void UnitTestTableRandomBothDouble(bool binary, bool read_scp, bool sorted, bool called_sorted, bool once)
void UnitTestTableSequentialInt32PairVectorBoth(bool binary, bool read_scp)
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
void Uniq(std::vector< T > *vec)
Removes duplicate elements from a sorted list.
Definition: stl-utils.h:78
void UnitTestTableSequentialInt32VectorBoth(bool binary, bool read_scp)
bool ApproxEqual(const MatrixBase< Real > &other, float tol=0.01) const
Returns true if ((*this)-other).FrobeniusNorm() <= tol * (*this).FrobeniusNorm(). ...
void Sleep(float seconds)
Definition: kaldi-utils.cc:45
void swap(basic_filebuf< CharT, Traits > &x, basic_filebuf< CharT, Traits > &y)
float RandGauss(struct RandomState *state=NULL)
Definition: kaldi-math.h:155
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
void UnitTestTableSequentialDoubleMatrixBoth(bool binary, bool read_scp)
void UnitTestTableSequentialBaseFloatVectorBoth(bool binary, bool read_scp)
void UnitTestTableRandomBothDoubleMatrix(bool binary, bool read_scp, bool sorted, bool called_sorted, bool once)
void UnitTestTableSequentialBool(bool binary)
void UnitTestTableSequentialInt32VectorVectorBoth(bool binary, bool read_scp)
void Write(const std::string &key, const T &value) const
RspecifierType ClassifyRspecifier(const std::string &rspecifier, std::string *rxfilename, RspecifierOptions *opts)
Definition: kaldi-table.cc:225
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
std::istream & Stream()
Definition: kaldi-io.cc:826
std::ostream & Stream()
Definition: kaldi-io.cc:701
bool WriteScriptFile(std::ostream &os, const std::vector< std::pair< std::string, std::string > > &script)
Definition: kaldi-table.cc:83
const T & Value(const std::string &key)
void UnitTestTableSequentialDouble(bool binary)
struct rnnlm::@11::@12 n
RspecifierType
Definition: kaldi-table.h:219
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
void UnitTestClassifyRspecifier()
WspecifierType
Definition: kaldi-table.h:106
void Trim(std::string *str)
Removes the beginning and trailing whitespaces from a string.
Definition: text-utils.cc:92
bool HasKey(const std::string &key)
int Rand(struct RandomState *state)
Definition: kaldi-math.cc:45
void UnitTestTableSequentialInt32Script(bool binary)
WspecifierType ClassifyWspecifier(const std::string &wspecifier, std::string *archive_wxfilename, std::string *script_wxfilename, WspecifierOptions *opts)
Definition: kaldi-table.cc:135
void UnitTestTableSequentialDoubleBoth(bool binary, bool read_scp)
std::string CharToString(const char &c)
Definition: kaldi-utils.cc:36
bool ReadScriptFile(const std::string &rxfilename, bool warn, std::vector< std::pair< std::string, std::string > > *script_out)
Definition: kaldi-table.cc:26
A class representing a vector.
Definition: kaldi-vector.h:406
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
int main()
void UnitTestRangesMatrix(bool binary)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void RandomizeVector(std::vector< T > *v)
void UnitTestClassifyWspecifier()
Sub-matrix representation.
Definition: kaldi-matrix.h:988
void UnitTestReadScriptFile()
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
Definition: kaldi-math.h:265
bool Close()
Definition: kaldi-io.cc:677
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)
Definition: kaldi-math.cc:95