text-utils-test.cc
Go to the documentation of this file.
1 // util/text-utils-test.cc
2 
3 // Copyright 2009-2011 Microsoft Corporation
4 // 2017 Johns Hopkins University (author: Daniel Povey)
5 // 2015 Vimal Manohar (Johns Hopkins University)
6 
7 // See ../../COPYING for clarification regarding multiple authors
8 //
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 //
13 // http://www.apache.org/licenses/LICENSE-2.0
14 //
15 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
17 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
18 // MERCHANTABLITY OR NON-INFRINGEMENT.
19 // See the Apache 2 License for the specific language governing permissions and
20 // limitations under the License.
21 
22 
23 #include "base/kaldi-common.h"
24 #include "util/text-utils.h"
25 
26 namespace kaldi {
27 
28 char GetRandChar() {
29  return static_cast<char>(32 + Rand() % 95); // between ' ' and '~'
30 }
31 
32 const char *ws_delim = " \t\n\r";
33 char GetRandDelim() {
34  if (Rand() % 2 == 0)
35  return static_cast<char>(33 + Rand() % 94); // between '!' and '~';
36  else
37  return ws_delim[Rand() % 4];
38 }
39 
40 
42  // srand((unsigned int)time(NULL));
43  // didn't compile on cygwin.
44 
45  {
46  std::vector<std::string> str_vec;
47  SplitStringToVector("", " ", false, &str_vec);
48  KALDI_ASSERT(str_vec.size() == 1); // If this fails it may just mean
49  // that someone changed the
50  // semantics of SplitStringToVector in a reasonable way.
51  SplitStringToVector("", " ", true, &str_vec);
52  KALDI_ASSERT(str_vec.empty());
53  }
54  for (int j = 0; j < 100; j++) {
55  std::vector<std::string> str_vec;
56  int sz = Rand() % 73;
57  std::string full;
58  for (int i = 0; i < sz-1; i++) {
59  full.push_back((Rand() % 7 == 0)? GetRandDelim() : GetRandChar());
60  }
61  std::string delim;
62  delim.push_back(GetRandDelim());
63  bool omit_empty_strings = (Rand() %2 == 0)? true : false;
64  SplitStringToVector(full, delim.c_str(), omit_empty_strings, &str_vec);
65  std::string new_full;
66  for (size_t i = 0; i < str_vec.size(); i++) {
67  if (omit_empty_strings) KALDI_ASSERT(str_vec[i] != "");
68  new_full.append(str_vec[i]);
69  if (i < str_vec.size() -1) new_full.append(delim);
70  }
71  std::string new_full2;
72  JoinVectorToString(str_vec, delim.c_str(), omit_empty_strings, &new_full2);
73  if (omit_empty_strings) { // sequences of delimiters cannot be matched
74  size_t start = full.find_first_not_of(delim),
75  end = full.find_last_not_of(delim);
76  if (start == std::string::npos) { // only delimiters
77  KALDI_ASSERT(end == std::string::npos);
78  } else {
79  std::string full_test;
80  char last = '\0';
81  for (size_t i = start; i <= end; i++) {
82  if (full[i] != last || last != *delim.c_str())
83  full_test.push_back(full[i]);
84  last = full[i];
85  }
86  if (!full.empty()) {
87  KALDI_ASSERT(new_full.compare(full_test) == 0);
88  KALDI_ASSERT(new_full2.compare(full_test) == 0);
89  }
90  }
91  } else if (!full.empty()) {
92  KALDI_ASSERT(new_full.compare(full) == 0);
93  KALDI_ASSERT(new_full2.compare(full) == 0);
94  }
95  }
96 }
97 
99  {
100  std::vector<int32> v;
101  KALDI_ASSERT(SplitStringToIntegers("-1:2:4", ":", false, &v) == true
102  && v.size() == 3 && v[0] == -1 && v[1] == 2 && v[2] == 4);
103  KALDI_ASSERT(SplitStringToIntegers("-1:2:4:", ":", false, &v) == false);
104  KALDI_ASSERT(SplitStringToIntegers(":-1::2:4:", ":", true, &v) == true
105  && v.size() == 3 && v[0] == -1 && v[1] == 2 && v[2] == 4);
106  KALDI_ASSERT(SplitStringToIntegers("-1\n2\t4", " \n\t\r", false, &v) == true
107  && v.size() == 3 && v[0] == -1 && v[1] == 2 && v[2] == 4);
108  KALDI_ASSERT(SplitStringToIntegers(" ", " \n\t\r", true, &v) == true
109  && v.size() == 0);
110  KALDI_ASSERT(SplitStringToIntegers("", " \n\t\r", false, &v) == true
111  && v.size() == 0);
112  }
113 
114  {
115  std::vector<uint32> v;
116  KALDI_ASSERT(SplitStringToIntegers("-1:2:4", ":", false, &v) == false);
117  // cannot put negative number in uint32.
118  }
119 }
120 
121 
122 
124  {
125  std::vector<float> v;
126  KALDI_ASSERT(SplitStringToFloats("-1:2.5:4", ":", false, &v) == true
127  && v.size() == 3 && v[0] == -1 && v[1] == 2.5 && v[2] == 4);
128  KALDI_ASSERT(SplitStringToFloats("-1:2.5:4:", ":", false, &v) == false);
129  KALDI_ASSERT(SplitStringToFloats(":-1::2:4:", ":", true, &v) == true
130  && v.size() == 3 && v[0] == -1 && v[1] == 2 && v[2] == 4);
131  KALDI_ASSERT(SplitStringToFloats("-1\n2.5\t4", " \n\t\r", false, &v) == true
132  && v.size() == 3 && v[0] == -1 && v[1] == 2.5 && v[2] == 4);
133  KALDI_ASSERT(SplitStringToFloats(" ", " \n\t\r", true, &v) == true
134  && v.size() == 0);
135  KALDI_ASSERT(SplitStringToFloats("", " \n\t\r", false, &v) == true
136  && v.size() == 0);
137  }
138 
139  {
140  std::vector<double> v;
141  KALDI_ASSERT(SplitStringToFloats("-1:2:4", ":", false, &v) == true);
142  }
143 }
144 
146  int32 i;
147  KALDI_ASSERT(ConvertStringToInteger("12345", &i) && i == 12345);
148  KALDI_ASSERT(ConvertStringToInteger("-12345", &i) && i == -12345);
149  char j;
150  KALDI_ASSERT(!ConvertStringToInteger("-12345", &j)); // too big for char.
151 
152  KALDI_ASSERT(ConvertStringToInteger(" -12345 ", &i)); // whitespace accepted
153 
154  KALDI_ASSERT(!ConvertStringToInteger("a ", &i)); // non-integers rejected.
155 
156  KALDI_ASSERT(ConvertStringToInteger("0", &i) && i == 0);
157 
158  uint64 k;
159  KALDI_ASSERT(ConvertStringToInteger("12345", &k) && k == 12345);
160  KALDI_ASSERT(!ConvertStringToInteger("-12345", &k)); // unsigned,
161  // cannot convert.
162 }
163 
164 template<class Real>
166  Real d;
167  KALDI_ASSERT(ConvertStringToReal("1", &d) && d == 1.0);
168  KALDI_ASSERT(ConvertStringToReal("-1", &d) && d == -1.0);
169  KALDI_ASSERT(ConvertStringToReal("-1", &d) && d == -1.0);
170  KALDI_ASSERT(ConvertStringToReal(" -1 ", &d) && d == -1.0);
171  KALDI_ASSERT(!ConvertStringToReal("-1 x", &d));
172  KALDI_ASSERT(!ConvertStringToReal("-1f", &d));
173  KALDI_ASSERT(ConvertStringToReal("12345.2", &d) && fabs(d-12345.2) < 1.0);
174  KALDI_ASSERT(ConvertStringToReal("1.0e+08", &d) && fabs(d-1.0e+08) < 100.0);
175 
176  // it also works for inf or nan.
177  KALDI_ASSERT(ConvertStringToReal("inf", &d) && d > 0 && d - d != 0);
178  KALDI_ASSERT(ConvertStringToReal(" inf", &d) && d > 0 && d - d != 0);
179  KALDI_ASSERT(ConvertStringToReal("inf ", &d) && d > 0 && d - d != 0);
180  KALDI_ASSERT(ConvertStringToReal(" inf ", &d) && d > 0 && d - d != 0);
181  KALDI_ASSERT(ConvertStringToReal("+inf", &d) && d > 0 && d - d != 0);
182  KALDI_ASSERT(ConvertStringToReal("-inf", &d) && d < 0 && d - d != 0);
183  KALDI_ASSERT(ConvertStringToReal("Inf", &d) && d > 0 && d - d != 0);
184  KALDI_ASSERT(ConvertStringToReal("INF", &d) && d > 0 && d - d != 0);
185  KALDI_ASSERT(ConvertStringToReal("InF", &d) && d > 0 && d - d != 0);
186  KALDI_ASSERT(ConvertStringToReal("infinity", &d) && d > 0 && d - d != 0);
187  KALDI_ASSERT(ConvertStringToReal("-infinity", &d) && d < 0 && d - d != 0);
188  KALDI_ASSERT(!ConvertStringToReal("GARBAGE inf", &d));
189  KALDI_ASSERT(!ConvertStringToReal("GARBAGEinf", &d));
190  KALDI_ASSERT(!ConvertStringToReal("infGARBAGE", &d));
191  KALDI_ASSERT(!ConvertStringToReal("inf_GARBAGE", &d));
192  KALDI_ASSERT(!ConvertStringToReal("inf GARBAGE", &d));
193  KALDI_ASSERT(!ConvertStringToReal("GARBAGE infinity", &d));
194  KALDI_ASSERT(!ConvertStringToReal("GARBAGEinfinity", &d));
195  KALDI_ASSERT(!ConvertStringToReal("infinityGARBAGE", &d));
196  KALDI_ASSERT(!ConvertStringToReal("infinity_GARBAGE", &d));
197  KALDI_ASSERT(!ConvertStringToReal("infinity GARBAGE", &d));
198  KALDI_ASSERT(ConvertStringToReal("1.#INF", &d) && d > 0 && d - d != 0);
199  KALDI_ASSERT(ConvertStringToReal("-1.#INF", &d) && d < 0 && d - d != 0);
200  KALDI_ASSERT(ConvertStringToReal("-1.#INF ", &d) && d < 0 && d - d != 0);
201  KALDI_ASSERT(ConvertStringToReal(" -1.#INF ", &d) && d < 0 && d - d != 0);
202  KALDI_ASSERT(!ConvertStringToReal("GARBAGE 1.#INF", &d));
203  KALDI_ASSERT(!ConvertStringToReal("GARBAGE1.#INF", &d));
204  KALDI_ASSERT(!ConvertStringToReal("2.#INF", &d));
205  KALDI_ASSERT(!ConvertStringToReal("-2.#INF", &d));
206  KALDI_ASSERT(!ConvertStringToReal("1.#INFGARBAGE", &d));
207  KALDI_ASSERT(!ConvertStringToReal("1.#INF_GARBAGE", &d));
208 
209  KALDI_ASSERT(ConvertStringToReal("nan", &d) && d != d);
210  KALDI_ASSERT(ConvertStringToReal("+nan", &d) && d != d);
211  KALDI_ASSERT(ConvertStringToReal("-nan", &d) && d != d);
212  KALDI_ASSERT(ConvertStringToReal("Nan", &d) && d != d);
213  KALDI_ASSERT(ConvertStringToReal("NAN", &d) && d != d);
214  KALDI_ASSERT(ConvertStringToReal("NaN", &d) && d != d);
215  KALDI_ASSERT(ConvertStringToReal(" NaN", &d) && d != d);
216  KALDI_ASSERT(ConvertStringToReal("NaN ", &d) && d != d);
217  KALDI_ASSERT(ConvertStringToReal(" NaN ", &d) && d != d);
218  KALDI_ASSERT(ConvertStringToReal("1.#QNAN", &d) && d != d);
219  KALDI_ASSERT(ConvertStringToReal("-1.#QNAN", &d) && d != d);
220  KALDI_ASSERT(ConvertStringToReal("1.#QNAN ", &d) && d != d);
221  KALDI_ASSERT(ConvertStringToReal(" 1.#QNAN ", &d) && d != d);
222  KALDI_ASSERT(!ConvertStringToReal("GARBAGE nan", &d));
223  KALDI_ASSERT(!ConvertStringToReal("GARBAGEnan", &d));
224  KALDI_ASSERT(!ConvertStringToReal("nanGARBAGE", &d));
225  KALDI_ASSERT(!ConvertStringToReal("nan_GARBAGE", &d));
226  KALDI_ASSERT(!ConvertStringToReal("nan GARBAGE", &d));
227  KALDI_ASSERT(!ConvertStringToReal("GARBAGE 1.#QNAN", &d));
228  KALDI_ASSERT(!ConvertStringToReal("GARBAGE1.#QNAN", &d));
229  KALDI_ASSERT(!ConvertStringToReal("2.#QNAN", &d));
230  KALDI_ASSERT(!ConvertStringToReal("-2.#QNAN", &d));
231  KALDI_ASSERT(!ConvertStringToReal("-1.#QNAN_GARBAGE", &d));
232  KALDI_ASSERT(!ConvertStringToReal("-1.#QNANGARBAGE", &d));
233 }
234 
235 template<class Real>
236 void TestNan() {
237  Real d;
238  KALDI_ASSERT(ConvertStringToReal(std::to_string(sqrt(-1)), &d) && d != d);
239 }
240 
241 template<class Real>
242 void TestInf() {
243  Real d;
244  KALDI_ASSERT(ConvertStringToReal(std::to_string(exp(10000)), &d) &&
245  d > 0 && d - d != 0);
246  KALDI_ASSERT(ConvertStringToReal(std::to_string(-exp(10000)), &d) &&
247  d < 0 && d - d != 0);
248 }
249 
250 
251 std::string TrimTmp(std::string s) {
252  Trim(&s);
253  return s;
254 }
255 
256 void TestTrim() {
257  KALDI_ASSERT(TrimTmp(" a ") == "a");
258  KALDI_ASSERT(TrimTmp(" a b c") == "a b c");
259  KALDI_ASSERT(TrimTmp("") == "");
260  KALDI_ASSERT(TrimTmp("X\n") == "X");
261  KALDI_ASSERT(TrimTmp("X\n\t") == "X");
262  KALDI_ASSERT(TrimTmp("\n\tX") == "X");
263 } // end namespace kaldi
264 
265 
267  std::string a, b;
268  SplitStringOnFirstSpace("a b", &a, &b);
269  KALDI_ASSERT(a == "a" && b == "b");
270  SplitStringOnFirstSpace("aa bb", &a, &b);
271  KALDI_ASSERT(a == "aa" && b == "bb");
272  SplitStringOnFirstSpace("aa", &a, &b);
273  KALDI_ASSERT(a == "aa" && b == "");
274  SplitStringOnFirstSpace(" aa \n\t ", &a, &b);
275  KALDI_ASSERT(a == "aa" && b == "");
276  SplitStringOnFirstSpace(" \n\t ", &a, &b);
277  KALDI_ASSERT(a == "" && b == "");
278  SplitStringOnFirstSpace(" aa bb \n\t ", &a, &b);
279  KALDI_ASSERT(a == "aa" && b == "bb");
280  SplitStringOnFirstSpace(" aa bb cc ", &a, &b);
281  KALDI_ASSERT(a == "aa" && b == "bb cc");
282  SplitStringOnFirstSpace(" aa bb cc ", &a, &b);
283  KALDI_ASSERT(a == "aa" && b == "bb cc");
284  SplitStringOnFirstSpace(" aa bb cc", &a, &b);
285  KALDI_ASSERT(a == "aa" && b == "bb cc");
286 }
287 
288 void TestIsToken() {
289  KALDI_ASSERT(IsToken("a"));
290  KALDI_ASSERT(IsToken("ab"));
291  KALDI_ASSERT(!IsToken("ab "));
292  KALDI_ASSERT(!IsToken(" ab"));
293  KALDI_ASSERT(!IsToken("a b"));
294  KALDI_ASSERT(IsToken("\231")); // typical non-ASCII printable character,
295  // something with an accent.
296  KALDI_ASSERT(!IsToken("\377")); // character 255, which is a form of space.
297  KALDI_ASSERT(IsToken("a-b,c,d=ef"));
298  KALDI_ASSERT(!IsToken("a\nb"));
299  KALDI_ASSERT(!IsToken("a\tb"));
300  KALDI_ASSERT(!IsToken("ab\t"));
301  KALDI_ASSERT(!IsToken(""));
302 }
303 
304 void TestIsLine() {
305  KALDI_ASSERT(IsLine("a"));
306  KALDI_ASSERT(IsLine("a b"));
307  KALDI_ASSERT(!IsLine("a\nb"));
308  KALDI_ASSERT(!IsLine("a b "));
309  KALDI_ASSERT(!IsLine(" a b"));
310 }
311 
312 
314  // we must test the test.
315  KALDI_ASSERT(!StringsApproxEqual("a", "b"));
316  KALDI_ASSERT(!StringsApproxEqual("1", "2"));
317  KALDI_ASSERT(StringsApproxEqual("1.234", "1.235", 2));
318  KALDI_ASSERT(!StringsApproxEqual("1.234", "1.235", 3));
319  KALDI_ASSERT(StringsApproxEqual("x 1.234 y", "x 1.2345 y", 3));
320  KALDI_ASSERT(!StringsApproxEqual("x 1.234 y", "x 1.2345 y", 4));
321  KALDI_ASSERT(StringsApproxEqual("x 1.234 y 6.41", "x 1.235 y 6.49", 1));
322  KALDI_ASSERT(!StringsApproxEqual("x 1.234 y 6.41", "x 1.235 y 6.49", 2));
323  KALDI_ASSERT(StringsApproxEqual("x 1.234 y 6.41", "x 1.235 y 6.411", 2));
324  KALDI_ASSERT(StringsApproxEqual("x 1.0 y", "x 1.0001 y", 3));
325  KALDI_ASSERT(!StringsApproxEqual("x 1.0 y", "x 1.0001 y", 4));
326 }
327 
329  std::string str;
330  {
331  ConfigLine cfl;
332  str = "a-b xx=yyy foo=bar baz=123 ba=1:2";
333  bool status = cfl.ParseLine(str);
334  KALDI_ASSERT(status && cfl.FirstToken() == "a-b");
335 
337  std::string str_value;
338  KALDI_ASSERT(cfl.GetValue("xx", &str_value));
339  KALDI_ASSERT(str_value == "yyy");
341  KALDI_ASSERT(cfl.GetValue("foo", &str_value));
342  KALDI_ASSERT(str_value == "bar");
344  KALDI_ASSERT(!cfl.GetValue("xy", &str_value));
345  KALDI_ASSERT(cfl.GetValue("baz", &str_value));
346  KALDI_ASSERT(str_value == "123");
347 
348  std::vector<int32> int_values;
349  KALDI_ASSERT(!cfl.GetValue("xx", &int_values));
350  KALDI_ASSERT(cfl.GetValue("baz", &int_values));
352  KALDI_ASSERT(int_values.size() == 1 && int_values[0] == 123);
353  KALDI_ASSERT(cfl.GetValue("ba", &int_values));
354  KALDI_ASSERT(int_values.size() == 2 && int_values[0] == 1 && int_values[1] == 2);
356  }
357 
358  {
359  ConfigLine cfl;
360  str = "a-b baz=x y z pp = qq ab =cd ac= bd";
361  KALDI_ASSERT(!cfl.ParseLine(str));
362  }
363  {
364  ConfigLine cfl;
365  str = "a-b baz=x y z pp = qq ab=cd ac=bd";
366  KALDI_ASSERT(!cfl.ParseLine(str));
367  }
368  {
369  ConfigLine cfl;
370  str = "foo-bar";
371  KALDI_ASSERT(cfl.ParseLine(str));
372  }
373  {
374  ConfigLine cfl;
375  str = "foo-bar a=b c d f=g";
376  std::string value;
377  KALDI_ASSERT(cfl.ParseLine(str) && cfl.FirstToken() == "foo-bar" &&
378  cfl.GetValue("a", &value) && value == "b c d" &&
379  cfl.GetValue("f", &value) && value == "g" &&
380  !cfl.HasUnusedValues());
381  }
382  {
383  ConfigLine cfl;
384  str = "zzz a=b baz";
385  KALDI_ASSERT(cfl.ParseLine(str) && cfl.FirstToken() == "zzz" &&
386  cfl.UnusedValues() == "a=b baz");
387  }
388  {
389  ConfigLine cfl;
390  str = "xxx a=b baz ";
391  KALDI_ASSERT(cfl.ParseLine(str) && cfl.UnusedValues() == "a=b baz");
392  }
393  {
394  ConfigLine cfl;
395  str = "xxx a=b =c";
396  KALDI_ASSERT(!cfl.ParseLine(str));
397  }
398  {
399  ConfigLine cfl;
400  str = "xxx baz='x y z' pp=qq ab=cd ac=bd";
401  KALDI_ASSERT(cfl.ParseLine(str) && cfl.FirstToken() == "xxx");
402  std::string str_value;
403  KALDI_ASSERT(cfl.GetValue("baz", &str_value));
404  KALDI_ASSERT(str_value == "x y z");
405  KALDI_ASSERT(cfl.GetValue("pp", &str_value));
406  KALDI_ASSERT(str_value == "qq");
407  KALDI_ASSERT(cfl.UnusedValues() == "ab=cd ac=bd");
408  KALDI_ASSERT(cfl.GetValue("ab", &str_value));
409  KALDI_ASSERT(str_value == "cd");
410  KALDI_ASSERT(cfl.UnusedValues() == "ac=bd");
412  KALDI_ASSERT(cfl.GetValue("ac", &str_value));
413  KALDI_ASSERT(str_value == "bd");
415  }
416 
417  {
418  ConfigLine cfl;
419  str = "x baz= pp = qq flag=t ";
420  KALDI_ASSERT(!cfl.ParseLine(str));
421  }
422  {
423  ConfigLine cfl;
424  str = " x baz= pp=qq flag=t ";
425  KALDI_ASSERT(cfl.ParseLine(str) && cfl.FirstToken() == "x");
426 
427  std::string str_value;
428  KALDI_ASSERT(cfl.GetValue("baz", &str_value));
429  KALDI_ASSERT(str_value == "");
430  KALDI_ASSERT(cfl.GetValue("pp", &str_value));
431  KALDI_ASSERT(str_value == "qq");
433  KALDI_ASSERT(cfl.GetValue("flag", &str_value));
434  KALDI_ASSERT(str_value == "t");
436 
437  bool bool_value = false;
438  KALDI_ASSERT(cfl.GetValue("flag", &bool_value));
439  KALDI_ASSERT(bool_value);
440  }
441 
442  {
443  ConfigLine cfl;
444  str = "xx _baz=a -pp=qq";
445  KALDI_ASSERT(!cfl.ParseLine(str));
446  }
447  {
448  ConfigLine cfl;
449  str = "xx 0baz=a pp=qq";
450  KALDI_ASSERT(!cfl.ParseLine(str));
451  }
452  {
453  ConfigLine cfl;
454  str = "xx -baz=a pp=qq";
455  KALDI_ASSERT(!cfl.ParseLine(str));
456  }
457  {
458  ConfigLine cfl;
459  str = "xx _baz'=a pp=qq";
460  KALDI_ASSERT(!cfl.ParseLine(str));
461  }
462  {
463  ConfigLine cfl;
464  str = " baz=g";
465  KALDI_ASSERT(cfl.ParseLine(str) && cfl.FirstToken() == "");
466  bool flag;
467  KALDI_ASSERT(!cfl.GetValue("baz", &flag));
468  }
469  {
470  ConfigLine cfl;
471  str = "xx _baz1=a pp=qq";
472  KALDI_ASSERT(cfl.ParseLine(str));
473 
474  std::string str_value;
475  KALDI_ASSERT(cfl.GetValue("_baz1", &str_value));
476  }
477 }
478 
480  std::string str = "a-b alpha=aa beta=\"b b\"# String test\n"
481  "a-b beta2='b c' beta3=bd # \n"
482  "a-b gamma=1:2:3:4 # Int Vector test\n"
483  " a-b de1ta=f # Bool + Integer in key Comment test delta=t \n"
484  "a-b _epsilon=-1 # Int Vector test _epsilon=1 \n"
485  "a-b zet-_a=0.15 theta=1.1# Float, -, _ test\n"
486  "a-b quoted='a b c' # quoted string\n"
487  "a-b quoted2=\"d e 'a b=c' f\" # string quoted with double quotes";
488 
489  std::istringstream is(str);
490  std::vector<std::string> lines;
491  ReadConfigLines(is, &lines);
492  KALDI_ASSERT(lines.size() == 8);
493 
494  ConfigLine cfl;
495  for (size_t i = 0; i < lines.size(); i++) {
496  KALDI_ASSERT(cfl.ParseLine(lines[i]) && cfl.FirstToken() == "a-b");
497  if (i == 1) {
498  KALDI_ASSERT(cfl.GetValue("beta2", &str) && str == "b c");
499  }
500  if (i == 4) {
501  KALDI_ASSERT(cfl.GetValue("_epsilon", &str) && str == "-1");
502  }
503  if (i == 5) {
504  BaseFloat float_val = 0;
505  KALDI_ASSERT(cfl.GetValue("zet-_a", &float_val) && ApproxEqual(float_val, 0.15));
506  }
507  if (i == 6) {
508  KALDI_ASSERT(cfl.GetValue("quoted", &str) && str == "a b c");
509  }
510  if (i == 7) {
511  KALDI_ASSERT(cfl.GetValue("quoted2", &str) && str == "d e 'a b=c' f");
512  }
513  }
514 }
515 
516 } // end namespace kaldi
517 
518 int main() {
519  using namespace kaldi;
524  TestConvertStringToReal<float>();
525  TestConvertStringToReal<double>();
526  TestTrim();
528  TestIsToken();
529  TestIsLine();
531  TestNan<float>();
532  TestNan<double>();
533  TestInf<float>();
534  TestInf<double>();
537  std::cout << "Test OK\n";
538 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
bool ConvertStringToInteger(const std::string &str, Int *out)
Converts a string into an integer via strtoll and returns false if there was any kind of problem (i...
Definition: text-utils.h:118
const std::string & FirstToken() const
Definition: text-utils.h:228
void UnitTestConfigLineParse()
void TestConvertStringToInteger()
bool ParseLine(const std::string &line)
Definition: text-utils.cc:343
bool SplitStringToFloats(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< F > *out)
Definition: text-utils.cc:30
bool IsLine(const std::string &line)
Returns true if "line" is free of characters and unprintable characters, and does not contain leadi...
Definition: text-utils.cc:154
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
Definition: text-utils.h:68
void JoinVectorToString(const std::vector< std::string > &vec_in, const char *delim, bool omit_empty_strings, std::string *str_out)
Joins the elements of a vector of strings into a single string using "delim" as the delimiter...
Definition: text-utils.cc:77
void TestStringsApproxEqual()
void TestSplitStringToIntegers()
kaldi::int32 int32
void TestSplitStringOnFirstSpace()
char GetRandDelim()
void TestIsLine()
bool IsToken(const std::string &token)
Returns true if "token" is nonempty, and all characters are printable and whitespace-free.
Definition: text-utils.cc:105
void TestSplitStringToFloats()
std::string UnusedValues() const
returns e.g.
Definition: text-utils.cc:518
void TestNan()
bool StringsApproxEqual(const std::string &a, const std::string &b, int32 decimal_places_tolerance)
This function returns true when two text strings are approximately equal, and false when they are not...
Definition: text-utils.cc:335
float BaseFloat
Definition: kaldi-types.h:29
void SplitStringOnFirstSpace(const std::string &str, std::string *first, std::string *rest)
Removes leading and trailing white space from the string, then splits on the first section of whitesp...
Definition: text-utils.cc:120
void TestConvertStringToReal()
void SplitStringToVector(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< std::string > *out)
Split a string using any of the single character delimiters.
Definition: text-utils.cc:63
void UnitTestReadConfig()
bool ConvertStringToReal(const std::string &str, T *out)
ConvertStringToReal converts a string into either float or double and returns false if there was any ...
Definition: text-utils.cc:238
void ReadConfigLines(std::istream &is, std::vector< std::string > *lines)
This function reads in a config file and *appends* its contents to a vector of lines; it is responsib...
Definition: text-utils.cc:564
void Trim(std::string *str)
Removes the beginning and trailing whitespaces from a string.
Definition: text-utils.cc:92
void TestIsToken()
int Rand(struct RandomState *state)
Definition: kaldi-math.cc:45
char GetRandChar()
void TestSplitStringToVector()
const char * ws_delim
This class is responsible for parsing input like hi-there xx=yyy a=b c empty= f-oo=Append(bar, sss) ba_z=123 bing=&#39;a b c&#39; baz="a b c d=&#39;a b&#39; e" and giving you access to the fields, in this case.
Definition: text-utils.h:205
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
bool HasUnusedValues() const
Definition: text-utils.cc:510
bool GetValue(const std::string &key, std::string *value)
Definition: text-utils.cc:427
void TestInf()
std::string TrimTmp(std::string s)
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
Definition: kaldi-math.h:265
void TestTrim()
int main()