"Table types and related functions"

This group is for classes and functions relatied to Tables; see also "Implementation classes for Table types" and "Specific Table types", and for a description of the Table concept see The Table concept. More...

Classes

struct  WspecifierOptions
 
struct  RspecifierOptions
 
class  RandomAccessTableReader< Holder >
 Allows random access to a collection of objects in an archive or script file; see The Table concept. More...
 
class  SequentialTableReader< Holder >
 A templated class for reading objects sequentially from an archive or script file; see The Table concept. More...
 
class  TableWriter< Holder >
 A templated class for writing objects to an archive or script file; see The Table concept. More...
 
class  RandomAccessTableReaderMapped< Holder >
 This class is for when you are reading something in random access, but it may actually be stored per-speaker (or something similar) but the keys you're using are per utterance. More...
 

Typedefs

typedef std::vector< std::string > KeyList
 

Enumerations

enum  WspecifierType { kNoWspecifier, kArchiveWspecifier, kScriptWspecifier, kBothWspecifier }
 
enum  RspecifierType { kNoRspecifier, kArchiveRspecifier, kScriptRspecifier }
 

Functions

WspecifierType ClassifyWspecifier (const std::string &wspecifier, std::string *archive_wxfilename, std::string *script_wxfilename, WspecifierOptions *opts)
 
bool ReadScriptFile (const std::string &rxfilename, bool warn, std::vector< std::pair< std::string, std::string > > *script_out)
 
bool ReadScriptFile (std::istream &is, bool warn, std::vector< std::pair< std::string, std::string > > *script_out)
 
bool WriteScriptFile (const std::string &wxfilename, const std::vector< std::pair< std::string, std::string > > &script)
 
bool WriteScriptFile (std::ostream &os, const std::vector< std::pair< std::string, std::string > > &script)
 
RspecifierType ClassifyRspecifier (const std::string &rspecifier, std::string *rxfilename, RspecifierOptions *opts)
 

Detailed Description

This group is for classes and functions relatied to Tables; see also "Implementation classes for Table types" and "Specific Table types", and for a description of the Table concept see The Table concept.

Typedef Documentation

◆ KeyList

typedef std::vector<std::string> KeyList

Definition at line 54 of file kaldi-table.h.

Enumeration Type Documentation

◆ RspecifierType

Enumerator
kNoRspecifier 
kArchiveRspecifier 
kScriptRspecifier 

Definition at line 219 of file kaldi-table.h.

◆ WspecifierType

Enumerator
kNoWspecifier 
kArchiveWspecifier 
kScriptWspecifier 
kBothWspecifier 

Definition at line 106 of file kaldi-table.h.

Function Documentation

◆ ClassifyRspecifier()

RspecifierType ClassifyRspecifier ( const std::string &  rspecifier,
std::string *  rxfilename,
RspecifierOptions opts 
)

Definition at line 225 of file kaldi-table.cc.

References RspecifierOptions::background, RspecifierOptions::called_sorted, rnnlm::i, kaldi::kArchiveRspecifier, kaldi::kNoRspecifier, kaldi::kScriptRspecifier, RspecifierOptions::once, RspecifierOptions::permissive, RspecifierOptions::sorted, and kaldi::SplitStringToVector().

Referenced by kaldi::ClassifyRxfilename(), kaldi::ClassifyWxfilename(), main(), SequentialTableReaderScriptImpl< Holder >::Open(), RandomAccessTableReader< kaldi::TokenHolder >::Open(), SequentialTableReader< Holder >::Open(), SequentialTableReaderArchiveImpl< Holder >::Open(), RandomAccessTableReaderScriptImpl< Holder >::Open(), RandomAccessTableReaderArchiveImplBase< Holder >::Open(), kaldi::SplitArgOnEquals(), kaldi::TypeThreeUsage(), kaldi::TypeTwoUsage(), and kaldi::UnitTestClassifyRspecifier().

227  {
228  // Examples
229  // ark:rxfilename -> kArchiveRspecifier
230  // scp:rxfilename -> kScriptRspecifier
231  //
232  // We also allow the meaningless prefixes b, and t,
233  // plus the options o (once), no (not-once),
234  // s (sorted) and ns (not-sorted), p (permissive)
235  // and np (not-permissive).
236  // so the following would be valid:
237  //
238  // f, o, b, np, ark:rxfilename -> kArchiveRspecifier
239  //
240  // Examples:
241  //
242  // b, ark:rxfilename -> kArchiveRspecifier
243  // t, ark:rxfilename -> kArchiveRspecifier
244  // b, scp:rxfilename -> kScriptRspecifier
245  // t, no, s, scp:rxfilename -> kScriptRspecifier
246  // t, ns, scp:rxfilename -> kScriptRspecifier
247 
248  // Improperly formed Rspecifiers will be classified as kNoRspecifier.
249 
250  if (rxfilename) rxfilename->clear();
251 
252  if (opts != NULL)
253  *opts = RspecifierOptions(); // Make sure all the defaults are as in the
254  // default constructor of the options class.
255 
256  size_t pos = rspecifier.find(':');
257  if (pos == std::string::npos) return kNoRspecifier;
258 
259  if (isspace(*(rspecifier.rbegin()))) return kNoRspecifier; // Trailing space
260  // disallowed.
261 
262  std::string before_colon(rspecifier, 0, pos),
263  after_colon(rspecifier, pos+1);
264 
265  std::vector<std::string> split_first_part; // Split part before ':' on ', '.
266  SplitStringToVector(before_colon, ", ", false, &split_first_part); // false==
267  // don't omit empty strings between commas.
268 
270 
271  for (size_t i = 0; i < split_first_part.size(); i++) {
272  const std::string &str = split_first_part[i]; // e.g. "b", "t", "f", "ark",
273  // "scp".
274  const char *c = str.c_str();
275  if (!strcmp(c, "b")); // Ignore this option. It's so we can use the same
276  // specifiers for rspecifiers and wspecifiers.
277  else if (!strcmp(c, "t")); // Ignore this option too.
278  else if (!strcmp(c, "o")) {
279  if (opts) opts->once = true;
280  } else if (!strcmp(c, "no")) {
281  if (opts) opts->once = false;
282  } else if (!strcmp(c, "p")) {
283  if (opts) opts->permissive = true;
284  } else if (!strcmp(c, "np")) {
285  if (opts) opts->permissive = false;
286  } else if (!strcmp(c, "s")) {
287  if (opts) opts->sorted = true;
288  } else if (!strcmp(c, "ns")) {
289  if (opts) opts->sorted = false;
290  } else if (!strcmp(c, "cs")) {
291  if (opts) opts->called_sorted = true;
292  } else if (!strcmp(c, "ncs")) {
293  if (opts) opts->called_sorted = false;
294  } else if (!strcmp(c, "bg")) {
295  if (opts) opts->background = true;
296  } else if (!strcmp(c, "ark")) {
297  if (rs == kNoRspecifier) rs = kArchiveRspecifier;
298  else
299  return kNoRspecifier; // Repeated or combined ark and scp options
300  // invalid.
301  } else if (!strcmp(c, "scp")) {
302  if (rs == kNoRspecifier) rs = kScriptRspecifier;
303  else
304  return kNoRspecifier; // Repeated or combined ark and scp options
305  // invalid.
306  } else {
307  return kNoRspecifier; // Could not interpret this option.
308  }
309  }
310  if ((rs == kArchiveRspecifier || rs == kScriptRspecifier)
311  && rxfilename != NULL)
312  *rxfilename = after_colon;
313  return rs;
314 }
void SplitStringToVector(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< std::string > *out)
Split a string using any of the single character delimiters.
Definition: text-utils.cc:63
RspecifierType
Definition: kaldi-table.h:219

◆ ClassifyWspecifier()

WspecifierType ClassifyWspecifier ( const std::string &  wspecifier,
std::string *  archive_wxfilename,
std::string *  script_wxfilename,
WspecifierOptions opts 
)

Definition at line 135 of file kaldi-table.cc.

References WspecifierOptions::binary, WspecifierOptions::flush, rnnlm::i, kaldi::kArchiveWspecifier, kaldi::kBothWspecifier, kaldi::kNoWspecifier, kaldi::kScriptWspecifier, WspecifierOptions::permissive, and kaldi::SplitStringToVector().

Referenced by kaldi::ClassifyRxfilename(), kaldi::ClassifyWxfilename(), main(), TableWriter< Holder >::Open(), TableWriterArchiveImpl< Holder >::Open(), TableWriterScriptImpl< Holder >::Open(), TableWriterBothImpl< Holder >::Open(), kaldi::TypeThreeUsage(), kaldi::TypeTwoUsage(), kaldi::UnitTestClassifyWspecifier(), and WspecifierOptions::WspecifierOptions().

138  {
139  // Examples:
140  // ark,t:wxfilename -> kArchiveWspecifier
141  // ark,b:wxfilename -> kArchiveWspecifier
142  // scp,t:rxfilename -> kScriptWspecifier
143  // scp,t:rxfilename -> kScriptWspecifier
144  // ark,scp,t:filename, wxfilename -> kBothWspecifier
145  // ark,scp:filename, wxfilename -> kBothWspecifier
146  // Note we can include the flush option (f) or no-flush (nf)
147  // anywhere: e.g.
148  // ark,scp,f:filename, wxfilename -> kBothWspecifier
149  // or:
150  // scp,t,nf:rxfilename -> kScriptWspecifier
151 
152  if (archive_wxfilename) archive_wxfilename->clear();
153  if (script_wxfilename) script_wxfilename->clear();
154 
155  size_t pos = wspecifier.find(':');
156  if (pos == std::string::npos) return kNoWspecifier;
157  if (isspace(*(wspecifier.rbegin()))) return kNoWspecifier; // Trailing space
158  // disallowed.
159 
160  std::string before_colon(wspecifier, 0, pos), after_colon(wspecifier, pos+1);
161 
162  std::vector<std::string> split_first_part; // Split part before ':' on ', '.
163  SplitStringToVector(before_colon, ", ", false, &split_first_part); // false==
164  // don't omit empty strings between commas.
165 
167 
168  if (opts != NULL)
169  *opts = WspecifierOptions(); // Make sure all the defaults are as in the
170  // default constructor of the options class.
171 
172  for (size_t i = 0; i < split_first_part.size(); i++) {
173  const std::string &str = split_first_part[i]; // e.g. "b", "t", "f", "ark",
174  // "scp".
175  const char *c = str.c_str();
176  if (!strcmp(c, "b")) {
177  if (opts) opts->binary = true;
178  } else if (!strcmp(c, "f")) {
179  if (opts) opts->flush = true;
180  } else if (!strcmp(c, "nf")) {
181  if (opts) opts->flush = false;
182  } else if (!strcmp(c, "t")) {
183  if (opts) opts->binary = false;
184  } else if (!strcmp(c, "p")) {
185  if (opts) opts->permissive = true;
186  } else if (!strcmp(c, "ark")) {
187  if (ws == kNoWspecifier) ws = kArchiveWspecifier;
188  else
189  return kNoWspecifier; // We do not allow "scp, ark", only "ark,
190  // scp".
191  } else if (!strcmp(c, "scp")) {
192  if (ws == kNoWspecifier) ws = kScriptWspecifier;
193  else if (ws == kArchiveWspecifier) ws = kBothWspecifier;
194  else
195  return kNoWspecifier; // repeated "scp" option: invalid.
196  } else {
197  return kNoWspecifier; // Could not interpret this option.
198  }
199  }
200 
201  switch (ws) {
202  case kArchiveWspecifier:
203  if (archive_wxfilename)
204  *archive_wxfilename = after_colon;
205  break;
206  case kScriptWspecifier:
207  if (script_wxfilename)
208  *script_wxfilename = after_colon;
209  break;
210  case kBothWspecifier:
211  pos = after_colon.find(','); // first comma.
212  if (pos == std::string::npos) return kNoWspecifier;
213  if (archive_wxfilename)
214  *archive_wxfilename = std::string(after_colon, 0, pos);
215  if (script_wxfilename)
216  *script_wxfilename = std::string(after_colon, pos+1);
217  break;
218  case kNoWspecifier: default: break;
219  }
220  return ws;
221 }
void SplitStringToVector(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< std::string > *out)
Split a string using any of the single character delimiters.
Definition: text-utils.cc:63
WspecifierType
Definition: kaldi-table.h:106

◆ ReadScriptFile() [1/2]

bool ReadScriptFile ( const std::string &  rxfilename,
bool  warn,
std::vector< std::pair< std::string, std::string > > *  script_out 
)

Definition at line 26 of file kaldi-table.cc.

References KALDI_WARN, Input::Open(), kaldi::PrintableRxfilename(), and Input::Stream().

Referenced by TableWriterScriptImpl< Holder >::Open(), RandomAccessTableReaderScriptImpl< Holder >::Open(), kaldi::UnitTestReadScriptFile(), kaldi::UnitTestTableSequentialInt32Script(), and WspecifierOptions::WspecifierOptions().

29  {
30  bool is_binary;
31  Input input;
32 
33  if (!input.Open(rxfilename, &is_binary)) {
34  if (warn) KALDI_WARN << "Error opening script file: " <<
35  PrintableRxfilename(rxfilename);
36  return false;
37  }
38  if (is_binary) {
39  if (warn) KALDI_WARN << "Error: script file appears to be binary: " <<
40  PrintableRxfilename(rxfilename);
41  return false;
42  }
43 
44  bool ans = ReadScriptFile(input.Stream(), warn, script_out);
45  if (warn && !ans)
46  KALDI_WARN << "[script file was: " << PrintableRxfilename(rxfilename) <<
47  "]";
48  return ans;
49 }
bool ReadScriptFile(std::istream &is, bool warn, std::vector< std::pair< std::string, std::string > > *script_out)
Definition: kaldi-table.cc:51
#define KALDI_WARN
Definition: kaldi-error.h:150
std::string PrintableRxfilename(const std::string &rxfilename)
PrintableRxfilename turns the rxfilename into a more human-readable form for error reporting...
Definition: kaldi-io.cc:61

◆ ReadScriptFile() [2/2]

bool ReadScriptFile ( std::istream &  is,
bool  warn,
std::vector< std::pair< std::string, std::string > > *  script_out 
)

Definition at line 51 of file kaldi-table.cc.

References KALDI_ASSERT, KALDI_WARN, line_number, and kaldi::SplitStringOnFirstSpace().

54  {
55  KALDI_ASSERT(script_out != NULL);
56  std::string line;
57  int line_number = 0;
58  while (getline(is, line)) {
59  line_number++;
60  const char *c = line.c_str();
61  if (*c == '\0') {
62  if (warn)
63  KALDI_WARN << "Empty " << line_number << "'th line in script file";
64  return false; // Empty line so invalid scp file format..
65  }
66 
67  std::string key, rest;
68  SplitStringOnFirstSpace(line, &key, &rest);
69 
70  if (key.empty() || rest.empty()) {
71  if (warn)
72  KALDI_WARN << "Invalid " << line_number << "'th line in script file"
73  <<":\"" << line << '"';
74  return false;
75  }
76  script_out->resize(script_out->size()+1);
77  script_out->back().first = key;
78  script_out->back().second = rest;
79  }
80  return true;
81 }
void SplitStringOnFirstSpace(const std::string &str, std::string *first, std::string *rest)
Removes leading and trailing white space from the string, then splits on the first section of whitesp...
Definition: text-utils.cc:120
#define KALDI_WARN
Definition: kaldi-error.h:150
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
int32 line_number

◆ WriteScriptFile() [1/2]

bool WriteScriptFile ( std::ostream &  os,
const std::vector< std::pair< std::string, std::string > > &  script 
)

Definition at line 83 of file kaldi-table.cc.

References kaldi::IsToken(), and KALDI_WARN.

Referenced by kaldi::UnitTestTableSequentialInt32Script(), kaldi::WriteScriptFile(), and WspecifierOptions::WspecifierOptions().

85  {
86  if (!os.good()) {
87  KALDI_WARN << "WriteScriptFile: attempting to write to invalid stream.";
88  return false;
89  }
90  std::vector<std::pair<std::string, std::string> >::const_iterator iter;
91  for (iter = script.begin(); iter != script.end(); ++iter) {
92  if (!IsToken(iter->first)) {
93  KALDI_WARN << "WriteScriptFile: using invalid token \"" << iter->first <<
94  '"';
95  return false;
96  }
97  if (iter->second.find('\n') != std::string::npos ||
98  (iter->second.length() != 0 &&
99  (isspace(iter->second[0]) ||
100  isspace(iter->second[iter->second.length()-1])))) {
101  // second part contains newline or leading or trailing space.
102  KALDI_WARN << "WriteScriptFile: attempting to write invalid line \"" <<
103  iter->second << '"';
104  return false;
105  }
106  os << iter->first << ' ' << iter->second << '\n';
107  }
108  if (!os.good()) {
109  KALDI_WARN << "WriteScriptFile: stream in error state.";
110  return false;
111  }
112  return true;
113 }
bool IsToken(const std::string &token)
Returns true if "token" is nonempty, and all characters are printable and whitespace-free.
Definition: text-utils.cc:105
#define KALDI_WARN
Definition: kaldi-error.h:150

◆ WriteScriptFile() [2/2]

bool WriteScriptFile ( const std::string &  wxfilename,
const std::vector< std::pair< std::string, std::string > > &  script 
)

Definition at line 115 of file kaldi-table.cc.

References KALDI_ERR, Output::Open(), kaldi::PrintableWxfilename(), Output::Stream(), and kaldi::WriteScriptFile().

117  {
118  Output output;
119  if (!output.Open(wxfilename, false, false)) { // false, false means not
120  // binary, no binary-mode header.
121  KALDI_ERR << "Error opening output stream for script file: "
122  << PrintableWxfilename(wxfilename);
123  return false;
124  }
125  if (!WriteScriptFile(output.Stream(), script)) {
126  KALDI_ERR << "Error writing script file to stream "
127  << PrintableWxfilename(wxfilename);
128  return false;
129  }
130  return true;
131 }
bool WriteScriptFile(const std::string &wxfilename, const std::vector< std::pair< std::string, std::string > > &script)
Definition: kaldi-table.cc:115
#define KALDI_ERR
Definition: kaldi-error.h:147
std::string PrintableWxfilename(const std::string &wxfilename)
PrintableWxfilename turns the wxfilename into a more human-readable form for error reporting...
Definition: kaldi-io.cc:73