28 std::vector<std::pair<std::string, std::string> >
33 if (!input.
Open(rxfilename, &is_binary)) {
34 if (warn)
KALDI_WARN <<
"Error opening script file: " <<
39 if (warn)
KALDI_WARN <<
"Error: script file appears to be binary: " <<
53 std::vector<std::pair<std::string, std::string> >
58 while (getline(is, line)) {
60 const char *c = line.c_str();
63 KALDI_WARN <<
"Empty " << line_number <<
"'th line in script file";
67 std::string key, rest;
70 if (key.empty() || rest.empty()) {
72 KALDI_WARN <<
"Invalid " << line_number <<
"'th line in script file" 73 <<
":\"" << line <<
'"';
76 script_out->resize(script_out->size()+1);
77 script_out->back().first = key;
78 script_out->back().second = rest;
84 const std::vector<std::pair<std::string, std::string> >
87 KALDI_WARN <<
"WriteScriptFile: attempting to write to invalid stream.";
90 std::vector<std::pair<std::string, std::string> >::const_iterator iter;
91 for (iter = script.begin(); iter != script.end(); ++iter) {
93 KALDI_WARN <<
"WriteScriptFile: using invalid token \"" << iter->first <<
97 if (iter->second.find(
'\n') != std::string::npos ||
98 (iter->second.length() != 0 &&
99 (isspace(iter->second[0]) ||
100 isspace(iter->second[iter->second.length()-1])))) {
102 KALDI_WARN <<
"WriteScriptFile: attempting to write invalid line \"" <<
106 os << iter->first <<
' ' << iter->second <<
'\n';
109 KALDI_WARN <<
"WriteScriptFile: stream in error state.";
116 const std::vector<std::pair<std::string, std::string> >
119 if (!output.
Open(wxfilename,
false,
false)) {
121 KALDI_ERR <<
"Error opening output stream for script file: " 126 KALDI_ERR <<
"Error writing script file to stream " 136 std::string *archive_wxfilename,
137 std::string *script_wxfilename,
152 if (archive_wxfilename) archive_wxfilename->clear();
153 if (script_wxfilename) script_wxfilename->clear();
155 size_t pos = wspecifier.find(
':');
160 std::string before_colon(wspecifier, 0, pos), after_colon(wspecifier, pos+1);
162 std::vector<std::string> split_first_part;
172 for (
size_t i = 0;
i < split_first_part.size();
i++) {
173 const std::string &str = split_first_part[
i];
175 const char *c = str.c_str();
176 if (!strcmp(c,
"b")) {
177 if (opts) opts->
binary =
true;
178 }
else if (!strcmp(c,
"f")) {
179 if (opts) opts->
flush =
true;
180 }
else if (!strcmp(c,
"nf")) {
181 if (opts) opts->
flush =
false;
182 }
else if (!strcmp(c,
"t")) {
183 if (opts) opts->
binary =
false;
184 }
else if (!strcmp(c,
"p")) {
186 }
else if (!strcmp(c,
"ark")) {
191 }
else if (!strcmp(c,
"scp")) {
203 if (archive_wxfilename)
204 *archive_wxfilename = after_colon;
207 if (script_wxfilename)
208 *script_wxfilename = after_colon;
211 pos = after_colon.find(
',');
213 if (archive_wxfilename)
214 *archive_wxfilename = std::string(after_colon, 0, pos);
215 if (script_wxfilename)
216 *script_wxfilename = std::string(after_colon, pos+1);
226 std::string *rxfilename,
250 if (rxfilename) rxfilename->clear();
256 size_t pos = rspecifier.find(
':');
262 std::string before_colon(rspecifier, 0, pos),
263 after_colon(rspecifier, pos+1);
265 std::vector<std::string> split_first_part;
271 for (
size_t i = 0;
i < split_first_part.size();
i++) {
272 const std::string &str = split_first_part[
i];
274 const char *c = str.c_str();
275 if (!strcmp(c,
"b"));
277 else if (!strcmp(c,
"t"));
278 else if (!strcmp(c,
"o")) {
279 if (opts) opts->
once =
true;
280 }
else if (!strcmp(c,
"no")) {
281 if (opts) opts->
once =
false;
282 }
else if (!strcmp(c,
"p")) {
284 }
else if (!strcmp(c,
"np")) {
286 }
else if (!strcmp(c,
"s")) {
287 if (opts) opts->
sorted =
true;
288 }
else if (!strcmp(c,
"ns")) {
289 if (opts) opts->
sorted =
false;
290 }
else if (!strcmp(c,
"cs")) {
292 }
else if (!strcmp(c,
"ncs")) {
294 }
else if (!strcmp(c,
"bg")) {
296 }
else if (!strcmp(c,
"ark")) {
301 }
else if (!strcmp(c,
"scp")) {
311 && rxfilename != NULL)
312 *rxfilename = after_colon;
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
RspecifierType ClassifyRspecifier(const std::string &rspecifier, std::string *rxfilename, RspecifierOptions *opts)
bool IsToken(const std::string &token)
Returns true if "token" is nonempty, and all characters are printable and whitespace-free.
bool WriteScriptFile(std::ostream &os, const std::vector< std::pair< std::string, std::string > > &script)
void SplitStringOnFirstSpace(const std::string &str, std::string *first, std::string *rest)
Removes leading and trailing white space from the string, then splits on the first section of whitesp...
void SplitStringToVector(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< std::string > *out)
Split a string using any of the single character delimiters.
WspecifierType ClassifyWspecifier(const std::string &wspecifier, std::string *archive_wxfilename, std::string *script_wxfilename, WspecifierOptions *opts)
bool Open(const std::string &wxfilename, bool binary, bool write_header)
This opens the stream, with the given mode (binary or text).
bool ReadScriptFile(const std::string &rxfilename, bool warn, std::vector< std::pair< std::string, std::string > > *script_out)
#define KALDI_ASSERT(cond)
std::string PrintableRxfilename(const std::string &rxfilename)
PrintableRxfilename turns the rxfilename into a more human-readable form for error reporting...
std::string PrintableWxfilename(const std::string &wxfilename)
PrintableWxfilename turns the wxfilename into a more human-readable form for error reporting...