31 bool include =
true,
bool ignore_missing =
false,
32 bool sorted =
false) {
33 unordered_set<std::string, StringHasher> subset;
34 std::set<std::string> subset_list;
37 Input ki(filename, &binary);
40 while (std::getline(ki.
Stream(), line)) {
41 std::vector<std::string> split_line;
43 if(split_line.empty()) {
44 KALDI_ERR <<
"Unable to parse line \"" << line <<
"\" encountered in input in " << filename;
46 subset.insert(split_line[0]);
47 subset_list.insert(split_line[0]);
51 size_t num_success = 0;
52 for (; !lattice_reader->
Done(); lattice_reader->
Next(), num_total++) {
53 if (include && sorted && subset_list.size() > 0
54 && lattice_reader->
Key() > *(subset_list.rbegin())) {
57 <<
"the last key in the include list. Not reading further.";
58 KALDI_LOG <<
"Wrote " << num_success <<
" utterances";
62 if (include && subset.count(lattice_reader->
Key()) > 0) {
63 lattice_writer->
Write(lattice_reader->
Key(), lattice_reader->
Value());
65 }
else if (!include && subset.count(lattice_reader->
Key()) == 0) {
66 lattice_writer->
Write(lattice_reader->
Key(), lattice_reader->
Value());
71 KALDI_LOG <<
"Wrote " << num_success <<
" out of " << num_total
74 if (ignore_missing)
return 0;
76 return (num_success != 0 ? 0 : 1);
82 bool include =
true,
bool ignore_missing =
false,
83 bool sorted =
false) {
84 unordered_set<std::string, StringHasher> subset;
85 std::set<std::string> subset_list;
88 Input ki(filename, &binary);
91 while (std::getline(ki.
Stream(), line)) {
92 std::vector<std::string> split_line;
94 if(split_line.empty()) {
95 KALDI_ERR <<
"Unable to parse line \"" << line <<
"\" encountered in input in " << filename;
97 subset.insert(split_line[0]);
98 subset_list.insert(split_line[0]);
102 size_t num_success = 0;
103 for (; !lattice_reader->
Done(); lattice_reader->
Next(), num_total++) {
104 if (include && sorted && subset_list.size() > 0
105 && lattice_reader->
Key() > *(subset_list.rbegin())) {
107 <<
" is larger than " 108 <<
"the last key in the include list. Not reading further.";
109 KALDI_LOG <<
"Wrote " << num_success <<
" utterances";
113 if (include && subset.count(lattice_reader->
Key()) > 0) {
114 lattice_writer->
Write(lattice_reader->
Key(), lattice_reader->
Value());
116 }
else if (!include && subset.count(lattice_reader->
Key()) == 0) {
117 lattice_writer->
Write(lattice_reader->
Key(), lattice_reader->
Value());
122 KALDI_LOG <<
" Wrote " << num_success <<
" out of " << num_total
125 if (ignore_missing)
return 0;
127 return (num_success != 0 ? 0 : 1);
131 int main(
int argc,
char *argv[]) {
133 using namespace kaldi;
135 typedef kaldi::int64 int64;
136 using fst::SymbolTable;
137 using fst::VectorFst;
141 "Copy lattices (e.g. useful for changing to text mode or changing\n" 142 "format to standard from compact lattice.)\n" 143 "The --include and --exclude options can be used to copy only a subset " 144 "of lattices, where are the --include option specifies the " 145 "whitelisted utterances that would be copied and --exclude option " 146 "specifies the blacklisted utterances that would not be copied.\n" 147 "Only one of --include and --exclude can be supplied.\n" 148 "Usage: lattice-copy [options] lattice-rspecifier lattice-wspecifier\n" 149 " e.g.: lattice-copy --write-compact=false ark:1.lats ark,t:text.lats\n" 150 "See also: lattice-scale, lattice-to-fst, and\n" 151 " the script egs/wsj/s5/utils/convert_slf.pl\n";
154 bool write_compact =
true, ignore_missing =
false;
155 std::string include_rxfilename;
156 std::string exclude_rxfilename;
158 po.
Register(
"write-compact", &write_compact,
"If true, write in normal (compact) form.");
159 po.
Register(
"include", &include_rxfilename,
160 "Text file, the first field of each " 161 "line being interpreted as the " 162 "utterance-id whose lattices will be included");
163 po.
Register(
"exclude", &exclude_rxfilename,
164 "Text file, the first field of each " 165 "line being interpreted as an utterance-id " 166 "whose lattices will be excluded");
167 po.
Register(
"ignore-missing", &ignore_missing,
168 "Exit with status 0 even if no lattices are copied");
177 std::string lats_rspecifier = po.
GetArg(1),
178 lats_wspecifier = po.
GetArg(2);
182 bool sorted = opts.
sorted;
190 if (include_rxfilename !=
"") {
191 if (exclude_rxfilename !=
"") {
192 KALDI_ERR <<
"should not have both --exclude and --include option!";
195 &lattice_reader, &lattice_writer,
196 true, ignore_missing, sorted);
197 }
else if (exclude_rxfilename !=
"") {
199 &lattice_reader, &lattice_writer,
200 false, ignore_missing);
203 for (; !lattice_reader.
Done(); lattice_reader.
Next(), n_done++)
204 lattice_writer.
Write(lattice_reader.
Key(), lattice_reader.
Value());
209 if (include_rxfilename !=
"") {
210 if (exclude_rxfilename !=
"") {
211 KALDI_ERR <<
"should not have both --exclude and --include option!";
214 &lattice_reader, &lattice_writer,
215 true, ignore_missing, sorted);
216 }
else if (exclude_rxfilename !=
"") {
218 &lattice_reader, &lattice_writer,
219 true, ignore_missing);
222 for (; !lattice_reader.
Done(); lattice_reader.
Next(), n_done++)
223 lattice_writer.
Write(lattice_reader.
Key(), lattice_reader.
Value());
225 KALDI_LOG <<
"Done copying " << n_done <<
" lattices.";
227 if (ignore_missing)
return 0;
229 return (n_done != 0 ? 0 : 1);
230 }
catch(
const std::exception &e) {
231 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
int32 CopySubsetLattices(std::string filename, SequentialLatticeReader *lattice_reader, LatticeWriter *lattice_writer, bool include=true, bool ignore_missing=false, bool sorted=false)
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
A templated class for writing objects to an archive or script file; see The Table concept...
int main(int argc, char *argv[])
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
RspecifierType ClassifyRspecifier(const std::string &rspecifier, std::string *rxfilename, RspecifierOptions *opts)
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
void SplitStringToVector(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< std::string > *out)
Split a string using any of the single character delimiters.
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
#define KALDI_ASSERT(cond)