33 int main(
int argc,
char *argv[]) {
38 using namespace kaldi;
41 using fst::SymbolTable;
46 "Generate lattices using nnet3 neural net model.\n" 47 "Usage: nnet3-latgen-faster [options] <nnet-in> <fst-in|fsts-rspecifier> <features-rspecifier>" 48 " <lattice-wspecifier> [ <words-wspecifier> [<alignments-wspecifier>] ]\n" 49 "See also: nnet3-latgen-faster-parallel, nnet3-latgen-faster-batch\n";
52 bool allow_partial =
false;
56 std::string word_syms_filename;
57 std::string ivector_rspecifier,
58 online_ivector_rspecifier,
60 int32 online_ivector_period = 0;
63 po.
Register(
"word-symbol-table", &word_syms_filename,
64 "Symbol table for words [for debug output]");
65 po.
Register(
"allow-partial", &allow_partial,
66 "If true, produce output even if end state was not reached.");
67 po.
Register(
"ivectors", &ivector_rspecifier,
"Rspecifier for " 68 "iVectors as vectors (i.e. not estimated online); per utterance " 69 "by default, or per speaker if you provide the --utt2spk option.");
70 po.
Register(
"utt2spk", &utt2spk_rspecifier,
"Rspecifier for " 71 "utt2spk option used to get ivectors per speaker");
72 po.
Register(
"online-ivectors", &online_ivector_rspecifier,
"Rspecifier for " 73 "iVectors estimated online, as matrices. If you supply this," 74 " you must set the --online-ivector-period option.");
75 po.
Register(
"online-ivector-period", &online_ivector_period,
"Number of frames " 76 "between iVectors in matrices supplied to the --online-ivectors " 86 std::string model_in_filename = po.
GetArg(1),
88 feature_rspecifier = po.
GetArg(3),
89 lattice_wspecifier = po.
GetArg(4),
97 Input ki(model_in_filename, &binary);
108 if (! (determinize ? compact_lattice_writer.
Open(lattice_wspecifier)
109 : lattice_writer.
Open(lattice_wspecifier)))
110 KALDI_ERR <<
"Could not open table for writing lattices: " 111 << lattice_wspecifier;
114 online_ivector_rspecifier);
116 ivector_rspecifier, utt2spk_rspecifier);
121 fst::SymbolTable *word_syms = NULL;
122 if (word_syms_filename !=
"")
123 if (!(word_syms = fst::SymbolTable::ReadText(word_syms_filename)))
124 KALDI_ERR <<
"Could not read symbol table from file " 125 << word_syms_filename;
127 double tot_like = 0.0;
128 kaldi::int64 frame_count = 0;
129 int num_success = 0, num_fail = 0;
145 for (; !feature_reader.
Done(); feature_reader.
Next()) {
146 std::string utt = feature_reader.
Key();
148 if (features.NumRows() == 0) {
149 KALDI_WARN <<
"Zero-length utterance: " << utt;
155 if (!ivector_rspecifier.empty()) {
156 if (!ivector_reader.
HasKey(utt)) {
157 KALDI_WARN <<
"No iVector available for utterance " << utt;
161 ivector = &ivector_reader.
Value(utt);
164 if (!online_ivector_rspecifier.empty()) {
165 if (!online_ivector_reader.
HasKey(utt)) {
166 KALDI_WARN <<
"No online iVector available for utterance " << utt;
170 online_ivectors = &online_ivector_reader.
Value(utt);
175 decodable_opts, trans_model, am_nnet,
176 features, ivector, online_ivectors,
177 online_ivector_period, &compiler);
181 decoder, nnet_decodable, trans_model, word_syms, utt,
183 &alignment_writer, &words_writer, &compact_lattice_writer,
196 for (; !fst_reader.
Done(); fst_reader.
Next()) {
197 std::string utt = fst_reader.
Key();
198 if (!feature_reader.
HasKey(utt)) {
199 KALDI_WARN <<
"Not decoding utterance " << utt
200 <<
" because no features available.";
206 KALDI_WARN <<
"Zero-length utterance: " << utt;
215 if (!ivector_rspecifier.empty()) {
216 if (!ivector_reader.
HasKey(utt)) {
217 KALDI_WARN <<
"No iVector available for utterance " << utt;
221 ivector = &ivector_reader.
Value(utt);
224 if (!online_ivector_rspecifier.empty()) {
225 if (!online_ivector_reader.
HasKey(utt)) {
226 KALDI_WARN <<
"No online iVector available for utterance " << utt;
230 online_ivectors = &online_ivector_reader.
Value(utt);
235 decodable_opts, trans_model, am_nnet,
236 features, ivector, online_ivectors,
237 online_ivector_period, &compiler);
241 decoder, nnet_decodable, trans_model, word_syms, utt,
243 &alignment_writer, &words_writer, &compact_lattice_writer,
244 &lattice_writer, &like)) {
252 kaldi::int64 input_frame_count =
255 double elapsed = timer.
Elapsed();
257 <<
"s: real-time factor assuming 100 frames/sec is " 258 << (elapsed * 100.0 / input_frame_count);
259 KALDI_LOG <<
"Done " << num_success <<
" utterances, failed for " 261 KALDI_LOG <<
"Overall log-likelihood per frame is " 262 << (tot_like / frame_count) <<
" over " 263 << frame_count <<
" frames.";
266 if (num_success != 0)
return 0;
268 }
catch(
const std::exception &e) {
269 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void CollapseModel(const CollapseModelConfig &config, Nnet *nnet)
This function modifies the neural net for efficiency, in a way that suitable to be done in test time...
bool Open(const std::string &wspecifier)
Fst< StdArc > * ReadFstKaldiGeneric(std::string rxfilename, bool throw_on_err)
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
This class is for when you are reading something in random access, but it may actually be stored per-...
This class enables you to do the compilation and optimization in one call, and also ensures that if t...
void SetBatchnormTestMode(bool test_mode, Nnet *nnet)
This function affects only components of type BatchNormComponent.
int main(int argc, char *argv[])
A templated class for writing objects to an archive or script file; see The Table concept...
bool DecodeUtteranceLatticeFaster(LatticeFasterDecoderTpl< FST > &decoder, DecodableInterface &decodable, const TransitionModel &trans_model, const fst::SymbolTable *word_syms, std::string utt, double acoustic_scale, bool determinize, bool allow_partial, Int32VectorWriter *alignment_writer, Int32VectorWriter *words_writer, CompactLatticeWriter *compact_lattice_writer, LatticeWriter *lattice_writer, double *like_ptr)
This function DecodeUtteranceLatticeFaster is used in several decoders, and we have moved it here...
const Nnet & GetNnet() const
void Read(std::istream &is, bool binary)
void Register(const std::string &name, bool *ptr, const std::string &doc)
RspecifierType ClassifyRspecifier(const std::string &rspecifier, std::string *rxfilename, RspecifierOptions *opts)
This file contains some miscellaneous functions dealing with class Nnet.
Allows random access to a collection of objects in an archive or script file; see The Table concept...
void SetDropoutTestMode(bool test_mode, Nnet *nnet)
This function affects components of child-classes of RandomComponent.
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
const T & Value(const std::string &key)
void Read(std::istream &is, bool binary)
void Register(OptionsItf *opts)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
bool HasKey(const std::string &key)
This is the "normal" lattice-generating decoder.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
A class representing a vector.
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
virtual int32 NumFramesReady() const
The call NumFramesReady() will return the number of frames currently available for this decodable obj...
const T & Value(const std::string &key)
NnetOptimizeOptions optimize_config
void Register(OptionsItf *opts)
double Elapsed() const
Returns time in seconds.
int32 frame_subsampling_factor
std::string GetOptArg(int param) const
Config class for the CollapseModel function.