36 using fst::SymbolTable;
75 decoder->
Decode(decodable);
76 KALDI_LOG <<
"Length of file is " << num_frames;;
78 VectorFst<LatticeArc> decoded;
82 KALDI_WARN <<
"Decoder did not reach end-state, outputting partial " 85 vector<kaldi::int32> alignment,
words;
93 std::ostringstream ss;
95 for (
size_t i = 0;
i < words.size();
i++) {
98 KALDI_ERR <<
"Word-id " << words[
i] <<
" not in symbol table.";
105 BaseFloat like = -weight.Value1() -weight.Value2();
106 KALDI_LOG <<
"Log-like per frame = " << (like/num_frames);
107 (*total_like) += like;
110 KALDI_WARN <<
"Did not successfully decode utterance, length = " 116 int main(
int argc,
char *argv[]) {
118 using namespace kaldi;
121 const char *usage =
"Decode features using GMM-based model.\n" 122 "Usage: gmm-decode-faster-regtree-mllr [options] model-in fst-in " 123 "regtree-in features-rspecifier transforms-rspecifier " 124 "words-wspecifier [alignments-wspecifier]\n";
130 std::string word_syms_filename, utt2spk_rspecifier;
133 po.
Register(
"utt2spk", &utt2spk_rspecifier,
"rspecifier for utterance to " 135 po.
Register(
"binary", &binary,
"Write output in binary mode");
136 po.
Register(
"acoustic-scale", &acoustic_scale,
137 "Scaling factor for acoustic likelihoods");
138 po.
Register(
"word-symbol-table", &word_syms_filename,
139 "Symbol table for words [for debug output]");
140 po.
Register(
"allow-partial", &allow_partial,
141 "Produce output even when final state was not reached");
149 std::string model_in_filename = po.
GetArg(1),
150 fst_in_filename = po.
GetArg(2),
151 regtree_filename = po.
GetArg(3),
152 feature_rspecifier = po.
GetArg(4),
153 xforms_rspecifier = po.
GetArg(5),
154 words_wspecifier = po.
GetArg(6),
161 Input ki(model_in_filename, &binary_read);
171 Input in(regtree_filename, &binary_read);
172 regtree.
Read(in.
Stream(), binary_read, am_gmm);
183 if (word_syms_filename !=
"") {
184 word_syms = fst::SymbolTable::ReadText(word_syms_filename);
186 KALDI_ERR <<
"Could not read symbol table from file " 187 << word_syms_filename;
192 kaldi::int64 frame_count = 0;
193 int num_success = 0, num_fail = 0;
198 DecodeInfo decode_info(am_gmm, trans_model, &decoder, acoustic_scale,
199 allow_partial, words_writer, alignment_writer,
203 for (; !feature_reader.
Done(); feature_reader.
Next()) {
204 string utt = feature_reader.
Key();
208 if (features.NumRows() == 0) {
209 KALDI_WARN <<
"Zero-length utterance: " << utt;
214 if (!mllr_reader.
HasKey(utt)) {
215 KALDI_WARN <<
"No MLLR transform for key " << utt <<
216 ", decoding without MLLR.";
221 utt, features.NumRows(), &tot_like)) {
237 utt, features.NumRows(), &tot_like)) {
245 double elapsed = timer.
Elapsed();
246 KALDI_LOG <<
"Time taken [excluding initialization] " << elapsed
247 <<
"s: real-time factor assuming 100 frames/sec is " 248 << (elapsed * 100.0 / frame_count);
249 KALDI_LOG <<
"Done " << num_success <<
" utterances, failed for " 251 KALDI_LOG <<
"Overall log-likelihood per frame is " 252 << (tot_like / frame_count) <<
" over " << frame_count
256 if (num_success != 0)
261 catch(
const std::exception &e) {
262 std::cerr << e.what();
void Read(std::istream &in, bool binary, const AmDiagGmm &am)
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
fst::ArcTpl< LatticeWeight > LatticeArc
An MLLR mean transformation is an affine transformation of Gaussian means.
DecodableInterface provides a link between the (acoustic-modeling and feature-processing) code and th...
const kaldi::Int32VectorWriter & alignment_writer
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
This class is for when you are reading something in random access, but it may actually be stored per-...
bool DecodeUtterance(kaldi::FasterDecoder *decoder, kaldi::DecodableInterface *decodable, DecodeInfo *info, const string &uttid, int32 num_frames, BaseFloat *total_like)
const kaldi::TransitionModel & trans_model
void Decode(DecodableInterface *decodable)
A templated class for writing objects to an archive or script file; see The Table concept...
DecodeInfo(const kaldi::AmDiagGmm &am, const kaldi::TransitionModel &tm, kaldi::FasterDecoder *decoder, BaseFloat scale, bool allow_partial, const kaldi::Int32VectorWriter &wwriter, const kaldi::Int32VectorWriter &awriter, fst::SymbolTable *wsyms)
bool GetLinearSymbolSequence(const Fst< Arc > &fst, std::vector< I > *isymbols_out, std::vector< I > *osymbols_out, typename Arc::Weight *tot_weight_out)
GetLinearSymbolSequence gets the symbol sequence from a linear FST.
KALDI_DISALLOW_COPY_AND_ASSIGN(DecodeInfo)
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
fst::LatticeWeightTpl< BaseFloat > LatticeWeight
bool GetBestPath(fst::MutableFst< LatticeArc > *fst_out, bool use_final_probs=true)
GetBestPath gets the decoding traceback.
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
A regression tree is a clustering of Gaussian densities in an acoustic model, such that the group of ...
void Read(std::istream &is, bool binary)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
void Register(OptionsItf *opts, bool full)
bool HasKey(const std::string &key)
virtual int32 NumFramesReady() const
The call NumFramesReady() will return the number of frames currently available for this decodable obj...
fst::SymbolTable * word_syms
kaldi::FasterDecoder * decoder
const kaldi::AmDiagGmm & acoustic_model
int NumArgs() const
Number of positional parameters (c.f. argc-1).
void ReadFstKaldi(std::istream &is, bool binary, VectorFst< Arc > *fst)
virtual int32 NumFramesReady() const
The call NumFramesReady() will return the number of frames currently available for this decodable obj...
LatticeWeightTpl< BaseFloat > LatticeWeight
const T & Value(const std::string &key)
double Elapsed() const
Returns time in seconds.
void Read(std::istream &in_stream, bool binary)
int main(int argc, char *argv[])
bool ReachedFinal() const
Returns true if a final state was active on the last frame.
const kaldi::Int32VectorWriter & words_writer
std::string GetOptArg(int param) const