37 using fst::SymbolTable;
76 decoder->
Decode(decodable);
77 KALDI_LOG <<
"Length of file is " << num_frames;
79 VectorFst<LatticeArc> decoded;
83 KALDI_WARN <<
"Decoder did not reach end-state, outputting partial " 86 vector<kaldi::int32> alignment,
words;
94 std::ostringstream ss;
96 for (
size_t i = 0;
i < words.size();
i++) {
99 KALDI_ERR <<
"Word-id " << words[
i] <<
" not in symbol table.";
106 BaseFloat like = -weight.Value1() -weight.Value2();
107 KALDI_LOG <<
"Log-like per frame = " << (like/num_frames);
108 (*total_like) += like;
111 KALDI_WARN <<
"Did not successfully decode utterance, length = " 117 int main(
int argc,
char *argv[]) {
119 using namespace kaldi;
122 const char *usage =
"Decode features using GMM-based model.\n" 123 "Usage: gmm-decode-faster-regtree-fmllr [options] model-in fst-in " 124 "regtree-in features-rspecifier transforms-rspecifier " 125 "words-wspecifier [alignments-wspecifier]\n";
131 std::string word_syms_filename, utt2spk_rspecifier;
134 po.
Register(
"utt2spk", &utt2spk_rspecifier,
"rspecifier for utterance to " 136 po.
Register(
"binary", &binary,
"Write output in binary mode");
137 po.
Register(
"acoustic-scale", &acoustic_scale,
138 "Scaling factor for acoustic likelihoods");
139 po.
Register(
"word-symbol-table", &word_syms_filename,
140 "Symbol table for words [for debug output]");
141 po.
Register(
"allow-partial", &allow_partial,
142 "Produce output even when final state was not reached");
150 std::string model_in_filename = po.
GetArg(1),
151 fst_in_filename = po.
GetArg(2),
152 regtree_filename = po.
GetArg(3),
153 feature_rspecifier = po.
GetArg(4),
154 xforms_rspecifier = po.
GetArg(5),
155 words_wspecifier = po.
GetArg(6),
162 Input ki(model_in_filename, &binary_read);
172 Input in(regtree_filename, &binary_read);
173 regtree.
Read(in.
Stream(), binary_read, am_gmm);
184 if (word_syms_filename !=
"") {
185 word_syms = fst::SymbolTable::ReadText(word_syms_filename);
187 KALDI_ERR <<
"Could not read symbol table from file " 188 << word_syms_filename;
193 kaldi::int64 frame_count = 0;
194 int num_success = 0, num_fail = 0;
199 DecodeInfo decode_info(am_gmm, trans_model, &decoder, acoustic_scale,
200 allow_partial, words_writer, alignment_writer,
204 for (; !feature_reader.
Done(); feature_reader.
Next()) {
205 string utt = feature_reader.
Key();
209 if (features.NumRows() == 0) {
210 KALDI_WARN <<
"Zero-length utterance: " << utt;
215 if (!fmllr_reader.
HasKey(utt)) {
216 KALDI_WARN <<
"No FMLLR transform for key " << utt <<
217 ", decoding without fMLLR.";
222 utt, features.NumRows(), &tot_like)) {
233 if (fmllr.NumRegClasses() == 1) {
236 fmllr.GetXformMatrix(0, &fmllr_matrix);
237 for (int32
i = 0;
i < xformed_features.
NumRows();
i++) {
246 utt, xformed_features.
NumRows(), &tot_like)) {
258 utt, features.NumRows(), &tot_like)) {
267 KALDI_LOG <<
"Average log-likelihood per frame is " << (tot_like
268 / frame_count) <<
" over " << frame_count <<
" frames.";
270 double elapsed = timer.
Elapsed();
271 KALDI_LOG <<
"Time taken [excluding initialization] " << elapsed
272 <<
"s: real-time factor assuming 100 frames/sec is " 273 << (elapsed * 100.0 / frame_count);
274 KALDI_LOG <<
"Done " << num_success <<
" utterances, failed for " 279 if (num_success != 0)
284 catch(
const std::exception &e) {
285 std::cerr << e.what();
void Read(std::istream &in, bool binary, const AmDiagGmm &am)
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
fst::ArcTpl< LatticeWeight > LatticeArc
DecodableInterface provides a link between the (acoustic-modeling and feature-processing) code and th...
const kaldi::Int32VectorWriter & alignment_writer
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
This class is for when you are reading something in random access, but it may actually be stored per-...
const kaldi::TransitionModel & trans_model
void Decode(DecodableInterface *decodable)
A templated class for writing objects to an archive or script file; see The Table concept...
DecodeInfo(const kaldi::AmDiagGmm &am, const kaldi::TransitionModel &tm, kaldi::FasterDecoder *decoder, BaseFloat scale, bool allow_partial, const kaldi::Int32VectorWriter &wwriter, const kaldi::Int32VectorWriter &awriter, fst::SymbolTable *wsyms)
bool GetLinearSymbolSequence(const Fst< Arc > &fst, std::vector< I > *isymbols_out, std::vector< I > *osymbols_out, typename Arc::Weight *tot_weight_out)
GetLinearSymbolSequence gets the symbol sequence from a linear FST.
KALDI_DISALLOW_COPY_AND_ASSIGN(DecodeInfo)
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
fst::LatticeWeightTpl< BaseFloat > LatticeWeight
bool GetBestPath(fst::MutableFst< LatticeArc > *fst_out, bool use_final_probs=true)
GetBestPath gets the decoding traceback.
An FMLLR (feature-space MLLR) transformation, also called CMLLR (constrained MLLR) is an affine trans...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
A regression tree is a clustering of Gaussian densities in an acoustic model, such that the group of ...
void Read(std::istream &is, bool binary)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
void Register(OptionsItf *opts, bool full)
bool HasKey(const std::string &key)
fst::SymbolTable * word_syms
bool DecodeUtterance(kaldi::FasterDecoder *decoder, kaldi::DecodableInterface *decodable, DecodeInfo *info, const string &uttid, int32 num_frames, BaseFloat *total_like)
kaldi::FasterDecoder * decoder
const kaldi::AmDiagGmm & acoustic_model
int NumArgs() const
Number of positional parameters (c.f. argc-1).
int main(int argc, char *argv[])
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
void ReadFstKaldi(std::istream &is, bool binary, VectorFst< Arc > *fst)
virtual int32 NumFramesReady() const
The call NumFramesReady() will return the number of frames currently available for this decodable obj...
virtual int32 NumFramesReady() const
The call NumFramesReady() will return the number of frames currently available for this decodable obj...
LatticeWeightTpl< BaseFloat > LatticeWeight
const T & Value(const std::string &key)
void ApplyAffineTransform(const MatrixBase< BaseFloat > &xform, VectorBase< BaseFloat > *vec)
Applies the affine transform 'xform' to the vector 'vec' and overwrites the contents of 'vec'...
double Elapsed() const
Returns time in seconds.
void Read(std::istream &in_stream, bool binary)
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
bool ReachedFinal() const
Returns true if a final state was active on the last frame.
const kaldi::Int32VectorWriter & words_writer
std::string GetOptArg(int param) const