33     using namespace kaldi;
    40     const int32 kDeltaOrder = 2;
    43         "Reads in wav file(s) and simulates online decoding.\n"    44         "Writes integerized-text and .ali files for WER computation. Utterance "    45         "segmentation is done on-the-fly.\n"    46         "Feature splicing/LDA transform is used, if the optional(last) argument "    48         "Otherwise delta/delta-delta(i.e. 2-nd order) features are produced.\n"    49         "Caution: the last few frames of the wav file may not be decoded properly.\n"    50         "Hence, don't use one wav file per utterance, but "    51         "rather use one wav file per show.\n\n"    52         "Usage: online-wav-gmm-decode-faster [options] wav-rspecifier model-in"    53         "fst-in word-symbol-table silence-phones transcript-wspecifier "    54         "alignments-wspecifier [lda-matrix-in]\n\n"    55         "Example: ./online-wav-gmm-decode-faster --rt-min=0.3 --rt-max=0.5 "    56         "--max-active=4000 --beam=12.0 --acoustic-scale=0.0769 "    57         "scp:wav.scp model HCLG.fst words.txt '1:2:3:4:5' ark,t:trans.txt ark,t:ali.txt";
    60     int32 cmn_window = 600,
    63     int32 right_context = 4, left_context = 4;
    70     po.Register(
"left-context", &left_context, 
"Number of frames of left context");
    71     po.Register(
"right-context", &right_context, 
"Number of frames of right context");
    72     po.Register(
"acoustic-scale", &acoustic_scale,
    73                 "Scaling factor for acoustic likelihoods");
    74     po.Register(
"cmn-window", &cmn_window,
    75         "Number of feat. vectors used in the running average CMN calculation");
    76     po.Register(
"min-cmn-window", &min_cmn_window,
    77                 "Minumum CMN window used at start of decoding (adds "    78                 "latency only at start)");
    79     po.Register(
"channel", &channel,
    80         "Channel to extract (-1 -> expect mono, 0 -> left, 1 -> right)");
    82     if (po.NumArgs() != 7 && po.NumArgs() != 8) {
    87     std::string wav_rspecifier = po.GetArg(1),
    88         model_rspecifier = po.GetArg(2),
    89         fst_rspecifier = po.GetArg(3),
    90         word_syms_filename = po.GetArg(4),
    91         silence_phones_str = po.GetArg(5),
    92         words_wspecifier = po.GetArg(6),
    93         alignment_wspecifier = po.GetArg(7),
    94         lda_mat_rspecifier = po.GetOptArg(8);
    96     std::vector<int32> silence_phones;
    98         KALDI_ERR << 
"Invalid silence-phones string " << silence_phones_str;
    99     if (silence_phones.empty())
   106     if (lda_mat_rspecifier != 
"") {
   108       Input ki(lda_mat_rspecifier, &binary_in);
   109       lda_transform.
Read(ki.Stream(), binary_in);
   116         Input ki(model_rspecifier, &binary);
   117         trans_model.
Read(ki.Stream(), binary);
   118         am_gmm.
Read(ki.Stream(), binary);
   121     fst::SymbolTable *word_syms = NULL;
   122     if (!(word_syms = fst::SymbolTable::ReadText(word_syms_filename)))
   123         KALDI_ERR << 
"Could not read symbol table from file "   124                     << word_syms_filename;
   136     int32 window_size = right_context + left_context + 1;
   140                                 silence_phones, trans_model);
   142     VectorFst<LatticeArc> out_fst;
   143     for (; !reader.Done(); reader.Next()) {
   144       std::string wav_key = reader.Key();
   145       std::cerr << 
"File: " << wav_key << std::endl;
   146       const WaveData &wav_data = reader.Value();
   148         KALDI_ERR << 
"Sampling rates other than 16kHz are not supported!";
   149       int32 num_chan = wav_data.
Data().
NumRows(), this_chan = channel;
   156             KALDI_WARN << 
"Channel not specified but you have data with "   157                        << num_chan  << 
" channels; defaulting to zero";
   159           if (this_chan >= num_chan) {
   160             KALDI_WARN << 
"File with id " << wav_key << 
" has "   161                        << num_chan << 
" channels but you specified channel "   162                        << channel << 
", producing no output.";
   168       Mfcc mfcc(mfcc_opts);
   169       FeInput fe_input(&au_src, &mfcc,
   170                        frame_length*(wav_data.
SampFreq()/1000),
   171                        frame_shift*(wav_data.
SampFreq()/1000));
   174       if (lda_mat_rspecifier != 
"") {
   176             &cmn_input, lda_transform,
   177             left_context, right_context);
   180         opts.
order = kDeltaOrder;
   190       int32 start_frame = 0;
   191       bool partial_res = 
false;
   192       decoder.InitDecoding();
   195         if (dstate & (decoder.kEndFeats | decoder.kEndUtt)) {
   196           std::vector<int32> word_ids;
   197           decoder.FinishTraceBack(&out_fst);
   199                                        static_cast<vector<int32> *
>(0),
   201                                        static_cast<LatticeArc::Weight*>(0));
   205           decoder.GetBestPath(&out_fst);
   206           std::vector<int32> tids;
   210                                        static_cast<LatticeArc::Weight*>(0));
   211           std::stringstream res_key;
   212           res_key << wav_key << 
'_' << start_frame << 
'-' << decoder.frame();
   213           if (!word_ids.empty())
   214             words_writer.Write(res_key.str(), word_ids);
   215           alignment_writer.Write(res_key.str(), tids);
   216           if (dstate == decoder.kEndFeats)
   218           start_frame = decoder.frame();
   220           std::vector<int32> word_ids;
   221           if (decoder.PartialTraceback(&out_fst)) {
   223                                         static_cast<vector<int32> *
>(0),
   225                                         static_cast<LatticeArc::Weight*>(0));
   228               partial_res = (word_ids.size() > 0);
   232       delete feat_transform;
   237   } 
catch(
const std::exception& e) {
   238     std::cerr << e.what();
 This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
 
void Register(OptionsItf *opts, bool full)
 
MfccOptions contains basic options for computing MFCC features. 
 
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
 
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g. 
 
A templated class for writing objects to an archive or script file; see The Table concept...
 
BaseFloat SampFreq() const
 
const Matrix< BaseFloat > & Data() const
 
bool GetLinearSymbolSequence(const Fst< Arc > &fst, std::vector< I > *isymbols_out, std::vector< I > *osymbols_out, typename Arc::Weight *tot_weight_out)
GetLinearSymbolSequence gets the symbol sequence from a linear FST. 
 
void PrintPartialResult(const std::vector< int32 > &words, const fst::SymbolTable *word_syms, bool line_break)
 
void Read(std::istream &in, bool binary, bool add=false)
read from stream. 
 
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
 
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const]. 
 
FrameExtractionOptions frame_opts
 
void Read(std::istream &is, bool binary)
 
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
 
This class's purpose is to read in Wave files. 
 
fst::Fst< fst::StdArc > * ReadDecodeGraph(const std::string &filename)
 
#define KALDI_ASSERT(cond)
 
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix). 
 
void Register(OptionsItf *opts)
 
This templated class is intended for offline feature extraction, i.e. 
 
void Read(std::istream &in_stream, bool binary)