76     using namespace kaldi;
    80     typedef kaldi::int64 int64;
    83         "Reads in wav file(s) and simulates online decoding, including\n"    84         "basis-fMLLR adaptation and endpointing.  Writes lattices.\n"    85         "Models are specified via options.\n"    87         "Usage: online2-wav-gmm-latgen-faster [options] <fst-in> "    88         "<spk2utt-rspecifier> <wav-rspecifier> <lattice-wspecifier>\n"    89         "Run egs/rm/s5/local/run_online_decoding.sh for example\n";
    93     std::string word_syms_rxfilename;
   100     bool do_endpointing = 
false;
   101     std::string use_gpu = 
"no";
   103     po.Register(
"chunk-length", &chunk_length_secs,
   104                 "Length of chunk size in seconds, that we process.");
   105     po.Register(
"word-symbol-table", &word_syms_rxfilename,
   106                 "Symbol table for words [for debug output]");
   107     po.Register(
"do-endpointing", &do_endpointing,
   108                 "If true, apply endpoint detection");
   110     feature_cmdline_config.
Register(&po);
   116     if (po.NumArgs() != 4) {
   121     std::string fst_rxfilename = po.GetArg(1),
   122         spk2utt_rspecifier = po.GetArg(2),
   123         wav_rspecifier = po.GetArg(3),
   124         clat_wspecifier = po.GetArg(4);
   134     fst::SymbolTable *word_syms = NULL;
   135     if (word_syms_rxfilename != 
"")
   136       if (!(word_syms = fst::SymbolTable::ReadText(word_syms_rxfilename)))
   137         KALDI_ERR << 
"Could not read symbol table from file "   138                   << word_syms_rxfilename;
   140     int32 num_done = 0, num_err = 0;
   141     double tot_like = 0.0;
   142     int64 num_frames = 0;
   150     for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) {
   151       std::string spk = spk2utt_reader.Key();
   152       const std::vector<std::string> &uttlist = spk2utt_reader.Value();
   154       for (
size_t i = 0; 
i < uttlist.size(); 
i++) {
   155         std::string utt = uttlist[
i];
   156         if (!wav_reader.HasKey(utt)) {
   157           KALDI_WARN << 
"Did not find audio for utterance " << utt;
   161         const WaveData &wave_data = wav_reader.Value(utt);
   175         int32 chunk_length = 
int32(samp_freq * chunk_length_secs);
   176         if (chunk_length == 0) chunk_length = 1;
   178         int32 samp_offset = 0;
   179         while (samp_offset < data.Dim()) {
   180           int32 samp_remaining = data.Dim() - samp_offset;
   181           int32 num_samp = chunk_length < samp_remaining ? chunk_length
   185           decoder.FeaturePipeline().AcceptWaveform(samp_freq, wave_part);
   187           samp_offset += num_samp;
   188           decoding_timer.WaitUntil(samp_offset / samp_freq);
   189           if (samp_offset == data.Dim()) {
   191             decoder.FeaturePipeline().InputFinished();
   193           decoder.AdvanceDecoding();
   195           if (do_endpointing && decoder.EndpointDetected(endpoint_config))
   198         decoder.FinalizeDecoding();
   200         bool end_of_utterance = 
true;
   201         decoder.EstimateFmllr(end_of_utterance);
   203         bool rescore_if_needed = 
true;
   204         decoder.GetLattice(rescore_if_needed, end_of_utterance, &clat);
   207                                      &num_frames, &tot_like);
   209         decoding_timer.OutputStats(&timing_stats);
   213         decoder.GetAdaptationState(&adaptation_state);
   220         clat_writer.Write(utt, clat);
   221         KALDI_LOG << 
"Decoded utterance " << utt;
   225     timing_stats.
Print();    
   226     KALDI_LOG << 
"Decoded " << num_done << 
" utterances, "   227               << num_err << 
" with errors.";
   228     KALDI_LOG << 
"Overall likelihood per frame was " << (tot_like / num_frames)
   229               << 
" per frame over " << num_frames << 
" frames.";
   232     return (num_done != 0 ? 0 : 1);
   233   } 
catch(
const std::exception& e) {
   234     std::cerr << e.what();
 This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
 
class OnlineTimer is used to test real-time decoding algorithms and evaluate how long the decoding of...
 
Fst< StdArc > * ReadFstKaldiGeneric(std::string rxfilename, bool throw_on_err)
 
This class is used to read, store and give access to the models used for 3 phases of decoding (first-...
 
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
 
void Register(OptionsItf *opts)
 
A templated class for writing objects to an archive or script file; see The Table concept...
 
BaseFloat SampFreq() const
 
const Matrix< BaseFloat > & Data() const
 
void Register(OptionsItf *opts)
 
void GetDiagnosticsAndPrintOutput(const std::string &utt, const fst::SymbolTable *word_syms, const CompactLattice &clat, int64 *tot_num_frames, double *tot_like)
 
This configuration class is to set up OnlineFeaturePipelineConfig, which in turn is the configuration...
 
OnlineFeaturePipeline is a class that's responsible for putting together the various stages of the fe...
 
Allows random access to a collection of objects in an archive or script file; see The Table concept...
 
You will instantiate this class when you want to decode a single utterance using the online-decoding ...
 
std::vector< std::vector< double > > AcousticLatticeScale(double acwt)
 
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
 
void Print(bool online=true)
Here, if "online == false" we take into account that the setup was used in not-really-online mode whe...
 
void ScaleLattice(const std::vector< std::vector< ScaleFloat > > &scale, MutableFst< ArcTpl< Weight > > *fst)
Scales the pairs of weights in LatticeWeight or CompactLatticeWeight by viewing the pair (a...
 
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
 
void Register(OptionsItf *opts)
 
fst::VectorFst< CompactLatticeArc > CompactLattice
 
This class's purpose is to read in Wave files. 
 
This configuration class is responsible for storing the configuration options for OnlineFeaturePipeli...
 
class OnlineTimingStats stores statistics from timing of online decoding, which will enable the Print...
 
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...