76 using namespace kaldi;
80 typedef kaldi::int64 int64;
83 "Reads in wav file(s) and simulates online decoding, including\n" 84 "basis-fMLLR adaptation and endpointing. Writes lattices.\n" 85 "Models are specified via options.\n" 87 "Usage: online2-wav-gmm-latgen-faster [options] <fst-in> " 88 "<spk2utt-rspecifier> <wav-rspecifier> <lattice-wspecifier>\n" 89 "Run egs/rm/s5/local/run_online_decoding.sh for example\n";
93 std::string word_syms_rxfilename;
100 bool do_endpointing =
false;
101 std::string use_gpu =
"no";
103 po.Register(
"chunk-length", &chunk_length_secs,
104 "Length of chunk size in seconds, that we process.");
105 po.Register(
"word-symbol-table", &word_syms_rxfilename,
106 "Symbol table for words [for debug output]");
107 po.Register(
"do-endpointing", &do_endpointing,
108 "If true, apply endpoint detection");
110 feature_cmdline_config.
Register(&po);
116 if (po.NumArgs() != 4) {
121 std::string fst_rxfilename = po.GetArg(1),
122 spk2utt_rspecifier = po.GetArg(2),
123 wav_rspecifier = po.GetArg(3),
124 clat_wspecifier = po.GetArg(4);
134 fst::SymbolTable *word_syms = NULL;
135 if (word_syms_rxfilename !=
"")
136 if (!(word_syms = fst::SymbolTable::ReadText(word_syms_rxfilename)))
137 KALDI_ERR <<
"Could not read symbol table from file " 138 << word_syms_rxfilename;
140 int32 num_done = 0, num_err = 0;
141 double tot_like = 0.0;
142 int64 num_frames = 0;
150 for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) {
151 std::string spk = spk2utt_reader.Key();
152 const std::vector<std::string> &uttlist = spk2utt_reader.Value();
154 for (
size_t i = 0;
i < uttlist.size();
i++) {
155 std::string utt = uttlist[
i];
156 if (!wav_reader.HasKey(utt)) {
157 KALDI_WARN <<
"Did not find audio for utterance " << utt;
161 const WaveData &wave_data = wav_reader.Value(utt);
175 int32 chunk_length =
int32(samp_freq * chunk_length_secs);
176 if (chunk_length == 0) chunk_length = 1;
178 int32 samp_offset = 0;
179 while (samp_offset < data.Dim()) {
180 int32 samp_remaining = data.Dim() - samp_offset;
181 int32 num_samp = chunk_length < samp_remaining ? chunk_length
185 decoder.FeaturePipeline().AcceptWaveform(samp_freq, wave_part);
187 samp_offset += num_samp;
188 decoding_timer.WaitUntil(samp_offset / samp_freq);
189 if (samp_offset == data.Dim()) {
191 decoder.FeaturePipeline().InputFinished();
193 decoder.AdvanceDecoding();
195 if (do_endpointing && decoder.EndpointDetected(endpoint_config))
198 decoder.FinalizeDecoding();
200 bool end_of_utterance =
true;
201 decoder.EstimateFmllr(end_of_utterance);
203 bool rescore_if_needed =
true;
204 decoder.GetLattice(rescore_if_needed, end_of_utterance, &clat);
207 &num_frames, &tot_like);
209 decoding_timer.OutputStats(&timing_stats);
213 decoder.GetAdaptationState(&adaptation_state);
220 clat_writer.Write(utt, clat);
221 KALDI_LOG <<
"Decoded utterance " << utt;
225 timing_stats.
Print();
226 KALDI_LOG <<
"Decoded " << num_done <<
" utterances, " 227 << num_err <<
" with errors.";
228 KALDI_LOG <<
"Overall likelihood per frame was " << (tot_like / num_frames)
229 <<
" per frame over " << num_frames <<
" frames.";
232 return (num_done != 0 ? 0 : 1);
233 }
catch(
const std::exception& e) {
234 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
class OnlineTimer is used to test real-time decoding algorithms and evaluate how long the decoding of...
Fst< StdArc > * ReadFstKaldiGeneric(std::string rxfilename, bool throw_on_err)
This class is used to read, store and give access to the models used for 3 phases of decoding (first-...
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
void Register(OptionsItf *opts)
A templated class for writing objects to an archive or script file; see The Table concept...
BaseFloat SampFreq() const
const Matrix< BaseFloat > & Data() const
void Register(OptionsItf *opts)
void GetDiagnosticsAndPrintOutput(const std::string &utt, const fst::SymbolTable *word_syms, const CompactLattice &clat, int64 *tot_num_frames, double *tot_like)
This configuration class is to set up OnlineFeaturePipelineConfig, which in turn is the configuration...
OnlineFeaturePipeline is a class that's responsible for putting together the various stages of the fe...
Allows random access to a collection of objects in an archive or script file; see The Table concept...
You will instantiate this class when you want to decode a single utterance using the online-decoding ...
std::vector< std::vector< double > > AcousticLatticeScale(double acwt)
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
void Print(bool online=true)
Here, if "online == false" we take into account that the setup was used in not-really-online mode whe...
void ScaleLattice(const std::vector< std::vector< ScaleFloat > > &scale, MutableFst< ArcTpl< Weight > > *fst)
Scales the pairs of weights in LatticeWeight or CompactLatticeWeight by viewing the pair (a...
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
void Register(OptionsItf *opts)
fst::VectorFst< CompactLatticeArc > CompactLattice
This class's purpose is to read in Wave files.
This configuration class is responsible for storing the configuration options for OnlineFeaturePipeli...
class OnlineTimingStats stores statistics from timing of online decoding, which will enable the Print...
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...