32 const fst::SymbolTable *word_syms,
34 int64 *tot_num_frames,
36 if (clat.NumStates() == 0) {
49 std::vector<int32> alignment;
50 std::vector<int32>
words;
52 num_frames = alignment.size();
54 *tot_num_frames += num_frames;
55 *tot_like += likelihood;
56 KALDI_VLOG(2) <<
"Likelihood per frame for utterance " << utt <<
" is " 57 << (likelihood / num_frames) <<
" over " << num_frames
60 if (word_syms != NULL) {
61 std::cerr << utt <<
' ';
62 for (
size_t i = 0;
i < words.size();
i++) {
63 std::string s = word_syms->Find(words[
i]);
65 KALDI_ERR <<
"Word-id " << words[
i] <<
" not in symbol table.";
66 std::cerr << s <<
' ';
68 std::cerr << std::endl;
74 int main(
int argc,
char *argv[]) {
76 using namespace kaldi;
80 typedef kaldi::int64 int64;
83 "Reads in wav file(s) and simulates online decoding, including\n" 84 "basis-fMLLR adaptation and endpointing. Writes lattices.\n" 85 "Models are specified via options.\n" 87 "Usage: online2-wav-gmm-latgen-faster [options] <fst-in> " 88 "<spk2utt-rspecifier> <wav-rspecifier> <lattice-wspecifier>\n" 89 "Run egs/rm/s5/local/run_online_decoding.sh for example\n";
93 std::string word_syms_rxfilename;
100 bool do_endpointing =
false;
101 std::string use_gpu =
"no";
103 po.
Register(
"chunk-length", &chunk_length_secs,
104 "Length of chunk size in seconds, that we process.");
105 po.
Register(
"word-symbol-table", &word_syms_rxfilename,
106 "Symbol table for words [for debug output]");
107 po.
Register(
"do-endpointing", &do_endpointing,
108 "If true, apply endpoint detection");
110 feature_cmdline_config.
Register(&po);
121 std::string fst_rxfilename = po.
GetArg(1),
122 spk2utt_rspecifier = po.
GetArg(2),
123 wav_rspecifier = po.
GetArg(3),
124 clat_wspecifier = po.
GetArg(4);
134 fst::SymbolTable *word_syms = NULL;
135 if (word_syms_rxfilename !=
"")
136 if (!(word_syms = fst::SymbolTable::ReadText(word_syms_rxfilename)))
137 KALDI_ERR <<
"Could not read symbol table from file " 138 << word_syms_rxfilename;
140 int32 num_done = 0, num_err = 0;
141 double tot_like = 0.0;
142 int64 num_frames = 0;
150 for (; !spk2utt_reader.
Done(); spk2utt_reader.
Next()) {
151 std::string spk = spk2utt_reader.
Key();
152 const std::vector<std::string> &uttlist = spk2utt_reader.
Value();
154 for (
size_t i = 0;
i < uttlist.size();
i++) {
155 std::string utt = uttlist[
i];
156 if (!wav_reader.
HasKey(utt)) {
157 KALDI_WARN <<
"Did not find audio for utterance " << utt;
175 int32 chunk_length =
int32(samp_freq * chunk_length_secs);
176 if (chunk_length == 0) chunk_length = 1;
178 int32 samp_offset = 0;
179 while (samp_offset < data.Dim()) {
180 int32 samp_remaining = data.Dim() - samp_offset;
181 int32 num_samp = chunk_length < samp_remaining ? chunk_length
185 decoder.FeaturePipeline().AcceptWaveform(samp_freq, wave_part);
187 samp_offset += num_samp;
188 decoding_timer.
WaitUntil(samp_offset / samp_freq);
189 if (samp_offset == data.Dim()) {
191 decoder.FeaturePipeline().InputFinished();
193 decoder.AdvanceDecoding();
195 if (do_endpointing && decoder.EndpointDetected(endpoint_config))
198 decoder.FinalizeDecoding();
200 bool end_of_utterance =
true;
201 decoder.EstimateFmllr(end_of_utterance);
203 bool rescore_if_needed =
true;
204 decoder.GetLattice(rescore_if_needed, end_of_utterance, &clat);
207 &num_frames, &tot_like);
213 decoder.GetAdaptationState(&adaptation_state);
220 clat_writer.
Write(utt, clat);
221 KALDI_LOG <<
"Decoded utterance " << utt;
225 timing_stats.
Print();
226 KALDI_LOG <<
"Decoded " << num_done <<
" utterances, " 227 << num_err <<
" with errors.";
228 KALDI_LOG <<
"Overall likelihood per frame was " << (tot_like / num_frames)
229 <<
" per frame over " << num_frames <<
" frames.";
232 return (num_done != 0 ? 0 : 1);
233 }
catch(
const std::exception& e) {
234 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
class OnlineTimer is used to test real-time decoding algorithms and evaluate how long the decoding of...
Fst< StdArc > * ReadFstKaldiGeneric(std::string rxfilename, bool throw_on_err)
This class is used to read, store and give access to the models used for 3 phases of decoding (first-...
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
void Register(OptionsItf *opts)
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
void OutputStats(OnlineTimingStats *stats)
This call, which should be made after decoding is done, writes the stats to the object that accumulat...
This file contains a class OnlineFeaturePipeline for online feature extraction, which puts together v...
A templated class for writing objects to an archive or script file; see The Table concept...
BaseFloat SampFreq() const
const Matrix< BaseFloat > & Data() const
void Register(OptionsItf *opts)
void GetDiagnosticsAndPrintOutput(const std::string &utt, const fst::SymbolTable *word_syms, const CompactLattice &clat, int64 *tot_num_frames, double *tot_like)
bool GetLinearSymbolSequence(const Fst< Arc > &fst, std::vector< I > *isymbols_out, std::vector< I > *osymbols_out, typename Arc::Weight *tot_weight_out)
GetLinearSymbolSequence gets the symbol sequence from a linear FST.
This configuration class is to set up OnlineFeaturePipelineConfig, which in turn is the configuration...
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
OnlineFeaturePipeline is a class that's responsible for putting together the various stages of the fe...
Allows random access to a collection of objects in an archive or script file; see The Table concept...
void CompactLatticeShortestPath(const CompactLattice &clat, CompactLattice *shortest_path)
A form of the shortest-path/best-path algorithm that's specially coded for CompactLattice.
You will instantiate this class when you want to decode a single utterance using the online-decoding ...
std::vector< std::vector< double > > AcousticLatticeScale(double acwt)
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
void Print(bool online=true)
Here, if "online == false" we take into account that the setup was used in not-really-online mode whe...
const T & Value(const std::string &key)
void ScaleLattice(const std::vector< std::vector< ScaleFloat > > &scale, MutableFst< ArcTpl< Weight > > *fst)
Scales the pairs of weights in LatticeWeight or CompactLatticeWeight by viewing the pair (a...
void ConvertLattice(const ExpandedFst< ArcTpl< Weight > > &ifst, MutableFst< ArcTpl< CompactLatticeWeightTpl< Weight, Int > > > *ofst, bool invert)
Convert lattice from a normal FST to a CompactLattice FST.
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
fst::VectorFst< LatticeArc > Lattice
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
int main(int argc, char *argv[])
void Register(OptionsItf *opts)
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
bool HasKey(const std::string &key)
fst::VectorFst< CompactLatticeArc > CompactLattice
This class's purpose is to read in Wave files.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
This configuration class is responsible for storing the configuration options for OnlineFeaturePipeli...
class OnlineTimingStats stores statistics from timing of online decoding, which will enable the Print...
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
void WaitUntil(double cur_utterance_length)
The call to WaitUntil(t) simulates the effect of sleeping until cur_utterance_length seconds after th...