35                                   const fst::SymbolTable *word_syms,
    37                                   int64 *tot_num_frames,
    39   if (clat.NumStates() == 0) {
    52   std::vector<int32> alignment;
    53   std::vector<int32> 
words;
    55   num_frames = alignment.size();
    56   likelihood = -(weight.Value1() + weight.Value2());
    57   *tot_num_frames += num_frames;
    58   *tot_like += likelihood;
    59   KALDI_VLOG(2) << 
"Likelihood per frame for utterance " << utt << 
" is "    60                 << (likelihood / num_frames) << 
" over " << num_frames
    61                 << 
" frames, = " << (-weight.Value1() / num_frames)
    62                 << 
',' << (weight.Value2() / num_frames);
    64   if (word_syms != NULL) {
    65     std::cerr << utt << 
' ';
    66     for (
size_t i = 0; 
i < words.size(); 
i++) {
    67       std::string s = word_syms->Find(words[
i]);
    69         KALDI_ERR << 
"Word-id " << words[
i] << 
" not in symbol table.";
    70       std::cerr << s << 
' ';
    72     std::cerr << std::endl;
    78 int main(
int argc, 
char *argv[]) {
    80     using namespace kaldi;
    84     typedef kaldi::int64 int64;
    87         "Reads in wav file(s) and simulates online decoding with neural nets\n"    88         "(nnet3 setup), with optional iVector-based speaker adaptation and\n"    89         "optional endpointing.  Note: some configuration values and inputs are\n"    90         "set via config files whose filenames are passed as options\n"    92         "Usage: online2-wav-nnet3-latgen-faster [options] <nnet3-in> <fst-in> "    93         "<spk2utt-rspecifier> <wav-rspecifier> <lattice-wspecifier>\n"    94         "The spk2utt-rspecifier can just be <utterance-id> <utterance-id> if\n"    95         "you want to decode utterance by utterance.\n";
    99     std::string word_syms_rxfilename;
   109     bool do_endpointing = 
false;
   112     po.
Register(
"chunk-length", &chunk_length_secs,
   113                 "Length of chunk size in seconds, that we process.  Set to <= 0 "   114                 "to use all input in one chunk.");
   115     po.
Register(
"word-symbol-table", &word_syms_rxfilename,
   116                 "Symbol table for words [for debug output]");
   117     po.
Register(
"do-endpointing", &do_endpointing,
   118                 "If true, apply endpoint detection");
   120                 "You can set this to false to disable online iVector estimation "   121                 "and have all the data for each utterance used, even at "   122                 "utterance start.  This is useful where you just want the best "   123                 "results and don't care about online operation.  Setting this to "   124                 "false has the same effect as setting "   125                 "--use-most-recent-ivector=true and --greedy-ivector-extractor=true "   126                 "in the file given to --ivector-extraction-config, and "   127                 "--chunk-length=-1.");
   129                 "Number of threads used when initializing iVector extractor.");
   144     std::string nnet3_rxfilename = po.
GetArg(1),
   145         fst_rxfilename = po.
GetArg(2),
   146         spk2utt_rspecifier = po.
GetArg(3),
   147         wav_rspecifier = po.
GetArg(4),
   148         clat_wspecifier = po.
GetArg(5);
   154       chunk_length_secs = -1.0;
   166       Input ki(nnet3_rxfilename, &binary);
   183     fst::SymbolTable *word_syms = NULL;
   184     if (word_syms_rxfilename != 
"")
   185       if (!(word_syms = fst::SymbolTable::ReadText(word_syms_rxfilename)))
   186         KALDI_ERR << 
"Could not read symbol table from file "   187                   << word_syms_rxfilename;
   189     int32 num_done = 0, num_err = 0;
   190     double tot_like = 0.0;
   191     int64 num_frames = 0;
   199     for (; !spk2utt_reader.
Done(); spk2utt_reader.
Next()) {
   200       std::string spk = spk2utt_reader.
Key();
   201       const std::vector<std::string> &uttlist = spk2utt_reader.
Value();
   207       for (
size_t i = 0; 
i < uttlist.size(); 
i++) {
   208         std::string utt = uttlist[
i];
   209         if (!wav_reader.
HasKey(utt)) {
   210           KALDI_WARN << 
"Did not find audio for utterance " << utt;
   220         feature_pipeline.SetAdaptationState(adaptation_state);
   221         feature_pipeline.SetCmvnState(cmvn_state);
   230                                             *decode_fst, &feature_pipeline);
   235         if (chunk_length_secs > 0) {
   236           chunk_length = 
int32(samp_freq * chunk_length_secs);
   237           if (chunk_length == 0) chunk_length = 1;
   239           chunk_length = std::numeric_limits<int32>::max();
   242         int32 samp_offset = 0;
   243         std::vector<std::pair<int32, BaseFloat> > delta_weights;
   245         while (samp_offset < data.Dim()) {
   246           int32 samp_remaining = data.Dim() - samp_offset;
   247           int32 num_samp = chunk_length < samp_remaining ? chunk_length
   251           feature_pipeline.AcceptWaveform(samp_freq, wave_part);
   253           samp_offset += num_samp;
   254           decoding_timer.
WaitUntil(samp_offset / samp_freq);
   255           if (samp_offset == data.Dim()) {
   257             feature_pipeline.InputFinished();
   260           if (silence_weighting.
Active() &&
   261               feature_pipeline.IvectorFeature() != NULL) {
   265             feature_pipeline.IvectorFeature()->UpdateFrameWeights(delta_weights);
   277         bool end_of_utterance = 
true;
   281                                      &num_frames, &tot_like);
   287         feature_pipeline.GetAdaptationState(&adaptation_state);
   288         feature_pipeline.GetCmvnState(&cmvn_state);
   295         clat_writer.
Write(utt, clat);
   296         KALDI_LOG << 
"Decoded utterance " << utt;
   300     timing_stats.
Print(online);
   302     KALDI_LOG << 
"Decoded " << num_done << 
" utterances, "   303               << num_err << 
" with errors.";
   304     KALDI_LOG << 
"Overall likelihood per frame was " << (tot_like / num_frames)
   305               << 
" per frame over " << num_frames << 
" frames.";
   308     return (num_done != 0 ? 0 : 1);
   309   } 
catch(
const std::exception& e) {
   310     std::cerr << e.what();
 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
 
void CollapseModel(const CollapseModelConfig &config, Nnet *nnet)
This function modifies the neural net for efficiency, in a way that suitable to be done in test time...
 
class OnlineTimer is used to test real-time decoding algorithms and evaluate how long the decoding of...
 
This configuration class is to set up OnlineNnet2FeaturePipelineInfo, which in turn is the configurat...
 
int32 frame_subsampling_factor
 
Fst< StdArc > * ReadFstKaldiGeneric(std::string rxfilename, bool throw_on_err)
 
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
 
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor]. 
 
void OutputStats(OnlineTimingStats *stats)
This call, which should be made after decoding is done, writes the stats to the object that accumulat...
 
void SetBatchnormTestMode(bool test_mode, Nnet *nnet)
This function affects only components of type BatchNormComponent. 
 
A templated class for writing objects to an archive or script file; see The Table concept...
 
BaseFloat SampFreq() const
 
const Matrix< BaseFloat > & Data() const
 
const Nnet & GetNnet() const
 
void Register(OptionsItf *opts)
 
void GetDiagnosticsAndPrintOutput(const std::string &utt, const fst::SymbolTable *word_syms, const CompactLattice &clat, int64 *tot_num_frames, double *tot_like)
 
bool GetLinearSymbolSequence(const Fst< Arc > &fst, std::vector< I > *isymbols_out, std::vector< I > *osymbols_out, typename Arc::Weight *tot_weight_out)
GetLinearSymbolSequence gets the symbol sequence from a linear FST. 
 
This file contains a different version of the feature-extraction pipeline in online-feature-pipeline...
 
void Write(const std::string &key, const T &value) const
 
This class is responsible for storing configuration variables, objects and options for OnlineNnet2Fea...
 
void Read(std::istream &is, bool binary)
 
void Register(const std::string &name, bool *ptr, const std::string &doc)
 
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
 
This file contains some miscellaneous functions dealing with class Nnet. 
 
Allows random access to a collection of objects in an archive or script file; see The Table concept...
 
fst::LatticeWeightTpl< BaseFloat > LatticeWeight
 
void CompactLatticeShortestPath(const CompactLattice &clat, CompactLattice *shortest_path)
A form of the shortest-path/best-path algorithm that's specially coded for CompactLattice. 
 
void SetDropoutTestMode(bool test_mode, Nnet *nnet)
This function affects components of child-classes of RandomComponent. 
 
std::vector< std::vector< double > > AcousticLatticeScale(double acwt)
 
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
 
void Print(bool online=true)
Here, if "online == false" we take into account that the setup was used in not-really-online mode whe...
 
void ComputeCurrentTraceback(const LatticeFasterOnlineDecoderTpl< FST > &decoder)
 
const T & Value(const std::string &key)
 
void ScaleLattice(const std::vector< std::vector< ScaleFloat > > &scale, MutableFst< ArcTpl< Weight > > *fst)
Scales the pairs of weights in LatticeWeight or CompactLatticeWeight by viewing the pair (a...
 
void Read(std::istream &is, bool binary)
 
Struct OnlineCmvnState stores the state of CMVN adaptation between utterances (but not the state of t...
 
void ConvertLattice(const ExpandedFst< ArcTpl< Weight > > &ifst, MutableFst< ArcTpl< CompactLatticeWeightTpl< Weight, Int > > > *ofst, bool invert)
Convert lattice from a normal FST to a CompactLattice FST. 
 
void GetLattice(bool end_of_utterance, CompactLattice *clat) const
Gets the lattice. 
 
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
 
bool EndpointDetected(const OnlineEndpointConfig &config)
This function calls EndpointDetected from online-endpoint.h, with the required arguments. 
 
You will instantiate this class when you want to decode a single utterance using the online-decoding ...
 
fst::VectorFst< LatticeArc > Lattice
 
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables. 
 
void Register(OptionsItf *opts)
 
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility. 
 
bool HasKey(const std::string &key)
 
fst::VectorFst< CompactLatticeArc > CompactLattice
 
This class's purpose is to read in Wave files. 
 
int NumArgs() const
Number of positional parameters (c.f. argc-1). 
 
std::string global_cmvn_stats_rxfilename
Options for online cmvn, read from config file. 
 
OnlineNnet2FeaturePipeline is a class that's responsible for putting together the various parts of th...
 
OnlineSilenceWeightingConfig silence_weighting_config
Config for weighting silence in iVector adaptation. 
 
void AdvanceDecoding()
Advances the decoding as far as we can. 
 
int main(int argc, char *argv[])
 
class OnlineTimingStats stores statistics from timing of online decoding, which will enable the Print...
 
void Register(OptionsItf *opts)
 
When you instantiate class DecodableNnetSimpleLooped, you should give it a const reference to this cl...
 
void Register(OptionsItf *opts)
 
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
 
OnlineIvectorExtractionInfo ivector_extractor_info
 
const LatticeFasterOnlineDecoderTpl< FST > & Decoder() const
 
void WaitUntil(double cur_utterance_length)
The call to WaitUntil(t) simulates the effect of sleeping until cur_utterance_length seconds after th...
 
void FinalizeDecoding()
Finalizes the decoding. 
 
Config class for the CollapseModel function. 
 
void GetDeltaWeights(int32 num_frames_ready, int32 first_decoder_frame, std::vector< std::pair< int32, BaseFloat > > *delta_weights)