69   using namespace kaldi;
    76     signal(SIGPIPE, SIG_IGN);
    79     const int32 kDeltaOrder = 2;
    82         "Starts a TCP server that receives RAW audio and outputs aligned words.\n"    83             "A sample client can be found in: onlinebin/online-audio-client\n\n"    84             "Usage: online-audio-server-decode-faster [options] model-in "    85             "fst-in word-symbol-table silence-phones word_boundary_file tcp-port [lda-matrix-in]\n\n"    86             "example: online-audio-server-decode-faster --verbose=1 --rt-min=0.5 --rt-max=3.0 --max-active=6000\n"    87             "--beam=72.0 --acoustic-scale=0.0769 final.mdl graph/HCLG.fst graph/words.txt '1:2:3:4:5'\n"    88             "graph/word_boundary.int 5000 final.mat\n\n";
    92     int32 cmn_window = 600, min_cmn_window = 100;  
    93     int32 right_context = 4, left_context = 4;
   101     po.Register(
"left-context", &left_context,
   102                 "Number of frames of left context");
   103     po.Register(
"right-context", &right_context,
   104                 "Number of frames of right context");
   105     po.Register(
"acoustic-scale", &acoustic_scale,
   106                 "Scaling factor for acoustic likelihoods");
   108         "cmn-window", &cmn_window,
   109         "Number of feat. vectors used in the running average CMN calculation");
   110     po.Register(
"min-cmn-window", &min_cmn_window,
   111                 "Minumum CMN window used at start of decoding (adds "   112                 "latency only at start)");
   113     po.Register(
"frame-shift", &frame_shift,
   114                 "Time in seconds between frames.\n");
   120     if (po.NumArgs() < 6 || po.NumArgs() > 7) {
   125     std::string model_rspecifier = po.GetArg(1), fst_rspecifier = po.GetArg(2),
   126         word_syms_filename = po.GetArg(3), silence_phones_str = po.GetArg(4),
   127         word_boundary_file = po.GetArg(5), lda_mat_rspecifier = 
"";
   129     if (po.NumArgs() == 7)
   130       lda_mat_rspecifier = po.GetOptArg(7);
   132     int32 port = strtol(po.GetArg(6).c_str(), 0, 10);
   134     std::vector<int32> silence_phones;
   136       KALDI_ERR << 
"Invalid silence-phones string " << silence_phones_str;
   137     if (silence_phones.empty())
   140     if (!tcp_server.
Listen(port))
   143     std::cout << 
"Reading LDA matrix: " << lda_mat_rspecifier << 
"..."   146     if (lda_mat_rspecifier != 
"") {
   148       Input ki(lda_mat_rspecifier, &binary_in);
   149       lda_transform.
Read(ki.Stream(), binary_in);
   152     std::cout << 
"Reading acoustic model: " << model_rspecifier << 
"..."   158       Input ki(model_rspecifier, &binary);
   159       trans_model.
Read(ki.Stream(), binary);
   160       am_gmm.
Read(ki.Stream(), binary);
   163     std::cout << 
"Reading word list: " << word_syms_filename << 
"..."   165     fst::SymbolTable *word_syms = NULL;
   166     if (!(word_syms = fst::SymbolTable::ReadText(word_syms_filename)))
   167       KALDI_ERR << 
"Could not read symbol table from file "   168           << word_syms_filename;
   170     std::cout << 
"Reading word boundary file: " << word_boundary_file << 
"..."   174     std::cout << 
"Reading FST: " << fst_rspecifier << 
"..." << std::endl;
   175     fst::Fst < fst::StdArc > *decode_fst = 
ReadDecodeGraph(fst_rspecifier);
   185     int32 window_size = right_context + left_context + 1;
   192     VectorFst < LatticeArc > out_fst;
   196     int32 client_socket = -1;
   201           std::cout << 
"Client disconnected!" << std::endl;
   204         client_socket = tcp_server.
Accept();
   212       Mfcc mfcc(mfcc_opts);
   213       FeInput fe_input(au_src, &mfcc, frame_length * (16000 / 1000),
   214                        mfcc_frame_shift * (16000 / 1000));  
   217       if (lda_mat_rspecifier != 
"") {
   219                                             left_context, right_context);
   222         opts.
order = kDeltaOrder;
   230                                              acoustic_scale, &feature_matrix);
   232       clock_t start = clock();
   233       int32 decoder_offset = 0;
   245         if (dstate & (decoder.kEndFeats | decoder.kEndUtt)) {
   246           std::vector<int32> word_ids, times, lengths;
   248           decoder.FinishTraceBack(&out_fst);
   249           decoder.GetBestPath(&out_fst);
   266           for (
size_t i = 0; 
i < word_ids.size(); 
i++)
   267             if (word_ids[
i] != 0)
   272             float dur = (clock() - start) / (
float) CLOCKS_PER_SEC;
   278             std::stringstream sstr;
   279             sstr << 
"RESULT:NUM=" << words_num << 
",FORMAT=WSE,RECO-DUR=" << dur
   280                 << 
",INPUT-DUR=" << input_dur;
   284             for (
size_t i = 0; 
i < word_ids.size(); 
i++) {
   285               if (word_ids[
i] == 0)
   288               std::string word = word_syms->Find(word_ids[
i]);
   295               std::stringstream wstr;
   296               wstr << word << 
"," << start << 
"," << (start + len);
   302           if (dstate == decoder.kEndFeats) {
   307           decoder_offset = decoder.frame();
   309           std::vector<int32> word_ids;
   310           if (decoder.PartialTraceback(&out_fst)) {
   313                                     static_cast<LatticeArc::Weight*>(0));
   314             for (
size_t i = 0; i < word_ids.size(); i++) {
   315               if (word_ids[i] != 0) {
   317                           "PARTIAL:" + word_syms->Find(word_ids[i]));
   323       delete feat_transform;
   326     std::cout << 
"Deinitizalizing..." << std::endl;
   332   } 
catch (
const std::exception& e) {
   333     std::cerr << e.what();
 size_t SamplesProcessed()
 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
 
void Register(OptionsItf *opts, bool full)
 
MfccOptions contains basic options for computing MFCC features. 
 
bool DeterminizeLatticePruned(const ExpandedFst< ArcTpl< Weight > > &ifst, double beam, MutableFst< ArcTpl< CompactLatticeWeightTpl< Weight, IntType > > > *ofst, DeterminizeLatticePrunedOptions opts)
 
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
 
const float kFramesPerSecond
 
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g. 
 
bool WordAlignLattice(const CompactLattice &lat, const TransitionModel &tmodel, const WordBoundaryInfo &info, int32 max_states, CompactLattice *lat_out)
Align lattice so that each arc has the transition-ids on it that correspond to the word that is on th...
 
bool GetLinearSymbolSequence(const Fst< Arc > &fst, std::vector< I > *isymbols_out, std::vector< I > *osymbols_out, typename Arc::Weight *tot_weight_out)
GetLinearSymbolSequence gets the symbol sequence from a linear FST. 
 
void Read(std::istream &in, bool binary, bool add=false)
read from stream. 
 
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
 
FrameExtractionOptions frame_opts
 
void Read(std::istream &is, bool binary)
 
void ConvertLattice(const ExpandedFst< ArcTpl< Weight > > &ifst, MutableFst< ArcTpl< CompactLatticeWeightTpl< Weight, Int > > > *ofst, bool invert)
Convert lattice from a normal FST to a CompactLattice FST. 
 
void Register(OptionsItf *opts)
 
fst::VectorFst< LatticeArc > Lattice
 
fst::VectorFst< CompactLatticeArc > CompactLattice
 
bool WriteLine(int32 socket, std::string line)
 
fst::Fst< fst::StdArc > * ReadDecodeGraph(const std::string &filename)
 
void Register(OptionsItf *opts)
 
This templated class is intended for offline feature extraction, i.e. 
 
void Read(std::istream &in_stream, bool binary)
 
bool CompactLatticeToWordAlignment(const CompactLattice &clat, std::vector< int32 > *words, std::vector< int32 > *begin_times, std::vector< int32 > *lengths)
This function takes a CompactLattice that should only contain a single linear sequence (e...