37 #include <sys/socket.h> 38 #include <sys/types.h> 69 using namespace kaldi;
76 signal(SIGPIPE, SIG_IGN);
79 const int32 kDeltaOrder = 2;
82 "Starts a TCP server that receives RAW audio and outputs aligned words.\n" 83 "A sample client can be found in: onlinebin/online-audio-client\n\n" 84 "Usage: online-audio-server-decode-faster [options] model-in " 85 "fst-in word-symbol-table silence-phones word_boundary_file tcp-port [lda-matrix-in]\n\n" 86 "example: online-audio-server-decode-faster --verbose=1 --rt-min=0.5 --rt-max=3.0 --max-active=6000\n" 87 "--beam=72.0 --acoustic-scale=0.0769 final.mdl graph/HCLG.fst graph/words.txt '1:2:3:4:5'\n" 88 "graph/word_boundary.int 5000 final.mat\n\n";
92 int32 cmn_window = 600, min_cmn_window = 100;
93 int32 right_context = 4, left_context = 4;
101 po.
Register(
"left-context", &left_context,
102 "Number of frames of left context");
103 po.
Register(
"right-context", &right_context,
104 "Number of frames of right context");
105 po.
Register(
"acoustic-scale", &acoustic_scale,
106 "Scaling factor for acoustic likelihoods");
108 "cmn-window", &cmn_window,
109 "Number of feat. vectors used in the running average CMN calculation");
110 po.
Register(
"min-cmn-window", &min_cmn_window,
111 "Minumum CMN window used at start of decoding (adds " 112 "latency only at start)");
113 po.
Register(
"frame-shift", &frame_shift,
114 "Time in seconds between frames.\n");
125 std::string model_rspecifier = po.
GetArg(1), fst_rspecifier = po.
GetArg(2),
126 word_syms_filename = po.
GetArg(3), silence_phones_str = po.
GetArg(4),
127 word_boundary_file = po.
GetArg(5), lda_mat_rspecifier =
"";
132 int32 port = strtol(po.
GetArg(6).c_str(), 0, 10);
134 std::vector<int32> silence_phones;
136 KALDI_ERR <<
"Invalid silence-phones string " << silence_phones_str;
137 if (silence_phones.empty())
140 if (!tcp_server.
Listen(port))
143 std::cout <<
"Reading LDA matrix: " << lda_mat_rspecifier <<
"..." 146 if (lda_mat_rspecifier !=
"") {
148 Input ki(lda_mat_rspecifier, &binary_in);
152 std::cout <<
"Reading acoustic model: " << model_rspecifier <<
"..." 158 Input ki(model_rspecifier, &binary);
163 std::cout <<
"Reading word list: " << word_syms_filename <<
"..." 165 fst::SymbolTable *word_syms = NULL;
166 if (!(word_syms = fst::SymbolTable::ReadText(word_syms_filename)))
167 KALDI_ERR <<
"Could not read symbol table from file " 168 << word_syms_filename;
170 std::cout <<
"Reading word boundary file: " << word_boundary_file <<
"..." 174 std::cout <<
"Reading FST: " << fst_rspecifier <<
"..." << std::endl;
175 fst::Fst < fst::StdArc > *decode_fst =
ReadDecodeGraph(fst_rspecifier);
185 int32 window_size = right_context + left_context + 1;
192 VectorFst < LatticeArc > out_fst;
196 int32 client_socket = -1;
201 std::cout <<
"Client disconnected!" << std::endl;
204 client_socket = tcp_server.
Accept();
212 Mfcc mfcc(mfcc_opts);
213 FeInput fe_input(au_src, &mfcc, frame_length * (16000 / 1000),
214 mfcc_frame_shift * (16000 / 1000));
217 if (lda_mat_rspecifier !=
"") {
219 left_context, right_context);
222 opts.
order = kDeltaOrder;
230 acoustic_scale, &feature_matrix);
232 clock_t start = clock();
233 int32 decoder_offset = 0;
246 std::vector<int32> word_ids, times, lengths;
266 for (
size_t i = 0;
i < word_ids.size();
i++)
267 if (word_ids[
i] != 0)
272 float dur = (clock() - start) / (
float) CLOCKS_PER_SEC;
278 std::stringstream sstr;
279 sstr <<
"RESULT:NUM=" << words_num <<
",FORMAT=WSE,RECO-DUR=" << dur
280 <<
",INPUT-DUR=" << input_dur;
284 for (
size_t i = 0;
i < word_ids.size();
i++) {
285 if (word_ids[
i] == 0)
288 std::string word = word_syms->Find(word_ids[
i]);
295 std::stringstream wstr;
296 wstr << word <<
"," << start <<
"," << (start + len);
307 decoder_offset = decoder.
frame();
309 std::vector<int32> word_ids;
313 static_cast<LatticeArc::Weight*>(0));
314 for (
size_t i = 0;
i < word_ids.size();
i++) {
315 if (word_ids[
i] != 0) {
317 "PARTIAL:" + word_syms->Find(word_ids[
i]));
323 delete feat_transform;
326 std::cout <<
"Deinitizalizing..." << std::endl;
332 }
catch (
const std::exception& e) {
333 std::cerr << e.what();
345 h_addr_.sin_addr.s_addr = INADDR_ANY;
346 h_addr_.sin_port = htons(port);
352 KALDI_ERR <<
"Cannot create TCP socket!";
358 if( setsockopt(
server_desc_, SOL_SOCKET, SO_REUSEADDR, &flag, len) == -1){
359 KALDI_ERR <<
"Cannot set socket options!\n";
364 KALDI_ERR <<
"Cannot bind to port: " << port <<
" (is it taken?)";
373 std::cout <<
"TcpServer: Listening on port: " << port << std::endl;
385 std::cout <<
"Waiting for client..." << std::endl;
389 len =
sizeof(
struct sockaddr);
392 struct sockaddr_storage addr;
396 getpeername(client_desc, (
struct sockaddr*) &addr, &len);
398 struct sockaddr_in *s = (
struct sockaddr_in *) &addr;
399 inet_ntop(AF_INET, &s->sin_addr, ipstr,
sizeof ipstr);
401 std::cout <<
"TcpServer: Accepted connection from: " << ipstr << std::endl;
409 const char* p = line.c_str();
410 int32 to_write = line.size();
412 while (to_write > 0) {
413 int32 ret = write(socket, p + wrote, to_write);
size_t SamplesProcessed()
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
bool PartialTraceback(fst::MutableFst< LatticeArc > *out_fst)
void Register(OptionsItf *opts, bool full)
MfccOptions contains basic options for computing MFCC features.
bool DeterminizeLatticePruned(const ExpandedFst< ArcTpl< Weight > > &ifst, double beam, MutableFst< ArcTpl< CompactLatticeWeightTpl< Weight, IntType > > > *ofst, DeterminizeLatticePrunedOptions opts)
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
const float kFramesPerSecond
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
struct sockaddr_in h_addr_
DecodeState Decode(DecodableInterface *decodable)
bool WordAlignLattice(const CompactLattice &lat, const TransitionModel &tmodel, const WordBoundaryInfo &info, int32 max_states, CompactLattice *lat_out)
Align lattice so that each arc has the transition-ids on it that correspond to the word that is on th...
bool GetLinearSymbolSequence(const Fst< Arc > &fst, std::vector< I > *isymbols_out, std::vector< I > *osymbols_out, typename Arc::Weight *tot_weight_out)
GetLinearSymbolSequence gets the symbol sequence from a linear FST.
void Register(const std::string &name, bool *ptr, const std::string &doc)
bool GetBestPath(fst::MutableFst< LatticeArc > *fst_out, bool use_final_probs=true)
GetBestPath gets the decoding traceback.
void Read(std::istream &in, bool binary, bool add=false)
read from stream.
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
void FinishTraceBack(fst::MutableFst< LatticeArc > *fst_out)
FrameExtractionOptions frame_opts
void Read(std::istream &is, bool binary)
void ConvertLattice(const ExpandedFst< ArcTpl< Weight > > &ifst, MutableFst< ArcTpl< CompactLatticeWeightTpl< Weight, Int > > > *ofst, bool invert)
Convert lattice from a normal FST to a CompactLattice FST.
void Register(OptionsItf *opts)
int32 main(int argc, char *argv[])
fst::VectorFst< LatticeArc > Lattice
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
fst::VectorFst< CompactLatticeArc > CompactLattice
int NumArgs() const
Number of positional parameters (c.f. argc-1).
bool WriteLine(int32 socket, std::string line)
fst::Fst< fst::StdArc > * ReadDecodeGraph(const std::string &filename)
void Register(OptionsItf *opts)
This templated class is intended for offline feature extraction, i.e.
void Read(std::istream &in_stream, bool binary)
bool CompactLatticeToWordAlignment(const CompactLattice &clat, std::vector< int32 > *words, std::vector< int32 > *begin_times, std::vector< int32 > *lengths)
This function takes a CompactLattice that should only contain a single linear sequence (e...
std::string GetOptArg(int param) const