online-net-client.cc
Go to the documentation of this file.
1 // onlinebin/online-net-client.cc
2 
3 // Copyright 2012 Cisco Systems (author: Matthias Paulik)
4 
5 // Modifications to the original contribution by Cisco Systems made by:
6 // Vassil Panayotov
7 
8 // See ../../COPYING for clarification regarding multiple authors
9 //
10 // Licensed under the Apache License, Version 2.0 (the "License");
11 // you may not use this file except in compliance with the License.
12 // You may obtain a copy of the License at
13 //
14 // http://www.apache.org/licenses/LICENSE-2.0
15 //
16 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
18 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
19 // MERCHANTABLITY OR NON-INFRINGEMENT.
20 // See the Apache 2 License for the specific language governing permissions and
21 // limitations under the License.
22 
23 #include <netdb.h>
24 #include <fcntl.h>
25 
26 #include "feat/feature-mfcc.h"
29 
30 
31 int main(int argc, char *argv[]) {
32  try {
33 #ifndef KALDI_NO_PORTAUDIO
34  using namespace kaldi;
35 
36  typedef kaldi::int32 int32;
37  typedef OnlineFeInput<Mfcc> FeInput;
38 
39  // Time out interval for the PortAudio source
40  const int32 kTimeout = 500; // half second
41  // PortAudio sampling rate
42  const int32 kSampleFreq = 16000;
43  // PortAudio's internal ring buffer size in bytes
44  const int32 kPaRingSize = 32768;
45  // Report interval for PortAudio buffer overflows in number of feat. batches
46  const int32 kPaReportInt = 4;
47 
48  const char *usage =
49  "Takes input using a microphone(PortAudio), extracts features and sends them\n"
50  "to a speech recognition server over a network connection\n\n"
51  "Usage: online-net-client server-address server-port\n\n";
52  ParseOptions po(usage);
53  int32 batch_size = 27;
54  po.Register("batch-size", &batch_size,
55  "The number of feature vectors to be extracted and sent in one go");
56  po.Read(argc, argv);
57  if (po.NumArgs() != 2) {
58  po.PrintUsage();
59  return 1;
60  }
61 
62  std::string server_addr_str = po.GetArg(1);
63  std::string server_port_str = po.GetArg(2);
64 
65  addrinfo *server_addr, hints;
66  hints.ai_family = AF_INET;
67  hints.ai_protocol = IPPROTO_UDP;
68  hints.ai_socktype = SOCK_DGRAM;
69  hints.ai_flags = AI_ADDRCONFIG;
70  if (getaddrinfo(server_addr_str.c_str(), server_port_str.c_str(),
71  &hints, &server_addr) != 0)
72  KALDI_ERR << "getaddrinfo() call failed!";
73  int32 sock_desc = socket(server_addr->ai_family,
74  server_addr->ai_socktype,
75  server_addr->ai_protocol);
76  if (sock_desc == -1)
77  KALDI_ERR << "socket() call failed!";
78  int32 flags = fcntl(sock_desc, F_GETFL);
79  flags |= O_NONBLOCK;
80  if (fcntl(sock_desc, F_SETFL, flags) == -1)
81  KALDI_ERR << "fcntl() failed to put the socket in non-blocking mode!";
82 
83  // We are not properly registering/exposing MFCC and frame extraction options,
84  // because there are parts of the online decoding code, where some of these
85  // options are hardwired(ToDo: we should fix this at some point)
86  MfccOptions mfcc_opts;
87  mfcc_opts.use_energy = false;
88  int32 frame_length = mfcc_opts.frame_opts.frame_length_ms = 25;
89  int32 frame_shift = mfcc_opts.frame_opts.frame_shift_ms = 10;
90  OnlinePaSource au_src(kTimeout, kSampleFreq, kPaRingSize, kPaReportInt);
91  Mfcc mfcc(mfcc_opts);
92  FeInput fe_input(&au_src, &mfcc,
93  frame_length * (kSampleFreq / 1000),
94  frame_shift * (kSampleFreq / 1000));
95  std::cerr << std::endl << "Sending features to " << server_addr_str
96  << ':' << server_port_str << " ... " << std::endl;
97  char buf[65535];
98  Matrix<BaseFloat> feats;
99  while (1) {
100  feats.Resize(batch_size, mfcc_opts.num_ceps, kUndefined);
101  bool more_feats = fe_input.Compute(&feats);
102  if (feats.NumRows() > 0) {
103  std::stringstream ss;
104  feats.Write(ss, true); // serialize features as binary data
105  ssize_t sent = sendto(sock_desc,
106  ss.str().c_str(),
107  ss.str().size(), 0,
108  server_addr->ai_addr,
109  server_addr->ai_addrlen);
110  if (sent == -1)
111  KALDI_ERR << "sendto() call failed!";
112  ssize_t rcvd = recvfrom(sock_desc, buf, sizeof(buf), 0,
113  server_addr->ai_addr, &server_addr->ai_addrlen);
114  if (rcvd == -1 && errno != EWOULDBLOCK && errno != EAGAIN) {
115  KALDI_ERR << "recvfrom() failed unexpectedly!";
116  } else if (rcvd > 0) {
117  buf[rcvd] = 0;
118  std::cout << buf;
119  std::cout.flush();
120  }
121  }
122  if (!more_feats) break;
123  }
124  freeaddrinfo(server_addr);
125  return 0;
126 #else
127  throw std::runtime_error("kaldi is compiled with KALDI_NO_PORTAUDIO");
128 #endif
129  } catch(const std::exception& e) {
130  std::cerr << e.what();
131  return -1;
132  }
133 } // main()
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void Write(std::ostream &out, bool binary) const
write to stream.
MfccOptions contains basic options for computing MFCC features.
Definition: feature-mfcc.h:38
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
kaldi::int32 int32
void Register(const std::string &name, bool *ptr, const std::string &doc)
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
FrameExtractionOptions frame_opts
Definition: feature-mfcc.h:39
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_ERR
Definition: kaldi-error.h:147
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
This templated class is intended for offline feature extraction, i.e.
int main(int argc, char *argv[])