online-net-client.cc File Reference
#include <netdb.h>
#include <fcntl.h>
#include "feat/feature-mfcc.h"
#include "online/online-audio-source.h"
#include "online/online-feat-input.h"
Include dependency graph for online-net-client.cc:

Go to the source code of this file.

Functions

int main (int argc, char *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 31 of file online-net-client.cc.

References FrameExtractionOptions::frame_length_ms, MfccOptions::frame_opts, FrameExtractionOptions::frame_shift_ms, ParseOptions::GetArg(), KALDI_ERR, kaldi::kUndefined, MfccOptions::num_ceps, ParseOptions::NumArgs(), MatrixBase< Real >::NumRows(), ParseOptions::PrintUsage(), ParseOptions::Read(), ParseOptions::Register(), Matrix< Real >::Resize(), MfccOptions::use_energy, and MatrixBase< Real >::Write().

31  {
32  try {
33 #ifndef KALDI_NO_PORTAUDIO
34  using namespace kaldi;
35 
36  typedef kaldi::int32 int32;
37  typedef OnlineFeInput<Mfcc> FeInput;
38 
39  // Time out interval for the PortAudio source
40  const int32 kTimeout = 500; // half second
41  // PortAudio sampling rate
42  const int32 kSampleFreq = 16000;
43  // PortAudio's internal ring buffer size in bytes
44  const int32 kPaRingSize = 32768;
45  // Report interval for PortAudio buffer overflows in number of feat. batches
46  const int32 kPaReportInt = 4;
47 
48  const char *usage =
49  "Takes input using a microphone(PortAudio), extracts features and sends them\n"
50  "to a speech recognition server over a network connection\n\n"
51  "Usage: online-net-client server-address server-port\n\n";
52  ParseOptions po(usage);
53  int32 batch_size = 27;
54  po.Register("batch-size", &batch_size,
55  "The number of feature vectors to be extracted and sent in one go");
56  po.Read(argc, argv);
57  if (po.NumArgs() != 2) {
58  po.PrintUsage();
59  return 1;
60  }
61 
62  std::string server_addr_str = po.GetArg(1);
63  std::string server_port_str = po.GetArg(2);
64 
65  addrinfo *server_addr, hints;
66  hints.ai_family = AF_INET;
67  hints.ai_protocol = IPPROTO_UDP;
68  hints.ai_socktype = SOCK_DGRAM;
69  hints.ai_flags = AI_ADDRCONFIG;
70  if (getaddrinfo(server_addr_str.c_str(), server_port_str.c_str(),
71  &hints, &server_addr) != 0)
72  KALDI_ERR << "getaddrinfo() call failed!";
73  int32 sock_desc = socket(server_addr->ai_family,
74  server_addr->ai_socktype,
75  server_addr->ai_protocol);
76  if (sock_desc == -1)
77  KALDI_ERR << "socket() call failed!";
78  int32 flags = fcntl(sock_desc, F_GETFL);
79  flags |= O_NONBLOCK;
80  if (fcntl(sock_desc, F_SETFL, flags) == -1)
81  KALDI_ERR << "fcntl() failed to put the socket in non-blocking mode!";
82 
83  // We are not properly registering/exposing MFCC and frame extraction options,
84  // because there are parts of the online decoding code, where some of these
85  // options are hardwired(ToDo: we should fix this at some point)
86  MfccOptions mfcc_opts;
87  mfcc_opts.use_energy = false;
88  int32 frame_length = mfcc_opts.frame_opts.frame_length_ms = 25;
89  int32 frame_shift = mfcc_opts.frame_opts.frame_shift_ms = 10;
90  OnlinePaSource au_src(kTimeout, kSampleFreq, kPaRingSize, kPaReportInt);
91  Mfcc mfcc(mfcc_opts);
92  FeInput fe_input(&au_src, &mfcc,
93  frame_length * (kSampleFreq / 1000),
94  frame_shift * (kSampleFreq / 1000));
95  std::cerr << std::endl << "Sending features to " << server_addr_str
96  << ':' << server_port_str << " ... " << std::endl;
97  char buf[65535];
98  Matrix<BaseFloat> feats;
99  while (1) {
100  feats.Resize(batch_size, mfcc_opts.num_ceps, kUndefined);
101  bool more_feats = fe_input.Compute(&feats);
102  if (feats.NumRows() > 0) {
103  std::stringstream ss;
104  feats.Write(ss, true); // serialize features as binary data
105  ssize_t sent = sendto(sock_desc,
106  ss.str().c_str(),
107  ss.str().size(), 0,
108  server_addr->ai_addr,
109  server_addr->ai_addrlen);
110  if (sent == -1)
111  KALDI_ERR << "sendto() call failed!";
112  ssize_t rcvd = recvfrom(sock_desc, buf, sizeof(buf), 0,
113  server_addr->ai_addr, &server_addr->ai_addrlen);
114  if (rcvd == -1 && errno != EWOULDBLOCK && errno != EAGAIN) {
115  KALDI_ERR << "recvfrom() failed unexpectedly!";
116  } else if (rcvd > 0) {
117  buf[rcvd] = 0;
118  std::cout << buf;
119  std::cout.flush();
120  }
121  }
122  if (!more_feats) break;
123  }
124  freeaddrinfo(server_addr);
125  return 0;
126 #else
127  throw std::runtime_error("kaldi is compiled with KALDI_NO_PORTAUDIO");
128 #endif
129  } catch(const std::exception& e) {
130  std::cerr << e.what();
131  return -1;
132  }
133 } // main()
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void Write(std::ostream &out, bool binary) const
write to stream.
MfccOptions contains basic options for computing MFCC features.
Definition: feature-mfcc.h:38
kaldi::int32 int32
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
FrameExtractionOptions frame_opts
Definition: feature-mfcc.h:39
#define KALDI_ERR
Definition: kaldi-error.h:147
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
This templated class is intended for offline feature extraction, i.e.