31 int main(
int argc,
char *argv[]) {
33 #ifndef KALDI_NO_PORTAUDIO 34 using namespace kaldi;
40 const int32 kTimeout = 500;
42 const int32 kSampleFreq = 16000;
44 const int32 kPaRingSize = 32768;
46 const int32 kPaReportInt = 4;
49 "Takes input using a microphone(PortAudio), extracts features and sends them\n" 50 "to a speech recognition server over a network connection\n\n" 51 "Usage: online-net-client server-address server-port\n\n";
53 int32 batch_size = 27;
54 po.
Register(
"batch-size", &batch_size,
55 "The number of feature vectors to be extracted and sent in one go");
62 std::string server_addr_str = po.
GetArg(1);
63 std::string server_port_str = po.
GetArg(2);
65 addrinfo *server_addr, hints;
66 hints.ai_family = AF_INET;
67 hints.ai_protocol = IPPROTO_UDP;
68 hints.ai_socktype = SOCK_DGRAM;
69 hints.ai_flags = AI_ADDRCONFIG;
70 if (getaddrinfo(server_addr_str.c_str(), server_port_str.c_str(),
71 &hints, &server_addr) != 0)
72 KALDI_ERR <<
"getaddrinfo() call failed!";
73 int32 sock_desc = socket(server_addr->ai_family,
74 server_addr->ai_socktype,
75 server_addr->ai_protocol);
78 int32 flags = fcntl(sock_desc, F_GETFL);
80 if (fcntl(sock_desc, F_SETFL, flags) == -1)
81 KALDI_ERR <<
"fcntl() failed to put the socket in non-blocking mode!";
90 OnlinePaSource au_src(kTimeout, kSampleFreq, kPaRingSize, kPaReportInt);
92 FeInput fe_input(&au_src, &mfcc,
93 frame_length * (kSampleFreq / 1000),
94 frame_shift * (kSampleFreq / 1000));
95 std::cerr << std::endl <<
"Sending features to " << server_addr_str
96 <<
':' << server_port_str <<
" ... " << std::endl;
101 bool more_feats = fe_input.Compute(&feats);
103 std::stringstream ss;
104 feats.
Write(ss,
true);
105 ssize_t sent = sendto(sock_desc,
108 server_addr->ai_addr,
109 server_addr->ai_addrlen);
112 ssize_t rcvd = recvfrom(sock_desc, buf,
sizeof(buf), 0,
113 server_addr->ai_addr, &server_addr->ai_addrlen);
114 if (rcvd == -1 && errno != EWOULDBLOCK && errno != EAGAIN) {
115 KALDI_ERR <<
"recvfrom() failed unexpectedly!";
116 }
else if (rcvd > 0) {
122 if (!more_feats)
break;
124 freeaddrinfo(server_addr);
127 throw std::runtime_error(
"kaldi is compiled with KALDI_NO_PORTAUDIO");
129 }
catch(
const std::exception& e) {
130 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void Write(std::ostream &out, bool binary) const
write to stream.
MfccOptions contains basic options for computing MFCC features.
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
void Register(const std::string &name, bool *ptr, const std::string &doc)
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
FrameExtractionOptions frame_opts
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
This templated class is intended for offline feature extraction, i.e.
int main(int argc, char *argv[])