online-audio-source.h
Go to the documentation of this file.
1 // online/online-audio-source.h
2 
3 // Copyright 2012 Cisco Systems (author: Matthias Paulik)
4 
5 // Modifications to the original contribution by Cisco Systems made by:
6 // Vassil Panayotov
7 
8 // See ../../COPYING for clarification regarding multiple authors
9 //
10 // Licensed under the Apache License, Version 2.0 (the "License");
11 // you may not use this file except in compliance with the License.
12 // You may obtain a copy of the License at
13 //
14 // http://www.apache.org/licenses/LICENSE-2.0
15 //
16 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
18 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
19 // MERCHANTABLITY OR NON-INFRINGEMENT.
20 // See the Apache 2 License for the specific language governing permissions and
21 // limitations under the License.
22 
23 #ifndef KALDI_ONLINE_ONLINE_AUDIO_SOURCE_H_
24 #define KALDI_ONLINE_ONLINE_AUDIO_SOURCE_H_
25 
26 #ifndef KALDI_NO_PORTAUDIO
27 
28 #include <portaudio.h>
29 #include <pa_ringbuffer.h>
30 
31 #endif //KALDI_NO_PORTAUDIO
32 
33 #include "matrix/kaldi-vector.h"
34 
35 namespace kaldi {
36 
38  public:
39  // Reads from the audio source, and writes the samples converted to BaseFloat
40  // into the vector pointed by "data".
41  // The user sets data->Dim() as a way of requesting that many samples.
42  // The function returns true if there may be more data, and false if it
43  // knows we are at the end of the stream.
44  // In case an unexpected and unrecoverable error occurs the function throws
45  // an exception of type KaldiFatalError (by using KALDI_ERR macro).
46  //
47  // NOTE: The older version of this interface had a second parameter - "timeout".
48  // We decided to remove it, because we don't envision usage scenarios,
49  // where "timeout" will need to be changed dynamically from call to call.
50  // If the particular audio source can experience timeouts for some reason
51  // (e.g. the samples are received over a network connection)
52  // we encourage the implementors to configure timeout using a
53  // constructor parameter.
54  // The suggested semantics are: if timeout is used and is greater than 0,
55  // this method has to wait no longer than "timeout" milliseconds before
56  // returning data-- by that time, it will return as much data as it has.
57  virtual bool Read(Vector<BaseFloat> *data) = 0;
58 
59  virtual ~OnlineAudioSourceItf() { }
60 };
61 
62 #ifndef KALDI_NO_PORTAUDIO
63 
64 // OnlineAudioSourceItf implementation using PortAudio to read samples in real-time
65 // from a sound card/microphone.
67  public:
68  typedef int16 SampleType; // hardcoded 16-bit audio
69  typedef ring_buffer_size_t rbs_t;
70 
71  // PortAudio is initialized here, so it may throw an exception on error
72  // "timeout": if > 0, and the acquisition takes more than this number of
73  // milliseconds, Compute() will return the data it has so far
74  // If no data was received until timeout expired, Compute() returns
75  // false (assumes sensible timeout).
76  // "sample_rate": the input rate to request from PortAudio
77  // "rb_size": requested size of PA's ring buffer - will be round up to
78  // power of 2
79  // "report_interval": if not 0, PA ring buffer overflow will be reported
80  // at every ovfw_msg_interval-th call to Read().
81  // Putting 0 into this argument disables the reporting.
82  OnlinePaSource(const uint32 timeout,
83  const uint32 sample_rate,
84  const uint32 rb_size,
85  const uint32 report_interval);
86 
87  // Implementation of the OnlineAudioSourceItf
88  bool Read(Vector<BaseFloat> *data);
89 
90  // Making friends with the callback so it will be able to access a private
91  // member function to delegate the processing
92  friend int PaCallback(const void *input, void *output,
93  long unsigned frame_count,
94  const PaStreamCallbackTimeInfo *time_info,
95  PaStreamCallbackFlags status_flags,
96  void *user_data);
97 
98  // Returns True if the last call to Read() failed to read the requested
99  // number of samples due to timeout.
100  bool TimedOut() { return timed_out_; }
101 
102  ~OnlinePaSource();
103 
104  private:
105  // The real PortAudio callback delegates to this one
106  int Callback(const void *input, void *output,
107  ring_buffer_size_t frame_count,
108  const PaStreamCallbackTimeInfo *time_info,
109  PaStreamCallbackFlags status_flags);
110 
111  uint32 timeout_; // timeout in milliseconds. if > 0, after this many ms. we
112  // give up trying to read data from PortAudio
113  bool timed_out_; // True if the last call to Read() failed to obtain the requested
114  // number of samples, because of timeout
115  uint32 sample_rate_; // the sampling rate of the input audio
117  char *ring_buffer_; // points to the actual buffer used by PA to store samples
118  PaUtilRingBuffer pa_ringbuf_; // a data structure used to wrap the ring buffer
119  PaStream *pa_stream_;
120  bool pa_started_; // becomes "true" after "pa_stream_" is started
121  uint32 report_interval_; // interval (in Read() calls) to report PA rb overflows
122  uint32 nread_calls_; // number of Read() calls so far
123  uint32 noverflows_; // number of the ringbuf overflows since the last report
124  uint32 samples_lost_; // samples lost, due to PA ring buffer overflow
126 };
127 
128 // The actual PortAudio callback - delegates to OnlinePaSource->Callback()
129 int PaCallback(const void *input, void *output,
130  long unsigned frame_count,
131  const PaStreamCallbackTimeInfo *time_info,
132  PaStreamCallbackFlags status_flags,
133  void *user_data);
134 #endif //KALDI_NO_PORTAUDIO
135 
136 // Simulates audio input, by returning data from a Vector.
137 // This class is mostly meant to be used for online decoder testing using
138 // pre-recorded audio
140  public:
142  : src_(input), pos_(0) {}
143 
144  // Implementation of the OnlineAudioSourceItf
145  bool Read(Vector<BaseFloat> *data);
146 
147  private:
149  uint32 pos_; // the index of the first element, not yet consumed
151 };
152 
153 } // namespace kaldi
154 
155 #endif // KALDI_ONLINE_ONLINE_AUDIO_SOURCE_H_
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
virtual bool Read(Vector< BaseFloat > *data)=0
PaUtilRingBuffer pa_ringbuf_
kaldi::int32 int32
#define KALDI_DISALLOW_COPY_AND_ASSIGN(type)
Definition: kaldi-utils.h:121
OnlineVectorSource(const VectorBase< BaseFloat > &input)
int PaCallback(const void *input, void *output, long unsigned frame_count, const PaStreamCallbackTimeInfo *time_info, PaStreamCallbackFlags status_flags, void *user_data)
A class representing a vector.
Definition: kaldi-vector.h:406
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
ring_buffer_size_t rbs_t