online-speex-wrapper.cc
Go to the documentation of this file.
1 // online2/online-speex-wrapper.cc
2 
3 // Copyright 2014 IMSL, PKU-HKUST (author: Wei Shi)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include <cstring>
22 
23 namespace kaldi {
24 
26  input_finished_(false) {
29  sample_rate_ = config.sample_rate;
30 
31  if(sizeof(speex_bits_) == 1) {
32  KALDI_ERR << "OnlineSpeexEncoder called but Speex not installed."
33  << "You should run tools/extras/install_speex.sh first, then "
34  << "re-run configure in src/ and then make Kaldi again.\n";
35  }
36 
37 #ifdef HAVE_SPEEX
38  speex_state_ = speex_encoder_init(&speex_wb_mode); // init speex with wideband mode
39  int32 tmp = config.speex_quality;
40  speex_encoder_ctl(speex_state_, SPEEX_SET_QUALITY, &tmp);
41  tmp = (int)(sample_rate_);
42  speex_encoder_ctl(speex_state_, SPEEX_SET_SAMPLING_RATE, &tmp);
43  speex_bits_init(&speex_bits_);
44 #endif
45 }
46 
48 #ifdef HAVE_SPEEX
49  speex_bits_destroy(&speex_bits_);
50  speex_encoder_destroy(speex_state_);
51 #endif
52 }
53 
55  int32 sample_rate, const VectorBase<BaseFloat> &waveform) {
56  if (waveform.Dim() == 0) {
57  return; // Nothing to do.
58  }
59  if (input_finished_) {
60  KALDI_ERR << "AcceptWaveform called after InputFinished() was called.";
61  }
62  if (sample_rate != sample_rate_) {
63  KALDI_ERR << "Sampling frequency mismatch, expected "
64  << sample_rate_ << ", got " << sample_rate;
65  }
66 
67  Vector<BaseFloat> appended_wave;
68  const VectorBase<BaseFloat> &wave_to_use = (waveform_remainder_.Dim() != 0 ?
69  appended_wave : waveform);
70  if (waveform_remainder_.Dim() != 0) {
71  appended_wave.Resize(waveform_remainder_.Dim() +
72  waveform.Dim());
73  appended_wave.Range(0, waveform_remainder_.Dim()).CopyFromVec(
75  appended_wave.Range(waveform_remainder_.Dim(),
76  waveform.Dim()).CopyFromVec(waveform);
77  }
78  waveform_remainder_.Resize(0);
79 
80  std::vector<char> spx_bits;
81  Encode(wave_to_use, &spx_bits);
82 
83  if (spx_bits.size() > 0) {
85  spx_bits.begin(), spx_bits.end());
86  }
87 }
88 
89 // Deal with the last frame, pad zeros
91  input_finished_ = true;
92 
93  int32 dim = waveform_remainder_.Dim();
94  if (dim != 0) {
97  std::vector<char> spx_bits;
98  wave_last.Range(0, dim).CopyFromVec(waveform_remainder_);
99  Encode(wave_last, &spx_bits);
100 
102  spx_bits.begin(), spx_bits.end());
103  }
104 }
105 
107  std::vector<char> *speex_encoder_bits) {
108  if (wave.Dim() == 0) {
109  return;
110  }
111 
112  int32 to_encode = wave.Dim();
113  int32 has_encode = 0;
114  char cbits[200];
115  std::vector<char> encoded_bits;
116  while (to_encode > speex_encoded_frame_size_) {
117  SubVector<BaseFloat> wave_frame(wave, has_encode,
119  int32 nbytes = 0;
120 #ifdef HAVE_SPEEX
121  speex_bits_reset(&speex_bits_);
122  speex_encode(speex_state_, wave_frame.Data(), &speex_bits_);
123  nbytes = speex_bits_nbytes(&speex_bits_);
124  if (nbytes != speex_frame_size_) {
125  KALDI_ERR << "The number of bytes of Speex encoded frame mismatch,"
126  << "expected " << speex_frame_size_ << ", got " << nbytes;
127  }
128  nbytes = speex_bits_write(&speex_bits_, cbits, 200);
129 #endif
130 
131  int32 encoded_bits_len = encoded_bits.size();
132  encoded_bits.resize(encoded_bits_len + nbytes);
133  for (int32 i = 0; i < nbytes; i++) {
134  encoded_bits[i+encoded_bits_len] = cbits[i];
135  }
136 
137  has_encode += speex_encoded_frame_size_;
138  to_encode -= speex_encoded_frame_size_;
139  }
140 
141  if (to_encode > 0) {
142  SubVector<BaseFloat> wave_left(wave, has_encode, to_encode);
143  int32 dim = waveform_remainder_.Dim();
144  if (dim != 0) {
145  waveform_remainder_.Resize(dim + to_encode, kCopyData);
146  waveform_remainder_.Range(dim, to_encode).CopyFromVec(wave_left);
147  } else {
148  waveform_remainder_ = wave_left;
149  }
150  }
151 
152  *speex_encoder_bits = encoded_bits;
153 }
154 
155 
158  speex_decoded_frame_size_ = config.speex_wave_frame_size;
159 
160  if(sizeof(speex_bits_) == 1) {
161  KALDI_ERR << "OnlineSpeexEncoder called but Speex not installed."
162  << "You should run tools/extras/install_speex.sh first, then "
163  << "re-run configure in src/ and then make Kaldi again.\n";
164  }
165 
166 #ifdef HAVE_SPEEX
167  speex_state_ = speex_decoder_init(&speex_wb_mode); // init speex with wideband mode
168  int32 tmp = config.speex_quality;
169  speex_decoder_ctl(speex_state_, SPEEX_SET_QUALITY, &tmp);
170  tmp = (int)config.sample_rate;
171  speex_decoder_ctl(speex_state_, SPEEX_SET_SAMPLING_RATE, &tmp);
172  speex_bits_init(&speex_bits_);
173 #endif
174 }
175 
177 #ifdef HAVE_SPEEX
178  speex_decoder_destroy(speex_state_);
179  speex_bits_destroy(&speex_bits_);
180 #endif
181 }
182 
183 void OnlineSpeexDecoder::AcceptSpeexBits(const std::vector<char> &spx_enc_bits) {
184  if (spx_enc_bits.size() == 0) {
185  return; // Nothing to do
186  }
187 
188  std::vector<char> appended_bits;
189  const std::vector<char> &bits_to_use = (speex_bits_remainder_.size() != 0 ?
190  appended_bits : spx_enc_bits);
191  if (speex_bits_remainder_.size() != 0) {
192  appended_bits.insert(appended_bits.end(), speex_bits_remainder_.begin(),
193  speex_bits_remainder_.end());
194  appended_bits.insert(appended_bits.end(), spx_enc_bits.begin(),
195  spx_enc_bits.end());
196  }
197  speex_bits_remainder_.clear();
198 
199  Vector<BaseFloat> waveform;
200  Decode(bits_to_use, &waveform);
201  if (waveform.Dim() == 0) {
202  // Got nothing, maybe the decode has failed
203  return;
204  }
205  int32 last_wav_size = waveform_.Dim();
206  waveform_.Resize(last_wav_size + waveform.Dim(), kCopyData);
207  waveform_.Range(last_wav_size, waveform.Dim()).CopyFromVec(waveform);
208 }
209 
210 void OnlineSpeexDecoder::Decode(const std::vector<char> &speex_char_bits,
211  Vector<BaseFloat> *decoded_wav) {
212  if (speex_char_bits.size() < speex_frame_size_) {
213  return; // Nothing to do, should never reach this
214  }
215  decoded_wav->Resize(0);
216 
217  char *cbits = new char[speex_frame_size_ + 10]();
218  BaseFloat *wav = new BaseFloat[speex_decoded_frame_size_]();
219  int32 to_decode = speex_char_bits.size();
220  int32 has_decode = 0;
221 
222  while(to_decode > speex_frame_size_){
223  memcpy(cbits, &speex_char_bits[has_decode], speex_frame_size_);
224 #ifdef HAVE_SPEEX
225  speex_bits_read_from(&speex_bits_, cbits, speex_frame_size_);
226  speex_decode(speex_state_, &speex_bits_, wav);
227 #endif
228 
229  int32 dim = decoded_wav->Dim(); // expanding decoded_wav each frame
230  decoded_wav->Resize(dim + speex_decoded_frame_size_, kCopyData);
231  // Cannot use CopyFromPtr at this moment
232  // decoded_wav->Range(dim, speex_decoded_frame_size_).
233  // CopyFromPtr(wav, speex_decoded_frame_size_);
234  for (int32 i = 0; i < speex_decoded_frame_size_; i++) {
235  (*decoded_wav)(i+dim) = wav[i];
236  }
237 
238  has_decode += speex_frame_size_;
239  to_decode -= speex_frame_size_;
240  }
241 
242  if (to_decode > 0) {
243  speex_bits_remainder_.insert(speex_bits_remainder_.end(),
244  speex_char_bits.begin() + has_decode, speex_char_bits.end());
245  }
246 
247  delete []cbits;
248  delete []wav;
249 }
250 
251 }
252 // namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
std::vector< char > speex_encoded_char_bits_
Vector< BaseFloat > waveform_remainder_
void AcceptWaveform(int32 sample_rate, const VectorBase< BaseFloat > &waveform)
kaldi::int32 int32
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
void AcceptSpeexBits(const std::vector< char > &spx_enc_bits)
int32 speex_wave_frame_size
In samples.
#define KALDI_ERR
Definition: kaldi-error.h:147
Real * Data()
Returns a pointer to the start of the vector&#39;s data.
Definition: kaldi-vector.h:70
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
BaseFloat sample_rate
The sample frequency of the waveform, it decides which Speex mode to use.
A class representing a vector.
Definition: kaldi-vector.h:406
void Decode(const std::vector< char > &speex_char_bits, Vector< BaseFloat > *decoded_wav)
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void Encode(const VectorBase< BaseFloat > &wave, std::vector< char > *speex_encoder_bits)
int32 speex_quality
Ranges from 0 to 10, the higher the quality is better.
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
OnlineSpeexDecoder(const SpeexOptions &config)
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).
Definition: kaldi-vector.h:94
int32 speex_bits_frame_size
In bytes.
OnlineSpeexEncoder(const SpeexOptions &config)