online-speex-wrapper.h
Go to the documentation of this file.
1 // online2/online-speex-wrapper.h
2 
3 // Copyright 2014 IMSL, PKU-HKUST (author: Wei Shi)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
21 #ifndef KALDI_ONLINE2_ONLINE_SPEEX_WRAPPER_H_
22 #define KALDI_ONLINE2_ONLINE_SPEEX_WRAPPER_H_
23 
24 #ifdef HAVE_SPEEX
25  #include <speex/speex.h>
26  typedef SpeexBits SPEEXBITS;
27 #else
28  typedef char SPEEXBITS;
29 #endif
30 
31 #include "matrix/kaldi-vector.h"
32 #include "itf/options-itf.h"
33 
34 namespace kaldi {
35 
36 struct SpeexOptions {
41 
46 
54 
58 
59  SpeexOptions(): sample_rate(16000.0),
60  speex_quality(10),
61  speex_bits_frame_size(106),
62  speex_wave_frame_size(320) { }
63 
64  void Register(OptionsItf *opts) {
65  opts->Register("sample-rate", &sample_rate, "Sample frequency of the waveform.");
66  opts->Register("speex-quality", &speex_quality, "Speex speech quality.");
67  opts->Register("speex-bits-frame-size", &speex_bits_frame_size,
68  "#bytes of each Speex compressed frame.");
69  opts->Register("speex-wave-frame-size", &speex_wave_frame_size,
70  "#samples of each waveform frame.");
71  }
72 };
73 
75  public:
76  OnlineSpeexEncoder(const SpeexOptions &config);
78 
79  void AcceptWaveform(int32 sample_rate,
80  const VectorBase<BaseFloat> &waveform);
81 
82  void InputFinished();
83 
84  void GetSpeexBits(std::vector<char> *spx_bits) { // call it after AcceptWaveform
85  *spx_bits = speex_encoded_char_bits_;
86  speex_encoded_char_bits_.clear();
87  }
88  private:
89  int32 speex_frame_size_; // in bytes, will be different according to the quality
90  int32 speex_encoded_frame_size_; // in samples, typically 320 in wideband mode, 16kHz
91 #ifdef HAVE_SPEEX
92  void *speex_state_; // Holds the state of the speex encoder
93 #endif
95 
96  Vector<BaseFloat> waveform_remainder_; // Holds the waveform that have not been processed
97 
98  // Holds the Speex-encoded char bits, will be peaked by GetSpeexBits().
99  // We use a vector container rather than a char-type pointer because
100  // it's a little easier to expand.
101  std::vector<char> speex_encoded_char_bits_;
102 
105 
106  void Encode(const VectorBase<BaseFloat> &wave,
107  std::vector<char> *speex_encoder_bits) ;
108 };
109 
111  public:
112  OnlineSpeexDecoder(const SpeexOptions &config);
114 
115  void AcceptSpeexBits(const std::vector<char> &spx_enc_bits);
116 
117  void GetWaveform(Vector<BaseFloat> *waveform) { // call it after AcceptSpeexBits
118  *waveform = waveform_;
119  waveform_.Resize(0);
120  }
121  private:
122  int32 speex_frame_size_; // in bytes, will be different according to the quality
123  int32 speex_decoded_frame_size_; // in samples, typically 320 in wideband mode, 16kHz
124 
125 #ifdef HAVE_SPEEX
126  void *speex_state_; // Holds the state of the speex decoder
127 #endif
129 
130 
131  Vector<BaseFloat> waveform_; // Holds the waveform decoded from speex bits
132  std::vector<char> speex_bits_remainder_;
133 
134  void Decode(const std::vector<char> &speex_char_bits,
135  Vector<BaseFloat> *decoded_wav) ;
136 };
137 
138 } // namespace kaldi
139 
140 #endif // KALDI_ONLINE2_ONLINE_SPEEX_WRAPPER_H_
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void GetWaveform(Vector< BaseFloat > *waveform)
std::vector< char > speex_encoded_char_bits_
Vector< BaseFloat > waveform_remainder_
kaldi::int32 int32
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
char SPEEXBITS
int32 speex_wave_frame_size
In samples.
std::vector< char > speex_bits_remainder_
BaseFloat sample_rate
The sample frequency of the waveform, it decides which Speex mode to use.
A class representing a vector.
Definition: kaldi-vector.h:406
void Register(OptionsItf *opts)
int32 speex_quality
Ranges from 0 to 10, the higher the quality is better.
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
void GetSpeexBits(std::vector< char > *spx_bits)
int32 speex_bits_frame_size
In bytes.