online-feature-test.cc
Go to the documentation of this file.
1 // feat/online-feature-test.cc
2 
3 // Copyright 2014 IMSL, PKU-HKUST (author: Wei Shi)
4 // Copyright 2014 Yanqing Sun, Junjie Wang,
5 // Daniel Povey, Korbinian Riedhammer
6 
7 // See ../../COPYING for clarification regarding multiple authors
8 //
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 //
13 // http://www.apache.org/licenses/LICENSE-2.0
14 //
15 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
17 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
18 // MERCHANTABLITY OR NON-INFRINGEMENT.
19 // See the Apache 2 License for the specific language governing permissions and
20 // limitations under the License.
21 
22 #include "feat/online-feature.h"
23 #include "feat/wave-reader.h"
24 #include "matrix/kaldi-matrix.h"
26 
27 namespace kaldi {
28 
30  Matrix<BaseFloat> *output) {
31  int32 dim = a->Dim();
32  int32 frame_num = 0;
33  OnlineCacheFeature cache(a);
34 
35  std::vector<Vector<BaseFloat>* > cached_frames;
36  while (true) {
37  Vector<BaseFloat> garbage(dim);
38  cache.GetFrame(frame_num, &garbage);
39  cached_frames.push_back(new Vector<BaseFloat>(garbage));
40  if (cache.IsLastFrame(frame_num))
41  break;
42  frame_num++;
43  }
44 
45  KALDI_ASSERT(cached_frames.size() == a->NumFramesReady());
46 
47  output->Resize(cached_frames.size(), dim);
48  for (int32 i = 0; i < cached_frames.size(); i++) {
49  output->CopyRowFromVec(*(cached_frames[i]), i);
50  delete cached_frames[i];
51  }
52  cached_frames.clear();
53  cache.ClearCache();
54 }
55 
56 // Only generate random length for each piece
57 bool RandomSplit(int32 wav_dim,
58  std::vector<int32> *piece_dim,
59  int32 num_pieces,
60  int32 trials = 5) {
61  piece_dim->clear();
62  piece_dim->resize(num_pieces, 0);
63 
64  int32 dim_mean = wav_dim / (num_pieces * 2);
65  int32 cnt = 0;
66  while (true) {
67  int32 dim_total = 0;
68  for (int32 i = 0; i < num_pieces - 1; i++) {
69  (*piece_dim)[i] = dim_mean + rand() % dim_mean;
70  dim_total += (*piece_dim)[i];
71  }
72  (*piece_dim)[num_pieces - 1] = wav_dim - dim_total;
73 
74  if (dim_total > 0 && dim_total < wav_dim)
75  break;
76  if (++cnt > trials)
77  return false;
78  }
79  return true;
80 }
81 
82 // test the OnlineMatrixFeature and OnlineCacheFeature classes.
84  int32 dim = 2 + rand() % 5; // dimension of features.
85  int32 num_frames = 100 + rand() % 100;
86 
87  Matrix<BaseFloat> input_feats(num_frames, dim);
88  input_feats.SetRandn();
89 
90  OnlineMatrixFeature matrix_feats(input_feats);
91 
92  Matrix<BaseFloat> output_feats;
93  GetOutput(&matrix_feats, &output_feats);
94  AssertEqual(input_feats, output_feats);
95 }
96 
98  int32 dim = 2 + rand() % 5; // dimension of features.
99  int32 num_frames = 100 + rand() % 100;
101  opts.order = rand() % 3;
102  opts.window = 1 + rand() % 3;
103 
104  int32 output_dim = dim * (1 + opts.order);
105 
106  Matrix<BaseFloat> input_feats(num_frames, dim);
107  input_feats.SetRandn();
108 
109  OnlineMatrixFeature matrix_feats(input_feats);
110  OnlineDeltaFeature delta_feats(opts, &matrix_feats);
111 
112  Matrix<BaseFloat> output_feats1;
113  GetOutput(&delta_feats, &output_feats1);
114 
115  Matrix<BaseFloat> output_feats2(num_frames, output_dim);
116  ComputeDeltas(opts, input_feats, &output_feats2);
117 
118  KALDI_ASSERT(output_feats1.ApproxEqual(output_feats2));
119 }
120 
122  int32 dim = 2 + rand() % 5; // dimension of features.
123  int32 num_frames = 100 + rand() % 100;
124  OnlineSpliceOptions opts;
125  opts.left_context = 1 + rand() % 4;
126  opts.right_context = 1 + rand() % 4;
127 
128  int32 output_dim = dim * (1 + opts.left_context + opts.right_context);
129 
130  Matrix<BaseFloat> input_feats(num_frames, dim);
131  input_feats.SetRandn();
132 
133  OnlineMatrixFeature matrix_feats(input_feats);
134  OnlineSpliceFrames splice_frame(opts, &matrix_feats);
135 
136  Matrix<BaseFloat> output_feats1;
137  GetOutput(&splice_frame, &output_feats1);
138 
139  Matrix<BaseFloat> output_feats2(num_frames, output_dim);
140  SpliceFrames(input_feats, opts.left_context, opts.right_context,
141  &output_feats2);
142 
143  KALDI_ASSERT(output_feats1.ApproxEqual(output_feats2));
144 }
145 
147  std::ifstream is("../feat/test_data/test.wav", std::ios_base::binary);
148  WaveData wave;
149  wave.Read(is);
150  KALDI_ASSERT(wave.Data().NumRows() == 1);
151  SubVector<BaseFloat> waveform(wave.Data(), 0);
152 
153  // the parametrization object
154  MfccOptions op;
155  op.frame_opts.dither = 0.0;
156  op.frame_opts.preemph_coeff = 0.0;
157  op.frame_opts.window_type = "hamming";
158  op.frame_opts.remove_dc_offset = false;
159  op.frame_opts.round_to_power_of_two = true;
160  op.frame_opts.samp_freq = wave.SampFreq();
161  op.mel_opts.low_freq = 0.0;
162  op.htk_compat = false;
163  op.use_energy = false; // C0 not energy.
164  if (RandInt(0, 1) == 0)
165  op.frame_opts.snip_edges = false;
166  Mfcc mfcc(op);
167 
168  // compute mfcc offline
169  Matrix<BaseFloat> mfcc_feats;
170  mfcc.Compute(waveform, 1.0, &mfcc_feats); // vtln not supported
171 
172  // compare
173  // The test waveform is about 1.44s long, so
174  // we try to break it into from 5 pieces to 9(not essential to do so)
175  for (int32 num_piece = 5; num_piece < 10; num_piece++) {
176  OnlineMfcc online_mfcc(op);
177  std::vector<int32> piece_length(num_piece, 0);
178 
179  bool ret = RandomSplit(waveform.Dim(), &piece_length, num_piece);
180  KALDI_ASSERT(ret);
181 
182  int32 offset_start = 0;
183  for (int32 i = 0; i < num_piece; i++) {
184  Vector<BaseFloat> wave_piece(
185  waveform.Range(offset_start, piece_length[i]));
186  online_mfcc.AcceptWaveform(wave.SampFreq(), wave_piece);
187  offset_start += piece_length[i];
188  }
189  online_mfcc.InputFinished();
190 
191  Matrix<BaseFloat> online_mfcc_feats;
192  GetOutput(&online_mfcc, &online_mfcc_feats);
193 
194  AssertEqual(mfcc_feats, online_mfcc_feats);
195  }
196 }
197 
199  std::ifstream is("../feat/test_data/test.wav", std::ios_base::binary);
200  WaveData wave;
201  wave.Read(is);
202  KALDI_ASSERT(wave.Data().NumRows() == 1);
203  SubVector<BaseFloat> waveform(wave.Data(), 0);
204 
205  // the parametrization object
206  PlpOptions op;
207  op.frame_opts.dither = 0.0;
208  op.frame_opts.preemph_coeff = 0.0;
209  op.frame_opts.window_type = "hamming";
210  op.frame_opts.remove_dc_offset = false;
211  op.frame_opts.round_to_power_of_two = true;
212  op.frame_opts.samp_freq = wave.SampFreq();
213  op.mel_opts.low_freq = 0.0;
214  op.htk_compat = false;
215  op.use_energy = false; // C0 not energy.
216  Plp plp(op);
217 
218  // compute plp offline
219  Matrix<BaseFloat> plp_feats;
220  plp.Compute(waveform, 1.0, &plp_feats); // vtln not supported
221 
222  // compare
223  // The test waveform is about 1.44s long, so
224  // we try to break it into from 5 pieces to 9(not essential to do so)
225  for (int32 num_piece = 5; num_piece < 10; num_piece++) {
226  OnlinePlp online_plp(op);
227  std::vector<int32> piece_length(num_piece);
228  bool ret = RandomSplit(waveform.Dim(), &piece_length, num_piece);
229  KALDI_ASSERT(ret);
230 
231  int32 offset_start = 0;
232  for (int32 i = 0; i < num_piece; i++) {
233  Vector<BaseFloat> wave_piece(
234  waveform.Range(offset_start, piece_length[i]));
235  online_plp.AcceptWaveform(wave.SampFreq(), wave_piece);
236  offset_start += piece_length[i];
237  }
238  online_plp.InputFinished();
239 
240  Matrix<BaseFloat> online_plp_feats;
241  GetOutput(&online_plp, &online_plp_feats);
242 
243  AssertEqual(plp_feats, online_plp_feats);
244  }
245 }
246 
248  std::ifstream is("../feat/test_data/test.wav", std::ios_base::binary);
249  WaveData wave;
250  wave.Read(is);
251  KALDI_ASSERT(wave.Data().NumRows() == 1);
252  SubVector<BaseFloat> waveform(wave.Data(), 0);
253 
254  // build online feature interface, take OnlineMfcc as an example
255  MfccOptions op;
256  op.frame_opts.dither = 0.0;
257  op.frame_opts.preemph_coeff = 0.0;
258  op.frame_opts.window_type = "hamming";
259  op.frame_opts.remove_dc_offset = false;
260  op.frame_opts.round_to_power_of_two = true;
261  op.frame_opts.samp_freq = wave.SampFreq();
262  op.mel_opts.low_freq = 0.0;
263  op.htk_compat = false;
264  op.use_energy = false; // C0 not energy.
265  OnlineMfcc online_mfcc(op);
266 
267  online_mfcc.AcceptWaveform(wave.SampFreq(), waveform);
268  online_mfcc.InputFinished();
269  Matrix<BaseFloat> mfcc_feats;
270  GetOutput(&online_mfcc, &mfcc_feats);
271 
272  // Affine transform
273  Matrix<BaseFloat> trans(online_mfcc.Dim(), online_mfcc.Dim() + 1);
274  trans.SetRandn();
275  OnlineTransform online_trans(trans, &online_mfcc);
276 
277  Matrix<BaseFloat> trans_feats;
278  GetOutput(&online_trans, &trans_feats);
279 
280  Matrix<BaseFloat> output_feats(mfcc_feats.NumRows(), mfcc_feats.NumCols());
281  for (int32 i = 0; i < mfcc_feats.NumRows(); i++) {
282  Vector<BaseFloat> vec_tmp(mfcc_feats.Row(i));
283  ApplyAffineTransform(trans, &vec_tmp);
284  output_feats.CopyRowFromVec(vec_tmp, i);
285  }
286 
287  AssertEqual(trans_feats, output_feats);
288 }
289 
291  std::ifstream is("../feat/test_data/test.wav", std::ios_base::binary);
292  WaveData wave;
293  wave.Read(is);
294  KALDI_ASSERT(wave.Data().NumRows() == 1);
295  SubVector<BaseFloat> waveform(wave.Data(), 0);
296 
297  // the parametrization object for 1st stream mfcc feature
298  MfccOptions mfcc_op;
299  mfcc_op.frame_opts.dither = 0.0;
300  mfcc_op.frame_opts.preemph_coeff = 0.0;
301  mfcc_op.frame_opts.window_type = "hamming";
302  mfcc_op.frame_opts.remove_dc_offset = false;
303  mfcc_op.frame_opts.round_to_power_of_two = true;
304  mfcc_op.frame_opts.samp_freq = wave.SampFreq();
305  mfcc_op.mel_opts.low_freq = 0.0;
306  mfcc_op.htk_compat = false;
307  mfcc_op.use_energy = false; // C0 not energy.
308  Mfcc mfcc(mfcc_op);
309 
310  // compute mfcc offline
311  Matrix<BaseFloat> mfcc_feats;
312  mfcc.Compute(waveform, 1.0, &mfcc_feats); // vtln not supported
313 
314  // the parametrization object for 2nd stream plp feature
315  PlpOptions plp_op;
316  plp_op.frame_opts.dither = 0.0;
317  plp_op.frame_opts.preemph_coeff = 0.0;
318  plp_op.frame_opts.window_type = "hamming";
319  plp_op.frame_opts.remove_dc_offset = false;
320  plp_op.frame_opts.round_to_power_of_two = true;
321  plp_op.frame_opts.samp_freq = wave.SampFreq();
322  plp_op.mel_opts.low_freq = 0.0;
323  plp_op.htk_compat = false;
324  plp_op.use_energy = false; // C0 not energy.
325  Plp plp(plp_op);
326 
327  // compute plp offline
328  Matrix<BaseFloat> plp_feats;
329  plp.Compute(waveform, 1.0, &plp_feats); // vtln not supported
330 
331  // compare
332  // The test waveform is about 1.44s long, so
333  // we try to break it into from 5 pieces to 9(not essential to do so)
334  for (int32 num_piece = 5; num_piece < 10; num_piece++) {
335  OnlineMfcc online_mfcc(mfcc_op);
336  OnlinePlp online_plp(plp_op);
337  OnlineAppendFeature online_mfcc_plp(&online_mfcc, &online_plp);
338 
339  std::vector<int32> piece_length(num_piece);
340  bool ret = RandomSplit(waveform.Dim(), &piece_length, num_piece);
341  KALDI_ASSERT(ret);
342  int32 offset_start = 0;
343  for (int32 i = 0; i < num_piece; i++) {
344  Vector<BaseFloat> wave_piece(
345  waveform.Range(offset_start, piece_length[i]));
346  online_mfcc.AcceptWaveform(wave.SampFreq(), wave_piece);
347  online_plp.AcceptWaveform(wave.SampFreq(), wave_piece);
348  offset_start += piece_length[i];
349  }
350  online_mfcc.InputFinished();
351  online_plp.InputFinished();
352 
353  Matrix<BaseFloat> online_mfcc_plp_feats;
354  GetOutput(&online_mfcc_plp, &online_mfcc_plp_feats);
355 
356  // compare mfcc_feats & plp_features with online_mfcc_plp_feats
357  KALDI_ASSERT(mfcc_feats.NumRows() == online_mfcc_plp_feats.NumRows()
358  && plp_feats.NumRows() == online_mfcc_plp_feats.NumRows()
359  && mfcc_feats.NumCols() + plp_feats.NumCols()
360  == online_mfcc_plp_feats.NumCols());
361  for (MatrixIndexT i = 0; i < online_mfcc_plp_feats.NumRows(); i++) {
362  for (MatrixIndexT j = 0; j < mfcc_feats.NumCols(); j++) {
363  KALDI_ASSERT(std::abs(mfcc_feats(i, j) - online_mfcc_plp_feats(i, j))
364  < 0.0001*std::max(1.0, static_cast<double>(std::abs(mfcc_feats(i, j))
365  + std::abs(online_mfcc_plp_feats(i, j)))));
366  }
367  for (MatrixIndexT k = 0; k < plp_feats.NumCols(); k++) {
368  KALDI_ASSERT(
369  std::abs(plp_feats(i, k) -
370  online_mfcc_plp_feats(i, mfcc_feats.NumCols() + k))
371  < 0.0001*std::max(1.0, static_cast<double>(std::abs(plp_feats(i, k))
372  +std::abs(online_mfcc_plp_feats(i, mfcc_feats.NumCols() + k)))));
373  }
374  }
375  }
376 }
377 
379  RecyclingVector full_vec;
380  RecyclingVector shrinking_vec(10);
381  for (int i = 0; i != 100; ++i) {
382  Vector <BaseFloat> data(1);
383  data.Set(i);
384  full_vec.PushBack(new Vector<BaseFloat>(data));
385  shrinking_vec.PushBack(new Vector<BaseFloat>(data));
386  }
387  KALDI_ASSERT(full_vec.Size() == 100);
388  KALDI_ASSERT(shrinking_vec.Size() == 100);
389 
390  // full_vec should contain everything
391  for (int i = 0; i != 100; ++i) {
392  Vector <BaseFloat> *data = full_vec.At(i);
393  KALDI_ASSERT(data != nullptr);
394  KALDI_ASSERT((*data)(0) == static_cast<BaseFloat>(i));
395  }
396 
397  // shrinking_vec may throw an exception for the first 90 elements
398  int caught_exceptions = 0;
399  for (int i = 0; i != 90; ++i) {
400  try {
401  shrinking_vec.At(i);
402  } catch (const std::runtime_error &) {
403  ++caught_exceptions;
404  }
405  }
406  // it may actually store a bit more elements for performance efficiency considerations
407  KALDI_ASSERT(caught_exceptions >= 80);
408 
409  // shrinking_vec should contain the last 10 elements
410  for (int i = 90; i != 100; ++i) {
411  Vector <BaseFloat> *data = shrinking_vec.At(i);
412  KALDI_ASSERT(data != nullptr);
413  KALDI_ASSERT((*data)(0) == static_cast<BaseFloat>(i));
414  }
415 }
416 
417 } // end namespace kaldi
418 
419 int main() {
420  using namespace kaldi;
421  for (int i = 0; i < 10; i++) {
425  TestOnlineMfcc();
426  TestOnlinePlp();
430  }
431  std::cout << "Test OK.\n";
432 }
void Read(std::istream &is)
Read() will throw on error.
Definition: wave-reader.cc:272
This class takes a Matrix<BaseFloat> and wraps it as an OnlineFeatureInterface: this can be useful wh...
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
virtual bool IsLastFrame(int32 frame) const
Returns true if this is the last frame.
void Compute(const VectorBase< BaseFloat > &wave, BaseFloat vtln_warp, Matrix< BaseFloat > *output)
void TestOnlinePlp()
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)
Gets the feature vector for this frame.
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
MfccOptions contains basic options for computing MFCC features.
Definition: feature-mfcc.h:38
void GetOutput(OnlineFeatureInterface *a, Matrix< BaseFloat > *output)
bool ApproxEqual(const MatrixBase< Real > &other, float tol=0.01) const
Returns true if ((*this)-other).FrobeniusNorm() <= tol * (*this).FrobeniusNorm(). ...
void TestOnlineMatrixCacheFeature()
kaldi::int32 int32
BaseFloat SampFreq() const
Definition: wave-reader.h:126
MelBanksOptions mel_opts
Definition: feature-plp.h:44
const Matrix< BaseFloat > & Data() const
Definition: wave-reader.h:124
void PushBack(Vector< BaseFloat > *item)
The ownership of the item is passed to this collection - do not delete the item.
int Size() const
This method returns the size as if no "recycling" had happened, i.e.
virtual void AcceptWaveform(BaseFloat sampling_rate, const VectorBase< BaseFloat > &waveform)
This would be called from the application, when you get more wave data.
int32 MatrixIndexT
Definition: matrix-common.h:98
This online-feature class implements combination of two feature streams (such as pitch, plp) into one stream.
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
FrameExtractionOptions frame_opts
Definition: feature-plp.h:43
FrameExtractionOptions frame_opts
Definition: feature-mfcc.h:39
void TestOnlineSpliceFrames()
int main()
void TestOnlineDeltaFeature()
void SetRandn()
Sets to random values of a normal distribution.
This online-feature class implements any affine or linear transform.
bool RandomSplit(int32 wav_dim, std::vector< int32 > *piece_dim, int32 num_pieces, int32 trials=5)
This class serves as a storage for feature vectors with an option to limit the memory usage by removi...
void TestOnlineMfcc()
virtual void InputFinished()
InputFinished() tells the class you won&#39;t be providing any more waveform.
This class&#39;s purpose is to read in Wave files.
Definition: wave-reader.h:106
A class representing a vector.
Definition: kaldi-vector.h:406
void TestOnlineAppendFeature()
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void ComputeDeltas(const DeltaFeaturesOptions &delta_opts, const MatrixBase< BaseFloat > &input_features, Matrix< BaseFloat > *output_features)
void Set(Real f)
Set all members of a vector to a specified value.
void SpliceFrames(const MatrixBase< BaseFloat > &input_features, int32 left_context, int32 right_context, Matrix< BaseFloat > *output_features)
static void AssertEqual(float a, float b, float relative_tolerance=0.001)
assert abs(a - b) <= relative_tolerance * (abs(a)+abs(b))
Definition: kaldi-math.h:276
void CopyRowFromVec(const VectorBase< Real > &v, const MatrixIndexT row)
Copy vector into specific row of matrix.
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
OnlineFeatureInterface is an interface for online feature processing (it is also usable in the offlin...
PlpOptions contains basic options for computing PLP features.
Definition: feature-plp.h:42
virtual int32 Dim() const
This templated class is intended for offline feature extraction, i.e.
void ApplyAffineTransform(const MatrixBase< BaseFloat > &xform, VectorBase< BaseFloat > *vec)
Applies the affine transform &#39;xform&#39; to the vector &#39;vec&#39; and overwrites the contents of &#39;vec&#39;...
This is a templated class for online feature extraction; it&#39;s templated on a class like MfccComputer ...
Vector< BaseFloat > * At(int index) const
The ownership is being retained by this collection - do not delete the item.
void TestRecyclingVector()
This feature type can be used to cache its input, to avoid repetition of computation in a multi-pass ...
void TestOnlineTransform()
virtual int32 NumFramesReady() const =0
returns the feature dimension.
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
virtual int32 Dim() const =0
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)
Definition: kaldi-math.cc:95