feature-fbank-test.cc
Go to the documentation of this file.
1 // feat/feature-fbank-test.cc
2 
3 // Copyright 2009-2011 Karel Vesely; Petr Motlicek
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
21 #include <iostream>
22 
23 #include "feat/feature-fbank.h"
24 #include "base/kaldi-math.h"
26 #include "feat/wave-reader.h"
27 
28 using namespace kaldi;
29 
30 
31 
32 static void UnitTestReadWave() {
33 
34  std::cout << "=== UnitTestReadWave() ===\n";
35 
36  Vector<BaseFloat> v, v2;
37 
38  std::cout << "<<<=== Reading waveform\n";
39 
40  {
41  std::ifstream is("test_data/test.wav", std::ios_base::binary);
42  WaveData wave;
43  wave.Read(is);
44  const Matrix<BaseFloat> data(wave.Data());
45  KALDI_ASSERT(data.NumRows() == 1);
46  v.Resize(data.NumCols());
47  v.CopyFromVec(data.Row(0));
48  }
49 
50  std::cout << "<<<=== Reading Vector<BaseFloat> waveform, prepared by matlab\n";
51  std::ifstream input(
52  "test_data/test_matlab.ascii"
53  );
54  KALDI_ASSERT(input.good());
55  v2.Read(input, false);
56  input.close();
57 
58  std::cout << "<<<=== Comparing freshly read waveform to 'libsndfile' waveform\n";
59  KALDI_ASSERT(v.Dim() == v2.Dim());
60  for (int32 i = 0; i < v.Dim(); i++) {
61  KALDI_ASSERT(v(i) == v2(i));
62  }
63  std::cout << "<<<=== Comparing done\n";
64 
65  // std::cout << "== The Waveform Samples == \n";
66  // std::cout << v;
67 
68  std::cout << "Test passed :)\n\n";
69 
70 }
71 
72 
73 
76 static void UnitTestSimple() {
77  std::cout << "=== UnitTestSimple() ===\n";
78 
79  Vector<BaseFloat> v(100000);
81 
82  // init with noise
83  for (int32 i = 0; i < v.Dim(); i++) {
84  v(i) = (abs( i * 433024253 ) % 65535) - (65535 / 2);
85  }
86 
87  std::cout << "<<<=== Just make sure it runs... Nothing is compared\n";
88  // the parametrization object
89  FbankOptions op;
90  // trying to have same opts as baseline.
91  op.frame_opts.dither = 0.0;
92  op.frame_opts.preemph_coeff = 0.0;
93  op.frame_opts.window_type = "rectangular";
94  op.frame_opts.remove_dc_offset = false;
96  op.mel_opts.low_freq = 0.0;
97  op.htk_compat = true;
98  op.use_energy = true;
99 
100  Fbank fbank(op);
101  // use default parameters
102 
103  // compute fbanks.
104  fbank.Compute(v, 1.0, &m);
105 
106  // possibly dump
107  // std::cout << "== Output features == \n" << m;
108  std::cout << "Test passed :)\n\n";
109 }
110 
111 
112 static void UnitTestHTKCompare1() {
113  std::cout << "=== UnitTestHTKCompare1() ===\n";
114 
115  std::ifstream is("test_data/test.wav", std::ios_base::binary);
116  WaveData wave;
117  wave.Read(is);
118  KALDI_ASSERT(wave.Data().NumRows() == 1);
119  SubVector<BaseFloat> waveform(wave.Data(), 0);
120 
121  // read the HTK features
122  Matrix<BaseFloat> htk_features;
123  {
124  std::ifstream is("test_data/test.wav.fbank_htk.1",
125  std::ios::in | std::ios_base::binary);
126  bool ans = ReadHtk(is, &htk_features, 0);
127  KALDI_ASSERT(ans);
128  }
129 
130  // use fbank with default configuration...
131  FbankOptions op;
132  op.frame_opts.dither = 0.0;
133  op.frame_opts.preemph_coeff = 0.0;
134  op.frame_opts.window_type = "hamming";
135  op.frame_opts.remove_dc_offset = false;
137  op.mel_opts.low_freq = 0.0;
138  op.htk_compat = true;
139  op.mel_opts.htk_mode = true;
140  op.use_energy = false; // C0 not energy.
141 
142  Fbank fbank(op);
143 
144  // calculate kaldi features
145  Matrix<BaseFloat> kaldi_features;
146  fbank.Compute(waveform, 1.0, &kaldi_features);
147 
148 
149  std::cout << "<<<=== Compare with HTK features...\n";
150  // compare the results
151  bool passed = true;
152  int32 i_old = -1;
153  KALDI_ASSERT(kaldi_features.NumRows() == htk_features.NumRows());
154  KALDI_ASSERT(kaldi_features.NumCols() == htk_features.NumCols());
155  // Ignore ends-- we make slightly different choices than
156  // HTK about how to treat the deltas at the ends.
157  for (int32 i = 10; i+10 < kaldi_features.NumRows(); i++) {
158  for (int32 j = 0; j < kaldi_features.NumCols(); j++) {
159  BaseFloat a = kaldi_features(i, j), b = htk_features(i, j);
161  if ((std::abs(b - a)) > 0.001) { //<< TOLERANCE TO DIFFERENCES!!!!!
162  // print the non-matching data only once per-line
163  if (i_old != i) {
164  std::cout << "\n\n\n[HTK-row: " << i << "] " << htk_features.Row(i) << "\n";
165  std::cout << "[Kaldi-row: " << i << "] " << kaldi_features.Row(i) << "\n\n\n";
166  i_old = i;
167  }
168  // print indices of non-matching cells
169  std::cout << "[" << i << ", " << j << "]";
170  passed = false;
171  }}}
172  if (!passed) KALDI_ERR << "Test failed";
173 
174  // write the htk features for later inspection
175  HtkHeader header = {
176  kaldi_features.NumRows(),
177  100000, // 10ms
178  static_cast<int16>(sizeof(float)*kaldi_features.NumCols()),
179  000007 // FBANK
180  };
181  {
182  std::ofstream os("tmp.test.wav.fbank_kaldi.1",
183  std::ios::out|std::ios::binary);
184  WriteHtk(os, kaldi_features, header);
185  }
186 
187  std::cout << "Test passed :)\n\n";
188 
189  unlink("tmp.test.wav.fbank_kaldi.1");
190 }
191 
192 
193 static void UnitTestHTKCompare2() {
194  std::cout << "=== UnitTestHTKCompare2() ===\n";
195 
196  std::ifstream is("test_data/test.wav", std::ios_base::binary);
197  WaveData wave;
198  wave.Read(is);
199  KALDI_ASSERT(wave.Data().NumRows() == 1);
200  SubVector<BaseFloat> waveform(wave.Data(), 0);
201 
202  // read the HTK features
203  Matrix<BaseFloat> htk_features;
204  {
205  std::ifstream is("test_data/test.wav.fbank_htk.2",
206  std::ios::in | std::ios_base::binary);
207  bool ans = ReadHtk(is, &htk_features, 0);
208  KALDI_ASSERT(ans);
209  }
210 
211  // use fbank with default configuration...
212  FbankOptions op;
213  op.frame_opts.dither = 0.0;
214  op.frame_opts.preemph_coeff = 0.0;
215  op.frame_opts.window_type = "hamming";
216  op.frame_opts.remove_dc_offset = false;
218  op.mel_opts.low_freq = 25.0;
219  op.htk_compat = true;
220  op.mel_opts.htk_mode = true;
221  op.use_energy = false; // C0 not energy.
222 
223  Fbank fbank(op);
224 
225  // calculate kaldi features
226  Matrix<BaseFloat> kaldi_features;
227  fbank.Compute(waveform, 1.0, &kaldi_features);
228 
229 
230  std::cout << "<<<=== Compare with HTK features...\n";
231  // compare the results
232  bool passed = true;
233  int32 i_old = -1;
234  KALDI_ASSERT(kaldi_features.NumRows() == htk_features.NumRows());
235  KALDI_ASSERT(kaldi_features.NumCols() == htk_features.NumCols());
236  // Ignore ends-- we make slightly different choices than
237  // HTK about how to treat the deltas at the ends.
238  for (int32 i = 10; i+10 < kaldi_features.NumRows(); i++) {
239  for (int32 j = 0; j < kaldi_features.NumCols(); j++) {
240  BaseFloat a = kaldi_features(i, j), b = htk_features(i, j);
242  if ((std::abs(b - a)) > 0.001) { //<< TOLERANCE TO DIFFERENCES!!!!!
243  // print the non-matching data only once per-line
244  if (i_old != i) {
245  std::cout << "\n\n\n[HTK-row: " << i << "] " << htk_features.Row(i) << "\n";
246  std::cout << "[Kaldi-row: " << i << "] " << kaldi_features.Row(i) << "\n\n\n";
247  i_old = i;
248  }
249  // print indices of non-matching cells
250  std::cout << "[" << i << ", " << j << "]";
251  passed = false;
252  }}}
253  if (!passed) KALDI_ERR << "Test failed";
254 
255  // write the htk features for later inspection
256  HtkHeader header = {
257  kaldi_features.NumRows(),
258  100000, // 10ms
259  static_cast<int16>(sizeof(float)*kaldi_features.NumCols()),
260  000007 // FBANK
261  };
262  {
263  std::ofstream os("tmp.test.wav.fbank_kaldi.1",
264  std::ios::out|std::ios::binary);
265  WriteHtk(os, kaldi_features, header);
266  }
267 
268  std::cout << "Test passed :)\n\n";
269 
270  unlink("tmp.test.wav.fbank_kaldi.1");
271 }
272 
273 static void UnitTestHTKCompare3() {
274  std::cout << "=== UnitTestHTKCompare3() ===\n";
275 
276  std::ifstream is("test_data/test.wav", std::ios_base::binary);
277  WaveData wave;
278  wave.Read(is);
279  KALDI_ASSERT(wave.Data().NumRows() == 1);
280  SubVector<BaseFloat> waveform(wave.Data(), 0);
281 
282  // read the HTK features
283  Matrix<BaseFloat> htk_features;
284  {
285  std::ifstream is("test_data/test.wav.fbank_htk.3",
286  std::ios::in | std::ios_base::binary);
287  bool ans = ReadHtk(is, &htk_features, 0);
288  KALDI_ASSERT(ans);
289  }
290 
291  // use fbank with default configuration...
292  FbankOptions op;
293  op.frame_opts.dither = 0.0;
294  op.frame_opts.preemph_coeff = 0.0;
295  op.frame_opts.window_type = "hamming";
296  op.frame_opts.remove_dc_offset = false;
298  op.mel_opts.low_freq = 25.0;
299  op.htk_compat = true;
300  op.mel_opts.htk_mode = true;
301  op.use_energy = false; // C0 not energy.
302 
303  op.mel_opts.vtln_low = 100.0;
304  op.mel_opts.vtln_high = 7500.0;
305  BaseFloat vtln_warp = 0.9;
306 
307  Fbank fbank(op);
308 
309  // calculate kaldi features
310  Matrix<BaseFloat> kaldi_features;
311  fbank.Compute(waveform, vtln_warp, &kaldi_features);
312 
313 
314  std::cout << "<<<=== Compare with HTK features...\n";
315  // compare the results
316  bool passed = true;
317  int32 i_old = -1;
318  KALDI_ASSERT(kaldi_features.NumRows() == htk_features.NumRows());
319  KALDI_ASSERT(kaldi_features.NumCols() == htk_features.NumCols());
320  // Ignore ends-- we make slightly different choices than
321  // HTK about how to treat the deltas at the ends.
322  for (int32 i = 10; i+10 < kaldi_features.NumRows(); i++) {
323  for (int32 j = 0; j < kaldi_features.NumCols(); j++) {
324  BaseFloat a = kaldi_features(i, j), b = htk_features(i, j);
326  if ((std::abs(b - a)) > 0.001) { //<< TOLERANCE TO DIFFERENCES!!!!!
327  // print the non-matching data only once per-line
328  if (i_old != i) {
329  std::cout << "\n\n\n[HTK-row: " << i << "] " << htk_features.Row(i) << "\n";
330  std::cout << "[Kaldi-row: " << i << "] " << kaldi_features.Row(i) << "\n\n\n";
331  i_old = i;
332  }
333  // print indices of non-matching cells
334  std::cout << "[" << i << ", " << j << "]";
335  if (j < 20) passed = false; // We know the last couple of filterbanks differ. We let this slide.
336  else KALDI_WARN << "Ignoring difference in last fbanks, we know the algorithms differ.";
337  }}}
338  if (!passed) KALDI_ERR << "Test failed";
339 
340  // write the htk features for later inspection
341  HtkHeader header = {
342  kaldi_features.NumRows(),
343  100000, // 10ms
344  static_cast<int16>(sizeof(float)*kaldi_features.NumCols()),
345  000007 // FBANK
346  };
347  {
348  std::ofstream os("tmp.test.wav.fbank_kaldi.1",
349  std::ios::out|std::ios::binary);
350  WriteHtk(os, kaldi_features, header);
351  }
352 
353  std::cout << "Test passed :)\n\n";
354 
355  unlink("tmp.test.wav.fbank_kaldi.1");
356 }
357 
358 
359 static void UnitTestHTKCompare4() {
360  std::cout << "=== UnitTestHTKCompare4() ===\n";
361 
362  std::ifstream is("test_data/test.wav", std::ios_base::binary);
363  WaveData wave;
364  wave.Read(is);
365  KALDI_ASSERT(wave.Data().NumRows() == 1);
366  SubVector<BaseFloat> waveform(wave.Data(), 0);
367 
368  // read the HTK features
369  Matrix<BaseFloat> htk_features;
370  {
371  std::ifstream is("test_data/test.wav.fbank_htk.4",
372  std::ios::in | std::ios_base::binary);
373  bool ans = ReadHtk(is, &htk_features, 0);
374  KALDI_ASSERT(ans);
375  }
376 
377  // use fbank with default configuration...
378  FbankOptions op;
379  op.frame_opts.dither = 0.0;
380  op.frame_opts.preemph_coeff = 0.0;
381  op.frame_opts.window_type = "hamming";
382  op.frame_opts.remove_dc_offset = false;
384  op.mel_opts.low_freq = 25.0;
385  op.htk_compat = true;
386  op.mel_opts.htk_mode = true;
387  op.use_energy = false; // C0 not energy.
388 
389  op.mel_opts.vtln_low = 100.0;
390  op.mel_opts.vtln_high = 7500.0;
391  BaseFloat vtln_warp = 1.1;
392 
393  Fbank fbank(op);
394 
395  // calculate kaldi features
396  Matrix<BaseFloat> kaldi_features;
397  fbank.Compute(waveform, vtln_warp, &kaldi_features);
398 
399 
400  std::cout << "<<<=== Compare with HTK features...\n";
401  // compare the results
402  bool passed = true;
403  int32 i_old = -1;
404  KALDI_ASSERT(kaldi_features.NumRows() == htk_features.NumRows());
405  KALDI_ASSERT(kaldi_features.NumCols() == htk_features.NumCols());
406  // Ignore ends-- we make slightly different choices than
407  // HTK about how to treat the deltas at the ends.
408  for (int32 i = 10; i+10 < kaldi_features.NumRows(); i++) {
409  for (int32 j = 0; j < kaldi_features.NumCols(); j++) {
410  BaseFloat a = kaldi_features(i, j), b = htk_features(i, j);
412  if ((std::abs(b - a)) > 0.01) { //<< TOLERANCE TO DIFFERENCES!!!!!
413  // print the non-matching data only once per-line
414  if (i_old != i) {
415  std::cout << "\n\n\n[HTK-row: " << i << "] " << htk_features.Row(i) << "\n";
416  std::cout << "[Kaldi-row: " << i << "] " << kaldi_features.Row(i) << "\n\n\n";
417  i_old = i;
418  }
419  // print indices of non-matching cells
420  std::cout << "[" << i << ", " << j << "]";
421  passed = false;
422  }}}
423  if (!passed) KALDI_ERR << "Test failed";
424 
425  // write the htk features for later inspection
426  HtkHeader header = {
427  kaldi_features.NumRows(),
428  100000, // 10ms
429  static_cast<int16>(sizeof(float)*kaldi_features.NumCols()),
430  000007 // FBANK
431  };
432  {
433  std::ofstream os("tmp.test.wav.fbank_kaldi.1",
434  std::ios::out|std::ios::binary);
435  WriteHtk(os, kaldi_features, header);
436  }
437 
438  std::cout << "Test passed :)\n\n";
439 
440  unlink("tmp.test.wav.fbank_kaldi.1");
441 }
442 
443 
444 
445 
446 static void UnitTestFeat() {
448  UnitTestSimple();
453 }
454 
455 
456 
457 
458 int main() {
459  try {
460  for (int i = 0; i < 5; i++)
461  UnitTestFeat();
462  std::cout << "Tests succeeded.\n";
463  return 0;
464  } catch (const std::exception &e) {
465  std::cerr << e.what();
466  return 1;
467  }
468 }
469 
470 
void Read(std::istream &is)
Read() will throw on error.
Definition: wave-reader.cc:272
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
static void UnitTestHTKCompare1()
static void UnitTestReadWave()
void Compute(const VectorBase< BaseFloat > &wave, BaseFloat vtln_warp, Matrix< BaseFloat > *output)
static void UnitTestHTKCompare4()
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
static void UnitTestHTKCompare3()
static void UnitTestFeat()
bool WriteHtk(std::ostream &os, const MatrixBase< Real > &M, HtkHeader htk_hdr)
static void UnitTestSimple()
kaldi::int32 int32
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
const Matrix< BaseFloat > & Data() const
Definition: wave-reader.h:124
int main()
FrameExtractionOptions frame_opts
Definition: feature-fbank.h:42
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
float BaseFloat
Definition: kaldi-types.h:29
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
This class&#39;s purpose is to read in Wave files.
Definition: wave-reader.h:106
A class representing a vector.
Definition: kaldi-vector.h:406
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
MelBanksOptions mel_opts
Definition: feature-fbank.h:43
This templated class is intended for offline feature extraction, i.e.
FbankOptions contains basic options for computing filterbank features.
Definition: feature-fbank.h:41
bool ReadHtk(std::istream &is, Matrix< Real > *M_ptr, HtkHeader *header_ptr)
Extension of the HTK header.
A structure containing the HTK header.
Definition: kaldi-matrix.h:955
static void UnitTestHTKCompare2()
void Read(std::istream &in, bool binary, bool add=false)
Read function using C++ streams.
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501