pitch-functions-test.cc
Go to the documentation of this file.
1 // feat/pitch-functions-test.cc
2 
3 // Copyright 2013 Pegah Ghahremani
4 // 2014 IMSL, PKU-HKUST (author: Wei Shi)
5 // 2014 Yanqing Sun, Junjie Wang,
6 // Daniel Povey, Korbinian Riedhammer
7 // Xin Lei
8 
9 // See ../../COPYING for clarification regarding multiple authors
10 //
11 // Licensed under the Apache License, Version 2.0 (the "License");
12 // you may not use this file except in compliance with the License.
13 // You may obtain a copy of the License at
14 //
15 // http://www.apache.org/licenses/LICENSE-2.0
16 //
17 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
19 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
20 // MERCHANTABLITY OR NON-INFRINGEMENT.
21 // See the Apache 2 License for the specific language governing permissions and
22 // limitations under the License.
23 
24 
25 #include <iostream>
26 
27 #include "base/kaldi-math.h"
28 #include "feat/feature-plp.h"
29 #include "feat/pitch-functions.h"
30 #include "feat/wave-reader.h"
31 #include "sys/stat.h"
32 #include "sys/types.h"
33 #include "base/timer.h"
34 
35 
36 namespace kaldi {
37 
38 std::string ConvertIntToString(const int &number) {
39  std::stringstream ss; // create a stringstream
40  ss << number; // add number to the stream
41  return ss.str(); // return a string with the contents of the stream
42 }
43 bool DirExist(const std::string &dirname) {
44  struct stat st;
45  if (stat(dirname.c_str(), &st) != 0) {
46  KALDI_LOG << " directory " << dirname << " does not exist!";
47  return false;
48  }
49  return true;
50 }
51 
52 static void UnitTestSimple() {
53  KALDI_LOG << "=== UnitTestSimple() ===";
54  Vector<BaseFloat> v(1000);
55  Matrix<BaseFloat> m1, m2;
56  // init with noise
57  for (int32 i = 0; i < v.Dim(); i++) {
58  v(i) = (abs(i * 433024253) % 65535) - (65535 / 2);
59  }
60  KALDI_LOG << "<<<=== Just make sure it runs... Nothing is compared";
61  // trying to compute and process pitch with same opts as baseline.
64  ComputeAndProcessKaldiPitch(op1, op2, v, &m1);
65  KALDI_LOG << "Test passed :)";
66 }
67 
68 // Make sure the snip edges options works as expected, i.e.
69 // disabling the option should introduce a delay equivalent to
70 // half the window length
71 static void UnitTestSnipEdges() {
72  KALDI_LOG << "=== UnitTestSnipEdges() ===\n";
73  PitchExtractionOptions op_SnipEdges, op_NoSnipEdges;
74  Matrix<BaseFloat> m1, m2;
76  int nbad = 0;
77 
78  // Load test wave file
79  WaveData wave;
80  {
81  std::ifstream is("test_data/test.wav");
82  wave.Read(is);
83  }
84  KALDI_ASSERT(wave.Data().NumRows() == 1);
85  SubVector<BaseFloat> waveform(wave.Data(), 0);
86 
87  // Process files with snip edge enabled or disabled, on various
88  // frame shifts and frame lengths
89  for (int fs = 4; fs <= 10; fs += 2) {
90  for (int wl = 20; wl <= 100; wl += 20) {
91  // Rather dirty way to round, but works fine
92  int32 ms_fs = (int32)(wave.SampFreq() * 0.001 * fs + 0.5);
93  int32 ms_wl = (int32)(wave.SampFreq() * 0.001 * wl + 0.5);
94  op_SnipEdges.snip_edges = true;
95  op_SnipEdges.frame_shift_ms = fs;
96  op_SnipEdges.frame_length_ms = wl;
97  op_NoSnipEdges.snip_edges = false;
98  op_NoSnipEdges.frame_shift_ms = fs;
99  op_NoSnipEdges.frame_length_ms = wl;
100  ComputeAndProcessKaldiPitch(op_SnipEdges, opp, waveform, &m1);
101  ComputeAndProcessKaldiPitch(op_NoSnipEdges, opp, waveform, &m2);
102 
103  // Check the output differ in a predictable manner:
104  // 1. The length of the output should only depend on the window size & window shift
105  KALDI_LOG << "Output: " << m1.NumRows() << " ; " << m2.NumRows();
106  // - with snip edges disabled, depends on file size and frame shift only */
107  AssertEqual(m2.NumRows(), ((int)(wave.Data().NumCols() + ms_fs / 2)) / ms_fs);
108  // - with snip edges disabled, depend on file size, frame shift, frame length */
109  AssertEqual(m1.NumRows(), ((int)(wave.Data().NumCols() - ms_wl + ms_fs)) / ms_fs);
110  // 2. The signal should be delayed in a predictable manner
111  Vector<BaseFloat> f0_1(m1.NumRows());
112  f0_1.CopyColFromMat(m1, 1);
113  Vector<BaseFloat> f0_2(m2.NumRows());
114  f0_2.CopyColFromMat(m2, 1);
115 
116  BaseFloat bcorr = -1;
117  int32 blag = -1;
118  int32 max_lag = wl / fs * 2;
119  int num_frames_f0 = m1.NumRows() - max_lag;
120 
121  /* Looks for the best correlation between the output signals,
122  identify the lag, compares it with theoretical value */
123  SubVector<BaseFloat> sub_vec1(f0_1, 0, num_frames_f0);
124  for (int32 lag = 0; lag < max_lag + 1; lag++) {
125  SubVector<BaseFloat> sub_vec2(f0_2, lag, num_frames_f0);
126  BaseFloat corr = VecVec(sub_vec1, sub_vec2);
127  if (corr > bcorr) {
128  bcorr = corr;
129  blag = lag;
130  }
131  }
132  KALDI_LOG << "Best lag: " << blag * fs << "ms with value: " << bcorr <<
133  "; expected lag: " << wl / 2 + 10 - fs / 2 << " ± " << fs;
134  // BP: the lag should in theory be equal to wl / 2 - fs / 2, but it seems
135  // to be: wl / 2 + 10 - fs / 2! It appears the 10 ms comes from the nccf_lag which
136  // is 82 samples with the default settings => nccf_lag / resample_freq / 2 => 10.25ms
137  // We should really be using the full_frame_length of the algorithm for accurate results,
138  // but there is no method to obtain it (and it is potentially variable), so that makes
139  // the pitch value *with snip edge* particularly unreliable.
140  if (!ApproxEqual(blag * fs, (BaseFloat)(wl / 2 + 10 - fs / 2), (BaseFloat)fs / wl)) {
141  KALDI_WARN << "Bad lag for window size " << wl << " and frame shift " << fs;
142  nbad++;
143  }
144  /*AssertEqual(blag * fs, (BaseFloat)(wl / 2 + 10 - fs / 2), (BaseFloat)fs / wl);*/
145  }
146  }
147  /* If more than 10% of tests fail, crash */
148  if (nbad > 9) KALDI_ERR << "Too many bad lags: " << nbad;
149 
150 }
151 
152 // Make sure that doing a calculation on the whole waveform gives
153 // the same results as doing on the waveform broken into pieces.
154 static void UnitTestPieces() {
155  KALDI_LOG << "=== UnitTestPieces() ===\n";
156  for (int32 n = 0; n < 10; n++) {
157  // the parametrization object
160  op2.delta_pitch_noise_stddev = 0.0; // to avoid mismatch of delta_log_pitch
161  // brought by rand noise.
162  op1.nccf_ballast_online = true; // this is necessary for the computation
163  // to be identical regardless how many pieces we break the signal into.
164 
165  int32 size = 10000 + rand() % 50000;
166 
167  Vector<BaseFloat> v(size);
168  // init with noise plus a sine-wave whose frequency is changing randomly.
169 
170  double cur_freq = 200.0, normalized_time = 0.0;
171 
172  for (int32 i = 0; i < size; i++) {
173  v(i) = RandGauss() + cos(normalized_time * M_2PI);
174  cur_freq += RandGauss(); // let the frequency wander a little.
175  if (cur_freq < 100.0) cur_freq = 100.0;
176  if (cur_freq > 300.0) cur_freq = 300.0;
177  normalized_time += cur_freq / op1.samp_freq;
178  }
179 
180  Matrix<BaseFloat> m1, m1p;
181 
182  // trying to have same opts as baseline.
183  ComputeKaldiPitch(op1, v, &m1);
184  ProcessPitch(op2, m1, &m1p);
185 
186  Matrix<BaseFloat> m2, m2p;
187 
188  { // compute it online with multiple pieces.
189  OnlinePitchFeature pitch_extractor(op1);
190  OnlineProcessPitch process_pitch(op2, &pitch_extractor);
191  int32 start_samp = 0;
192  while (start_samp < v.Dim()) {
193  int32 num_samp = rand() % (v.Dim() + 1 - start_samp);
194  SubVector<BaseFloat> v_part(v, start_samp, num_samp);
195  pitch_extractor.AcceptWaveform(op1.samp_freq, v_part);
196  start_samp += num_samp;
197  }
198  pitch_extractor.InputFinished();
199  int32 num_frames = pitch_extractor.NumFramesReady();
200  m2.Resize(num_frames, 2);
201  m2p.Resize(num_frames, process_pitch.Dim());
202  for (int32 frame = 0; frame < num_frames; frame++) {
203  SubVector<BaseFloat> row(m2, frame);
204  pitch_extractor.GetFrame(frame, &row);
205  SubVector<BaseFloat> rowp(m2p, frame);
206  process_pitch.GetFrame(frame, &rowp);
207  }
208  }
209  AssertEqual(m1, m2);
210  if (!ApproxEqual(m1p, m2p)) {
211  KALDI_ERR << "Post-processed pitch differs: " << m1p << " vs. " << m2p;
212  }
213  KALDI_LOG << "Test passed :)\n";
214  }
215 }
216 
217 // Make sure that the delayed output matches the non-delayed
218 // version in the online scenario.
219 static void UnitTestDelay() {
220  KALDI_LOG << "=== UnitTestDelay() ===\n";
221  for (int32 n = 0; n < 10; n++) {
222  // the parametrization object
223  PitchExtractionOptions ext_opt;
224  ProcessPitchOptions pro_opt1, pro_opt2;
225  pro_opt1.delta_pitch_noise_stddev = 0.0; // to avoid mismatch of delta_log_pitch
226  // brought by rand noise.
227  pro_opt2.delta_pitch_noise_stddev = 0.0; // to avoid mismatch of delta_log_pitch
228  // brought by rand noise.
229  pro_opt2.delay = rand() % 50;
230  ext_opt.nccf_ballast_online = true; // this is necessary for the computation
231  // to be identical regardless how many pieces we break the signal into.
232 
233  int32 size = 1000 + rand() % 5000;
234 
235  Vector<BaseFloat> v(size);
236  // init with noise plus a sine-wave whose frequency is changing randomly.
237 
238  double cur_freq = 200.0, normalized_time = 0.0;
239 
240  for (int32 i = 0; i < size; i++) {
241  v(i) = RandGauss() + cos(normalized_time * M_2PI);
242  cur_freq += RandGauss(); // let the frequency wander a little.
243  if (cur_freq < 100.0) cur_freq = 100.0;
244  if (cur_freq > 300.0) cur_freq = 300.0;
245  normalized_time += cur_freq / ext_opt.samp_freq;
246  }
247 
248  Matrix<BaseFloat> m1, m2;
249  // compute it online with multiple pieces.
250  OnlinePitchFeature pitch_extractor(ext_opt);
251  OnlineProcessPitch pitch_processor(pro_opt1, &pitch_extractor);
252  OnlineProcessPitch pitch_processor_delayed(pro_opt2, &pitch_extractor);
253  int32 start_samp = 0;
254  while (start_samp < v.Dim()) {
255  int32 num_samp = rand() % (v.Dim() + 1 - start_samp);
256  SubVector<BaseFloat> v_part(v, start_samp, num_samp);
257  pitch_extractor.AcceptWaveform(ext_opt.samp_freq, v_part);
258  start_samp += num_samp;
259  }
260  pitch_extractor.InputFinished();
261 
262  int32 num_frames = pitch_processor.NumFramesReady();
263  m1.Resize(num_frames, pitch_processor.Dim());
264  for (int32 frame = 0; frame < num_frames; frame++) {
265  SubVector<BaseFloat> rowp(m1, frame);
266  pitch_processor.GetFrame(frame, &rowp);
267  }
268 
269  int32 num_frames_delayed = pitch_processor_delayed.NumFramesReady();
270  m2.Resize(num_frames_delayed, pitch_processor_delayed.Dim());
271  for (int32 frame = 0; frame < num_frames_delayed; frame++) {
272  SubVector<BaseFloat> rowp(m2, frame);
273  pitch_processor_delayed.GetFrame(frame, &rowp);
274  }
275 
276  KALDI_ASSERT(num_frames_delayed == num_frames + pro_opt2.delay);
277  SubMatrix<BaseFloat> m3(m2, pro_opt2.delay, num_frames, 0, m2.NumCols());
278  if (!ApproxEqual(m1, m3)) {
279  KALDI_ERR << "Post-processed pitch differs: " << m1 << " vs. " << m3;
280  }
281  KALDI_LOG << "Test passed :)\n";
282  }
283 }
284 
285 extern bool pitch_use_naive_search; // was declared in pitch-functions.cc
286 
287 // Make sure that doing a calculation on the whole waveform gives
288 // the same results as doing on the waveform broken into pieces.
289 static void UnitTestSearch() {
290  KALDI_LOG << "=== UnitTestSearch() ===\n";
291  for (int32 n = 0; n < 3; n++) {
292  // the parametrization object
294  op.nccf_ballast_online = true; // this is necessary for the computation
295  // to be identical regardless how many pieces we break the signal into.
296 
297  int32 size = 1000 + rand() % 1000;
298 
299  Vector<BaseFloat> v(size);
300  // init with noise plus a sine-wave whose frequency is changing randomly.
301 
302  double cur_freq = 200.0, normalized_time = 0.0;
303 
304  for (int32 i = 0; i < size; i++) {
305  v(i) = RandGauss() + cos(normalized_time * M_2PI);
306  cur_freq += RandGauss(); // let the frequency wander a little.
307  if (cur_freq < 100.0) cur_freq = 100.0;
308  if (cur_freq > 300.0) cur_freq = 300.0;
309  normalized_time += cur_freq / op.samp_freq;
310  }
311 
313  ComputeKaldiPitch(op, v, &m1);
314 
315  pitch_use_naive_search = true;
316 
318  ComputeKaldiPitch(op, v, &m2);
319 
320  pitch_use_naive_search = false;
321 
322  AssertEqual(m1, m2, 1.0e-08); // should be identical.
323  }
324  KALDI_LOG << "Test passed :)\n";
325 }
326 
327 static void UnitTestComputeGPE() {
328  KALDI_LOG << "=== UnitTestComputeGPE ===\n";
329  int32 wrong_pitch = 0, tot_voiced = 0, tot_unvoiced = 0, num_frames = 0;
330  BaseFloat tol = 0.1, avg_d_kpitch = 0, real_pitch = 0;
331  for (int32 i = 1; i < 11; i++) {
332  std::string wavefile;
333  std::string num;
334  if (i < 6) {
335  num = "f" + ConvertIntToString(i) + "nw0000";
336  } else {
337  num = "m" + ConvertIntToString(i-5) + "nw0000";
338  }
339  Matrix<BaseFloat> gross_pitch;
340  std::string pitchfile = "keele/keele-true-lags/"+num+".txt";
341  std::ifstream pitch(pitchfile.c_str());
342  gross_pitch.Read(pitch, false);
343  Matrix<BaseFloat> kaldi_pitch;
344  std::string kfile = "keele/tmp/+"+num+"-kaldi.txt";
345  std::ifstream kpitch(kfile.c_str());
346  kaldi_pitch.Read(kpitch, false);
347  num_frames = std::min(kaldi_pitch.NumRows(),gross_pitch.NumRows());
348  for (int32 j = 1; j < num_frames; j++) {
349  if (gross_pitch(j,0) > 0.0) {
350  tot_voiced++;
351  real_pitch = 20000.0/gross_pitch(j,0);
352  if (fabs((real_pitch - kaldi_pitch(j,1))/real_pitch) > tol)
353  wrong_pitch++;
354  } else if (gross_pitch(j,0) == 0.0 && gross_pitch(j-1,0) == 0.0) {
355  tot_unvoiced++;
356  avg_d_kpitch += fabs(kaldi_pitch(j,1) - kaldi_pitch(j-1,1));
357  }
358  }
359  }
360  BaseFloat GPE = 1.0 * wrong_pitch / tot_voiced;
361  KALDI_LOG << " Gross Pitch Error with Rel.Error " << tol << " is " << GPE;
362  KALDI_LOG << "Average Kaldi delta_pitch for unvoiced regions " << avg_d_kpitch/tot_unvoiced;
363 }
364 
365 // Compare pitch using Kaldi pitch tracker on KEELE corpora
366 static void UnitTestKeele() {
367  KALDI_LOG << "=== UnitTestKeele() ===";
368  for (int32 i = 1; i < 11; i++) {
369  std::string wavefile;
370  std::string num;
371  if (i < 6) {
372  num = "f" + ConvertIntToString(i) + "nw0000";
373  wavefile = "keele/16kHz/"+num+".wav";
374  } else {
375  num = "m" + ConvertIntToString(i-5) + "nw0000";
376  wavefile = "keele/16kHz/"+num+".wav";
377  }
378  KALDI_LOG << "--- " << wavefile << " ---";
379  std::ifstream is(wavefile.c_str(), std::ios_base::binary);
380  WaveData wave;
381  wave.Read(is);
382  KALDI_ASSERT(wave.Data().NumRows() == 1);
383  SubVector<BaseFloat> waveform(wave.Data(), 0);
384  // use pitch code with default configuration..
386  op.nccf_ballast = 1;
387  op.penalty_factor = 5;
388  // compute pitch.
390  ComputeKaldiPitch(op, waveform, &m);
391  std::string outfile = "keele/tmp/+"+num+"-kaldi.txt";
392  std::ofstream os(outfile.c_str());
393  m.Write(os, false);
394  }
395 }
396 /* change freq_weight to investigate the results */
397 static void UnitTestPenaltyFactor() {
398  KALDI_LOG << "=== UnitTestPenaltyFactor() ===";
399  for (int32 k = 1; k < 5; k++) {
400  for (int32 i = 1; i < 4; i++) {
401  std::string wavefile;
402  std::string num;
403  if (i < 6) {
404  num = "f"+ConvertIntToString(i)+"nw0000";
405  wavefile = "keele/16kHz/"+num+".wav";
406  } else {
407  num = "m"+ConvertIntToString(i-5)+"nw0000";
408  wavefile = "keele/16kHz/"+num+".wav";
409  }
410  KALDI_LOG << "--- " << wavefile << " ---";
411  std::ifstream is(wavefile.c_str(), std::ios_base::binary);
412  WaveData wave;
413  wave.Read(is);
414  KALDI_ASSERT(wave.Data().NumRows() == 1);
415  SubVector<BaseFloat> waveform(wave.Data(), 0);
416  // use pitch code with default configuration..
418  op.penalty_factor = k * 0.05;
419  op.nccf_ballast = 0.1;
420  // compute pitch.
422  ComputeKaldiPitch(op, waveform, &m);
423  std::string penaltyfactor = ConvertIntToString(k);
424  std::string outfile = "keele/tmp/+"+num+"-kaldi-penalty-"+penaltyfactor+".txt";
425  std::ofstream os(outfile.c_str());
426  m.Write(os, false);
427  }
428  }
429 }
431  KALDI_LOG << "=== UnitTestKeeleNccfBallast() ===";
432  for (int32 k = 1; k < 10; k++) {
433  for (int32 i = 1; i < 2; i++) {
434  std::string wavefile;
435  std::string num;
436  if (i < 6) {
437  num = "f"+ConvertIntToString(i)+"nw0000";
438  wavefile = "keele/16kHz/"+num+".wav";
439  } else {
440  num = "m"+ConvertIntToString(i-5)+"nw0000";
441  wavefile = "keele/16kHz/"+num+".wav";
442  }
443  KALDI_LOG << "--- " << wavefile << " ---";
444  std::ifstream is(wavefile.c_str(), std::ios_base::binary);
445  WaveData wave;
446  wave.Read(is);
447  KALDI_ASSERT(wave.Data().NumRows() == 1);
448  SubVector<BaseFloat> waveform(wave.Data(), 0);
449  // use pitch code with default configuration..
451  op.nccf_ballast = 0.05 * k;
452  KALDI_LOG << " nccf_ballast " << op.nccf_ballast;
453  // compute pitch.
455  ComputeKaldiPitch(op, waveform, &m);
456  std::string nccfballast = ConvertIntToString(op.nccf_ballast);
457  std::string outfile = "keele/tmp/+"+num
458  +"-kaldi-nccf-ballast-"+nccfballast+".txt";
459  std::ofstream os(outfile.c_str());
460  m.Write(os, false);
461  }
462  }
463 }
464 
466  KALDI_LOG << "=== UnitTestPitchExtractionSpeed() ===";
467  // use pitch code with default configuration..
469  op.nccf_ballast = 0.1;
470  op.lowpass_cutoff = 1000;
471  for (int32 i = 1; i < 2; i++) {
472  std::string wavefile;
473  std::string num;
474  if (i < 6) {
475  num = "f"+ConvertIntToString(i)+"nw0000";
476  wavefile = "keele/16kHz/"+num+".wav";
477  } else {
478  num = "m"+ConvertIntToString(i-5)+"nw0000";
479  wavefile = "keele/16kHz/"+num+".wav";
480  }
481  KALDI_LOG << "--- " << wavefile << " ---";
482  std::ifstream is(wavefile.c_str(), std::ios_base::binary);
483  WaveData wave;
484  wave.Read(is);
485  KALDI_ASSERT(wave.Data().NumRows() == 1);
486  SubVector<BaseFloat> waveform(wave.Data(), 0);
487  // compute pitch.
488  int test_num = 10;
490  Timer timer;
491  for (int32 t = 0; t < test_num; t++)
492  ComputeKaldiPitch(op, waveform, &m);
493  double tot_time = timer.Elapsed(),
494  speech_time = test_num * waveform.Dim() / wave.SampFreq();
495  KALDI_LOG << " Pitch extraction time per second of speech is "
496  << (tot_time / speech_time) << " seconds.";
497  }
498 }
500  KALDI_LOG << "=== UnitTestPitchExtractorCompareKeele() ===";
501  // use pitch code with default configuration..
503  op.nccf_ballast = 0.1;
504  for (int32 i = 1; i < 11; i++) {
505  std::string wavefile;
506  std::string num;
507  if (i < 6) {
508  num = "f"+ConvertIntToString(i)+"nw0000";
509  wavefile = "keele/16kHz/"+num+".wav";
510  } else {
511  num = "m"+ConvertIntToString(i-5)+"nw0000";
512  wavefile = "keele/16kHz/"+num+".wav";
513  }
514  KALDI_LOG << "--- " << wavefile << " ---";
515  std::ifstream is(wavefile.c_str(), std::ios_base::binary);
516  WaveData wave;
517  wave.Read(is);
518  KALDI_ASSERT(wave.Data().NumRows() == 1);
519  SubVector<BaseFloat> waveform(wave.Data(), 0);
520  // compute pitch.
522  ComputeKaldiPitch(op, waveform, &m);
523  std::string outfile = "keele/tmp/+"+num+"-speedup-kaldi1.txt";
524  std::ofstream os(outfile.c_str());
525  m.Write(os, false);
526  }
527 }
529  // you need to use sox to change sampling rate
530  // e.g. sox -r 10k input.wav output.wav
531  // put them in keele/(samp_rate in kHz)+"kHz" e.g. keele/10kHz
532  int sample_rate = 16000;
534  op.samp_freq = static_cast<double>(sample_rate);
535  op.lowpass_cutoff = 1000;
536  op.max_f0 = 400;
537  std::string samp_rate = ConvertIntToString(sample_rate/1000);
538  for (int32 i = 1; i < 11; i++) {
539  std::string wavefile;
540  std::string num;
541  if (i < 6) {
542  num = "f"+ConvertIntToString(i)+"nw0000";
543  wavefile = "keele/"+samp_rate+"kHz/"+num+".wav";
544  } else {
545  num = "m"+ConvertIntToString(i-5)+"nw0000";
546  wavefile = "keele/"+samp_rate+"kHz/"+num+".wav";
547  }
548  KALDI_LOG << "--- " << wavefile << " ---";
549  std::ifstream is(wavefile.c_str(), std::ios_base::binary);
550  WaveData wave;
551  wave.Read(is);
552  KALDI_ASSERT(wave.Data().NumRows() == 1);
553  SubVector<BaseFloat> waveform(wave.Data(), 0);
555  ComputeKaldiPitch(op, waveform, &m);
556  std::string outfile = "keele/tmp/+"+num+"-kaldi-samp-freq-"+samp_rate+"kHz.txt";
557  std::ofstream os(outfile.c_str());
558  m.Write(os, false);
559  }
560 }
562  for (int32 i = 1; i < 11; i++) {
563  std::string wavefile;
564  std::string num;
565  if (i < 6) {
566  num = "f"+ConvertIntToString(i)+"nw0000";
567  wavefile = "keele/16kHz/"+num+".wav";
568  } else {
569  num = "m"+ConvertIntToString(i-5)+"nw0000";
570  wavefile = "keele/16kHz/"+num+".wav";
571  }
572  KALDI_LOG << "--- " << wavefile << " ---";
573  std::ifstream is(wavefile.c_str(), std::ios_base::binary);
574  WaveData wave;
575  wave.Read(is);
576  KALDI_ASSERT(wave.Data().NumRows() == 1);
577  SubVector<BaseFloat> waveform(wave.Data(), 0);
579  op.lowpass_cutoff = 1000;
580  op.nccf_ballast = 0.1;
581  op.max_f0 = 400;
582  Matrix<BaseFloat> m, m2;
583  ComputeKaldiPitch(op, waveform, &m);
584  ProcessPitchOptions postprop_op;
585  // postprop_op.pov_nonlinearity = 2;
586  // Use zero noise, or the features won't be identical.
587  postprop_op.delta_pitch_noise_stddev = 0.0;
588  ProcessPitch(postprop_op, m, &m2);
589 
590  std::string outfile = "keele/tmp/+"+num+"-processed-kaldi.txt";
591  std::ofstream os(outfile.c_str());
592  m2.Write(os, false);
593  }
594 }
595 
596 static void UnitTestFeatNoKeele() {
597  UnitTestSimple();
598  UnitTestPieces();
600  UnitTestDelay();
601  UnitTestSearch();
602 }
603 
604 static void UnitTestFeatWithKeele() {
605  UnitTestProcess();
606  UnitTestKeele();
613 }
614 
615 } // namespace kaldi
616 
617 int main() {
618  using namespace kaldi;
619 
620  SetVerboseLevel(3);
621  try {
623  if (DirExist("keele/16kHz")) {
625  } else {
626  KALDI_LOG
627  << "Not running tests that require the Keele database, "
628  << "please ask g.meyer@liverpool.ac.uk for the database if you need it.\n"
629  << "Once you have the keele/ subdirectory, containing *.{pel,pet,pev,raw,wav}, do this:\n"
630  << "cd keele; mkdir -p 16kHz; mkdir -p tmp; for x in *.wav; do \n"
631  << "sox $x -r 16000 16kHz/$x; done \n"
632  << "mkdir -p keele-true-lags; for f in *.pev; do \n"
633  << "out_f=keele-true-lags/$(echo $f | sed s:pev:txt:); ( echo ' ['; len=`cat $f | wc -l`; \n"
634  << "head -n $(($len-1)) $f | tail -n $(($len-14)) ; echo -n ']') >$out_f; done \n"
635  << "\n"
636  << "Note: the GPE reported in paper is computed using pseudo-ground-truth pitch obtained\n"
637  << "by voting among the pitch trackers mentioned in the paper.\n";
638  }
639  KALDI_LOG << "Tests succeeded.";
640  return 0;
641  } catch(const std::exception &e) {
642  KALDI_ERR << e.what();
643  return 1;
644  }
645 }
virtual int32 NumFramesReady() const
returns the feature dimension.
void Read(std::istream &is)
Read() will throw on error.
Definition: wave-reader.cc:272
static void UnitTestKeeleNccfBallast()
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void Write(std::ostream &out, bool binary) const
write to stream.
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)
Gets the feature vector for this frame.
std::string ConvertIntToString(const int &number)
static void UnitTestComputeGPE()
static void UnitTestSnipEdges()
virtual int32 Dim() const
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
virtual void InputFinished()
InputFinished() tells the class you won&#39;t be providing any more waveform.
bool DirExist(const std::string &dirname)
void ComputeKaldiPitch(const PitchExtractionOptions &opts, const VectorBase< BaseFloat > &wave, Matrix< BaseFloat > *output)
This function extracts (pitch, NCCF) per frame, using the pitch extraction method described in "A Pit...
static void UnitTestSimple()
virtual void AcceptWaveform(BaseFloat sampling_rate, const VectorBase< BaseFloat > &waveform)
This would be called from the application, when you get more wave data.
This online-feature class implements post processing of pitch features.
float RandGauss(struct RandomState *state=NULL)
Definition: kaldi-math.h:155
kaldi::int32 int32
BaseFloat SampFreq() const
Definition: wave-reader.h:126
const Matrix< BaseFloat > & Data() const
Definition: wave-reader.h:124
static void UnitTestPieces()
void SetVerboseLevel(int32 i)
This should be rarely used, except by programs using Kaldi as library; command-line programs set the ...
Definition: kaldi-error.h:64
static void UnitTestPitchExtractorCompareKeele()
void Read(std::istream &in, bool binary, bool add=false)
read from stream.
int main()
virtual int32 NumFramesReady() const
returns the feature dimension.
static void UnitTestPitchExtractionSpeed()
struct rnnlm::@11::@12 n
void ComputeAndProcessKaldiPitch(const PitchExtractionOptions &pitch_opts, const ProcessPitchOptions &process_opts, const VectorBase< BaseFloat > &wave, Matrix< BaseFloat > *output)
This function combines ComputeKaldiPitch and ProcessPitch.
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
bool pitch_use_naive_search
This class&#39;s purpose is to read in Wave files.
Definition: wave-reader.h:106
void CopyColFromMat(const MatrixBase< OtherReal > &M, MatrixIndexT col)
Extracts a column of the matrix M.
A class representing a vector.
Definition: kaldi-vector.h:406
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
static void UnitTestPenaltyFactor()
static void UnitTestKeele()
static void AssertEqual(float a, float b, float relative_tolerance=0.001)
assert abs(a - b) <= relative_tolerance * (abs(a)+abs(b))
Definition: kaldi-math.h:276
#define M_2PI
Definition: kaldi-math.h:52
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
static void UnitTestFeatWithKeele()
void ProcessPitch(const ProcessPitchOptions &opts, const MatrixBase< BaseFloat > &input, Matrix< BaseFloat > *output)
This function processes the raw (NCCF, pitch) quantities computed by ComputeKaldiPitch, and processes them into features.
static void UnitTestDelay()
static void UnitTestFeatNoKeele()
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)
Outputs the two-dimensional feature consisting of (pitch, NCCF).
#define KALDI_LOG
Definition: kaldi-error.h:153
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Definition: kaldi-vector.cc:37
void UnitTestDiffSampleRate()
double Elapsed() const
Returns time in seconds.
Definition: timer.h:74
Sub-matrix representation.
Definition: kaldi-matrix.h:988
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
Definition: kaldi-math.h:265
static void UnitTestSearch()
void UnitTestProcess()