32 #include "sys/types.h" 45 if (stat(dirname.c_str(), &st) != 0) {
46 KALDI_LOG <<
" directory " << dirname <<
" does not exist!";
58 v(
i) = (abs(
i * 433024253) % 65535) - (65535 / 2);
60 KALDI_LOG <<
"<<<=== Just make sure it runs... Nothing is compared";
72 KALDI_LOG <<
"=== UnitTestSnipEdges() ===\n";
81 std::ifstream is(
"test_data/test.wav");
89 for (
int fs = 4; fs <= 10; fs += 2) {
90 for (
int wl = 20; wl <= 100; wl += 20) {
118 int32 max_lag = wl / fs * 2;
119 int num_frames_f0 = m1.
NumRows() - max_lag;
124 for (
int32 lag = 0; lag < max_lag + 1; lag++) {
132 KALDI_LOG <<
"Best lag: " << blag * fs <<
"ms with value: " << bcorr <<
133 "; expected lag: " << wl / 2 + 10 - fs / 2 <<
" ± " << fs;
141 KALDI_WARN <<
"Bad lag for window size " << wl <<
" and frame shift " << fs;
148 if (nbad > 9)
KALDI_ERR <<
"Too many bad lags: " << nbad;
155 KALDI_LOG <<
"=== UnitTestPieces() ===\n";
165 int32 size = 10000 + rand() % 50000;
170 double cur_freq = 200.0, normalized_time = 0.0;
175 if (cur_freq < 100.0) cur_freq = 100.0;
176 if (cur_freq > 300.0) cur_freq = 300.0;
177 normalized_time += cur_freq / op1.
samp_freq;
191 int32 start_samp = 0;
192 while (start_samp < v.
Dim()) {
193 int32 num_samp = rand() % (v.
Dim() + 1 - start_samp);
196 start_samp += num_samp;
201 m2p.
Resize(num_frames, process_pitch.
Dim());
202 for (
int32 frame = 0; frame < num_frames; frame++) {
204 pitch_extractor.
GetFrame(frame, &row);
206 process_pitch.
GetFrame(frame, &rowp);
211 KALDI_ERR <<
"Post-processed pitch differs: " << m1p <<
" vs. " << m2p;
220 KALDI_LOG <<
"=== UnitTestDelay() ===\n";
229 pro_opt2.
delay = rand() % 50;
233 int32 size = 1000 + rand() % 5000;
238 double cur_freq = 200.0, normalized_time = 0.0;
243 if (cur_freq < 100.0) cur_freq = 100.0;
244 if (cur_freq > 300.0) cur_freq = 300.0;
245 normalized_time += cur_freq / ext_opt.
samp_freq;
253 int32 start_samp = 0;
254 while (start_samp < v.
Dim()) {
255 int32 num_samp = rand() % (v.
Dim() + 1 - start_samp);
258 start_samp += num_samp;
263 m1.
Resize(num_frames, pitch_processor.
Dim());
264 for (
int32 frame = 0; frame < num_frames; frame++) {
266 pitch_processor.
GetFrame(frame, &rowp);
270 m2.
Resize(num_frames_delayed, pitch_processor_delayed.
Dim());
271 for (
int32 frame = 0; frame < num_frames_delayed; frame++) {
273 pitch_processor_delayed.
GetFrame(frame, &rowp);
279 KALDI_ERR <<
"Post-processed pitch differs: " << m1 <<
" vs. " << m3;
290 KALDI_LOG <<
"=== UnitTestSearch() ===\n";
297 int32 size = 1000 + rand() % 1000;
302 double cur_freq = 200.0, normalized_time = 0.0;
307 if (cur_freq < 100.0) cur_freq = 100.0;
308 if (cur_freq > 300.0) cur_freq = 300.0;
309 normalized_time += cur_freq / op.
samp_freq;
315 pitch_use_naive_search =
true;
320 pitch_use_naive_search =
false;
328 KALDI_LOG <<
"=== UnitTestComputeGPE ===\n";
329 int32 wrong_pitch = 0, tot_voiced = 0, tot_unvoiced = 0, num_frames = 0;
330 BaseFloat tol = 0.1, avg_d_kpitch = 0, real_pitch = 0;
332 std::string wavefile;
340 std::string pitchfile =
"keele/keele-true-lags/"+num+
".txt";
341 std::ifstream pitch(pitchfile.c_str());
342 gross_pitch.
Read(pitch,
false);
344 std::string kfile =
"keele/tmp/+"+num+
"-kaldi.txt";
345 std::ifstream kpitch(kfile.c_str());
346 kaldi_pitch.
Read(kpitch,
false);
347 num_frames = std::min(kaldi_pitch.
NumRows(),gross_pitch.
NumRows());
348 for (
int32 j = 1;
j < num_frames;
j++) {
349 if (gross_pitch(
j,0) > 0.0) {
351 real_pitch = 20000.0/gross_pitch(
j,0);
352 if (fabs((real_pitch - kaldi_pitch(
j,1))/real_pitch) > tol)
354 }
else if (gross_pitch(
j,0) == 0.0 && gross_pitch(
j-1,0) == 0.0) {
356 avg_d_kpitch += fabs(kaldi_pitch(
j,1) - kaldi_pitch(
j-1,1));
360 BaseFloat GPE = 1.0 * wrong_pitch / tot_voiced;
361 KALDI_LOG <<
" Gross Pitch Error with Rel.Error " << tol <<
" is " << GPE;
362 KALDI_LOG <<
"Average Kaldi delta_pitch for unvoiced regions " << avg_d_kpitch/tot_unvoiced;
369 std::string wavefile;
373 wavefile =
"keele/16kHz/"+num+
".wav";
376 wavefile =
"keele/16kHz/"+num+
".wav";
378 KALDI_LOG <<
"--- " << wavefile <<
" ---";
379 std::ifstream is(wavefile.c_str(), std::ios_base::binary);
387 op.penalty_factor = 5;
391 std::string outfile =
"keele/tmp/+"+num+
"-kaldi.txt";
392 std::ofstream os(outfile.c_str());
398 KALDI_LOG <<
"=== UnitTestPenaltyFactor() ===";
399 for (
int32 k = 1; k < 5; k++) {
401 std::string wavefile;
405 wavefile =
"keele/16kHz/"+num+
".wav";
408 wavefile =
"keele/16kHz/"+num+
".wav";
410 KALDI_LOG <<
"--- " << wavefile <<
" ---";
411 std::ifstream is(wavefile.c_str(), std::ios_base::binary);
419 op.nccf_ballast = 0.1;
424 std::string outfile =
"keele/tmp/+"+num+
"-kaldi-penalty-"+penaltyfactor+
".txt";
425 std::ofstream os(outfile.c_str());
431 KALDI_LOG <<
"=== UnitTestKeeleNccfBallast() ===";
432 for (
int32 k = 1; k < 10; k++) {
434 std::string wavefile;
438 wavefile =
"keele/16kHz/"+num+
".wav";
441 wavefile =
"keele/16kHz/"+num+
".wav";
443 KALDI_LOG <<
"--- " << wavefile <<
" ---";
444 std::ifstream is(wavefile.c_str(), std::ios_base::binary);
452 KALDI_LOG <<
" nccf_ballast " << op.nccf_ballast;
457 std::string outfile =
"keele/tmp/+"+num
458 +
"-kaldi-nccf-ballast-"+nccfballast+
".txt";
459 std::ofstream os(outfile.c_str());
466 KALDI_LOG <<
"=== UnitTestPitchExtractionSpeed() ===";
472 std::string wavefile;
476 wavefile =
"keele/16kHz/"+num+
".wav";
479 wavefile =
"keele/16kHz/"+num+
".wav";
481 KALDI_LOG <<
"--- " << wavefile <<
" ---";
482 std::ifstream is(wavefile.c_str(), std::ios_base::binary);
491 for (
int32 t = 0; t < test_num; t++)
493 double tot_time = timer.
Elapsed(),
494 speech_time = test_num * waveform.Dim() / wave.SampFreq();
495 KALDI_LOG <<
" Pitch extraction time per second of speech is " 496 << (tot_time / speech_time) <<
" seconds.";
500 KALDI_LOG <<
"=== UnitTestPitchExtractorCompareKeele() ===";
505 std::string wavefile;
509 wavefile =
"keele/16kHz/"+num+
".wav";
512 wavefile =
"keele/16kHz/"+num+
".wav";
514 KALDI_LOG <<
"--- " << wavefile <<
" ---";
515 std::ifstream is(wavefile.c_str(), std::ios_base::binary);
523 std::string outfile =
"keele/tmp/+"+num+
"-speedup-kaldi1.txt";
524 std::ofstream os(outfile.c_str());
532 int sample_rate = 16000;
534 op.
samp_freq =
static_cast<double>(sample_rate);
539 std::string wavefile;
543 wavefile =
"keele/"+samp_rate+
"kHz/"+num+
".wav";
546 wavefile =
"keele/"+samp_rate+
"kHz/"+num+
".wav";
548 KALDI_LOG <<
"--- " << wavefile <<
" ---";
549 std::ifstream is(wavefile.c_str(), std::ios_base::binary);
556 std::string outfile =
"keele/tmp/+"+num+
"-kaldi-samp-freq-"+samp_rate+
"kHz.txt";
557 std::ofstream os(outfile.c_str());
563 std::string wavefile;
567 wavefile =
"keele/16kHz/"+num+
".wav";
570 wavefile =
"keele/16kHz/"+num+
".wav";
572 KALDI_LOG <<
"--- " << wavefile <<
" ---";
573 std::ifstream is(wavefile.c_str(), std::ios_base::binary);
580 op.nccf_ballast = 0.1;
590 std::string outfile =
"keele/tmp/+"+num+
"-processed-kaldi.txt";
591 std::ofstream os(outfile.c_str());
618 using namespace kaldi;
627 <<
"Not running tests that require the Keele database, " 628 <<
"please ask g.meyer@liverpool.ac.uk for the database if you need it.\n" 629 <<
"Once you have the keele/ subdirectory, containing *.{pel,pet,pev,raw,wav}, do this:\n" 630 <<
"cd keele; mkdir -p 16kHz; mkdir -p tmp; for x in *.wav; do \n" 631 <<
"sox $x -r 16000 16kHz/$x; done \n" 632 <<
"mkdir -p keele-true-lags; for f in *.pev; do \n" 633 <<
"out_f=keele-true-lags/$(echo $f | sed s:pev:txt:); ( echo ' ['; len=`cat $f | wc -l`; \n" 634 <<
"head -n $(($len-1)) $f | tail -n $(($len-14)) ; echo -n ']') >$out_f; done \n" 636 <<
"Note: the GPE reported in paper is computed using pseudo-ground-truth pitch obtained\n" 637 <<
"by voting among the pitch trackers mentioned in the paper.\n";
641 }
catch(
const std::exception &e) {
virtual int32 NumFramesReady() const
returns the feature dimension.
void Read(std::istream &is)
Read() will throw on error.
static void UnitTestKeeleNccfBallast()
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void Write(std::ostream &out, bool binary) const
write to stream.
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)
Gets the feature vector for this frame.
std::string ConvertIntToString(const int &number)
static void UnitTestComputeGPE()
static void UnitTestSnipEdges()
virtual int32 Dim() const
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
virtual void InputFinished()
InputFinished() tells the class you won't be providing any more waveform.
bool DirExist(const std::string &dirname)
void ComputeKaldiPitch(const PitchExtractionOptions &opts, const VectorBase< BaseFloat > &wave, Matrix< BaseFloat > *output)
This function extracts (pitch, NCCF) per frame, using the pitch extraction method described in "A Pit...
static void UnitTestSimple()
virtual void AcceptWaveform(BaseFloat sampling_rate, const VectorBase< BaseFloat > &waveform)
This would be called from the application, when you get more wave data.
This online-feature class implements post processing of pitch features.
float RandGauss(struct RandomState *state=NULL)
BaseFloat SampFreq() const
const Matrix< BaseFloat > & Data() const
static void UnitTestPieces()
void SetVerboseLevel(int32 i)
This should be rarely used, except by programs using Kaldi as library; command-line programs set the ...
static void UnitTestPitchExtractorCompareKeele()
void Read(std::istream &in, bool binary, bool add=false)
read from stream.
virtual int32 NumFramesReady() const
returns the feature dimension.
BaseFloat delta_pitch_noise_stddev
static void UnitTestPitchExtractionSpeed()
void ComputeAndProcessKaldiPitch(const PitchExtractionOptions &pitch_opts, const ProcessPitchOptions &process_opts, const VectorBase< BaseFloat > &wave, Matrix< BaseFloat > *output)
This function combines ComputeKaldiPitch and ProcessPitch.
MatrixIndexT Dim() const
Returns the dimension of the vector.
bool pitch_use_naive_search
This class's purpose is to read in Wave files.
void CopyColFromMat(const MatrixBase< OtherReal > &M, MatrixIndexT col)
Extracts a column of the matrix M.
A class representing a vector.
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
static void UnitTestPenaltyFactor()
static void UnitTestKeele()
static void AssertEqual(float a, float b, float relative_tolerance=0.001)
assert abs(a - b) <= relative_tolerance * (abs(a)+abs(b))
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
static void UnitTestFeatWithKeele()
void ProcessPitch(const ProcessPitchOptions &opts, const MatrixBase< BaseFloat > &input, Matrix< BaseFloat > *output)
This function processes the raw (NCCF, pitch) quantities computed by ComputeKaldiPitch, and processes them into features.
static void UnitTestDelay()
static void UnitTestFeatNoKeele()
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)
Outputs the two-dimensional feature consisting of (pitch, NCCF).
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
void UnitTestDiffSampleRate()
double Elapsed() const
Returns time in seconds.
Sub-matrix representation.
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
static void UnitTestSearch()