Files
file	resample.h
	]

Classes
struct	ExampleFeatureComputerOptions
	This class is only added for documentation, it is not intended to ever be used. More...

class	ExampleFeatureComputer
	This class is only added for documentation, it is not intended to ever be used. More...

class	OfflineFeatureTpl< F >
	This templated class is intended for offline feature extraction, i.e. More...

struct	FbankOptions
	FbankOptions contains basic options for computing filterbank features. More...

class	FbankComputer
	Class for computing mel-filterbank features; see Computing MFCC features for more information. More...

struct	DeltaFeaturesOptions

class	DeltaFeatures

struct	ShiftedDeltaFeaturesOptions

class	ShiftedDeltaFeatures

struct	SlidingWindowCmnOptions

struct	MfccOptions
	MfccOptions contains basic options for computing MFCC features. More...

class	MfccComputer

struct	PlpOptions
	PlpOptions contains basic options for computing PLP features. More...

class	PlpComputer
	This is the new-style interface to the PLP computation. More...

struct	SpectrogramOptions
	SpectrogramOptions contains basic options for computing spectrogram features. More...

class	SpectrogramComputer
	Class for computing spectrogram features. More...

struct	FrameExtractionOptions

struct	FeatureWindowFunction

struct	MelBanksOptions

class	MelBanks

struct	PitchExtractionOptions

struct	ProcessPitchOptions

class	OnlinePitchFeature

class	OnlineProcessPitch
	This online-feature class implements post processing of pitch features. More...

class	ArbitraryResample
	Class ArbitraryResample allows you to resample a signal (assumed zero outside the sample region, not periodic) at arbitrary specified time values, which don't have to be linearly spaced. More...

class	LinearResample
	LinearResample is a special case of ArbitraryResample, where we want to resample a signal at linearly spaced intervals (this means we want to upsample or downsample the signal). More...

Typedefs
typedef OfflineFeatureTpl< FbankComputer >	Fbank

typedef OfflineFeatureTpl< MfccComputer >	Mfcc

typedef OfflineFeatureTpl< PlpComputer >	Plp

typedef OfflineFeatureTpl< SpectrogramComputer >	Spectrogram

Functions
void	ComputePowerSpectrum (VectorBase< BaseFloat > *waveform)

void	ComputeDeltas (const DeltaFeaturesOptions &delta_opts, const MatrixBase< BaseFloat > &input_features, Matrix< BaseFloat > *output_features)

void	ComputeShiftedDeltas (const ShiftedDeltaFeaturesOptions &delta_opts, const MatrixBase< BaseFloat > &input_features, Matrix< BaseFloat > *output_features)

void	SpliceFrames (const MatrixBase< BaseFloat > &input_features, int32 left_context, int32 right_context, Matrix< BaseFloat > *output_features)

void	ReverseFrames (const MatrixBase< BaseFloat > &input_features, Matrix< BaseFloat > *output_features)

void	InitIdftBases (int32 n_bases, int32 dimension, Matrix< BaseFloat > *mat_out)

void	SlidingWindowCmn (const SlidingWindowCmnOptions &opts, const MatrixBase< BaseFloat > &input, MatrixBase< BaseFloat > *output)
	Applies sliding-window cepstral mean and/or variance normalization. More...

int32	NumFrames (int64 num_samples, const FrameExtractionOptions &opts, bool flush=true)
	This function returns the number of frames that we can extract from a wave file with the given number of samples in it (assumed to have the same sampling rate as specified in 'opts'). More...

int64	FirstSampleOfFrame (int32 frame, const FrameExtractionOptions &opts)

void	Dither (VectorBase< BaseFloat > *waveform, BaseFloat dither_value)

void	Preemphasize (VectorBase< BaseFloat > *waveform, BaseFloat preemph_coeff)

void	ProcessWindow (const FrameExtractionOptions &opts, const FeatureWindowFunction &window_function, VectorBase< BaseFloat > window, BaseFloat log_energy_pre_window=NULL)
	This function does all the windowing steps after actually extracting the windowed signal: depending on the configuration, it does dithering, dc offset removal, preemphasis, and multiplication by the windowing function. More...

void	ExtractWindow (int64 sample_offset, const VectorBase< BaseFloat > &wave, int32 f, const FrameExtractionOptions &opts, const FeatureWindowFunction &window_function, Vector< BaseFloat > window, BaseFloat log_energy_pre_window)

void	ComputeLifterCoeffs (BaseFloat Q, VectorBase< BaseFloat > *coeffs)

BaseFloat	Durbin (int n, const BaseFloat pAC, BaseFloat pLP, BaseFloat *pTmp)

BaseFloat	ComputeLpc (const VectorBase< BaseFloat > &autocorr_in, Vector< BaseFloat > *lpc_out)

void	Lpc2Cepstrum (int n, const BaseFloat pLPC, BaseFloat pCepst)

void	GetEqualLoudnessVector (const MelBanks &mel_banks, Vector< BaseFloat > *ans)

void	ComputeKaldiPitch (const PitchExtractionOptions &opts, const VectorBase< BaseFloat > &wave, Matrix< BaseFloat > *output)
	This function extracts (pitch, NCCF) per frame, using the pitch extraction method described in "A Pitch Extraction Algorithm Tuned for Automatic Speech Recognition", Pegah Ghahremani, Bagher BabaAli, Daniel Povey, Korbinian Riedhammer, Jan Trmal and Sanjeev Khudanpur, ICASSP 2014. More...

void	ProcessPitch (const ProcessPitchOptions &opts, const MatrixBase< BaseFloat > &input, Matrix< BaseFloat > *output)
	This function processes the raw (NCCF, pitch) quantities computed by ComputeKaldiPitch, and processes them into features. More...

void	ComputeAndProcessKaldiPitch (const PitchExtractionOptions &pitch_opts, const ProcessPitchOptions &process_opts, const VectorBase< BaseFloat > &wave, Matrix< BaseFloat > *output)
	This function combines ComputeKaldiPitch and ProcessPitch. More...

void	ResampleWaveform (BaseFloat orig_freq, const VectorBase< BaseFloat > &wave, BaseFloat new_freq, Vector< BaseFloat > *new_wave)
	Downsample or upsample a waveform. More...

void	DownsampleWaveForm (BaseFloat orig_freq, const VectorBase< BaseFloat > &wave, BaseFloat new_freq, Vector< BaseFloat > *new_wave)
	This function is deprecated. More...

Detailed Description

Typedef Documentation

◆ Fbank

typedef OfflineFeatureTpl<FbankComputer> Fbank

Definition at line 143 of file feature-fbank.h.

◆ Mfcc

typedef OfflineFeatureTpl<MfccComputer> Mfcc

Definition at line 147 of file feature-mfcc.h.

◆ Plp

typedef OfflineFeatureTpl<PlpComputer> Plp

Definition at line 169 of file feature-plp.h.

◆ Spectrogram

typedef OfflineFeatureTpl<SpectrogramComputer> Spectrogram

Definition at line 122 of file feature-spectrogram.h.

Function Documentation

◆ ComputeAndProcessKaldiPitch()

void ComputeAndProcessKaldiPitch	(	const PitchExtractionOptions &	pitch_opts,
		const ProcessPitchOptions &	process_opts,
		const VectorBase< BaseFloat > &	wave,
		Matrix< BaseFloat > *	output
	)

This function combines ComputeKaldiPitch and ProcessPitch.

The reason why we need a separate function to do this is in order to be able to accurately simulate the online pitch-processing, for testing and for training models matched to the "first-pass" features. It is sensitive to the variables in pitch_opts that relate to online processing, i.e. max_frames_latency, frames_per_chunk, simulate_first_pass_online, recompute_frame.

Definition at line 1597 of file pitch-functions.cc.

References OnlinePitchFeature::AcceptWaveform(), VectorBase< Real >::Dim(), OnlineProcessPitch::Dim(), PitchExtractionOptions::frame_shift_ms, PitchExtractionOptions::frames_per_chunk, OnlineProcessPitch::GetFrame(), OnlinePitchFeature::InputFinished(), KALDI_ASSERT, KALDI_WARN, kaldi::kCopyData, OnlineProcessPitch::NumFramesReady(), Matrix< Real >::Resize(), MatrixBase< Real >::RowRange(), PitchExtractionOptions::samp_freq, and PitchExtractionOptions::simulate_first_pass_online.

Referenced by main(), kaldi::UnitTestSimple(), and kaldi::UnitTestSnipEdges().

                                {
 
   OnlinePitchFeature pitch_extractor(pitch_opts);
 
   if (pitch_opts.simulate_first_pass_online) {
     KALDI_ASSERT(pitch_opts.frames_per_chunk > 0 &&
                  "--simulate-first-pass-online option does not make sense "
                  "unless you specify --frames-per-chunk");
   }
 
   OnlineProcessPitch post_process(process_opts, &pitch_extractor);
 
   int32 cur_rows = 100;
   Matrix<BaseFloat> feats(cur_rows, post_process.Dim());
 
   int32 cur_offset = 0, cur_frame = 0,
       samp_per_chunk = pitch_opts.frames_per_chunk *
       pitch_opts.samp_freq * pitch_opts.frame_shift_ms / 1000.0f;
 
   // We request the first-pass features as soon as they are available,
   // regardless of whether opts.simulate_first_pass_online == true.  If
   // opts.simulate_first_pass_online == true this should
   // not affect the features generated, but it helps us to test the code
   // in a way that's closer to what online decoding would see.
 
   while (cur_offset < wave.Dim()) {
     int32 num_samp;
     if (samp_per_chunk > 0)
       num_samp = std::min(samp_per_chunk, wave.Dim() - cur_offset);
     else  // user left opts.frames_per_chunk at zero.
       num_samp = wave.Dim();
     SubVector<BaseFloat> wave_chunk(wave, cur_offset, num_samp);
     pitch_extractor.AcceptWaveform(pitch_opts.samp_freq, wave_chunk);
     cur_offset += num_samp;
     if (cur_offset == wave.Dim())
       pitch_extractor.InputFinished();
 
     // Get each frame as soon as it is ready.
     for (; cur_frame < post_process.NumFramesReady(); cur_frame++) {
       if (cur_frame >= cur_rows) {
         cur_rows *= 2;
         feats.Resize(cur_rows, post_process.Dim(), kCopyData);
       }
       SubVector<BaseFloat> row(feats, cur_frame);
       post_process.GetFrame(cur_frame, &row);
     }
   }
 
   if (pitch_opts.simulate_first_pass_online) {
     if (cur_frame == 0) {
       KALDI_WARN << "No features output since wave file too short";
       output->Resize(0, 0);
     } else {
       *output = feats.RowRange(0, cur_frame);
     }
   } else {
     // want the "final" features for second pass, so get them again.
     output->Resize(post_process.NumFramesReady(), post_process.Dim());
     for (int32 frame = 0; frame < post_process.NumFramesReady(); frame++) {
       SubVector<BaseFloat> row(*output, frame);
       post_process.GetFrame(frame, &row);
     }
   }
 }

◆ ComputeDeltas()

void ComputeDeltas	(	const DeltaFeaturesOptions &	delta_opts,
		const MatrixBase< BaseFloat > &	input_features,
		Matrix< BaseFloat > *	output_features
	)

Definition at line 160 of file feature-functions.cc.

References MatrixBase< Real >::NumCols(), MatrixBase< Real >::NumRows(), DeltaFeaturesOptions::order, DeltaFeatures::Process(), and Matrix< Real >::Resize().

Referenced by OnlineProcessPitch::GetDeltaPitchFeature(), main(), kaldi::TestOnlineDeltaFeature(), kaldi::TestOnlineDeltaInput(), UnitTestCompareWithDeltaFeatures(), UnitTestHTKCompare1(), UnitTestHTKCompare2(), UnitTestHTKCompare3(), UnitTestHTKCompare4(), UnitTestHTKCompare5(), and UnitTestHTKCompare6().

                                                        {
   output_features->Resize(input_features.NumRows(),
                           input_features.NumCols()
                           *(delta_opts.order + 1));
   DeltaFeatures delta(delta_opts);
   for (int32 r = 0; r < static_cast<int32>(input_features.NumRows()); r++) {
     SubVector<BaseFloat> row(*output_features, r);
     delta.Process(input_features, r, &row);
   }
 }

◆ ComputeKaldiPitch()

void ComputeKaldiPitch	(	const PitchExtractionOptions &	opts,
		const VectorBase< BaseFloat > &	wave,
		Matrix< BaseFloat > *	output
	)

This function extracts (pitch, NCCF) per frame, using the pitch extraction method described in "A Pitch Extraction Algorithm Tuned for Automatic Speech Recognition", Pegah Ghahremani, Bagher BabaAli, Daniel Povey, Korbinian Riedhammer, Jan Trmal and Sanjeev Khudanpur, ICASSP 2014.

The output will have as many rows as there are frames, and two columns corresponding to (NCCF, pitch)

Definition at line 1291 of file pitch-functions.cc.

References OnlinePitchFeature::AcceptWaveform(), kaldi::ComputeKaldiPitchFirstPass(), VectorBase< Real >::Dim(), PitchExtractionOptions::frame_shift_ms, PitchExtractionOptions::frames_per_chunk, OnlinePitchFeature::GetFrame(), OnlinePitchFeature::InputFinished(), KALDI_ASSERT, KALDI_WARN, OnlinePitchFeature::NumFramesReady(), Matrix< Real >::Resize(), PitchExtractionOptions::samp_freq, and PitchExtractionOptions::simulate_first_pass_online.

Referenced by main(), kaldi::UnitTestDiffSampleRate(), kaldi::UnitTestKeele(), kaldi::UnitTestKeeleNccfBallast(), kaldi::UnitTestPenaltyFactor(), kaldi::UnitTestPieces(), kaldi::UnitTestPitchExtractionSpeed(), kaldi::UnitTestPitchExtractorCompareKeele(), kaldi::UnitTestProcess(), and kaldi::UnitTestSearch().

                                                   {
   if (opts.simulate_first_pass_online) {
     ComputeKaldiPitchFirstPass(opts, wave, output);
     return;
   }
   OnlinePitchFeature pitch_extractor(opts);
 
   if (opts.frames_per_chunk == 0) {
     pitch_extractor.AcceptWaveform(opts.samp_freq, wave);
   } else {
     // the user may set opts.frames_per_chunk for better compatibility with
     // online operation.
     KALDI_ASSERT(opts.frames_per_chunk > 0);
     int32 cur_offset = 0, samp_per_chunk =
         opts.frames_per_chunk * opts.samp_freq * opts.frame_shift_ms / 1000.0f;
     while (cur_offset < wave.Dim()) {
       int32 num_samp = std::min(samp_per_chunk, wave.Dim() - cur_offset);
       SubVector<BaseFloat> wave_chunk(wave, cur_offset, num_samp);
       pitch_extractor.AcceptWaveform(opts.samp_freq, wave_chunk);
       cur_offset += num_samp;
     }
   }
   pitch_extractor.InputFinished();
   int32 num_frames = pitch_extractor.NumFramesReady();
   if (num_frames == 0) {
     KALDI_WARN << "No frames output in pitch extraction";
     output->Resize(0, 0);
     return;
   }
   output->Resize(num_frames, 2);
   for (int32 frame = 0; frame < num_frames; frame++) {
     SubVector<BaseFloat> row(*output, frame);
     pitch_extractor.GetFrame(frame, &row);
   }
 }

◆ ComputeLifterCoeffs()

void ComputeLifterCoeffs	(	BaseFloat	Q,
		VectorBase< BaseFloat > *	coeffs
	)

Definition at line 253 of file mel-computations.cc.

References VectorBase< Real >::Dim(), rnnlm::i, and M_PI.

Referenced by MfccComputer::MfccComputer(), and PlpComputer::PlpComputer().

                                                                      {
   // Compute liftering coefficients (scaling on cepstral coeffs)
   // coeffs are numbered slightly differently from HTK: the zeroth
   // index is C0, which is not affected.
   for (int32 i = 0; i < coeffs->Dim(); i++)
     (*coeffs)(i) = 1.0 + 0.5 * Q * sin (M_PI * i / Q);
 }

◆ ComputeLpc()

BaseFloat ComputeLpc	(	const VectorBase< BaseFloat > &	autocorr_in,
		Vector< BaseFloat > *	lpc_out
	)

Definition at line 326 of file mel-computations.cc.

References VectorBase< Real >::Data(), VectorBase< Real >::Dim(), kaldi::Durbin(), KALDI_ASSERT, KALDI_WARN, kaldi::Log(), and rnnlm::n.

Referenced by PlpComputer::Compute().

                                                  {
   int32 n = autocorr_in.Dim() - 1;
   KALDI_ASSERT(lpc_out->Dim() == n);
   Vector<BaseFloat> tmp(n);
   BaseFloat ans = Durbin(n, autocorr_in.Data(),
                          lpc_out->Data(),
                          tmp.Data());
   if (ans <= 0.0)
     KALDI_WARN << "Zero energy in LPC computation";
   return -Log(1.0 / ans);  // forms the C0 value
 }

◆ ComputePowerSpectrum()

void ComputePowerSpectrum ( VectorBase< BaseFloat > * waveform )

Definition at line 29 of file feature-functions.cc.

References VectorBase< Real >::Dim(), and rnnlm::i.

Referenced by SpectrogramComputer::Compute(), MfccComputer::Compute(), FbankComputer::Compute(), and PlpComputer::Compute().

                                                            {
   int32 dim = waveform->Dim();
 
   // no, letting it be non-power-of-two for now.
   // KALDI_ASSERT(dim > 0 && (dim & (dim-1) == 0));  // make sure a power of two.. actually my FFT code
   // does not require this (dan) but this is better in case we use different code [dan].
 
   // RealFft(waveform, true);  // true == forward (not inverse) FFT; makes no difference here,
   // as we just want power spectrum.
 
   // now we have in waveform, first half of complex spectrum
   // it's stored as [real0, realN/2, real1, im1, real2, im2, ...]
   int32 half_dim = dim/2;
   BaseFloat first_energy = (*waveform)(0) * (*waveform)(0),
       last_energy = (*waveform)(1) * (*waveform)(1);  // handle this special case
   for (int32 i = 1; i < half_dim; i++) {
     BaseFloat real = (*waveform)(i*2), im = (*waveform)(i*2 + 1);
     (*waveform)(i) = real*real + im*im;
   }
   (*waveform)(0) = first_energy;
   (*waveform)(half_dim) = last_energy;  // Will actually never be used, and anyway
   // if the signal has been bandlimited sensibly this should be zero.
 }

◆ ComputeShiftedDeltas()

void ComputeShiftedDeltas	(	const ShiftedDeltaFeaturesOptions &	delta_opts,
		const MatrixBase< BaseFloat > &	input_features,
		Matrix< BaseFloat > *	output_features
	)

Definition at line 173 of file feature-functions.cc.

References ShiftedDeltaFeaturesOptions::num_blocks, MatrixBase< Real >::NumCols(), MatrixBase< Real >::NumRows(), ShiftedDeltaFeatures::Process(), and Matrix< Real >::Resize().

Referenced by main(), UnitTestCompareWithDeltaFeatures(), UnitTestEndEffects(), and UnitTestParams().

                                                        {
   output_features->Resize(input_features.NumRows(),
                           input_features.NumCols()
                           * (delta_opts.num_blocks + 1));
   ShiftedDeltaFeatures delta(delta_opts);
 
   for (int32 r = 0; r < static_cast<int32>(input_features.NumRows()); r++) {
     SubVector<BaseFloat> row(*output_features, r);
     delta.Process(input_features, r, &row);
   }
 }

◆ Dither()

void Dither	(	VectorBase< BaseFloat > *	waveform,
		BaseFloat	dither_value
	)

Definition at line 90 of file feature-window.cc.

References VectorBase< Real >::Data(), VectorBase< Real >::Dim(), rnnlm::i, and kaldi::RandGauss().

Referenced by kaldi::ProcessWindow().

                                                                      {
   if (dither_value == 0.0)
     return;
   int32 dim = waveform->Dim();
   BaseFloat *data = waveform->Data();
   RandomState rstate;
   for (int32 i = 0; i < dim; i++)
     data[i] += RandGauss(&rstate) * dither_value;
 }

◆ DownsampleWaveForm()

void kaldi::DownsampleWaveForm	(	BaseFloat	orig_freq,
		const VectorBase< BaseFloat > &	wave,
		BaseFloat	new_freq,
		Vector< BaseFloat > *	new_wave
	)

inline

This function is deprecated.

It is provided for backward compatibility, to avoid breaking older code.

Definition at line 279 of file resample.h.

References kaldi::ResampleWaveform().

                                                                                 {
   ResampleWaveform(orig_freq, wave, new_freq, new_wave);
 }

◆ Durbin()

BaseFloat Durbin	(	int	n,
		const BaseFloat *	pAC,
		BaseFloat *	pLP,
		BaseFloat *	pTmp
	)

Definition at line 267 of file mel-computations.cc.

References rnnlm::i, rnnlm::j, and rnnlm::n.

Referenced by kaldi::ComputeLpc().

                                                                                {
   BaseFloat ki;                // reflection coefficient
   int i;
   int j;
 
   BaseFloat E = pAC[0];
 
   for (i = 0; i < n; i++) {
     // next reflection coefficient
     ki = pAC[i + 1];
     for (j = 0; j < i; j++)
       ki += pLP[j] * pAC[i - j];
     ki = ki / E;
 
     // new error
     BaseFloat c = 1 - ki * ki;
     if (c < 1.0e-5) // remove NaNs for constan signal
       c = 1.0e-5;
     E *= c;
 
     // new LP coefficients
     pTmp[i] = -ki;
     for (j = 0; j < i; j++)
       pTmp[j] = pLP[j] - ki * pLP[i - j - 1];
 
     for (j = 0; j <= i; j++)
       pLP[j] = pTmp[j];
   }
 
   return E;
 }

◆ ExtractWindow()

void ExtractWindow	(	int64	sample_offset,
		const VectorBase< BaseFloat > &	wave,
		int32	f,
		const FrameExtractionOptions &	opts,
		const FeatureWindowFunction &	window_function,
		Vector< BaseFloat > *	window,
		BaseFloat *	log_energy_pre_window
	)

Definition at line 166 of file feature-window.cc.

References VectorBase< Real >::Dim(), kaldi::FirstSampleOfFrame(), KALDI_ASSERT, kaldi::kUndefined, FrameExtractionOptions::PaddedWindowSize(), kaldi::ProcessWindow(), VectorBase< Real >::Range(), Vector< Real >::Resize(), FrameExtractionOptions::snip_edges, and FrameExtractionOptions::WindowSize().

Referenced by OfflineFeatureTpl< F >::Compute(), and OnlineGenericBaseFeature< C >::ComputeFeatures().

                                                      {
   KALDI_ASSERT(sample_offset >= 0 && wave.Dim() != 0);
   int32 frame_length = opts.WindowSize(),
       frame_length_padded = opts.PaddedWindowSize();
   int64 num_samples = sample_offset + wave.Dim(),
       start_sample = FirstSampleOfFrame(f, opts),
       end_sample = start_sample + frame_length;
 
   if (opts.snip_edges) {
     KALDI_ASSERT(start_sample >= sample_offset &&
                  end_sample <= num_samples);
   } else {
     KALDI_ASSERT(sample_offset == 0 || start_sample >= sample_offset);
   }
 
   if (window->Dim() != frame_length_padded)
     window->Resize(frame_length_padded, kUndefined);
 
   // wave_start and wave_end are start and end indexes into 'wave', for the
   // piece of wave that we're trying to extract.
   int32 wave_start = int32(start_sample - sample_offset),
       wave_end = wave_start + frame_length;
   if (wave_start >= 0 && wave_end <= wave.Dim()) {
     // the normal case-- no edge effects to consider.
     window->Range(0, frame_length).CopyFromVec(
         wave.Range(wave_start, frame_length));
   } else {
     // Deal with any end effects by reflection, if needed.  This code will only
     // be reached for about two frames per utterance, so we don't concern
     // ourselves excessively with efficiency.
     int32 wave_dim = wave.Dim();
     for (int32 s = 0; s < frame_length; s++) {
       int32 s_in_wave = s + wave_start;
       while (s_in_wave < 0 || s_in_wave >= wave_dim) {
         // reflect around the beginning or end of the wave.
         // e.g. -1 -> 0, -2 -> 1.
         // dim -> dim - 1, dim + 1 -> dim - 2.
         // the code supports repeated reflections, although this
         // would only be needed in pathological cases.
         if (s_in_wave < 0) s_in_wave = - s_in_wave - 1;
         else s_in_wave = 2 * wave_dim - 1 - s_in_wave;
       }
       (*window)(s) = wave(s_in_wave);
     }
   }
 
   if (frame_length_padded > frame_length)
     window->Range(frame_length, frame_length_padded - frame_length).SetZero();
 
   SubVector<BaseFloat> frame(*window, 0, frame_length);
 
   ProcessWindow(opts, window_function, &frame, log_energy_pre_window);
 }

◆ FirstSampleOfFrame()

int64 FirstSampleOfFrame	(	int32	frame,
		const FrameExtractionOptions &	opts
	)

Definition at line 30 of file feature-window.cc.

References FrameExtractionOptions::snip_edges, FrameExtractionOptions::WindowShift(), and FrameExtractionOptions::WindowSize().

Referenced by OnlineGenericBaseFeature< C >::ComputeFeatures(), kaldi::ExtractWindow(), and kaldi::NumFrames().

                                                              {
   int64 frame_shift = opts.WindowShift();
   if (opts.snip_edges) {
     return frame * frame_shift;
   } else {
     int64 midpoint_of_frame = frame_shift * frame  +  frame_shift / 2,
         beginning_of_frame = midpoint_of_frame  -  opts.WindowSize() / 2;
     return beginning_of_frame;
   }
 }

◆ GetEqualLoudnessVector()

void GetEqualLoudnessVector	(	const MelBanks &	mel_banks,
		Vector< BaseFloat > *	ans
	)

Definition at line 311 of file mel-computations.cc.

References MelBanks::GetCenterFreqs(), rnnlm::i, rnnlm::n, MelBanks::NumBins(), and Vector< Real >::Resize().

Referenced by PlpComputer::GetEqualLoudness().

                                                     {
   int32 n = mel_banks.NumBins();
   // Central frequency of each mel bin.
   const Vector<BaseFloat> &f0 = mel_banks.GetCenterFreqs();
   ans->Resize(n);
   for (int32 i = 0; i < n; i++) {
     BaseFloat fsq = f0(i) * f0(i);
     BaseFloat fsub = fsq / (fsq + 1.6e5);
     (*ans)(i) = fsub * fsub * ((fsq + 1.44e6) / (fsq + 9.61e6));
   }
 }

◆ InitIdftBases()

void InitIdftBases	(	int32	n_bases,
		int32	dimension,
		Matrix< BaseFloat > *	mat_out
	)

Definition at line 188 of file feature-functions.cc.

References rnnlm::i, rnnlm::j, M_PI, and Matrix< Real >::Resize().

Referenced by PlpComputer::PlpComputer().

                                                                                {
   BaseFloat angle = M_PI / static_cast<BaseFloat>(dimension - 1);
   BaseFloat scale = 1.0f / (2.0 * static_cast<BaseFloat>(dimension - 1));
   mat_out->Resize(n_bases, dimension);
   for (int32 i = 0; i < n_bases; i++) {
     (*mat_out)(i, 0) = 1.0 * scale;
     BaseFloat i_fl = static_cast<BaseFloat>(i);
     for (int32 j = 1; j < dimension - 1; j++) {
       BaseFloat j_fl = static_cast<BaseFloat>(j);
       (*mat_out)(i, j) = 2.0 * scale * cos(angle * i_fl * j_fl);
     }
 
     (*mat_out)(i, dimension -1)
         = scale * cos(angle * i_fl * static_cast<BaseFloat>(dimension-1));
   }
 }

◆ Lpc2Cepstrum()

void Lpc2Cepstrum	(	int	n,
		const BaseFloat *	pLPC,
		BaseFloat *	pCepst
	)

Definition at line 300 of file mel-computations.cc.

References rnnlm::i, rnnlm::j, and rnnlm::n.

Referenced by PlpComputer::Compute().

                                                                    {
   for (int32 i = 0; i < n; i++) {
     double sum = 0.0;
     int j;
     for (j = 0; j < i; j++) {
       sum += static_cast<BaseFloat>(i - j) * pLPC[j] * pCepst[i - j - 1];
     }
     pCepst[i] = -pLPC[i] - sum / static_cast<BaseFloat>(i + 1);
   }
 }

◆ NumFrames()

int32 NumFrames	(	int64	num_samples,
		const FrameExtractionOptions &	opts,
		bool	flush = `true`
	)

This function returns the number of frames that we can extract from a wave file with the given number of samples in it (assumed to have the same sampling rate as specified in 'opts').

Parameters

[in]	num_samples	The number of samples in the wave file.
[in]	opts	The frame-extraction options class
[in]	flush	True if we are asserting that this number of samples is 'all there is', false if we expecting more data to possibly come in. This only makes a difference to the answer if opts.snips_edges == false. For offline feature extraction you always want flush == true. In an online-decoding context, once you know (or decide) that no more data is coming in, you'd call it with flush == true at the end to flush out any remaining data.

Definition at line 42 of file feature-window.cc.

References kaldi::FirstSampleOfFrame(), FrameExtractionOptions::snip_edges, FrameExtractionOptions::WindowShift(), and FrameExtractionOptions::WindowSize().

Referenced by OfflineFeatureTpl< F >::Compute(), OnlineFeInput< E >::Compute(), OnlineGenericBaseFeature< C >::ComputeFeatures(), and OnlineIvectorFeature::PrintDiagnostics().

                             {
   int64 frame_shift = opts.WindowShift();
   int64 frame_length = opts.WindowSize();
   if (opts.snip_edges) {
     // with --snip-edges=true (the default), we use a HTK-like approach to
     // determining the number of frames-- all frames have to fit completely into
     // the waveform, and the first frame begins at sample zero.
     if (num_samples < frame_length)
       return 0;
     else
       return (1 + ((num_samples - frame_length) / frame_shift));
     // You can understand the expression above as follows: 'num_samples -
     // frame_length' is how much room we have to shift the frame within the
     // waveform; 'frame_shift' is how much we shift it each time; and the ratio
     // is how many times we can shift it (integer arithmetic rounds down).
   } else {
     // if --snip-edges=false, the number of frames is determined by rounding the
     // (file-length / frame-shift) to the nearest integer.  The point of this
     // formula is to make the number of frames an obvious and predictable
     // function of the frame shift and signal length, which makes many
     // segmentation-related questions simpler.
     //
     // Because integer division in C++ rounds toward zero, we add (half the
     // frame-shift minus epsilon) before dividing, to have the effect of
     // rounding towards the closest integer.
     int32 num_frames = (num_samples + (frame_shift / 2)) / frame_shift;
 
     if (flush)
       return num_frames;
 
     // note: 'end' always means the last plus one, i.e. one past the last.
     int64 end_sample_of_last_frame = FirstSampleOfFrame(num_frames - 1, opts)
         + frame_length;
 
     // the following code is optimized more for clarity than efficiency.
     // If flush == false, we can't output frames that extend past the end
     // of the signal.
     while (num_frames > 0 && end_sample_of_last_frame > num_samples) {
       num_frames--;
       end_sample_of_last_frame -= frame_shift;
     }
     return num_frames;
   }
 }

◆ Preemphasize()

void Preemphasize	(	VectorBase< BaseFloat > *	waveform,
		BaseFloat	preemph_coeff
	)

Definition at line 101 of file feature-window.cc.

References VectorBase< Real >::Dim(), rnnlm::i, and KALDI_ASSERT.

Referenced by kaldi::ProcessWindow().

                                                                             {
   if (preemph_coeff == 0.0) return;
   KALDI_ASSERT(preemph_coeff >= 0.0 && preemph_coeff <= 1.0);
   for (int32 i = waveform->Dim()-1; i > 0; i--)
     (*waveform)(i) -= preemph_coeff * (*waveform)(i-1);
   (*waveform)(0) -= preemph_coeff * (*waveform)(0);
 }

◆ ProcessPitch()

void ProcessPitch	(	const ProcessPitchOptions &	opts,
		const MatrixBase< BaseFloat > &	input,
		Matrix< BaseFloat > *	output
	)

This function processes the raw (NCCF, pitch) quantities computed by ComputeKaldiPitch, and processes them into features.

By default it will output three-dimensional features, (POV-feature, mean-subtracted-log-pitch, delta-of-raw-pitch), but this is configurable in the options. The number of rows of "output" will be the number of frames (rows) in "input", and the number of columns will be the number of different types of features requested (by default, 3; 4 is the max). The four config variables –add-pov-feature, –add-normalized-log-pitch, –add-delta-pitch, –add-raw-log-pitch determine which features we create; by default we create the first three.

Definition at line 1581 of file pitch-functions.cc.

References OnlineProcessPitch::Dim(), OnlineProcessPitch::GetFrame(), OnlineProcessPitch::NumFramesReady(), and Matrix< Real >::Resize().

Referenced by main(), kaldi::UnitTestPieces(), and kaldi::UnitTestProcess().

                                              {
   OnlineMatrixFeature pitch_feat(input);
 
   OnlineProcessPitch online_process_pitch(opts, &pitch_feat);
 
   output->Resize(online_process_pitch.NumFramesReady(),
                  online_process_pitch.Dim());
   for (int32 t = 0; t < online_process_pitch.NumFramesReady(); t++) {
     SubVector<BaseFloat> row(*output, t);
     online_process_pitch.GetFrame(t, &row);
   }
 }

◆ ProcessWindow()

void ProcessWindow	(	const FrameExtractionOptions &	opts,
		const FeatureWindowFunction &	window_function,
		VectorBase< BaseFloat > *	window,
		BaseFloat *	log_energy_pre_window = `NULL`
	)

This function does all the windowing steps after actually extracting the windowed signal: depending on the configuration, it does dithering, dc offset removal, preemphasis, and multiplication by the windowing function.

Parameters

[in]	opts	The options class to be used
[in]	window_function	The windowing function– should have been initialized using 'opts'.
[in,out]	window	A vector of size opts.WindowSize(). Note: it will typically be a sub-vector of a larger vector of size opts.PaddedWindowSize(), with the remaining samples zero, as the FFT code is more efficient if it operates on data with power-of-two size.
[out]	log_energy_pre_window	If non-NULL, then after dithering and DC offset removal, this function will write to this pointer the log of the total energy (i.e. sum-squared) of the frame.

Definition at line 137 of file feature-window.cc.

References VectorBase< Real >::Add(), VectorBase< Real >::Dim(), FrameExtractionOptions::dither, kaldi::Dither(), KALDI_ASSERT, kaldi::Log(), VectorBase< Real >::MulElements(), FrameExtractionOptions::preemph_coeff, kaldi::Preemphasize(), FrameExtractionOptions::remove_dc_offset, VectorBase< Real >::Sum(), kaldi::VecVec(), FeatureWindowFunction::window, and FrameExtractionOptions::WindowSize().

Referenced by kaldi::ExtractWindow().

                                                      {
   int32 frame_length = opts.WindowSize();
   KALDI_ASSERT(window->Dim() == frame_length);
 
   if (opts.dither != 0.0)
     Dither(window, opts.dither);
 
   if (opts.remove_dc_offset)
     window->Add(-window->Sum() / frame_length);
 
   if (log_energy_pre_window != NULL) {
     BaseFloat energy = std::max<BaseFloat>(VecVec(*window, *window),
                                 std::numeric_limits<float>::epsilon());
     *log_energy_pre_window = Log(energy);
   }
 
   if (opts.preemph_coeff != 0.0)
     Preemphasize(window, opts.preemph_coeff);
 
   window->MulElements(window_function.window);
 }

◆ ResampleWaveform()

void ResampleWaveform	(	BaseFloat	orig_freq,
		const VectorBase< BaseFloat > &	wave,
		BaseFloat	new_freq,
		Vector< BaseFloat > *	new_wave
	)

Downsample or upsample a waveform.

This is a convenience wrapper for the class 'LinearResample'. The low-pass filter cutoff used in 'LinearResample' is 0.99 of the Nyquist, where the Nyquist is half of the minimum of (orig_freq, new_freq). The resampling is done with a symmetric FIR filter with N_z (number of zeros) as 6.

We compared the downsampling results with those from the sox resampling toolkit. Sox's design is inspired by Laurent De Soras' paper, https://ccrma.stanford.edu/~jos/resample/Implementation.html

Note: we expect that while orig_freq and new_freq are of type BaseFloat, they are actually required to have exact integer values (like 16000 or 8000) with a ratio between them that can be expressed as a rational number with reasonably small integer factors.

Definition at line 368 of file resample.cc.

References LinearResample::Resample().

Referenced by OfflineFeatureTpl< F >::ComputeFeatures(), and kaldi::DownsampleWaveForm().

                                                                        {
   BaseFloat min_freq = std::min(orig_freq, new_freq);
   BaseFloat lowpass_cutoff = 0.99 * 0.5 * min_freq;
   int32 lowpass_filter_width = 6;
   LinearResample resampler(orig_freq, new_freq,
                            lowpass_cutoff, lowpass_filter_width);
   resampler.Resample(wave, true, new_wave);
 }

◆ ReverseFrames()

void ReverseFrames	(	const MatrixBase< BaseFloat > &	input_features,
		Matrix< BaseFloat > *	output_features
	)

Definition at line 228 of file feature-functions.cc.

References VectorBase< Real >::CopyFromVec(), KALDI_ERR, MatrixBase< Real >::NumCols(), MatrixBase< Real >::NumRows(), and Matrix< Real >::Resize().

                                                        {
   int32 T = input_features.NumRows(), D = input_features.NumCols();
   if (T == 0 || D == 0)
     KALDI_ERR << "ReverseFrames: empty input";
   output_features->Resize(T, D);
   for (int32 t = 0; t < T; t++) {
     SubVector<BaseFloat> dst_row(*output_features, t);
     SubVector<BaseFloat> src_row(input_features, T-1-t);
     dst_row.CopyFromVec(src_row);
   }
 }

◆ SlidingWindowCmn()

void SlidingWindowCmn	(	const SlidingWindowCmnOptions &	opts,
		const MatrixBase< BaseFloat > &	input,
		MatrixBase< BaseFloat > *	output
	)

Applies sliding-window cepstral mean and/or variance normalization.

See the strings registering the options in the options class for information on how this works and what the options are. input and output must have the same dimension.

Definition at line 350 of file feature-functions.cc.

References MatrixBase< Real >::CopyFromMat(), KALDI_ASSERT, MatrixBase< Real >::NumCols(), MatrixBase< Real >::NumRows(), kaldi::SameDim(), and kaldi::SlidingWindowCmnInternal().

Referenced by main(), SlidingWindowCmnOptions::Register(), and kaldi::UnitTestOnlineCmvn().

                                                      {
   KALDI_ASSERT(SameDim(input, *output) && input.NumRows() > 0);
   Matrix<double> input_dbl(input), output_dbl(input.NumRows(), input.NumCols());
   // call double-precision version
   SlidingWindowCmnInternal(opts, input_dbl, &output_dbl);
   output->CopyFromMat(output_dbl);
 }

◆ SpliceFrames()

void SpliceFrames	(	const MatrixBase< BaseFloat > &	input_features,
		int32	left_context,
		int32	right_context,
		Matrix< BaseFloat > *	output_features
	)

Definition at line 205 of file feature-functions.cc.

References rnnlm::j, KALDI_ASSERT, KALDI_ERR, MatrixBase< Real >::NumCols(), MatrixBase< Real >::NumRows(), and Matrix< Real >::Resize().

Referenced by OnlineLdaInput::Dim(), main(), kaldi::TestOnlineLdaInput(), and kaldi::TestOnlineSpliceFrames().

                                                       {
   int32 T = input_features.NumRows(), D = input_features.NumCols();
   if (T == 0 || D == 0)
     KALDI_ERR << "SpliceFrames: empty input";
   KALDI_ASSERT(left_context >= 0 && right_context >= 0);
   int32 N = 1 + left_context + right_context;
   output_features->Resize(T, D*N);
   for (int32 t = 0; t < T; t++) {
     SubVector<BaseFloat> dst_row(*output_features, t);
     for (int32 j = 0; j < N; j++) {
       int32 t2 = t + j - left_context;
       if (t2 < 0) t2 = 0;
       if (t2 >= T) t2 = T-1;
       SubVector<BaseFloat> dst(dst_row, j*D, D),
           src(input_features, t2);
       dst.CopyFromVec(src);
     }
   }
 }

Files

Classes

Typedefs

Functions