resample.cc
Go to the documentation of this file.
1 // feat/resample.cc
2 
3 // Copyright 2013 Pegah Ghahremani
4 // 2014 IMSL, PKU-HKUST (author: Wei Shi)
5 // 2014 Yanqing Sun, Junjie Wang
6 // 2014 Johns Hopkins University (author: Daniel Povey)
7 
8 // See ../../COPYING for clarification regarding multiple authors
9 //
10 // Licensed under the Apache License, Version 2.0 (the "License");
11 // you may not use this file except in compliance with the License.
12 // You may obtain a copy of the License at
13 //
14 // http://www.apache.org/licenses/LICENSE-2.0
15 //
16 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
18 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
19 // MERCHANTABLITY OR NON-INFRINGEMENT.
20 // See the Apache 2 License for the specific language governing permissions and
21 // limitations under the License.
22 
23 
24 #include <algorithm>
25 #include <limits>
26 #include "feat/feature-functions.h"
28 #include "feat/resample.h"
29 
30 namespace kaldi {
31 
32 
34  int32 samp_rate_out_hz,
35  BaseFloat filter_cutoff_hz,
36  int32 num_zeros):
37  samp_rate_in_(samp_rate_in_hz),
38  samp_rate_out_(samp_rate_out_hz),
39  filter_cutoff_(filter_cutoff_hz),
40  num_zeros_(num_zeros) {
41  KALDI_ASSERT(samp_rate_in_hz > 0.0 &&
42  samp_rate_out_hz > 0.0 &&
43  filter_cutoff_hz > 0.0 &&
44  filter_cutoff_hz*2 <= samp_rate_in_hz &&
45  filter_cutoff_hz*2 <= samp_rate_out_hz &&
46  num_zeros > 0);
47 
48  // base_freq is the frequency of the repeating unit, which is the gcd
49  // of the input frequencies.
50  int32 base_freq = Gcd(samp_rate_in_, samp_rate_out_);
53 
55  Reset();
56 }
57 
58 int64 LinearResample::GetNumOutputSamples(int64 input_num_samp,
59  bool flush) const {
60  // For exact computation, we measure time in "ticks" of 1.0 / tick_freq,
61  // where tick_freq is the least common multiple of samp_rate_in_ and
62  // samp_rate_out_.
63  int32 tick_freq = Lcm(samp_rate_in_, samp_rate_out_);
64  int32 ticks_per_input_period = tick_freq / samp_rate_in_;
65 
66  // work out the number of ticks in the time interval
67  // [ 0, input_num_samp/samp_rate_in_ ).
68  int64 interval_length_in_ticks = input_num_samp * ticks_per_input_period;
69  if (!flush) {
70  BaseFloat window_width = num_zeros_ / (2.0 * filter_cutoff_);
71  // To count the window-width in ticks we take the floor. This
72  // is because since we're looking for the largest integer num-out-samp
73  // that fits in the interval, which is open on the right, a reduction
74  // in interval length of less than a tick will never make a difference.
75  // For example, the largest integer in the interval [ 0, 2 ) and the
76  // largest integer in the interval [ 0, 2 - 0.9 ) are the same (both one).
77  // So when we're subtracting the window-width we can ignore the fractional
78  // part.
79  int32 window_width_ticks = floor(window_width * tick_freq);
80  // The time-period of the output that we can sample gets reduced
81  // by the window-width (which is actually the distance from the
82  // center to the edge of the windowing function) if we're not
83  // "flushing the output".
84  interval_length_in_ticks -= window_width_ticks;
85  }
86  if (interval_length_in_ticks <= 0)
87  return 0;
88  int32 ticks_per_output_period = tick_freq / samp_rate_out_;
89  // Get the last output-sample in the closed interval, i.e. replacing [ ) with
90  // [ ]. Note: integer division rounds down. See
91  // http://en.wikipedia.org/wiki/Interval_(mathematics) for an explanation of
92  // the notation.
93  int64 last_output_samp = interval_length_in_ticks / ticks_per_output_period;
94  // We need the last output-sample in the open interval, so if it takes us to
95  // the end of the interval exactly, subtract one.
96  if (last_output_samp * ticks_per_output_period == interval_length_in_ticks)
97  last_output_samp--;
98  // First output-sample index is zero, so the number of output samples
99  // is the last output-sample plus one.
100  int64 num_output_samp = last_output_samp + 1;
101  return num_output_samp;
102 }
103 
107 
108  double window_width = num_zeros_ / (2.0 * filter_cutoff_);
109 
110  for (int32 i = 0; i < output_samples_in_unit_; i++) {
111  double output_t = i / static_cast<double>(samp_rate_out_);
112  double min_t = output_t - window_width, max_t = output_t + window_width;
113  // we do ceil on the min and floor on the max, because if we did it
114  // the other way around we would unnecessarily include indexes just
115  // outside the window, with zero coefficients. It's possible
116  // if the arguments to the ceil and floor expressions are integers
117  // (e.g. if filter_cutoff_ has an exact ratio with the sample rates),
118  // that we unnecessarily include something with a zero coefficient,
119  // but this is only a slight efficiency issue.
120  int32 min_input_index = ceil(min_t * samp_rate_in_),
121  max_input_index = floor(max_t * samp_rate_in_),
122  num_indices = max_input_index - min_input_index + 1;
123  first_index_[i] = min_input_index;
124  weights_[i].Resize(num_indices);
125  for (int32 j = 0; j < num_indices; j++) {
126  int32 input_index = min_input_index + j;
127  double input_t = input_index / static_cast<double>(samp_rate_in_),
128  delta_t = input_t - output_t;
129  // sign of delta_t doesn't matter.
130  weights_[i](j) = FilterFunc(delta_t) / samp_rate_in_;
131  }
132  }
133 }
134 
135 
136 // inline
137 void LinearResample::GetIndexes(int64 samp_out,
138  int64 *first_samp_in,
139  int32 *samp_out_wrapped) const {
140  // A unit is the smallest nonzero amount of time that is an exact
141  // multiple of the input and output sample periods. The unit index
142  // is the answer to "which numbered unit we are in".
143  int64 unit_index = samp_out / output_samples_in_unit_;
144  // samp_out_wrapped is equal to samp_out % output_samples_in_unit_
145  *samp_out_wrapped = static_cast<int32>(samp_out -
146  unit_index * output_samples_in_unit_);
147  *first_samp_in = first_index_[*samp_out_wrapped] +
148  unit_index * input_samples_in_unit_;
149 }
150 
151 
153  bool flush,
154  Vector<BaseFloat> *output) {
155  int32 input_dim = input.Dim();
156  int64 tot_input_samp = input_sample_offset_ + input_dim,
157  tot_output_samp = GetNumOutputSamples(tot_input_samp, flush);
158 
159  KALDI_ASSERT(tot_output_samp >= output_sample_offset_);
160 
161  output->Resize(tot_output_samp - output_sample_offset_);
162 
163  // samp_out is the index into the total output signal, not just the part
164  // of it we are producing here.
165  for (int64 samp_out = output_sample_offset_;
166  samp_out < tot_output_samp;
167  samp_out++) {
168  int64 first_samp_in;
169  int32 samp_out_wrapped;
170  GetIndexes(samp_out, &first_samp_in, &samp_out_wrapped);
171  const Vector<BaseFloat> &weights = weights_[samp_out_wrapped];
172  // first_input_index is the first index into "input" that we have a weight
173  // for.
174  int32 first_input_index = static_cast<int32>(first_samp_in -
176  BaseFloat this_output;
177  if (first_input_index >= 0 &&
178  first_input_index + weights.Dim() <= input_dim) {
179  SubVector<BaseFloat> input_part(input, first_input_index, weights.Dim());
180  this_output = VecVec(input_part, weights);
181  } else { // Handle edge cases.
182  this_output = 0.0;
183  for (int32 i = 0; i < weights.Dim(); i++) {
184  BaseFloat weight = weights(i);
185  int32 input_index = first_input_index + i;
186  if (input_index < 0 && input_remainder_.Dim() + input_index >= 0) {
187  this_output += weight *
188  input_remainder_(input_remainder_.Dim() + input_index);
189  } else if (input_index >= 0 && input_index < input_dim) {
190  this_output += weight * input(input_index);
191  } else if (input_index >= input_dim) {
192  // We're past the end of the input and are adding zero; should only
193  // happen if the user specified flush == true, or else we would not
194  // be trying to output this sample.
195  KALDI_ASSERT(flush);
196  }
197  }
198  }
199  int32 output_index = static_cast<int32>(samp_out - output_sample_offset_);
200  (*output)(output_index) = this_output;
201  }
202 
203  if (flush) {
204  Reset(); // Reset the internal state.
205  } else {
206  SetRemainder(input);
207  input_sample_offset_ = tot_input_samp;
208  output_sample_offset_ = tot_output_samp;
209  }
210 }
211 
213  Vector<BaseFloat> old_remainder(input_remainder_);
214  // max_remainder_needed is the width of the filter from side to side,
215  // measured in input samples. you might think it should be half that,
216  // but you have to consider that you might be wanting to output samples
217  // that are "in the past" relative to the beginning of the latest
218  // input... anyway, storing more remainder than needed is not harmful.
219  int32 max_remainder_needed = ceil(samp_rate_in_ * num_zeros_ /
221  input_remainder_.Resize(max_remainder_needed);
222  for (int32 index = - input_remainder_.Dim(); index < 0; index++) {
223  // we interpret "index" as an offset from the end of "input" and
224  // from the end of input_remainder_.
225  int32 input_index = index + input.Dim();
226  if (input_index >= 0)
227  input_remainder_(index + input_remainder_.Dim()) = input(input_index);
228  else if (input_index + old_remainder.Dim() >= 0)
229  input_remainder_(index + input_remainder_.Dim()) =
230  old_remainder(input_index + old_remainder.Dim());
231  // else leave it at zero.
232  }
233 }
234 
238  input_remainder_.Resize(0);
239 }
240 
247  BaseFloat window, // raised-cosine (Hanning) window of width
248  // num_zeros_/2*filter_cutoff_
249  filter; // sinc filter function
250  if (fabs(t) < num_zeros_ / (2.0 * filter_cutoff_))
251  window = 0.5 * (1 + cos(M_2PI * filter_cutoff_ / num_zeros_ * t));
252  else
253  window = 0.0; // outside support of window function
254  if (t != 0)
255  filter = sin(M_2PI * filter_cutoff_ * t) / (M_PI * t);
256  else
257  filter = 2 * filter_cutoff_; // limit of the function at t = 0
258  return filter * window;
259 }
260 
261 
263  int32 num_samples_in, BaseFloat samp_rate_in,
264  BaseFloat filter_cutoff, const Vector<BaseFloat> &sample_points,
265  int32 num_zeros):
266  num_samples_in_(num_samples_in),
267  samp_rate_in_(samp_rate_in),
268  filter_cutoff_(filter_cutoff),
269  num_zeros_(num_zeros) {
270  KALDI_ASSERT(num_samples_in > 0 && samp_rate_in > 0.0 &&
271  filter_cutoff > 0.0 &&
272  filter_cutoff * 2.0 <= samp_rate_in
273  && num_zeros > 0);
274  // set up weights_ and indices_. Please try to keep all functions short and
275  SetIndexes(sample_points);
276  SetWeights(sample_points);
277 }
278 
279 
281  MatrixBase<BaseFloat> *output) const {
282  // each row of "input" corresponds to the data to resample;
283  // the corresponding row of "output" is the resampled data.
284 
285  KALDI_ASSERT(input.NumRows() == output->NumRows() &&
286  input.NumCols() == num_samples_in_ &&
287  output->NumCols() == weights_.size());
288 
289  Vector<BaseFloat> output_col(output->NumRows());
290  for (int32 i = 0; i < NumSamplesOut(); i++) {
291  SubMatrix<BaseFloat> input_part(input, 0, input.NumRows(),
292  first_index_[i],
293  weights_[i].Dim());
294  const Vector<BaseFloat> &weight_vec(weights_[i]);
295  output_col.AddMatVec(1.0, input_part,
296  kNoTrans, weight_vec, 0.0);
297  output->CopyColFromVec(output_col, i);
298  }
299 }
300 
302  VectorBase<BaseFloat> *output) const {
303  KALDI_ASSERT(input.Dim() == num_samples_in_ &&
304  output->Dim() == weights_.size());
305 
306  int32 output_dim = output->Dim();
307  for (int32 i = 0; i < output_dim; i++) {
308  SubVector<BaseFloat> input_part(input, first_index_[i], weights_[i].Dim());
309  (*output)(i) = VecVec(input_part, weights_[i]);
310  }
311 }
312 
314  int32 num_samples = sample_points.Dim();
315  first_index_.resize(num_samples);
316  weights_.resize(num_samples);
317  BaseFloat filter_width = num_zeros_ / (2.0 * filter_cutoff_);
318  for (int32 i = 0; i < num_samples; i++) {
319  // the t values are in seconds.
320  BaseFloat t = sample_points(i),
321  t_min = t - filter_width, t_max = t + filter_width;
322  int32 index_min = ceil(samp_rate_in_ * t_min),
323  index_max = floor(samp_rate_in_ * t_max);
324  // the ceil on index min and the floor on index_max are because there
325  // is no point using indices just outside the window (coeffs would be zero).
326  if (index_min < 0)
327  index_min = 0;
328  if (index_max >= num_samples_in_)
329  index_max = num_samples_in_ - 1;
330  first_index_[i] = index_min;
331  weights_[i].Resize(index_max - index_min + 1);
332  }
333 }
334 
336  int32 num_samples_out = NumSamplesOut();
337  for (int32 i = 0; i < num_samples_out; i++) {
338  for (int32 j = 0 ; j < weights_[i].Dim(); j++) {
339  BaseFloat delta_t = sample_points(i) -
340  (first_index_[i] + j) / samp_rate_in_;
341  // Include at this point the factor of 1.0 / samp_rate_in_ which
342  // appears in the math.
343  weights_[i](j) = FilterFunc(delta_t) / samp_rate_in_;
344  }
345  }
346 }
347 
354  BaseFloat window, // raised-cosine (Hanning) window of width
355  // num_zeros_/2*filter_cutoff_
356  filter; // sinc filter function
357  if (fabs(t) < num_zeros_ / (2.0 * filter_cutoff_))
358  window = 0.5 * (1 + cos(M_2PI * filter_cutoff_ / num_zeros_ * t));
359  else
360  window = 0.0; // outside support of window function
361  if (t != 0.0)
362  filter = sin(M_2PI * filter_cutoff_ * t) / (M_PI * t);
363  else
364  filter = 2.0 * filter_cutoff_; // limit of the function at zero.
365  return filter * window;
366 }
367 
369  BaseFloat new_freq, Vector<BaseFloat> *new_wave) {
370  BaseFloat min_freq = std::min(orig_freq, new_freq);
371  BaseFloat lowpass_cutoff = 0.99 * 0.5 * min_freq;
372  int32 lowpass_filter_width = 6;
373  LinearResample resampler(orig_freq, new_freq,
374  lowpass_cutoff, lowpass_filter_width);
375  resampler.Resample(wave, true, new_wave);
376 }
377 } // namespace kaldi
BaseFloat FilterFunc(BaseFloat) const
Here, t is a time in seconds representing an offset from the center of the windowed filter function...
Definition: resample.cc:246
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void Resample(const VectorBase< BaseFloat > &input, bool flush, Vector< BaseFloat > *output)
This function does the resampling.
Definition: resample.cc:152
void CopyColFromVec(const VectorBase< Real > &v, const MatrixIndexT col)
Copy vector into specific column of matrix.
int64 output_sample_offset_
The number of samples we have already output for this signal.
Definition: resample.h:249
void Reset()
Calling the function Reset() resets the state of the object prior to processing a new signal; it is o...
Definition: resample.cc:235
void ResampleWaveform(BaseFloat orig_freq, const VectorBase< BaseFloat > &wave, BaseFloat new_freq, Vector< BaseFloat > *new_wave)
Downsample or upsample a waveform.
Definition: resample.cc:368
void SetIndexesAndWeights()
Definition: resample.cc:104
#define M_PI
Definition: kaldi-math.h:44
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
int32 output_samples_in_unit_
The number of output samples in the smallest repeating unit: num_samp_out_ = samp_rate_out_hz / Gcd(s...
Definition: resample.h:228
void SetRemainder(const VectorBase< BaseFloat > &input)
Definition: resample.cc:212
int32 NumSamplesOut() const
Definition: resample.h:105
void GetIndexes(int64 samp_out, int64 *first_samp_in, int32 *samp_out_wrapped) const
Given an output-sample index, this function outputs to *first_samp_in the first input-sample index th...
Definition: resample.cc:137
I Gcd(I m, I n)
Definition: kaldi-math.h:297
BaseFloat FilterFunc(BaseFloat t) const
Here, t is a time in seconds representing an offset from the center of the windowed filter function...
Definition: resample.cc:353
kaldi::int32 int32
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
int64 input_sample_offset_
The number of input samples we have already received for this signal (including anything in remainder...
Definition: resample.h:246
Vector< BaseFloat > input_remainder_
A small trailing part of the previously seen input signal.
Definition: resample.h:251
I Lcm(I m, I n)
Returns the least common multiple of two integers.
Definition: kaldi-math.h:318
float BaseFloat
Definition: kaldi-types.h:29
std::vector< int32 > first_index_
The first input-sample index that we sum over, for this output-sample index.
Definition: resample.h:238
void Resample(const MatrixBase< BaseFloat > &input, MatrixBase< BaseFloat > *output) const
This function does the resampling.
Definition: resample.cc:280
void SetIndexes(const Vector< BaseFloat > &sample_points)
Definition: resample.cc:313
std::vector< Vector< BaseFloat > > weights_
Definition: resample.h:133
LinearResample(int32 samp_rate_in_hz, int32 samp_rate_out_hz, BaseFloat filter_cutoff_hz, int32 num_zeros)
Constructor.
Definition: resample.cc:33
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
BaseFloat filter_cutoff_
Definition: resample.h:128
void SetWeights(const Vector< BaseFloat > &sample_points)
Definition: resample.cc:335
int64 GetNumOutputSamples(int64 input_num_samp, bool flush) const
This function outputs the number of output samples we will output for a signal with "input_num_samp" ...
Definition: resample.cc:58
A class representing a vector.
Definition: kaldi-vector.h:406
LinearResample is a special case of ArbitraryResample, where we want to resample a signal at linearly...
Definition: resample.h:147
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
BaseFloat filter_cutoff_
Definition: resample.h:221
ArbitraryResample(int32 num_samples_in, BaseFloat samp_rate_hz, BaseFloat filter_cutoff_hz, const Vector< BaseFloat > &sample_points_secs, int32 num_zeros)
Definition: resample.cc:262
#define M_2PI
Definition: kaldi-math.h:52
int32 input_samples_in_unit_
The number of input samples in the smallest repeating unit: num_samp_in_ = samp_rate_in_hz / Gcd(samp...
Definition: resample.h:224
std::vector< Vector< BaseFloat > > weights_
Weights on the input samples, for this output-sample index.
Definition: resample.h:241
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Definition: kaldi-vector.cc:37
Sub-matrix representation.
Definition: kaldi-matrix.h:988
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
std::vector< int32 > first_index_
Definition: resample.h:131