matrix-functions.cc
Go to the documentation of this file.
1 // matrix/matrix-functions.cc
2 
3 // Copyright 2009-2011 Microsoft Corporation; Go Vivace Inc.; Jan Silovsky
4 // Yanmin Qian; Saarland University; Johns Hopkins University (Author: Daniel Povey)
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 //
21 // (*) incorporates, with permission, FFT code from his book
22 // "Signal Processing with Lapped Transforms", Artech, 1992.
23 
25 #include "matrix/sp-matrix.h"
26 
27 namespace kaldi {
28 
29 template<typename Real> void ComplexFt (const VectorBase<Real> &in,
30  VectorBase<Real> *out, bool forward) {
31  int exp_sign = (forward ? -1 : 1);
32  KALDI_ASSERT(out != NULL);
33  KALDI_ASSERT(in.Dim() == out->Dim());
34  KALDI_ASSERT(in.Dim() % 2 == 0);
35  int twoN = in.Dim(), N = twoN / 2;
36  const Real *data_in = in.Data();
37  Real *data_out = out->Data();
38 
39  Real exp1N_re, exp1N_im; // forward -> exp(-2pi / N), backward -> exp(2pi / N).
40  Real fraction = exp_sign * M_2PI / static_cast<Real>(N); // forward -> -2pi/N, backward->-2pi/N
41  ComplexImExp(fraction, &exp1N_re, &exp1N_im);
42 
43  Real expm_re = 1.0, expm_im = 0.0; // forward -> exp(-2pi m / N).
44 
45  for (int two_m = 0; two_m < twoN; two_m+=2) { // For each output component.
46  Real expmn_re = 1.0, expmn_im = 0.0; // forward -> exp(-2pi m n / N).
47  Real sum_re = 0.0, sum_im = 0.0; // complex output for index m (the sum expression)
48  for (int two_n = 0; two_n < twoN; two_n+=2) {
49  ComplexAddProduct(data_in[two_n], data_in[two_n+1],
50  expmn_re, expmn_im,
51  &sum_re, &sum_im);
52  ComplexMul(expm_re, expm_im, &expmn_re, &expmn_im);
53  }
54  data_out[two_m] = sum_re;
55  data_out[two_m + 1] = sum_im;
56 
57 
58  if (two_m % 10 == 0) { // occasionally renew "expm" from scratch to avoid
59  // loss of precision.
60  int nextm = 1 + two_m/2;
61  Real fraction_mult = fraction * nextm;
62  ComplexImExp(fraction_mult, &expm_re, &expm_im);
63  } else {
64  ComplexMul(exp1N_re, exp1N_im, &expm_re, &expm_im);
65  }
66  }
67 }
68 
69 template
70 void ComplexFt (const VectorBase<float> &in,
71  VectorBase<float> *out, bool forward);
72 template
73 void ComplexFt (const VectorBase<double> &in,
74  VectorBase<double> *out, bool forward);
75 
76 
77 #define KALDI_COMPLEXFFT_BLOCKSIZE 8192
78 // This #define affects how we recurse in ComplexFftRecursive.
79 // We assume that memory-caching happens on a scale at
80 // least as small as this.
81 
82 
94 
95 
96 template<typename Real>
97 void ComplexFftRecursive (Real *data, int nffts, int N,
98  const int *factor_begin,
99  const int *factor_end, bool forward,
100  Vector<Real> *tmp_vec) {
101  if (factor_begin == factor_end) {
102  KALDI_ASSERT(N == 1);
103  return;
104  }
105 
106  { // an optimization: compute in smaller blocks.
107  // this block of code could be removed and it would still work.
108  MatrixIndexT size_perblock = N * 2 * sizeof(Real);
109  if (nffts > 1 && size_perblock*nffts > KALDI_COMPLEXFFT_BLOCKSIZE) { // can break it up...
110  // Break up into multiple blocks. This is an optimization. We make
111  // no progress on the FFT when we do this.
112  int block_skip = KALDI_COMPLEXFFT_BLOCKSIZE / size_perblock; // n blocks per call
113  if (block_skip == 0) block_skip = 1;
114  if (block_skip < nffts) {
115  int blocks_left = nffts;
116  while (blocks_left > 0) {
117  int skip_now = std::min(blocks_left, block_skip);
118  ComplexFftRecursive(data, skip_now, N, factor_begin, factor_end, forward, tmp_vec);
119  blocks_left -= skip_now;
120  data += skip_now * N*2;
121  }
122  return;
123  } // else do the actual algorithm.
124  } // else do the actual algorithm.
125  }
126 
127  int P = *factor_begin;
128  KALDI_ASSERT(P > 1);
129  int Q = N / P;
130 
131 
132  if (P > 1 && Q > 1) { // Do the rearrangement. C.f. eq. (8) below. Transform
133  // (a) to (b).
134  Real *data_thisblock = data;
135  if (tmp_vec->Dim() < (MatrixIndexT)N) tmp_vec->Resize(N);
136  Real *data_tmp = tmp_vec->Data();
137  for (int thisfft = 0; thisfft < nffts; thisfft++, data_thisblock+=N*2) {
138  for (int offset = 0; offset < 2; offset++) { // 0 == real, 1 == im.
139  for (int p = 0; p < P; p++) {
140  for (int q = 0; q < Q; q++) {
141  int aidx = q*P + p, bidx = p*Q + q;
142  data_tmp[bidx] = data_thisblock[2*aidx+offset];
143  }
144  }
145  for (int n = 0;n < P*Q;n++) data_thisblock[2*n+offset] = data_tmp[n];
146  }
147  }
148  }
149 
150  { // Recurse.
151  ComplexFftRecursive(data, nffts*P, Q, factor_begin+1, factor_end, forward, tmp_vec);
152  }
153 
154  int exp_sign = (forward ? -1 : 1);
155  Real rootN_re, rootN_im; // Nth root of unity.
156  ComplexImExp(static_cast<Real>(exp_sign * M_2PI / N), &rootN_re, &rootN_im);
157 
158  Real rootP_re, rootP_im; // Pth root of unity.
159  ComplexImExp(static_cast<Real>(exp_sign * M_2PI / P), &rootP_re, &rootP_im);
160 
161  { // Do the multiplication
162  // could avoid a bunch of complex multiplies by moving the loop over data_thisblock
163  // inside.
164  if (tmp_vec->Dim() < (MatrixIndexT)(P*2)) tmp_vec->Resize(P*2);
165  Real *temp_a = tmp_vec->Data();
166 
167  Real *data_thisblock = data, *data_end = data+(N*2*nffts);
168  for (; data_thisblock != data_end; data_thisblock += N*2) { // for each separate fft.
169  Real qd_re = 1.0, qd_im = 0.0; // 1^(q'/N)
170  for (int qd = 0; qd < Q; qd++) {
171  Real pdQ_qd_re = qd_re, pdQ_qd_im = qd_im; // 1^((p'Q+q') / N) == 1^((p'/P) + (q'/N))
172  // Initialize to q'/N, corresponding to p' == 0.
173  for (int pd = 0; pd < P; pd++) { // pd == p'
174  { // This is the p = 0 case of the loop below [an optimization].
175  temp_a[pd*2] = data_thisblock[qd*2];
176  temp_a[pd*2 + 1] = data_thisblock[qd*2 + 1];
177  }
178  { // This is the p = 1 case of the loop below [an optimization]
179  // **** MOST OF THE TIME (>60% I think) gets spent here. ***
180  ComplexAddProduct(pdQ_qd_re, pdQ_qd_im,
181  data_thisblock[(qd+Q)*2], data_thisblock[(qd+Q)*2 + 1],
182  &(temp_a[pd*2]), &(temp_a[pd*2 + 1]));
183  }
184  if (P > 2) {
185  Real p_pdQ_qd_re = pdQ_qd_re, p_pdQ_qd_im = pdQ_qd_im; // 1^(p(p'Q+q')/N)
186  for (int p = 2; p < P; p++) {
187  ComplexMul(pdQ_qd_re, pdQ_qd_im, &p_pdQ_qd_re, &p_pdQ_qd_im); // p_pdQ_qd *= pdQ_qd.
188  int data_idx = p*Q + qd;
189  ComplexAddProduct(p_pdQ_qd_re, p_pdQ_qd_im,
190  data_thisblock[data_idx*2], data_thisblock[data_idx*2 + 1],
191  &(temp_a[pd*2]), &(temp_a[pd*2 + 1]));
192  }
193  }
194  if (pd != P-1)
195  ComplexMul(rootP_re, rootP_im, &pdQ_qd_re, &pdQ_qd_im); // pdQ_qd *= (rootP == 1^{1/P})
196  // (using 1/P == Q/N)
197  }
198  for (int pd = 0; pd < P; pd++) {
199  data_thisblock[(pd*Q + qd)*2] = temp_a[pd*2];
200  data_thisblock[(pd*Q + qd)*2 + 1] = temp_a[pd*2 + 1];
201  }
202  ComplexMul(rootN_re, rootN_im, &qd_re, &qd_im); // qd *= rootN.
203  }
204  }
205  }
206 }
207 
208 /* Equations for ComplexFftRecursive.
209  We consider here one of the "nffts" separate ffts; it's just a question of
210  doing them all in parallel. We also write all equations in terms of
211  complex math (the conversion to real arithmetic is not hard, and anyway
212  takes place inside function calls).
213 
214 
215  Let the input (i.e. "data" at start) be a_n, n = 0..N-1, and
216  the output (Fourier transform) be d_k, k = 0..N-1. We use these letters because
217  there will be two intermediate variables b and c.
218  We want to compute:
219 
220  d_k = \sum_n a_n 1^(kn/N) (1)
221 
222  where we use 1^x as shorthand for exp(-2pi x) for the forward algorithm
223  and exp(2pi x) for the backward one.
224 
225  We factorize N = P Q (P small, Q usually large).
226  With p = 0..P-1 and q = 0..Q-1, and also p'=0..P-1 and q'=0..P-1, we let:
227 
228  k == p'Q + q' (2)
229  n == qP + p (3)
230 
231  That is, we let p, q, p', q' range over these indices and observe that this way we
232  can cover all n, k. Expanding (1) using (2) and (3), we can write:
233 
234  d_k = \sum_{p, q} a_n 1^((p'Q+q')(qP+p)/N)
235  = \sum_{p, q} a_n 1^(p'pQ/N) 1^(q'qP/N) 1^(q'p/N) (4)
236 
237  using 1^(PQ/N) = 1 to get rid of the terms with PQ in them. Rearranging (4),
238 
239  d_k = \sum_p 1^(p'pQ/N) 1^(q'p/N) \sum_q 1^(q'qP/N) a_n (5)
240 
241  The point here is to separate the index q. Now we can expand out the remaining
242  instances of k and n using (2) and (3):
243 
244  d_(p'Q+q') = \sum_p 1^(p'pQ/N) 1^(q'p/N) \sum_q 1^(q'qP/N) a_(qP+p) (6)
245 
246  The expression \sum_q varies with the indices p and q'. Let us define
247 
248  C_{p, q'} = \sum_q 1^(q'qP/N) a_(qP+p) (7)
249 
250  Here, C_{p, q'}, viewed as a sequence in q', is just the DFT of the points
251  a_(qP+p) for q = 1..Q-1. These points are not consecutive in memory though,
252  they jump by P each time. Let us define b as a rearranged version of a,
253  so that
254 
255  b_(pQ+q) = a_(qP+p) (8)
256 
257  How to do this rearrangement in place? In
258 
259  We can rearrange (7) to be written in terms of the b's, using (8), so that
260 
261  C_{p, q'} = \sum_q 1^(q'q (P/N)) b_(pQ+q) (9)
262 
263  Here, the sequence of C_{p, q'} over q'=0..Q-1, is just the DFT of the sequence
264  of b_(pQ) .. b_(p(Q+1)-1). Let's arrange the C_{p, q'} in a single array in
265  memory in the same way as the b's, i.e. we define
266  c_(pQ+q') == C_{p, q'}. (10)
267  Note that we could have written (10) with q in place of q', as there is only
268  one index of type q present, but q' is just a more natural variable name to use
269  since we use q' elsewhere to subscript c and C.
270 
271  Rewriting (9), we have:
272  c_(pQ+q') = \sum_q 1^(q'q (P/N)) b_(pQ+q) (11)
273  which is the DFT computed by the recursive call to this function [after computing
274  the b's by rearranging the a's]. From the c's we want to compute the d's.
275  Taking (6), substituting in the sum (7), and using (10) to write it as an array,
276  we have:
277  d_(p'Q+q') = \sum_p 1^(p'pQ/N) 1^(q'p/N) c_(pQ+q') (12)
278  This sum is independent for different values of q'. Note that d overwrites c
279  in memory. We compute this in a direct way, using a little array of size P to
280  store the computed d values for one value of q' (we reuse the array for each value
281  of q').
282 
283  So the overall picture is this:
284  We get a call to compute DFT on size N.
285 
286  - If N == 1 we return (nothing to do).
287  - We factor N = P Q (typically, P is small).
288  - Using (8), we rearrange the data in memory so that we have b not a in memory
289  (this is the block "do the rearrangement").
290  The pseudocode for this is as follows. For simplicity we use a temporary array.
291 
292  for p = 0..P-1
293  for q = 0..Q-1
294  bidx = pQ + q
295  aidx = qP + p
296  tmp[bidx] = data[aidx].
297  end
298  end
299  data <-- tmp
300  else
301 
302  endif
303 
304 
305  The reason this accomplishes (8) is that we want pQ+q and qP+p to be swapped
306  over for each p, q, and the "if m > n" is a convenient way of ensuring that
307  this swapping happens only once (otherwise it would happen twice, since pQ+q
308  and qP+p both range over the entire set of numbers 0..N-1).
309 
310  - We do the DFT on the smaller block size to compute c from b (this eq eq. (11)).
311  Note that this is actually multiple DFTs, one for each value of p, but this
312  goes to the "nffts" argument of the function call, which we have ignored up to now.
313 
314  -We compute eq. (12) via a loop, as follows
315  allocate temporary array e of size P.
316  For q' = 0..Q-1:
317  for p' = 0..P-1:
318  set sum to zero [this will go in e[p']]
319  for p = p..P-1:
320  sum += 1^(p'pQ/N) 1^(q'p/N) c_(pQ+q')
321  end
322  e[p'] = sum
323  end
324  for p' = 0..P-1:
325  d_(p'Q+q') = e[p']
326  end
327  end
328  delete temporary array e
329 
330 */
331 
332 // This is the outer-layer calling code for ComplexFftRecursive.
333 // It factorizes the dimension and then calls the FFT routine.
334 template<typename Real> void ComplexFft(VectorBase<Real> *v, bool forward, Vector<Real> *tmp_in) {
335  KALDI_ASSERT(v != NULL);
336 
337  if (v->Dim()<=1) return;
338  KALDI_ASSERT(v->Dim() % 2 == 0); // complex input.
339  int N = v->Dim() / 2;
340  std::vector<int> factors;
341  Factorize(N, &factors);
342  int *factor_beg = NULL;
343  if (factors.size() > 0)
344  factor_beg = &(factors[0]);
345  Vector<Real> tmp; // allocated in ComplexFftRecursive.
346  ComplexFftRecursive(v->Data(), 1, N, factor_beg, factor_beg+factors.size(), forward, (tmp_in?tmp_in:&tmp));
347 }
348 
350 template<typename Real> void RealFftInefficient (VectorBase<Real> *v, bool forward) {
351  KALDI_ASSERT(v != NULL);
352  MatrixIndexT N = v->Dim();
353  KALDI_ASSERT(N%2 == 0);
354  if (N == 0) return;
355  Vector<Real> vtmp(N*2); // store as complex.
356  if (forward) {
357  for (MatrixIndexT i = 0; i < N; i++) vtmp(i*2) = (*v)(i);
358  ComplexFft(&vtmp, forward); // this is already tested so we can use this.
359  v->CopyFromVec( vtmp.Range(0, N) );
360  (*v)(1) = vtmp(N); // Copy the N/2'th fourier component, which is real,
361  // to the imaginary part of the 1st complex output.
362  } else {
363  // reverse the transformation above to get the complex spectrum.
364  vtmp(0) = (*v)(0); // copy F_0 which is real
365  vtmp(N) = (*v)(1); // copy F_{N/2} which is real
366  for (MatrixIndexT i = 1; i < N/2; i++) {
367  // Copy i'th to i'th fourier component
368  vtmp(2*i) = (*v)(2*i);
369  vtmp(2*i+1) = (*v)(2*i+1);
370  // Copy i'th to N-i'th, conjugated.
371  vtmp(2*(N-i)) = (*v)(2*i);
372  vtmp(2*(N-i)+1) = -(*v)(2*i+1);
373  }
374  ComplexFft(&vtmp, forward); // actually backward since forward == false
375  // Copy back real part. Complex part should be zero.
376  for (MatrixIndexT i = 0; i < N; i++)
377  (*v)(i) = vtmp(i*2);
378  }
379 }
380 
381 template void RealFftInefficient (VectorBase<float> *v, bool forward);
382 template void RealFftInefficient (VectorBase<double> *v, bool forward);
383 
384 template
385 void ComplexFft(VectorBase<float> *v, bool forward, Vector<float> *tmp_in);
386 template
387 void ComplexFft(VectorBase<double> *v, bool forward, Vector<double> *tmp_in);
388 
389 
390 // See the long comment below for the math behind this.
391 template<typename Real> void RealFft (VectorBase<Real> *v, bool forward) {
392  KALDI_ASSERT(v != NULL);
393  MatrixIndexT N = v->Dim(), N2 = N/2;
394  KALDI_ASSERT(N%2 == 0);
395  if (N == 0) return;
396 
397  if (forward) ComplexFft(v, true);
398 
399  Real *data = v->Data();
400  Real rootN_re, rootN_im; // exp(-2pi/N), forward; exp(2pi/N), backward
401  int forward_sign = forward ? -1 : 1;
402  ComplexImExp(static_cast<Real>(M_2PI/N *forward_sign), &rootN_re, &rootN_im);
403  Real kN_re = -forward_sign, kN_im = 0.0; // exp(-2pik/N), forward; exp(-2pik/N), backward
404  // kN starts out as 1.0 for forward algorithm but -1.0 for backward.
405  for (MatrixIndexT k = 1; 2*k <= N2; k++) {
406  ComplexMul(rootN_re, rootN_im, &kN_re, &kN_im);
407 
408  Real Ck_re, Ck_im, Dk_re, Dk_im;
409  // C_k = 1/2 (B_k + B_{N/2 - k}^*) :
410  Ck_re = 0.5 * (data[2*k] + data[N - 2*k]);
411  Ck_im = 0.5 * (data[2*k + 1] - data[N - 2*k + 1]);
412  // re(D_k)= 1/2 (im(B_k) + im(B_{N/2-k})):
413  Dk_re = 0.5 * (data[2*k + 1] + data[N - 2*k + 1]);
414  // im(D_k) = -1/2 (re(B_k) - re(B_{N/2-k}))
415  Dk_im =-0.5 * (data[2*k] - data[N - 2*k]);
416  // A_k = C_k + 1^(k/N) D_k:
417  data[2*k] = Ck_re; // A_k <-- C_k
418  data[2*k+1] = Ck_im;
419  // now A_k += D_k 1^(k/N)
420  ComplexAddProduct(Dk_re, Dk_im, kN_re, kN_im, &(data[2*k]), &(data[2*k+1]));
421 
422  MatrixIndexT kdash = N2 - k;
423  if (kdash != k) {
424  // Next we handle the index k' = N/2 - k. This is necessary
425  // to do now, to avoid invalidating data that we will later need.
426  // The quantities C_{k'} and D_{k'} are just the conjugates of C_k
427  // and D_k, so the equations are simple modifications of the above,
428  // replacing Ck_im and Dk_im with their negatives.
429  data[2*kdash] = Ck_re; // A_k' <-- C_k'
430  data[2*kdash+1] = -Ck_im;
431  // now A_k' += D_k' 1^(k'/N)
432  // We use 1^(k'/N) = 1^((N/2 - k) / N) = 1^(1/2) 1^(-k/N) = -1 * (1^(k/N))^*
433  // so it's the same as 1^(k/N) but with the real part negated.
434  ComplexAddProduct(Dk_re, -Dk_im, -kN_re, kN_im, &(data[2*kdash]), &(data[2*kdash+1]));
435  }
436  }
437 
438  { // Now handle k = 0.
439  // In simple terms: after the complex fft, data[0] becomes the sum of real
440  // parts input[0], input[2]... and data[1] becomes the sum of imaginary
441  // pats input[1], input[3]...
442  // "zeroth" [A_0] is just the sum of input[0]+input[1]+input[2]..
443  // and "n2th" [A_{N/2}] is input[0]-input[1]+input[2]... .
444  Real zeroth = data[0] + data[1],
445  n2th = data[0] - data[1];
446  data[0] = zeroth;
447  data[1] = n2th;
448  if (!forward) {
449  data[0] /= 2;
450  data[1] /= 2;
451  }
452  }
453 
454  if (!forward) {
455  ComplexFft(v, false);
456  v->Scale(2.0); // This is so we get a factor of N increase, rather than N/2 which we would
457  // otherwise get from [ComplexFft, forward] + [ComplexFft, backward] in dimension N/2.
458  // It's for consistency with our normal FFT convensions.
459  }
460 }
461 
462 template void RealFft (VectorBase<float> *v, bool forward);
463 template void RealFft (VectorBase<double> *v, bool forward);
464 
465 /* Notes for real FFTs.
466  We are using the same convention as above, 1^x to mean exp(-2\pi x) for the forward transform.
467  Actually, in a slight abuse of notation, we use this meaning for 1^x in both the forward and
468  backward cases because it's more convenient in this section.
469 
470  Suppose we have real data a[0...N-1], with N even, and want to compute its Fourier transform.
471  We can make do with the first N/2 points of the transform, since the remaining ones are complex
472  conjugates of the first. We want to compute:
473  for k = 0...N/2-1,
474  A_k = \sum_{n = 0}^{N-1} a_n 1^(kn/N) (1)
475 
476  We treat a[0..N-1] as a complex sequence of length N/2, i.e. a sequence b[0..N/2 - 1].
477  Viewed as sequences of length N/2, we have:
478  b = c + i d,
479  where c = a_0, a_2 ... and d = a_1, a_3 ...
480 
481  We can recover the length-N/2 Fourier transforms of c and d by doing FT on b and
482  then doing the equations below. Derivation is marked by (*) in a comment below (search
483  for it). Let B, C, D be the FTs.
484  We have
485  C_k = 1/2 (B_k + B_{N/2 - k}^*) (z0)
486  D_k =-1/2i (B_k - B_{N/2 - k}^*) (z1)
487 so: re(D_k)= 1/2 (im(B_k) + im(B_{N/2-k})) (z2)
488  im(D_k) = -1/2 (re(B_k) - re(B_{N/2-k})) (z3)
489 
490  To recover the FT A from C and D, we write, rearranging (1):
491 
492  A_k = \sum_{n = 0, 2, ..., N-2} a_n 1^(kn/N)
493  +\sum_{n = 1, 3, ..., N-1} a_n 1^(kn/N)
494  = \sum_{n = 0, 1, ..., N/2-1} a_n 1^(2kn/N) + a_{n+1} 1^(2kn/N) 1^(k/N)
495  = \sum_{n = 0, 1, ..., N/2-1} c_n 1^(2kn/N) + d_n 1^(2kn/N) 1^(k/N)
496  A_k = C_k + 1^(k/N) D_k (a0)
497 
498  This equation is valid for k = 0...N/2-1, which is the range of the sequences B_k and
499  C_k. We don't use is for k = 0, which is a special case considered below. For
500  1 < k < N/2, it's convenient to consider the pair k, k', where k' = N/2 - k.
501  Remember that C_k' = C_k^ *and D_k' = D_k^* [where * is conjugation]. Also,
502  1^(N/2 / N) = -1. So we have:
503  A_k' = C_k^* - 1^(k/N) D_k^* (a0b)
504  We do (a0) and (a0b) together.
505 
506 
507 
508  By symmetry this gives us the Fourier components for N/2+1, ... N, if we want
509  them. However, it doesn't give us the value for exactly k = N/2. For k = 0 and k = N/2, it
510  is easiest to argue directly about the meaning of the A_k, B_k and C_k in terms of
511  sums of points.
512  A_0 and A_{N/2} are both real, with A_0=\sum_n a_n, and A_1 an alternating sum
513  A_1 = a_0 - a_1 + a_2 ...
514  It's easy to show that
515  A_0 = B_0 + C_0 (a1)
516  A_{N/2} = B_0 - C_0. (a2)
517  Since B_0 and C_0 are both real, B_0 is the real coefficient of D_0 and C_0 is the
518  imaginary coefficient.
519 
520  *REVERSING THE PROCESS*
521 
522  Next we want to reverse this process. We just need to work out C_k and D_k from the
523  sequence A_k. Then we do the inverse complex fft and we get back where we started.
524  For 0 and N/2, working from (a1) and (a2) above, we can see that:
525  B_0 = 1/2 (A_0 + A_{N/2}) (y0)
526  C_0 = 1/2 (A_0 + A_{N/2}) (y1)
527  and we use
528  D_0 = B_0 + i C_0
529  to get the 1st complex coefficient of D. This is exactly the same as the forward process
530  except with an extra factor of 1/2.
531 
532  Consider equations (a0) and (a0b). We want to work out C_k and D_k from A_k and A_k'. Remember
533  k' = N/2 - k.
534 
535  Write down
536  A_k = C_k + 1^(k/N) D_k (copying a0)
537  A_k'^* = C_k - 1^(k/N) D_k (conjugate of a0b)
538  So
539  C_k = 0.5 (A_k + A_k'^*) (p0)
540  D_k = 1^(-k/N) . 0.5 (A_k - A_k'^*) (p1)
541  Next, we want to compute B_k and B_k' from C_k and D_k. C.f. (z0)..(z3), and remember
542  that k' = N/2-k. We can see
543  that
544  B_k = C_k + i D_k (p2)
545  B_k' = C_k - i D_k (p3)
546 
547  We would like to make the equations (p0) ... (p3) look like the forward equations (z0), (z1),
548  (a0) and (a0b) so we can reuse the code. Define E_k = -i 1^(k/N) D_k. Then write down (p0)..(p3).
549  We have
550  C_k = 0.5 (A_k + A_k'^*) (p0')
551  E_k = -0.5 i (A_k - A_k'^*) (p1')
552  B_k = C_k - 1^(-k/N) E_k (p2')
553  B_k' = C_k + 1^(-k/N) E_k (p3')
554  So these are exactly the same as (z0), (z1), (a0), (a0b) except replacing 1^(k/N) with
555  -1^(-k/N) . Remember that we defined 1^x above to be exp(-2pi x/N), so the signs here
556  might be opposite to what you see in the code.
557 
558  MODIFICATION: we need to take care of a factor of two. The complex FFT we implemented
559  does not divide by N in the reverse case. So upon inversion we get larger by N/2.
560  However, this is not consistent with normal FFT conventions where you get a factor of N.
561  For this reason we multiply by two after the process described above.
562 
563 */
564 
565 
566 /*
567  (*) [this token is referred to in a comment above].
568 
569  Notes for separating 2 real transforms from one complex one. Note that the
570  letters here (A, B, C and N) are all distinct from the same letters used in the
571  place where this comment is used.
572  Suppose we
573  have two sequences a_n and b_n, n = 0..N-1. We combine them into a complex
574  number,
575  c_n = a_n + i b_n.
576  Then we take the fourier transform to get
577  C_k = \sum_{n = 0}^{N-1} c_n 1^(n/N) .
578  Then we use symmetry. Define A_k and B_k as the DFTs of a and b.
579  We use A_k = A_{N-k}^*, and B_k = B_{N-k}^*, since a and b are real. Using
580  C_k = A_k + i B_k,
581  C_{N-k} = A_k^* + i B_k^*
582  = A_k^* - (i B_k)^*
583  So:
584  A_k = 1/2 (C_k + C_{N-k}^*)
585  i B_k = 1/2 (C_k - C_{N-k}^*)
586 -> B_k =-1/2i (C_k - C_{N-k}^*)
587 -> re(B_k) = 1/2 (im(C_k) + im(C_{N-k}))
588  im(B_k) =-1/2 (re(C_k) - re(C_{N-k}))
589 
590  */
591 
592 template<typename Real> void ComputeDctMatrix(Matrix<Real> *M) {
593  //KALDI_ASSERT(M->NumRows() == M->NumCols());
594  MatrixIndexT K = M->NumRows();
595  MatrixIndexT N = M->NumCols();
596 
597  KALDI_ASSERT(K > 0);
598  KALDI_ASSERT(N > 0);
599  Real normalizer = std::sqrt(1.0 / static_cast<Real>(N)); // normalizer for
600  // X_0.
601  for (MatrixIndexT j = 0; j < N; j++) (*M)(0, j) = normalizer;
602  normalizer = std::sqrt(2.0 / static_cast<Real>(N)); // normalizer for other
603  // elements.
604  for (MatrixIndexT k = 1; k < K; k++)
605  for (MatrixIndexT n = 0; n < N; n++)
606  (*M)(k, n) = normalizer
607  * std::cos( static_cast<double>(M_PI)/N * (n + 0.5) * k );
608 }
609 
610 
611 template void ComputeDctMatrix(Matrix<float> *M);
612 template void ComputeDctMatrix(Matrix<double> *M);
613 
614 
615 template<typename Real>
617  MatrixBase<Real> *U,
618  MatrixBase<Real> *A,
619  bool print_eigs,
620  bool exact) {
621  // Note that some of these matrices may be transposed w.r.t. the
622  // way it's most natural to describe them in math... it's the rows
623  // of X and U that correspond to the (data-points, basis elements).
624  MatrixIndexT N = X.NumRows(), D = X.NumCols();
625  // N = #points, D = feature dim.
626  KALDI_ASSERT(U != NULL && U->NumCols() == D);
627  MatrixIndexT G = U->NumRows(); // # of retained basis elements.
628  KALDI_ASSERT(A == NULL || (A->NumRows() == N && A->NumCols() == G));
629  KALDI_ASSERT(G <= N && G <= D);
630  if (D < N) { // Do conventional PCA.
631  SpMatrix<Real> Msp(D); // Matrix of outer products.
632  Msp.AddMat2(1.0, X, kTrans, 0.0); // M <-- X^T X
633  Matrix<Real> Utmp;
634  Vector<Real> l;
635  if (exact) {
636  Utmp.Resize(D, D);
637  l.Resize(D);
638  //Matrix<Real> M(Msp);
639  //M.DestructiveSvd(&l, &Utmp, NULL);
640  Msp.Eig(&l, &Utmp);
641  } else {
642  Utmp.Resize(D, G);
643  l.Resize(G);
644  Msp.TopEigs(&l, &Utmp);
645  }
646  SortSvd(&l, &Utmp);
647 
648  for (MatrixIndexT g = 0; g < G; g++)
649  U->Row(g).CopyColFromMat(Utmp, g);
650  if (print_eigs)
651  KALDI_LOG << (exact ? "" : "Retained ")
652  << "PCA eigenvalues are " << l;
653  if (A != NULL)
654  A->AddMatMat(1.0, X, kNoTrans, *U, kTrans, 0.0);
655  } else { // Do inner-product PCA.
656  SpMatrix<Real> Nsp(N); // Matrix of inner products.
657  Nsp.AddMat2(1.0, X, kNoTrans, 0.0); // M <-- X X^T
658 
659  Matrix<Real> Vtmp;
660  Vector<Real> l;
661  if (exact) {
662  Vtmp.Resize(N, N);
663  l.Resize(N);
664  Matrix<Real> Nmat(Nsp);
665  Nmat.DestructiveSvd(&l, &Vtmp, NULL);
666  } else {
667  Vtmp.Resize(N, G);
668  l.Resize(G);
669  Nsp.TopEigs(&l, &Vtmp);
670  }
671 
672  MatrixIndexT num_zeroed = 0;
673  for (MatrixIndexT g = 0; g < G; g++) {
674  if (l(g) < 0.0) {
675  KALDI_WARN << "In PCA, setting element " << l(g) << " to zero.";
676  l(g) = 0.0;
677  num_zeroed++;
678  }
679  }
680  SortSvd(&l, &Vtmp); // Make sure zero elements are last, this
681  // is necessary for Orthogonalize() to work properly later.
682 
683  Vtmp.Transpose(); // So eigenvalues are the rows.
684 
685  for (MatrixIndexT g = 0; g < G; g++) {
686  Real sqrtlg = sqrt(l(g));
687  if (l(g) != 0.0) {
688  U->Row(g).AddMatVec(1.0 / sqrtlg, X, kTrans, Vtmp.Row(g), 0.0);
689  } else {
690  U->Row(g).SetZero();
691  (*U)(g, g) = 1.0; // arbitrary direction. Will later orthogonalize.
692  }
693  if (A != NULL)
694  for (MatrixIndexT n = 0; n < N; n++)
695  (*A)(n, g) = sqrtlg * Vtmp(g, n);
696  }
697  // Now orthogonalize. This is mainly useful in
698  // case there were zero eigenvalues, but we do it
699  // for all of them.
700  U->OrthogonalizeRows();
701  if (print_eigs)
702  KALDI_LOG << "(inner-product) PCA eigenvalues are " << l;
703  }
704 }
705 
706 
707 template
708 void ComputePca(const MatrixBase<float> &X,
711  bool print_eigs,
712  bool exact);
713 
714 template
715 void ComputePca(const MatrixBase<double> &X,
718  bool print_eigs,
719  bool exact);
720 
721 
722 // Added by Dan, Feb. 13 2012.
723 // This function does: *plus += max(0, a b^T),
724 // *minus += max(0, -(a b^T)).
725 template<typename Real>
726 void AddOuterProductPlusMinus(Real alpha,
727  const VectorBase<Real> &a,
728  const VectorBase<Real> &b,
729  MatrixBase<Real> *plus,
730  MatrixBase<Real> *minus) {
731  KALDI_ASSERT(a.Dim() == plus->NumRows() && b.Dim() == plus->NumCols()
732  && a.Dim() == minus->NumRows() && b.Dim() == minus->NumCols());
733  int32 nrows = a.Dim(), ncols = b.Dim(), pskip = plus->Stride() - ncols,
734  mskip = minus->Stride() - ncols;
735  const Real *adata = a.Data(), *bdata = b.Data();
736  Real *plusdata = plus->Data(), *minusdata = minus->Data();
737 
738  for (int32 i = 0; i < nrows; i++) {
739  const Real *btmp = bdata;
740  Real multiple = alpha * *adata;
741  if (multiple > 0.0) {
742  for (int32 j = 0; j < ncols; j++, plusdata++, minusdata++, btmp++) {
743  if (*btmp > 0.0) *plusdata += multiple * *btmp;
744  else *minusdata -= multiple * *btmp;
745  }
746  } else {
747  for (int32 j = 0; j < ncols; j++, plusdata++, minusdata++, btmp++) {
748  if (*btmp < 0.0) *plusdata += multiple * *btmp;
749  else *minusdata -= multiple * *btmp;
750  }
751  }
752  plusdata += pskip;
753  minusdata += mskip;
754  adata++;
755  }
756 }
757 
758 // Instantiate template
759 template
760 void AddOuterProductPlusMinus<float>(float alpha,
761  const VectorBase<float> &a,
762  const VectorBase<float> &b,
763  MatrixBase<float> *plus,
764  MatrixBase<float> *minus);
765 template
766 void AddOuterProductPlusMinus<double>(double alpha,
767  const VectorBase<double> &a,
768  const VectorBase<double> &b,
769  MatrixBase<double> *plus,
770  MatrixBase<double> *minus);
771 
772 
773 } // end namespace kaldi
void AddMat2(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transM, const Real beta)
rank-N update: if (transM == kNoTrans) (*this) = beta*(*this) + alpha * M * M^T, or (if transM == kTr...
Definition: sp-matrix.cc:1110
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
Packed symetric matrix class.
Definition: matrix-common.h:62
void ComplexFftRecursive(Real *data, int nffts, int N, const int *factor_begin, const int *factor_end, bool forward, Vector< Real > *tmp_vec)
ComplexFftRecursive is a recursive function that computes the complex FFT of size N...
void Transpose()
Transpose the matrix.
void RealFftInefficient(VectorBase< Real > *v, bool forward)
Inefficient version of Fourier transform, for testing purposes.
#define M_PI
Definition: kaldi-math.h:44
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
const Real * Data() const
Gives pointer to raw data (const).
Definition: kaldi-matrix.h:79
void ComputeDctMatrix(Matrix< Real > *M)
ComputeDctMatrix computes a matrix corresponding to the DCT, such that M * v equals the DCT of vector...
kaldi::int32 int32
void Eig(VectorBase< Real > *s, MatrixBase< Real > *P=NULL) const
Solves the symmetric eigenvalue problem: at end we should have (*this) = P * diag(s) * P^T...
Definition: qr.cc:433
A class for storing matrices.
Definition: kaldi-matrix.h:823
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
void DestructiveSvd(VectorBase< Real > *s, MatrixBase< Real > *U, MatrixBase< Real > *Vt)
Singular value decomposition Major limitations: For nonsquare matrices, we assume m>=n (NumRows >= Nu...
void ComplexFt(const VectorBase< Real > &in, VectorBase< Real > *out, bool forward)
ComplexFt is the same as ComplexFft but it implements the Fourier transform in an inefficient way...
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
template void AddOuterProductPlusMinus< float >(float alpha, const VectorBase< float > &a, const VectorBase< float > &b, MatrixBase< float > *plus, MatrixBase< float > *minus)
MatrixIndexT Stride() const
Stride (distance in memory between each row). Will be >= NumCols.
Definition: kaldi-matrix.h:70
int32 MatrixIndexT
Definition: matrix-common.h:98
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
void ComplexAddProduct(const Real &a_re, const Real &a_im, const Real &b_re, const Real &b_im, Real *c_re, Real *c_im)
ComplexMul implements, inline, the complex operation c += (a * b).
struct rnnlm::@11::@12 n
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
void TopEigs(VectorBase< Real > *s, MatrixBase< Real > *P, MatrixIndexT lanczos_dim=0) const
This function gives you, approximately, the largest eigenvalues of the symmetric matrix and the corre...
Definition: qr.cc:454
void ComputePca(const MatrixBase< Real > &X, MatrixBase< Real > *U, MatrixBase< Real > *A, bool print_eigs, bool exact)
ComputePCA does a PCA computation, using either outer products or inner products, whichever is more e...
#define KALDI_WARN
Definition: kaldi-error.h:150
Real * Data()
Returns a pointer to the start of the vector&#39;s data.
Definition: kaldi-vector.h:70
void ComplexMul(const Real &a_re, const Real &a_im, Real *b_re, Real *b_im)
ComplexMul implements, inline, the complex multiplication b *= a.
void AddOuterProductPlusMinus(Real alpha, const VectorBase< Real > &a, const VectorBase< Real > &b, MatrixBase< Real > *plus, MatrixBase< Real > *minus)
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
void Scale(Real alpha)
Multiplies all elements by this constant.
#define KALDI_COMPLEXFFT_BLOCKSIZE
void ComplexFft(VectorBase< Real > *v, bool forward, Vector< Real > *tmp_in)
The function ComplexFft does an Fft on the vector argument v.
A class representing a vector.
Definition: kaldi-vector.h:406
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
template void AddOuterProductPlusMinus< double >(double alpha, const VectorBase< double > &a, const VectorBase< double > &b, MatrixBase< double > *plus, MatrixBase< double > *minus)
#define M_2PI
Definition: kaldi-math.h:52
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
void OrthogonalizeRows()
This function orthogonalizes the rows of a matrix using the Gram-Schmidt process. ...
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
#define KALDI_LOG
Definition: kaldi-error.h:153
void SortSvd(VectorBase< Real > *s, MatrixBase< Real > *U, MatrixBase< Real > *Vt, bool sort_on_absolute_value)
Function to ensure that SVD is sorted.
void Factorize(I m, std::vector< I > *factors)
Definition: kaldi-math.h:325
void ComplexImExp(Real x, Real *a_re, Real *a_im)
ComplexImExp implements a <– exp(i x), inline.
void RealFft(VectorBase< Real > *v, bool forward)
RealFft is a fourier transform of real inputs.
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).
Definition: kaldi-vector.h:94