doc/jama-svd_8h_source.html

 // matrix/jama-svd.h

 // Copyright 2009-2011 Microsoft Corporation

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.

 // This file consists of a port and modification of materials from
 //   JAMA: A Java Matrix Package
 // under the following notice: This software is a cooperative product of
 // The MathWorks and the National Institute of Standards and Technology (NIST)
 // which has been released to the public.  This notice and the original code are
 // available at http://math.nist.gov/javanumerics/jama/domain.notice


 #ifndef KALDI_MATRIX_JAMA_SVD_H_
 #define KALDI_MATRIX_JAMA_SVD_H_ 1


 #include "matrix/kaldi-matrix.h"
 #include "matrix/sp-matrix.h"
 #include "matrix/cblas-wrappers.h"

 namespace kaldi {

 #if defined(HAVE_ATLAS) || defined(USE_KALDI_SVD)
 // using ATLAS as our math library, which doesn't have SVD -> need
 // to implement it.

 // This routine is a modified form of jama_svd.h which is part of the TNT distribution.
 // (originally comes from JAMA).

 template<typename Real>
 bool MatrixBase<Real>::JamaSvd(VectorBase<Real> *s_in,
                                MatrixBase<Real> *U_in,
                                MatrixBase<Real> *V_in) {  //  Destructive!
   KALDI_ASSERT(s_in != NULL && U_in != this && V_in != this);
   int wantu = (U_in != NULL), wantv = (V_in != NULL);
   Matrix<Real> Utmp, Vtmp;
   MatrixBase<Real> &U = (U_in ? *U_in : Utmp), &V = (V_in ? *V_in : Vtmp);
   VectorBase<Real> &s = *s_in;

   int m = num_rows_, n = num_cols_;
   KALDI_ASSERT(m>=n && m != 0 && n != 0);
   if (wantu) KALDI_ASSERT((int)U.num_rows_ == m && (int)U.num_cols_ == n);
   if (wantv) KALDI_ASSERT((int)V.num_rows_ == n && (int)V.num_cols_ == n);
   KALDI_ASSERT((int)s.Dim() == n);  // n<=m so n is min.

   int nu = n;
   U.SetZero();  // make sure all zero.
   Vector<Real> e(n);
   Vector<Real> work(m);
   MatrixBase<Real> &A(*this);
   Real *adata = A.Data(), *workdata = work.Data(), *edata = e.Data(),
       *udata = U.Data(), *vdata = V.Data();
   int astride = static_cast<int>(A.Stride()),
       ustride = static_cast<int>(U.Stride()),
       vstride = static_cast<int>(V.Stride());
   int i = 0, j = 0, k = 0;

   // Reduce A to bidiagonal form, storing the diagonal elements
   // in s and the super-diagonal elements in e.

   int nct = std::min(m-1, n);
   int nrt = std::max(0, std::min(n-2, m));
   for (k = 0; k < std::max(nct, nrt); k++) {
     if (k < nct) {

       // Compute the transformation for the k-th column and
       // place the k-th diagonal in s(k).
       // Compute 2-norm of k-th column without under/overflow.
       s(k) = 0;
       for (i = k; i < m; i++) {
         s(k) = hypot(s(k), A(i, k));
       }
       if (s(k) != 0.0) {
         if (A(k, k) < 0.0) {
           s(k) = -s(k);
         }
         for (i = k; i < m; i++) {
           A(i, k) /= s(k);
         }
         A(k, k) += 1.0;
       }
       s(k) = -s(k);
     }
     for (j = k+1; j < n; j++) {
       if ((k < nct) && (s(k) != 0.0))  {

         // Apply the transformation.

         Real t = cblas_Xdot(m - k, adata + astride*k + k, astride,
                             adata + astride*k + j, astride);
         /*for (i = k; i < m; i++) {
           t += adata[i*astride + k]*adata[i*astride + j];  //   A(i, k)*A(i, j); // 3
           }*/
         t = -t/A(k, k);
         cblas_Xaxpy(m - k, t, adata + k*astride + k, astride,
                     adata + k*astride + j, astride);
         /*for (i = k; i < m; i++) {
           adata[i*astride + j] += t*adata[i*astride + k];  // A(i, j) += t*A(i, k); // 5
           }*/
       }

       // Place the k-th row of A into e for the
       // subsequent calculation of the row transformation.

       e(j) = A(k, j);
     }
     if (wantu & (k < nct)) {

       // Place the transformation in U for subsequent back
       // multiplication.

       for (i = k; i < m; i++) {
         U(i, k) = A(i, k);
       }
     }
     if (k < nrt) {

       // Compute the k-th row transformation and place the
       // k-th super-diagonal in e(k).
       // Compute 2-norm without under/overflow.
       e(k) = 0;
       for (i = k+1; i < n; i++) {
         e(k) = hypot(e(k), e(i));
       }
       if (e(k) != 0.0) {
         if (e(k+1) < 0.0) {
           e(k) = -e(k);
         }
         for (i = k+1; i < n; i++) {
           e(i) /= e(k);
         }
         e(k+1) += 1.0;
       }
       e(k) = -e(k);
       if ((k+1 < m) & (e(k) != 0.0)) {

         // Apply the transformation.

         for (i = k+1; i < m; i++) {
           work(i) = 0.0;
         }
         for (j = k+1; j < n; j++) {
           for (i = k+1; i < m; i++) {
             workdata[i] += edata[j] * adata[i*astride + j];  // work(i) += e(j)*A(i, j); // 5
           }
         }
         for (j = k+1; j < n; j++) {
           Real t(-e(j)/e(k+1));
           cblas_Xaxpy(m - (k+1), t, workdata + (k+1), 1,
                       adata + (k+1)*astride + j, astride);
           /*
           for (i = k+1; i < m; i++) {
             adata[i*astride + j] += t*workdata[i];  // A(i, j) += t*work(i); // 5
             }*/
         }
       }
       if (wantv) {

         // Place the transformation in V for subsequent
         // back multiplication.

         for (i = k+1; i < n; i++) {
           V(i, k) = e(i);
         }
       }
     }
   }

   // Set up the final bidiagonal matrix or order p.

   int p = std::min(n, m+1);
   if (nct < n) {
     s(nct) = A(nct, nct);
   }
   if (m < p) {
     s(p-1) = 0.0;
   }
   if (nrt+1 < p) {
     e(nrt) = A(nrt, p-1);
   }
   e(p-1) = 0.0;

   // If required, generate U.

   if (wantu) {
     for (j = nct; j < nu; j++) {
       for (i = 0; i < m; i++) {
         U(i, j) = 0.0;
       }
       U(j, j) = 1.0;
     }
     for (k = nct-1; k >= 0; k--) {
       if (s(k) != 0.0) {
         for (j = k+1; j < nu; j++) {
           Real t = cblas_Xdot(m - k, udata + k*ustride + k, ustride, udata + k*ustride + j, ustride);
           //for (i = k; i < m; i++) {
           //  t += udata[i*ustride + k]*udata[i*ustride + j];  // t += U(i, k)*U(i, j); // 8
           // }
           t = -t/U(k, k);
           cblas_Xaxpy(m - k, t, udata + ustride*k + k, ustride,
                       udata + k*ustride + j, ustride);
           /*for (i = k; i < m; i++) {
             udata[i*ustride + j] += t*udata[i*ustride + k];  // U(i, j) += t*U(i, k); // 4
             }*/
         }
         for (i = k; i < m; i++ ) {
           U(i, k) = -U(i, k);
         }
         U(k, k) = 1.0 + U(k, k);
         for (i = 0; i < k-1; i++) {
           U(i, k) = 0.0;
         }
       } else {
         for (i = 0; i < m; i++) {
           U(i, k) = 0.0;
         }
         U(k, k) = 1.0;
       }
     }
   }

   // If required, generate V.

   if (wantv) {
     for (k = n-1; k >= 0; k--) {
       if ((k < nrt) & (e(k) != 0.0)) {
         for (j = k+1; j < nu; j++) {
           Real t = cblas_Xdot(n - (k+1), vdata + (k+1)*vstride + k, vstride,
                               vdata + (k+1)*vstride + j, vstride);
           /*Real t (0.0);
           for (i = k+1; i < n; i++) {
             t += vdata[i*vstride + k]*vdata[i*vstride + j];  // t += V(i, k)*V(i, j); // 7
             }*/
           t = -t/V(k+1, k);
           cblas_Xaxpy(n - (k+1), t, vdata + (k+1)*vstride + k, vstride,
                       vdata + (k+1)*vstride + j, vstride);
           /*for (i = k+1; i < n; i++) {
             vdata[i*vstride + j] += t*vdata[i*vstride + k];  // V(i, j) += t*V(i, k); // 7
             }*/
         }
       }
       for (i = 0; i < n; i++) {
         V(i, k) = 0.0;
       }
       V(k, k) = 1.0;
     }
   }

   // Main iteration loop for the singular values.

   int pp = p-1;
   int iter = 0;
   // note: -52.0 is from Jama code; the -23 is the extension
   // to float, because mantissa length in (double, float)
   // is (52, 23) bits respectively.
   Real eps(pow(2.0, sizeof(Real) == 4 ? -23.0 : -52.0));
   // Note: the -966 was taken from Jama code, but the -120 is a guess
   // of how to extend this to float... the exponent in double goes
   // from -1022 .. 1023, and in float from -126..127.  I'm not sure
   // what the significance of 966 is, so -120 just represents a number
   // that's a bit less negative than -126.  If we get convergence
   // failure in float only, this may mean that we have to make the
   // -120 value less negative.
   Real tiny(pow(2.0, sizeof(Real) == 4 ? -120.0: -966.0 ));

   while (p > 0) {
     int k = 0;
     int kase = 0;

     if (iter == 500 || iter == 750) {
       KALDI_WARN << "Svd taking a long time: making convergence criterion less exact.";
       eps = pow(static_cast<Real>(0.8), eps);
       tiny = pow(static_cast<Real>(0.8), tiny);
     }
     if (iter > 1000) {
       KALDI_WARN << "Svd not converging on matrix of size " << m << " by " <<n;
       return false;
     }

     // This section of the program inspects for
     // negligible elements in the s and e arrays.  On
     // completion the variables kase and k are set as follows.

     // kase = 1     if s(p) and e(k-1) are negligible and k < p
     // kase = 2     if s(k) is negligible and k < p
     // kase = 3     if e(k-1) is negligible, k < p, and
     //              s(k), ..., s(p) are not negligible (qr step).
     // kase = 4     if e(p-1) is negligible (convergence).

     for (k = p-2; k >= -1; k--) {
       if (k == -1) {
         break;
       }
       if (std::abs(e(k)) <=
           tiny + eps*(std::abs(s(k)) + std::abs(s(k+1)))) {
         e(k) = 0.0;
         break;
       }
     }
     if (k == p-2) {
       kase = 4;
     } else {
       int ks;
       for (ks = p-1; ks >= k; ks--) {
         if (ks == k) {
           break;
         }
         Real t( (ks != p ? std::abs(e(ks)) : 0.) +
                 (ks != k+1 ? std::abs(e(ks-1)) : 0.));
         if (std::abs(s(ks)) <= tiny + eps*t)  {
           s(ks) = 0.0;
           break;
         }
       }
       if (ks == k) {
         kase = 3;
       } else if (ks == p-1) {
         kase = 1;
       } else {
         kase = 2;
         k = ks;
       }
     }
     k++;

     // Perform the task indicated by kase.

     switch (kase) {

       // Deflate negligible s(p).

       case 1: {
         Real f(e(p-2));
         e(p-2) = 0.0;
         for (j = p-2; j >= k; j--) {
           Real t( hypot(s(j), f));
           Real cs(s(j)/t);
           Real sn(f/t);
           s(j) = t;
           if (j != k) {
             f = -sn*e(j-1);
             e(j-1) = cs*e(j-1);
           }
           if (wantv) {
             for (i = 0; i < n; i++) {
               t = cs*V(i, j) + sn*V(i, p-1);
               V(i, p-1) = -sn*V(i, j) + cs*V(i, p-1);
               V(i, j) = t;
             }
           }
         }
       }
         break;

         // Split at negligible s(k).

       case 2: {
         Real f(e(k-1));
         e(k-1) = 0.0;
         for (j = k; j < p; j++) {
           Real t(hypot(s(j), f));
           Real cs( s(j)/t);
           Real sn(f/t);
           s(j) = t;
           f = -sn*e(j);
           e(j) = cs*e(j);
           if (wantu) {
             for (i = 0; i < m; i++) {
               t = cs*U(i, j) + sn*U(i, k-1);
               U(i, k-1) = -sn*U(i, j) + cs*U(i, k-1);
               U(i, j) = t;
             }
           }
         }
       }
         break;

         // Perform one qr step.

       case 3: {

         // Calculate the shift.

         Real scale = std::max(std::max(std::max(std::max(
             std::abs(s(p-1)), std::abs(s(p-2))), std::abs(e(p-2))),
                                        std::abs(s(k))), std::abs(e(k)));
         Real sp = s(p-1)/scale;
         Real spm1 = s(p-2)/scale;
         Real epm1 = e(p-2)/scale;
         Real sk = s(k)/scale;
         Real ek = e(k)/scale;
         Real b = ((spm1 + sp)*(spm1 - sp) + epm1*epm1)/2.0;
         Real c = (sp*epm1)*(sp*epm1);
         Real shift = 0.0;
         if ((b != 0.0) || (c != 0.0)) {
           shift = std::sqrt(b*b + c);
           if (b < 0.0) {
             shift = -shift;
           }
           shift = c/(b + shift);
         }
         Real f = (sk + sp)*(sk - sp) + shift;
         Real g = sk*ek;

         // Chase zeros.

         for (j = k; j < p-1; j++) {
           Real t = hypot(f, g);
           Real cs = f/t;
           Real sn = g/t;
           if (j != k) {
             e(j-1) = t;
           }
           f = cs*s(j) + sn*e(j);
           e(j) = cs*e(j) - sn*s(j);
           g = sn*s(j+1);
           s(j+1) = cs*s(j+1);
           if (wantv) {
             cblas_Xrot(n, vdata + j, vstride, vdata + j+1, vstride, cs, sn);
             /*for (i = 0; i < n; i++) {
               t = cs*vdata[i*vstride + j] + sn*vdata[i*vstride + j+1];  // t = cs*V(i, j) + sn*V(i, j+1);         // 13
               vdata[i*vstride + j+1] = -sn*vdata[i*vstride + j] + cs*vdata[i*vstride + j+1];  // V(i, j+1) = -sn*V(i, j) + cs*V(i, j+1); // 5
               vdata[i*vstride + j] = t;  // V(i, j) = t; // 4
               }*/
           }
           t = hypot(f, g);
           cs = f/t;
           sn = g/t;
           s(j) = t;
           f = cs*e(j) + sn*s(j+1);
           s(j+1) = -sn*e(j) + cs*s(j+1);
           g = sn*e(j+1);
           e(j+1) = cs*e(j+1);
           if (wantu && (j < m-1)) {
             cblas_Xrot(m, udata + j, ustride, udata + j+1, ustride, cs, sn);
             /*for (i = 0; i < m; i++) {
               t = cs*udata[i*ustride + j] + sn*udata[i*ustride + j+1];  // t = cs*U(i, j) + sn*U(i, j+1); // 7
               udata[i*ustride + j+1] = -sn*udata[i*ustride + j] +cs*udata[i*ustride + j+1];  // U(i, j+1) = -sn*U(i, j) + cs*U(i, j+1); // 8
               udata[i*ustride + j] = t;  // U(i, j) = t; // 1
               }*/
           }
         }
         e(p-2) = f;
         iter = iter + 1;
       }
         break;

         // Convergence.

       case 4: {

         // Make the singular values positive.

         if (s(k) <= 0.0) {
           s(k) = (s(k) < 0.0 ? -s(k) : 0.0);
           if (wantv) {
             for (i = 0; i <= pp; i++) {
               V(i, k) = -V(i, k);
             }
           }
         }

         // Order the singular values.

         while (k < pp) {
           if (s(k) >= s(k+1)) {
             break;
           }
           Real t = s(k);
           s(k) = s(k+1);
           s(k+1) = t;
           if (wantv && (k < n-1)) {
             for (i = 0; i < n; i++) {
               t = V(i, k+1); V(i, k+1) = V(i, k); V(i, k) = t;
             }
           }
           if (wantu && (k < m-1)) {
             for (i = 0; i < m; i++) {
               t = U(i, k+1); U(i, k+1) = U(i, k); U(i, k) = t;
             }
           }
           k++;
         }
         iter = 0;
         p--;
       }
         break;
     }
   }
   return true;
 }

 #endif // defined(HAVE_ATLAS) || defined(USE_KALDI_SVD)

 } // namespace kaldi

 #endif // KALDI_MATRIX_JAMA_SVD_H_
kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

cblas-wrappers.h

rnnlm::j
int j
Definition: mikolov-rnnlm-lib.cc:66

sp-matrix.h

kaldi-matrix.h

kaldi::cblas_Xdot
float cblas_Xdot(const int N, const float *const X, const int incX, const float *const Y, const int incY)
Definition: cblas-wrappers.h:64

rnnlm::n
struct rnnlm::@11::@12 n

KALDI_WARN
#define KALDI_WARN
Definition: kaldi-error.h:150

rnnlm::i
int i
Definition: mikolov-rnnlm-lib.cc:66

kaldi::cblas_Xaxpy
void cblas_Xaxpy(const int N, const float alpha, const float *X, const int incX, float *Y, const int incY)
Definition: cblas-wrappers.h:74

KALDI_ASSERT
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

kaldi::cblas_Xrot
void cblas_Xrot(const int N, float *X, const int incX, float *Y, const int incY, const float c, const float s)
Definition: cblas-wrappers.h:56