35 using namespace kaldi;
    45 template<
typename Real>
    55   std::vector<int32> copy_from_idx_vec;
    58     copy_from_idx_vec.push_back(
Rand() % n_rows);
    64     for (
int32 j = 0; 
j < n_columns; 
j++) {
    65       Real src_val = src(copy_from_idx_vec.at(
i), 
j);
    66       Real tgt_val = tgt(
i, 
j);
    72 template<
typename Real>
    84     Real src = x_cpu(r, c), dest = y_cpu(r, c);
    85     if (src <= -epsilon || src >= epsilon) {
    87     } 
else if (src >= 0) {
    96 template<
typename Real>
   106   std::vector<int32> copy_from_idx_vec;
   108   for (
int32 i = 0; 
i < n_columns; 
i++) {
   109     copy_from_idx_vec.push_back(
Rand() % n_columns);
   114   for (
int32 i = 0; 
i < n_rows; 
i++) {
   115     for (
int32 j = 0; 
j < n_columns; 
j++) {
   116       Real src_val = src(
i, copy_from_idx_vec.at(
j));
   117       Real tgt_val = tgt(
i, 
j);
   123 template<
typename Real>
   132   std::vector<int32> frame_offsets_vec;
   138   for (
int32 i = 0; 
i < n_frame_offsets; 
i++) {
   139     frame_offsets_vec.push_back(
Rand() % 2 * n_columns - n_columns);
   147   for (
int32 i = 0; 
i < n_rows; 
i++) {
   148     for (
int32 k = 0; k < n_frame_offsets; k++) {
   149       for (
int32 j = 0; 
j < n_columns; 
j++) {
   151         if (
i + frame_offsets_vec.at(k) >= n_rows) {
   152           src_val = src_copy(n_rows-1, 
j);
   153         } 
else if (
i + frame_offsets_vec.at(k) <= 0) {
   154           src_val = src_copy(0, 
j);
   156           src_val = src_copy(
i + frame_offsets_vec.at(k), 
j);
   158         Real tgt_val = tgt_copy(
i, k * n_columns + 
j);
   165 template<
typename Real>
   167   for (
int i = 0; 
i < 3; 
i++) {
   171     Matrix<Real> Hinput(num_rows, 5 * cell_dim + dropout_dim);
   188   for (
int i = 16; 
i <= 1024; 
i *= 2) {
   201     for (; tim.
Elapsed() < time_in_secs; iter++)
   205     KALDI_LOG << 
"For ComputeLstmNonlinearity"   206               << (
sizeof(Real)==8 ? 
"<double>" : 
"<float>") << 
", for dim = "   207               << 
i << 
", speed was " << gflops << 
" gigaflops";
   214   for (
int32 loop = 0; loop < 10; loop++) {
   219         dropout_dim = (
RandInt(0, 1) == 0 ? 0 : 3);
   238         output_deriv(num_rows, cell_dim * 2);
   242     output_deriv.
ColRange(test_output * cell_dim, cell_dim).SetRandn();
   255         value_sum(5, cell_dim);
   258         self_repair_sum(5, cell_dim),
   259         input_deriv(num_rows, 5 * cell_dim + dropout_dim),
   260         params_deriv(3, cell_dim);
   262     double count_in = 0.0;
   266                                  self_repair_config, count_in,
   267                                  &input_deriv, ¶ms_deriv,
   268                                  &value_sum, &deriv_sum, &self_repair_sum);
   275         measured_objf_change(test_dim);
   277     for (
int32 i = 0; 
i < test_dim; 
i++) {
   279           delta_params(3, cell_dim);
   280       if (test_input >= 0) {
   281         delta_input.ColRange(test_input * cell_dim, cell_dim).SetRandn();
   282         delta_input.Scale(delta);
   284       if (test_params >= 0) {
   285         delta_params.
Row(test_params).SetRandn();
   286         delta_params.
Scale(delta);
   293       perturbed_input.
AddMat(1.0, delta_input);
   296       perturbed_params.
AddMat(1.0, delta_params);
   302           objf_change = new_objf - baseline_objf;
   303       measured_objf_change(
i) = objf_change;
   305     KALDI_LOG << 
"LSTM nonlinearity test: num_rows=" << num_rows
   306               << 
", cell_dim=" << cell_dim
   307               << 
", dropout_dim=" << dropout_dim
   308               << 
", test_input=" << test_input
   309               << 
", test_params=" << test_params
   310               << 
", test_output=" << test_output
   311               << 
", predicted_objf_change=" << predicted_objf_change
   312               << 
", measured_objf_change=" << measured_objf_change;
   315       KALDI_ERR << 
"LSTM nonlinearity test failed.";
   320 template<
typename Real>
   322   for (
int i = 0; 
i < 3; 
i++) {
   325        dropout_dim = (
RandInt(0, 1) == 0 ? 0 : 3);
   328     Matrix<Real> hinput(num_rows, 5 * cell_dim + dropout_dim);
   334     Matrix<Real> hinput_deriv(num_rows, 5 * cell_dim + dropout_dim);
   345     count_in = 
Rand() % num_rows;
   366                                     hderiv_sum_in, hself_repair_config,
   373                                  dself_repair_config, count_in,
   381                                     hderiv_sum_in, hself_repair_config,
   383                                     &hparams_deriv, &hvalue_sum_out,
   384                                     &hderiv_sum_out, &hself_repair_sum_out);
   386                                  dself_repair_config, count_in,
   388                                  &dvalue_sum_out, &dderiv_sum_out,
   389                                  &dself_repair_sum_out);
   392                                     hderiv_sum_in, hself_repair_config,
   393                                     count_in, &hinput_deriv,
   399                                  dself_repair_config, count_in, &dinput_deriv,
   406                                     hderiv_sum_in, hself_repair_config,
   407                                     count_in, &hinput_deriv, &hparams_deriv,
   408                                     &hvalue_sum_out, &hderiv_sum_out,
   409                                     &hself_repair_sum_out);
   411                                  dself_repair_config, count_in, &dinput_deriv,
   412                                  &dparams_deriv, &dvalue_sum_out,
   413                                  &dderiv_sum_out, &dself_repair_sum_out);
   419     Matrix<Real> hdself_repair_sum_out(dself_repair_sum_out);
   431     AssertEqual(hself_repair_sum_out, hdself_repair_sum_out);
   434   for (
int i = 16; 
i <= 2048; 
i *= 2) {
   458     count_in = 
Rand() % num_rows;
   462     for (; tim.
Elapsed() < time_in_secs; iter++)
   464                                    self_repair_config, count_in, &input_deriv,
   465                                    ¶ms_deriv, &value_sum_out,
   466                                    &deriv_sum_out, &self_repair_sum_out);
   470     KALDI_LOG << 
"For BackpropLstmNonlinearity"   471               << (
sizeof(Real) == 8 ? 
"<double>" : 
"<float>") << 
", for dim = "   472               << 
i << 
", speed was " << gflops << 
" gigaflops";
   478 template<
typename Real>
   482     int row = 10 + 
Rand() % 40;
   483     int col = 10 + 
Rand() % 50;
   494     Real target_rms = 0.3456;
   495     bool add_log_stddev = 
true;
   496     const Real kSquaredNormFloor = 1.35525271560688e-20; 
   505       Real target_rms=0.3456;
   507       if (in.
Data() != out_no_log.Data())
   510       Real d_scaled = in.
NumCols() * target_rms * target_rms;
   511       in_norm.AddDiagMat2(1.0 / d_scaled, in, 
kNoTrans, 0.0);
   512       in_norm.ApplyFloor(kSquaredNormFloor);
   513       in_norm.ApplyPow(-0.5);
   514       out_no_log.MulRowsVec(in_norm);
   515       if (add_log_stddev) {
   518         in_norm.Add(log(target_rms));
   527   for (
int dim = 16; dim <= 1024; dim *= 2) {
   534     for (; tim.
Elapsed() < time_in_secs; iter++) {
   540     KALDI_LOG << 
"For CuMath::NormalizePerRow"   541               << (
sizeof(Real)==8?
"<double>":
"<float>") << 
", for dim = "   542               << dim << 
", speed was " << gflops << 
" gigaflops.";
   549 template<
typename Real>
   565   Real target_rms = 0.3456;
   566   bool add_log_stddev = 
false;
   567   const Real kSquaredNormFloor = 1.35525271560688e-20; 
   576     Real target_rms=0.3456;
   578     Real d_scaled = in.
NumCols() * target_rms * target_rms;
   579     in_norm.AddDiagMat2(1.0 / d_scaled, in, 
kNoTrans, 0.0);
   580     in_norm.ApplyFloor(kSquaredNormFloor);
   581     in_norm.ApplyPow(-0.5);
   592 template<
typename Real>
   595     int row = 10 + 
Rand() % 40;
   596     int col = 10 + 
Rand() % 50;
   613     Real target_rms = 0.3456;
   614     bool add_log_stddev = 
true;
   615     const Real kSquaredNormFloor = 1.3552527156068805425e-20; 
   632       Real d_scaled = (in_value.
NumCols() * target_rms * target_rms);
   634       if (add_log_stddev) {
   643         log_stddev_deriv.ApplyPow(-1.0);
   644         out_deriv_for_stddev.CopyColFromMat(out_deriv,
   646         log_stddev_deriv.MulElements(out_deriv_for_stddev);
   651       in_norm.Scale(1.0 / d_scaled);
   652       in_norm.ApplyFloor(kSquaredNormFloor);
   653       in_norm.ApplyPow(-0.5);
   655         if (in_deriv->
Data() != out_deriv_no_log.Data())
   660         in_norm.ReplaceValue(1.0 / sqrt(kSquaredNormFloor), 0.0);
   661         in_norm.ApplyPow(3.0);
   662         dot_products.MulElements(in_norm);
   664         in_deriv->
AddDiagVecMat(-1.0 / d_scaled, dot_products, in_value,
   673   for (
int dim = 16; dim <= 1024; dim *= 2) {
   680     for (; tim.
Elapsed() < time_in_secs; iter++) {
   685     KALDI_LOG << 
"For CuMath::DiffNormalizePerRow"   686               << (
sizeof(Real)==8?
"<double>":
"<float>")
   687               << 
", for dim = " << dim << 
", speed was " << gflops
   696   if (CuDevice::Instantiate().DoublePrecisionSupported())
   699   UnitTestCuMathComputeLstmNonlinearity<Real>();
   700   UnitTestCuMathRandomize<Real>();
   701   UnitTestCuMathSplice<Real>();
   702   UnitTestCuMathCopy<Real>();
   704   UnitTestEnsureNonzero<Real>();
   705   UnitTestBackpropLstmNonlinearity<Real>();
   706   UnitTestCuMathNormalizePerRow<Real>();
   707   UnitTestCuMathNormalizePerRow_v2<Real>();
   708   UnitTestCuDiffNormalizePerRow<Real>();
   718   for (; loop < 2; loop++) {
   719     CuDevice::Instantiate().SetDebugStrideMode(
true);
   721       CuDevice::Instantiate().SelectGpuId(
"no"); 
   723       CuDevice::Instantiate().SelectGpuId(
"yes"); 
   726     kaldi::CudaMathUnitTest<float>();
   729     if (CuDevice::Instantiate().DoublePrecisionSupported()) {
   730       kaldi::CudaMathUnitTest<double>();
   732       KALDI_WARN << 
"Double precision not supported";
   735     kaldi::CudaMathUnitTest<float>();
   739       KALDI_LOG << 
"Tests without GPU use succeeded.";
   741       KALDI_LOG << 
"Tests with GPU use (if available) succeeded.";
   744   CuDevice::Instantiate().PrintProfile();
 void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
 
void CopyColFromVec(const VectorBase< Real > &v, const MatrixIndexT col)
Copy vector into specific column of matrix. 
 
void CopyFromVec(const std::vector< T > &src)
This function resizes if needed. 
 
void UnitTestLstmNonlinearity()
 
void CpuComputeLstmNonlinearity(const MatrixBase< Real > &input_mat, const MatrixBase< Real > ¶ms_mat, MatrixBase< Real > *output)
 
const CuSubVector< Real > Row(MatrixIndexT i) const
 
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix). 
 
Base class which provides matrix operations not involving resizing or allocation. ...
 
const Real * Data() const
Gives pointer to raw data (const). 
 
void Randomize(const CuMatrixBase< Real > &src, const CuArray< int32 > ©_from_idx, CuMatrixBase< Real > *tgt)
Copies a permutation of src into tgt. 
 
static void UnitTestBackpropLstmNonlinearity()
 
static void UnitTestCuMathCopy()
 
void AddDiagMat2(Real alpha, const MatrixBase< Real > &M, MatrixTransposeType trans=kNoTrans, Real beta=1.0)
Add the diagonal of a matrix times itself: *this = diag(M M^T) + beta * *this (if trans == kNoTrans)...
 
void AddMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType trans=kNoTrans)
*this += alpha * A 
 
A class for storing matrices. 
 
This class represents a matrix that's stored on the GPU if we have one, and in memory if not...
 
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done). 
 
static void UnitTestCuMathNormalizePerRow()
 
static void UnitTestCuDiffNormalizePerRow()
 
static void UnitTestCuMathComputeLstmNonlinearity()
 
void BackpropLstmNonlinearity(const CuMatrixBase< Real > &input, const CuMatrixBase< Real > ¶ms, const CuMatrixBase< Real > &output_deriv, const CuMatrixBase< double > &deriv_sum_in, const CuVectorBase< Real > &self_repair_config, double count_in, CuMatrixBase< Real > *input_deriv, CuMatrixBase< Real > *params_deriv, CuMatrixBase< double > *value_sum_out, CuMatrixBase< double > *deriv_sum_out, CuMatrixBase< Real > *self_repair_sum_out)
This function does the 'backward' pass corresponding to the function ComputeLstmNonlinearity. 
 
void SetVerboseLevel(int32 i)
This should be rarely used, except by programs using Kaldi as library; command-line programs set the ...
 
void ApplyFloor(Real floor_val, MatrixIndexT *floored_count=nullptr)
Applies floor to all elements. 
 
static void UnitTestCuMathRandomize()
 
void DiffNormalizePerRow(const CuMatrixBase< Real > &in_value, const CuMatrixBase< Real > &out_deriv, const Real target_rms, const bool add_log_stddev, CuMatrixBase< Real > *in_deriv)
 
static void UnitTestEnsureNonzero()
 
void Scale(Real alpha)
Multiply each element with a scalar value. 
 
void SetRandn()
Sets to random values of a normal distribution. 
 
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function. 
 
int Rand(struct RandomState *state)
 
void SetRandn()
Set vector to random normally-distributed noise. 
 
void MulRowsVec(const VectorBase< Real > &scale)
Equivalent to (*this) = diag(scale) * (*this). 
 
CuSubMatrix< Real > ColRange(const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
 
void EnsureNonzero(const CuMatrixBase< Real > &src, Real epsilon, CuMatrixBase< Real > *dest)
This function requires that src and dest have the same dimension and epsilon > 0. ...
 
void Splice(const CuMatrixBase< Real > &src, const CuArray< int32 > &frame_offsets, CuMatrixBase< Real > *tgt)
Splice concatenates frames of src as specified in frame_offsets into tgt. 
 
Matrix for CUDA computing. 
 
MatrixIndexT NumCols() const
 
A class representing a vector. 
 
#define KALDI_ASSERT(cond)
 
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix). 
 
static void AssertEqual(float a, float b, float relative_tolerance=0.001)
assert abs(a - b) <= relative_tolerance * (abs(a)+abs(b)) 
 
void AddDiagMatMat(Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transM, const MatrixBase< Real > &N, MatrixTransposeType transN, Real beta=1.0)
Add the diagonal of a matrix product: *this = diag(M N), assuming the "trans" arguments are both kNoT...
 
void AddDiagVecMat(const Real alpha, const VectorBase< Real > &v, const MatrixBase< Real > &M, MatrixTransposeType transM, Real beta=1.0)
*this = beta * *this + alpha * diag(v) * M [or M^T]. 
 
MatrixIndexT NumRows() const
Dimensions. 
 
void NormalizePerRow(const CuMatrixBase< Real > &in, const Real target_rms, const bool add_log_stddev, CuMatrixBase< Real > *out)
Normalize nonlinearity modifies the vector of activations by scaling it so that the root-mean-square ...
 
void ApplyFloor(Real floor_val)
 
static void UnitTestCuMathNormalizePerRow_v2()
 
double Elapsed() const
Returns time in seconds. 
 
Sub-matrix representation. 
 
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)). 
 
static void UnitTestCuMathSplice()
 
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)
 
void Copy(const CuMatrixBase< Real > &src, const CuArray< int32 > ©_from_indices, CuMatrixBase< Real > *tgt)
Copies elements from src into tgt as given by copy_from_indices. 
 
void CpuBackpropLstmNonlinearity(const MatrixBase< Real > &input, const MatrixBase< Real > ¶ms, const MatrixBase< Real > &output_deriv, const MatrixBase< double > &deriv_sum_in, const VectorBase< Real > &self_repair_config, double count_in, MatrixBase< Real > *input_deriv, MatrixBase< Real > *params_deriv, MatrixBase< double > *value_sum_out, MatrixBase< double > *deriv_sum_out, MatrixBase< Real > *self_repair_sum_out)
 
void ComputeLstmNonlinearity(const CuMatrixBase< Real > &input, const CuMatrixBase< Real > ¶ms, CuMatrixBase< Real > *output)
this is a special-purpose function used by class LstmNonlinearityComponent, to do its forward propaga...