doc/nnet-parse_8cc_source.html

 // nnet3/nnet-parse.cc

 // Copyright      2015  Johns Hopkins University (author: Daniel Povey)

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.

 #include <iterator>
 #include <sstream>
 #include <iomanip>
 #include "nnet3/nnet-parse.h"
 #include "cudamatrix/cu-vector.h"
 #include "cudamatrix/cu-matrix.h"

 namespace kaldi {
 namespace nnet3 {

 bool DescriptorTokenize(const std::string &input,
                         std::vector<std::string> *tokens) {
   KALDI_ASSERT(tokens != NULL);
   size_t start = input.find_first_not_of(" \t"), size = input.size();
   tokens->clear();
   while (start < size) {
     KALDI_ASSERT(!isspace(input[start]));
     if (input[start] == '(' || input[start] == ')' || input[start] == ',') {
       tokens->push_back(std::string(input, start, 1));
       start = input.find_first_not_of(" \t", start + 1);
     } else {
       size_t found = input.find_first_of(" \t(),", start);
       KALDI_ASSERT(found != start);
       if (found == std::string::npos) {
         std::string str(input, start, input.size() - start);
         BaseFloat tmp;
         if (!IsValidName(str) && !ConvertStringToReal(str, &tmp)) {
           KALDI_WARN << "Could not tokenize line " << ErrorContext(std::string(input, start));
           return false;
         }
         tokens->push_back(str);
         break;
       } else {
         if (input[found] == '(' || input[found] == ')' || input[found] == ',') {
           std::string str(input, start, found - start);
           BaseFloat tmp;
           if (!IsValidName(str) && !ConvertStringToReal(str, &tmp)) {
             KALDI_WARN << "Could not tokenize line " << ErrorContext(std::string(input, start));
             return false;
           }
           tokens->push_back(str);
           start = found;
         } else {
           std::string str(input, start, found - start);
           BaseFloat tmp;
           if (!IsValidName(str) && !ConvertStringToReal(str, &tmp)) {
             KALDI_WARN << "Could not tokenize line " << ErrorContext(std::string(input, start));
             return false;
           }
           tokens->push_back(str);
           start = input.find_first_not_of(" \t", found);
         }
       }
     }
   }
   return true;
 }

 std::string ErrorContext(std::istream &is) {
   if (!is.good()) return "end of line";
   char buf[21];
   is.read(buf, 21);
   if (is) {
     return (std::string(buf, 20) + "...");
   }
   return std::string(buf, is.gcount());
 }

 std::string ErrorContext(const std::string &str) {
   if (str.size() == 0) return "end of line";
   if (str.size() <= 20) return str;
   return std::string(str, 0, 20) + "...";
 }

 static void PrintFloatSuccinctly(std::ostream &os, BaseFloat f) {
   if (fabs(f) < 10000.0 && fabs(f) >= 10.0) {
     os  << std::fixed << std::setprecision(0) << f;
   } else if (fabs(f) >= 0.995) {
     os  << std::fixed << std::setprecision(1) << f;
   } else if (fabs(f) >= 0.01) {
     os  << std::fixed << std::setprecision(2) << f;
   } else {
     os << std::setprecision(1) << f;
   }
   os.unsetf(std::ios_base::floatfield);
   os << std::setprecision(6);  // Restore the default.
 }


 // Returns a string that summarizes a vector fairly succintly, for
 // printing stats in info lines.
 std::string SummarizeVector(const VectorBase<float> &vec) {
   std::ostringstream os;
   if (vec.Dim() < 10) {
     os << "[ ";
     for (int32 i = 0; i < vec.Dim(); i++) {
       PrintFloatSuccinctly(os, vec(i));
       os << ' ';
     }
     os << "]";
   } else {
     // print out mean and standard deviation, and some selected values.
     BaseFloat mean = vec.Sum() / vec.Dim(),
         stddev = sqrt(VecVec(vec, vec) / vec.Dim() - mean * mean);

     std::string percentiles_str = "0,1,2,5 10,20,50,80,90 95,98,99,100";
     std::vector<int32> percentiles;
     bool ans = SplitStringToIntegers(percentiles_str, ", ", false,
                                      &percentiles);
     KALDI_ASSERT(ans);
     os << "[percentiles(" << percentiles_str << ")=(";
     Vector<BaseFloat> vec_sorted(vec);
     std::sort(vec_sorted.Data(), vec_sorted.Data() + vec_sorted.Dim());
     int32 n = vec.Dim() - 1;
     for (size_t i = 0; i < percentiles.size(); i++) {
       int32 percentile = percentiles[i];
       BaseFloat value = vec_sorted((n * percentile) / 100);
       PrintFloatSuccinctly(os, value);
       if (i + 1 < percentiles.size())
         os << (i == 3 || i == 8 ? ' ' : ',');
     }
     os << std::setprecision(3);
     os << "), mean=" << mean << ", stddev=" << stddev << "]";
   }
   return os.str();
 }

 std::string SummarizeVector(const VectorBase<double> &vec) {
   Vector<float> vec_copy(vec);
   return SummarizeVector(vec_copy);
 }

 std::string SummarizeVector(const CuVectorBase<BaseFloat> &cu_vec) {
   Vector<float> vec(cu_vec);
   return SummarizeVector(vec);
 }

 void PrintParameterStats(std::ostringstream &os,
                          const std::string &name,
                          const CuVectorBase<BaseFloat> &params,
                          bool include_mean) {
   os << std::setprecision(4);
   os << ", " << name << '-';
   if (include_mean) {
     BaseFloat mean = params.Sum() / params.Dim(),
         stddev = std::sqrt(VecVec(params, params) / params.Dim() - mean * mean);
     os << "{mean,stddev}=" << mean << ',' << stddev;
   } else {
     BaseFloat rms = std::sqrt(VecVec(params, params) / params.Dim());
     os << "rms=" << rms;
   }
   os << std::setprecision(6);  // restore the default precision.
 }

 void PrintParameterStats(std::ostringstream &os,
                          const std::string &name,
                          const CuMatrix<BaseFloat> &params,
                          bool include_mean,
                          bool include_row_norms,
                          bool include_column_norms,
                          bool include_singular_values) {
   os << std::setprecision(4);
   os << ", " << name << '-';
   int32 dim = params.NumRows() * params.NumCols();
   if (include_mean) {
     BaseFloat mean = params.Sum() / dim,
         stddev = std::sqrt(TraceMatMat(params, params, kTrans) / dim -
                            mean * mean);
     os << "{mean,stddev}=" << mean << ',' << stddev;
   } else {
     BaseFloat rms = std::sqrt(TraceMatMat(params, params, kTrans) / dim);
     os << "rms=" << rms;
   }
   os << std::setprecision(6);  // restore the default precision.

   if (include_row_norms) {
     CuVector<BaseFloat> row_norms(params.NumRows());
     row_norms.AddDiagMat2(1.0, params, kNoTrans, 0.0);
     row_norms.ApplyPow(0.5);
     Vector<BaseFloat> row_norms_cpu;
     row_norms.Swap(&row_norms_cpu);
     os << ", " << name << "-row-norms="
        << SummarizeVector(row_norms_cpu);
   }
   if (include_column_norms) {
     CuVector<BaseFloat> col_norms(params.NumCols());
     col_norms.AddDiagMat2(1.0, params, kTrans, 0.0);
     col_norms.ApplyPow(0.5);
     Vector<BaseFloat> col_norms_cpu;
     col_norms.Swap(&col_norms_cpu);
     os << ", " << name << "-col-norms="
        << SummarizeVector(col_norms_cpu);
   }
   if (include_singular_values) {
     Matrix<BaseFloat> params_cpu(params);
     Vector<BaseFloat> s(std::min(params.NumRows(), params.NumCols()));
     params_cpu.Svd(&s);
     std::string singular_values_str = SummarizeVector(s);
     os << ", " << name << "-singular-values=" << singular_values_str;
     std::ostringstream name_os;
   }
 }


 void ParseConfigLines(const std::vector<std::string> &lines,
                       std::vector<ConfigLine> *config_lines) {
   config_lines->resize(lines.size());
   for (size_t i = 0; i < lines.size(); i++) {
     bool ret = (*config_lines)[i].ParseLine(lines[i]);
     if (!ret) {
       KALDI_ERR << "Error parsing config line: " << lines[i];
     }
   }
 }

 bool NameMatchesPattern(const char *name, const char *pattern) {
   if (*pattern == '*') {
     return NameMatchesPattern(name, pattern + 1) ||
         (*name != '\0' && NameMatchesPattern(name + 1, pattern));
   } else if (*name == *pattern) {
     return (*name == '\0' || NameMatchesPattern(name + 1, pattern + 1));
   } else {
     return false;
   }
 }


 } // namespace nnet3
 } // namespace kaldi
kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::CuVector
Definition: matrix-common.h:74

kaldi::SplitStringToIntegers
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
Definition: text-utils.h:68

kaldi::CuVectorBase::Sum
Real Sum() const
Definition: cu-vector.cc:297

kaldi::CuMatrixBase::Sum
Real Sum() const
Definition: cu-matrix.cc:3012

kaldi::nnet3::DescriptorTokenize
bool DescriptorTokenize(const std::string &input, std::vector< std::string > *tokens)
This function tokenizes input when parsing Descriptor configuration values.
Definition: nnet-parse.cc:30

kaldi::nnet3::SummarizeVector
std::string SummarizeVector(const VectorBase< float > &vec)
Returns a string that summarizes a vector fairly succintly, for printing stats in info lines...
Definition: nnet-parse.cc:111

kaldi::CuVectorBase::AddDiagMat2
void AddDiagMat2(Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType trans, Real beta)
Add the diagonal of a matrix times itself: *this = diag(M M^T) + beta * *this (if trans == kNoTrans)...
Definition: cu-vector.cc:595

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

cu-matrix.h

kaldi::Matrix< BaseFloat >

kaldi::CuMatrix
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71

kaldi::IsValidName
bool IsValidName(const std::string &name)
Returns true if &#39;name&#39; would be a valid name for a component or node in a nnet3Nnet.
Definition: text-utils.cc:553

kaldi::nnet3::NameMatchesPattern
bool NameMatchesPattern(const char *name, const char *pattern)
Definition: nnet-parse.cc:235

kaldi::nnet3::ErrorContext
std::string ErrorContext(std::istream &is)
Return a string used in error messages.
Definition: nnet-parse.cc:78

cu-vector.h

kaldi::kTrans
Definition: matrix-common.h:33

kaldi::BaseFloat
float BaseFloat
Definition: kaldi-types.h:29

kaldi::nnet3::ParseConfigLines
void ParseConfigLines(const std::vector< std::string > &lines, std::vector< ConfigLine > *config_lines)
Definition: nnet-parse.cc:224

kaldi::Vector::Swap
void Swap(Vector< Real > *other)
Swaps the contents of *this and *other. Shallow swap.
Definition: kaldi-vector.cc:1297

rnnlm::n
struct rnnlm::@11::@12 n

KALDI_ERR
#define KALDI_ERR
Definition: kaldi-error.h:147

kaldi::kNoTrans
Definition: matrix-common.h:34

kaldi::ConvertStringToReal
bool ConvertStringToReal(const std::string &str, T *out)
ConvertStringToReal converts a string into either float or double and returns false if there was any ...
Definition: text-utils.cc:238

KALDI_WARN
#define KALDI_WARN
Definition: kaldi-error.h:150

kaldi::TraceMatMat
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
Definition: kaldi-matrix.cc:2692

kaldi::VectorBase::Data
Real * Data()
Returns a pointer to the start of the vector&#39;s data.
Definition: kaldi-vector.h:70

kaldi::VectorBase::Dim
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64

nnet-parse.h

kaldi::VectorBase::Sum
Real Sum() const
Returns sum of the elements.
Definition: kaldi-vector.cc:688

rnnlm::i
int i
Definition: mikolov-rnnlm-lib.cc:66

kaldi::CuMatrixBase::NumCols
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216

kaldi::Vector
A class representing a vector.
Definition: kaldi-vector.h:406

KALDI_ASSERT
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

kaldi::nnet3::PrintFloatSuccinctly
static void PrintFloatSuccinctly(std::ostream &os, BaseFloat f)
Definition: nnet-parse.cc:94

kaldi::MatrixBase::Svd
void Svd(VectorBase< Real > *s, MatrixBase< Real > *U, MatrixBase< Real > *Vt) const
Compute SVD (*this) = U diag(s) Vt.
Definition: kaldi-matrix.cc:1825

kaldi::nnet3::PrintParameterStats
void PrintParameterStats(std::ostringstream &os, const std::string &name, const CuVectorBase< BaseFloat > &params, bool include_mean)
Print to &#39;os&#39; some information about the mean and standard deviation of some parameters, used in Info() functions in nnet-simple-component.cc.
Definition: nnet-parse.cc:157

kaldi::CuMatrixBase::NumRows
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215

kaldi::VectorBase
Provides a vector abstraction class.
Definition: kaldi-vector.h:41

kaldi::VecVec
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Definition: kaldi-vector.cc:37

kaldi::CuVectorBase::Dim
MatrixIndexT Dim() const
Dimensions.
Definition: cu-vector.h:69

kaldi::CuVectorBase
Vector for CUDA computing.
Definition: matrix-common.h:72