nnet-parse.cc
Go to the documentation of this file.
1 // nnet3/nnet-parse.cc
2 
3 // Copyright 2015 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include <iterator>
21 #include <sstream>
22 #include <iomanip>
23 #include "nnet3/nnet-parse.h"
24 #include "cudamatrix/cu-vector.h"
25 #include "cudamatrix/cu-matrix.h"
26 
27 namespace kaldi {
28 namespace nnet3 {
29 
30 bool DescriptorTokenize(const std::string &input,
31  std::vector<std::string> *tokens) {
32  KALDI_ASSERT(tokens != NULL);
33  size_t start = input.find_first_not_of(" \t"), size = input.size();
34  tokens->clear();
35  while (start < size) {
36  KALDI_ASSERT(!isspace(input[start]));
37  if (input[start] == '(' || input[start] == ')' || input[start] == ',') {
38  tokens->push_back(std::string(input, start, 1));
39  start = input.find_first_not_of(" \t", start + 1);
40  } else {
41  size_t found = input.find_first_of(" \t(),", start);
42  KALDI_ASSERT(found != start);
43  if (found == std::string::npos) {
44  std::string str(input, start, input.size() - start);
45  BaseFloat tmp;
46  if (!IsValidName(str) && !ConvertStringToReal(str, &tmp)) {
47  KALDI_WARN << "Could not tokenize line " << ErrorContext(std::string(input, start));
48  return false;
49  }
50  tokens->push_back(str);
51  break;
52  } else {
53  if (input[found] == '(' || input[found] == ')' || input[found] == ',') {
54  std::string str(input, start, found - start);
55  BaseFloat tmp;
56  if (!IsValidName(str) && !ConvertStringToReal(str, &tmp)) {
57  KALDI_WARN << "Could not tokenize line " << ErrorContext(std::string(input, start));
58  return false;
59  }
60  tokens->push_back(str);
61  start = found;
62  } else {
63  std::string str(input, start, found - start);
64  BaseFloat tmp;
65  if (!IsValidName(str) && !ConvertStringToReal(str, &tmp)) {
66  KALDI_WARN << "Could not tokenize line " << ErrorContext(std::string(input, start));
67  return false;
68  }
69  tokens->push_back(str);
70  start = input.find_first_not_of(" \t", found);
71  }
72  }
73  }
74  }
75  return true;
76 }
77 
78 std::string ErrorContext(std::istream &is) {
79  if (!is.good()) return "end of line";
80  char buf[21];
81  is.read(buf, 21);
82  if (is) {
83  return (std::string(buf, 20) + "...");
84  }
85  return std::string(buf, is.gcount());
86 }
87 
88 std::string ErrorContext(const std::string &str) {
89  if (str.size() == 0) return "end of line";
90  if (str.size() <= 20) return str;
91  return std::string(str, 0, 20) + "...";
92 }
93 
94 static void PrintFloatSuccinctly(std::ostream &os, BaseFloat f) {
95  if (fabs(f) < 10000.0 && fabs(f) >= 10.0) {
96  os << std::fixed << std::setprecision(0) << f;
97  } else if (fabs(f) >= 0.995) {
98  os << std::fixed << std::setprecision(1) << f;
99  } else if (fabs(f) >= 0.01) {
100  os << std::fixed << std::setprecision(2) << f;
101  } else {
102  os << std::setprecision(1) << f;
103  }
104  os.unsetf(std::ios_base::floatfield);
105  os << std::setprecision(6); // Restore the default.
106 }
107 
108 
109 // Returns a string that summarizes a vector fairly succintly, for
110 // printing stats in info lines.
111 std::string SummarizeVector(const VectorBase<float> &vec) {
112  std::ostringstream os;
113  if (vec.Dim() < 10) {
114  os << "[ ";
115  for (int32 i = 0; i < vec.Dim(); i++) {
116  PrintFloatSuccinctly(os, vec(i));
117  os << ' ';
118  }
119  os << "]";
120  } else {
121  // print out mean and standard deviation, and some selected values.
122  BaseFloat mean = vec.Sum() / vec.Dim(),
123  stddev = sqrt(VecVec(vec, vec) / vec.Dim() - mean * mean);
124 
125  std::string percentiles_str = "0,1,2,5 10,20,50,80,90 95,98,99,100";
126  std::vector<int32> percentiles;
127  bool ans = SplitStringToIntegers(percentiles_str, ", ", false,
128  &percentiles);
129  KALDI_ASSERT(ans);
130  os << "[percentiles(" << percentiles_str << ")=(";
131  Vector<BaseFloat> vec_sorted(vec);
132  std::sort(vec_sorted.Data(), vec_sorted.Data() + vec_sorted.Dim());
133  int32 n = vec.Dim() - 1;
134  for (size_t i = 0; i < percentiles.size(); i++) {
135  int32 percentile = percentiles[i];
136  BaseFloat value = vec_sorted((n * percentile) / 100);
137  PrintFloatSuccinctly(os, value);
138  if (i + 1 < percentiles.size())
139  os << (i == 3 || i == 8 ? ' ' : ',');
140  }
141  os << std::setprecision(3);
142  os << "), mean=" << mean << ", stddev=" << stddev << "]";
143  }
144  return os.str();
145 }
146 
147 std::string SummarizeVector(const VectorBase<double> &vec) {
148  Vector<float> vec_copy(vec);
149  return SummarizeVector(vec_copy);
150 }
151 
152 std::string SummarizeVector(const CuVectorBase<BaseFloat> &cu_vec) {
153  Vector<float> vec(cu_vec);
154  return SummarizeVector(vec);
155 }
156 
157 void PrintParameterStats(std::ostringstream &os,
158  const std::string &name,
159  const CuVectorBase<BaseFloat> &params,
160  bool include_mean) {
161  os << std::setprecision(4);
162  os << ", " << name << '-';
163  if (include_mean) {
164  BaseFloat mean = params.Sum() / params.Dim(),
165  stddev = std::sqrt(VecVec(params, params) / params.Dim() - mean * mean);
166  os << "{mean,stddev}=" << mean << ',' << stddev;
167  } else {
168  BaseFloat rms = std::sqrt(VecVec(params, params) / params.Dim());
169  os << "rms=" << rms;
170  }
171  os << std::setprecision(6); // restore the default precision.
172 }
173 
174 void PrintParameterStats(std::ostringstream &os,
175  const std::string &name,
176  const CuMatrix<BaseFloat> &params,
177  bool include_mean,
178  bool include_row_norms,
179  bool include_column_norms,
180  bool include_singular_values) {
181  os << std::setprecision(4);
182  os << ", " << name << '-';
183  int32 dim = params.NumRows() * params.NumCols();
184  if (include_mean) {
185  BaseFloat mean = params.Sum() / dim,
186  stddev = std::sqrt(TraceMatMat(params, params, kTrans) / dim -
187  mean * mean);
188  os << "{mean,stddev}=" << mean << ',' << stddev;
189  } else {
190  BaseFloat rms = std::sqrt(TraceMatMat(params, params, kTrans) / dim);
191  os << "rms=" << rms;
192  }
193  os << std::setprecision(6); // restore the default precision.
194 
195  if (include_row_norms) {
196  CuVector<BaseFloat> row_norms(params.NumRows());
197  row_norms.AddDiagMat2(1.0, params, kNoTrans, 0.0);
198  row_norms.ApplyPow(0.5);
199  Vector<BaseFloat> row_norms_cpu;
200  row_norms.Swap(&row_norms_cpu);
201  os << ", " << name << "-row-norms="
202  << SummarizeVector(row_norms_cpu);
203  }
204  if (include_column_norms) {
205  CuVector<BaseFloat> col_norms(params.NumCols());
206  col_norms.AddDiagMat2(1.0, params, kTrans, 0.0);
207  col_norms.ApplyPow(0.5);
208  Vector<BaseFloat> col_norms_cpu;
209  col_norms.Swap(&col_norms_cpu);
210  os << ", " << name << "-col-norms="
211  << SummarizeVector(col_norms_cpu);
212  }
213  if (include_singular_values) {
214  Matrix<BaseFloat> params_cpu(params);
215  Vector<BaseFloat> s(std::min(params.NumRows(), params.NumCols()));
216  params_cpu.Svd(&s);
217  std::string singular_values_str = SummarizeVector(s);
218  os << ", " << name << "-singular-values=" << singular_values_str;
219  std::ostringstream name_os;
220  }
221 }
222 
223 
224 void ParseConfigLines(const std::vector<std::string> &lines,
225  std::vector<ConfigLine> *config_lines) {
226  config_lines->resize(lines.size());
227  for (size_t i = 0; i < lines.size(); i++) {
228  bool ret = (*config_lines)[i].ParseLine(lines[i]);
229  if (!ret) {
230  KALDI_ERR << "Error parsing config line: " << lines[i];
231  }
232  }
233 }
234 
235 bool NameMatchesPattern(const char *name, const char *pattern) {
236  if (*pattern == '*') {
237  return NameMatchesPattern(name, pattern + 1) ||
238  (*name != '\0' && NameMatchesPattern(name + 1, pattern));
239  } else if (*name == *pattern) {
240  return (*name == '\0' || NameMatchesPattern(name + 1, pattern + 1));
241  } else {
242  return false;
243  }
244 }
245 
246 
247 
248 } // namespace nnet3
249 } // namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
Definition: text-utils.h:68
Real Sum() const
Definition: cu-vector.cc:297
Real Sum() const
Definition: cu-matrix.cc:3012
bool DescriptorTokenize(const std::string &input, std::vector< std::string > *tokens)
This function tokenizes input when parsing Descriptor configuration values.
Definition: nnet-parse.cc:30
std::string SummarizeVector(const VectorBase< float > &vec)
Returns a string that summarizes a vector fairly succintly, for printing stats in info lines...
Definition: nnet-parse.cc:111
void AddDiagMat2(Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType trans, Real beta)
Add the diagonal of a matrix times itself: *this = diag(M M^T) + beta * *this (if trans == kNoTrans)...
Definition: cu-vector.cc:595
kaldi::int32 int32
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
bool IsValidName(const std::string &name)
Returns true if &#39;name&#39; would be a valid name for a component or node in a nnet3Nnet.
Definition: text-utils.cc:553
bool NameMatchesPattern(const char *name, const char *pattern)
Definition: nnet-parse.cc:235
std::string ErrorContext(std::istream &is)
Return a string used in error messages.
Definition: nnet-parse.cc:78
float BaseFloat
Definition: kaldi-types.h:29
void ParseConfigLines(const std::vector< std::string > &lines, std::vector< ConfigLine > *config_lines)
Definition: nnet-parse.cc:224
void Swap(Vector< Real > *other)
Swaps the contents of *this and *other. Shallow swap.
struct rnnlm::@11::@12 n
#define KALDI_ERR
Definition: kaldi-error.h:147
bool ConvertStringToReal(const std::string &str, T *out)
ConvertStringToReal converts a string into either float or double and returns false if there was any ...
Definition: text-utils.cc:238
#define KALDI_WARN
Definition: kaldi-error.h:150
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
Real * Data()
Returns a pointer to the start of the vector&#39;s data.
Definition: kaldi-vector.h:70
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
Real Sum() const
Returns sum of the elements.
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216
A class representing a vector.
Definition: kaldi-vector.h:406
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
static void PrintFloatSuccinctly(std::ostream &os, BaseFloat f)
Definition: nnet-parse.cc:94
void Svd(VectorBase< Real > *s, MatrixBase< Real > *U, MatrixBase< Real > *Vt) const
Compute SVD (*this) = U diag(s) Vt.
void PrintParameterStats(std::ostringstream &os, const std::string &name, const CuVectorBase< BaseFloat > &params, bool include_mean)
Print to &#39;os&#39; some information about the mean and standard deviation of some parameters, used in Info() functions in nnet-simple-component.cc.
Definition: nnet-parse.cc:157
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Definition: kaldi-vector.cc:37
MatrixIndexT Dim() const
Dimensions.
Definition: cu-vector.h:69
Vector for CUDA computing.
Definition: matrix-common.h:72