matrix-sum.cc
Go to the documentation of this file.
1 // bin/matrix-sum.cc
2 
3 // Copyright 2012-2014 Johns Hopkins University (author: Daniel Povey)
4 // 2014 Vimal Manohar
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 #include "base/kaldi-common.h"
22 #include "util/common-utils.h"
23 #include "matrix/kaldi-matrix.h"
24 
25 namespace kaldi {
26 
27 // sums a bunch of archives to produce one archive
28 // for back-compatibility with an older form, we support scaling
29 // of the first two input archives.
31  BaseFloat scale1,
32  BaseFloat scale2) {
33  int32 num_args = po.NumArgs();
34  std::string matrix_in_fn1 = po.GetArg(1),
35  matrix_out_fn = po.GetArg(num_args);
36 
37  // Output matrix
38  BaseFloatMatrixWriter matrix_writer(matrix_out_fn);
39 
40  // Input matrices
41  SequentialBaseFloatMatrixReader matrix_reader1(matrix_in_fn1);
42  std::vector<RandomAccessBaseFloatMatrixReader*>
43  matrix_readers(num_args-2,
44  static_cast<RandomAccessBaseFloatMatrixReader*>(NULL));
45  std::vector<std::string> matrix_in_fns(num_args-2);
46  for (int32 i = 2; i < num_args; ++i) {
47  matrix_readers[i-2] = new RandomAccessBaseFloatMatrixReader(po.GetArg(i));
48  matrix_in_fns[i-2] = po.GetArg(i);
49  }
50 
51  int32 n_utts = 0, n_total_matrices = 0,
52  n_success = 0, n_missing = 0, n_other_errors = 0;
53 
54  for (; !matrix_reader1.Done(); matrix_reader1.Next()) {
55  std::string key = matrix_reader1.Key();
56  Matrix<BaseFloat> matrix1 = matrix_reader1.Value();
57  matrix_reader1.FreeCurrent();
58  n_utts++;
59  n_total_matrices++;
60 
61  matrix1.Scale(scale1);
62 
63  Matrix<BaseFloat> matrix_out(matrix1);
64 
65  for (int32 i = 0; i < num_args-2; ++i) {
66  if (matrix_readers[i]->HasKey(key)) {
67  Matrix<BaseFloat> matrix2 = matrix_readers[i]->Value(key);
68  n_total_matrices++;
69  if (SameDim(matrix2, matrix_out)) {
70  BaseFloat scale = (i == 0 ? scale2 : 1.0);
71  // note: i == 0 corresponds to the 2nd input archive.
72  matrix_out.AddMat(scale, matrix2, kNoTrans);
73  } else {
74  KALDI_WARN << "Dimension mismatch for utterance " << key
75  << " : " << matrix2.NumRows() << " by "
76  << matrix2.NumCols() << " for "
77  << "system " << (i + 2) << ", rspecifier: "
78  << matrix_in_fns[i] << " vs " << matrix_out.NumRows()
79  << " by " << matrix_out.NumCols()
80  << " primary matrix, rspecifier:" << matrix_in_fn1;
81  n_other_errors++;
82  }
83  } else {
84  KALDI_WARN << "No matrix found for utterance " << key << " for "
85  << "system " << (i + 2) << ", rspecifier: "
86  << matrix_in_fns[i];
87  n_missing++;
88  }
89  }
90 
91  matrix_writer.Write(key, matrix_out);
92  n_success++;
93  }
94 
95  KALDI_LOG << "Processed " << n_utts << " utterances: with a total of "
96  << n_total_matrices << " matrices across " << (num_args-1)
97  << " different systems";
98  KALDI_LOG << "Produced output for " << n_success << " utterances; "
99  << n_missing << " total missing matrices";
100 
101  DeletePointers(&matrix_readers);
102 
103  return (n_success != 0 && n_missing < (n_success - n_missing)) ? 0 : 1;
104 }
105 
107  int32 num_args = po.NumArgs();
108  std::string matrix_in_fn1 = po.GetArg(1),
109  matrix_out_fn = po.GetArg(num_args);
110  BaseFloat scale = 1.0 / (num_args - 1);
111 
112  // Output matrix
113  BaseFloatMatrixWriter matrix_writer(matrix_out_fn);
114 
115  // Input matrices
116  SequentialBaseFloatMatrixReader matrix_reader1(matrix_in_fn1);
117  std::vector<RandomAccessBaseFloatMatrixReader*>
118  matrix_readers(num_args-2,
119  static_cast<RandomAccessBaseFloatMatrixReader*>(NULL));
120  std::vector<std::string> matrix_in_fns(num_args-2);
121  for (int32 i = 2; i < num_args; ++i) {
122  matrix_readers[i-2] = new RandomAccessBaseFloatMatrixReader(po.GetArg(i));
123  matrix_in_fns[i-2] = po.GetArg(i);
124  }
125 
126  int32 n_utts = 0, n_total_matrices = 0,
127  n_success = 0, n_missing = 0, n_other_errors = 0;
128 
129  for (; !matrix_reader1.Done(); matrix_reader1.Next()) {
130  std::string key = matrix_reader1.Key();
131  Matrix<BaseFloat> matrix1 = matrix_reader1.Value();
132  matrix_reader1.FreeCurrent();
133  n_utts++;
134  n_total_matrices++;
135 
136  matrix1.Scale(scale);
137 
138  Matrix<BaseFloat> matrix_out(matrix1);
139 
140  for (int32 i = 0; i < num_args-2; ++i) {
141  if (matrix_readers[i]->HasKey(key)) {
142  Matrix<BaseFloat> matrix2 = matrix_readers[i]->Value(key);
143  n_total_matrices++;
144  if (SameDim(matrix2, matrix_out)) {
145  matrix_out.AddMat(scale, matrix2, kNoTrans);
146  } else {
147  KALDI_WARN << "Dimension mismatch for utterance " << key
148  << " : " << matrix2.NumRows() << " by "
149  << matrix2.NumCols() << " for "
150  << "system " << (i + 2) << ", rspecifier: "
151  << matrix_in_fns[i] << " vs " << matrix_out.NumRows()
152  << " by " << matrix_out.NumCols()
153  << " primary matrix, rspecifier:" << matrix_in_fn1;
154  n_other_errors++;
155  }
156  } else {
157  KALDI_WARN << "No matrix found for utterance " << key << " for "
158  << "system " << (i + 2) << ", rspecifier: "
159  << matrix_in_fns[i];
160  n_missing++;
161  }
162  }
163 
164  matrix_writer.Write(key, matrix_out);
165  n_success++;
166  }
167 
168  KALDI_LOG << "Processed " << n_utts << " utterances: with a total of "
169  << n_total_matrices << " matrices across " << (num_args-1)
170  << " different systems";
171  KALDI_LOG << "Produced output for " << n_success << " utterances; "
172  << n_missing << " total missing matrices";
173 
174  DeletePointers(&matrix_readers);
175 
176  return (n_success != 0 && n_missing < (n_success - n_missing)) ? 0 : 1;
177 }
178 
180  bool binary) {
181  KALDI_ASSERT(po.NumArgs() == 2);
182  KALDI_ASSERT(ClassifyRspecifier(po.GetArg(1), NULL, NULL) != kNoRspecifier &&
183  "matrix-sum: first argument must be an rspecifier");
184  // if next assert fails it would be bug in the code as otherwise we shouldn't
185  // be called.
186  KALDI_ASSERT(ClassifyWspecifier(po.GetArg(2), NULL, NULL, NULL) ==
187  kNoWspecifier);
188 
189  SequentialBaseFloatMatrixReader mat_reader(po.GetArg(1));
190 
191  Matrix<double> sum;
192 
193  int32 num_done = 0, num_err = 0;
194 
195  for (; !mat_reader.Done(); mat_reader.Next()) {
196  const Matrix<BaseFloat> &mat = mat_reader.Value();
197  if (mat.NumRows() == 0) {
198  KALDI_WARN << "Zero matrix input for key " << mat_reader.Key();
199  num_err++;
200  } else {
201  if (sum.NumRows() == 0) sum.Resize(mat.NumRows(), mat.NumCols());
202  if (sum.NumRows() != mat.NumRows() || sum.NumCols() != mat.NumCols()) {
203  KALDI_WARN << "Dimension mismatch for key " << mat_reader.Key()
204  << ": " << mat.NumRows() << " by " << mat.NumCols() << " vs. "
205  << sum.NumRows() << " by " << sum.NumCols();
206  num_err++;
207  } else {
208  Matrix<double> dmat(mat);
209  sum.AddMat(1.0, dmat, kNoTrans);
210  num_done++;
211  }
212  }
213  }
214 
215  Matrix<BaseFloat> sum_float(sum);
216  WriteKaldiObject(sum_float, po.GetArg(2), binary);
217 
218  KALDI_LOG << "Summed " << num_done << " matrices, "
219  << num_err << " with errors; wrote sum to "
220  << PrintableWxfilename(po.GetArg(2));
221  return (num_done > 0 && num_err < num_done) ? 0 : 1;
222 }
223 
224 // sum a bunch of single files to produce a single file [including
225 // extended filenames, of course]
227  bool binary, bool average) {
228  KALDI_ASSERT(po.NumArgs() >= 2);
229  for (int32 i = 1; i < po.NumArgs(); i++) {
230  if (ClassifyRspecifier(po.GetArg(i), NULL, NULL) != kNoRspecifier) {
231  KALDI_ERR << "Wrong usage (type 3): if first and last arguments are not "
232  << "tables, the intermediate arguments must not be tables.";
233  }
234  }
235  if (ClassifyWspecifier(po.GetArg(po.NumArgs()), NULL, NULL, NULL) !=
236  kNoWspecifier) {
237  KALDI_ERR << "Wrong usage (type 3): if first and last arguments are not "
238  << "tables, the intermediate arguments must not be tables.";
239  }
240 
241  Matrix<BaseFloat> sum;
242  for (int32 i = 1; i < po.NumArgs(); i++) {
243  Matrix<BaseFloat> this_mat;
244  ReadKaldiObject(po.GetArg(i), &this_mat);
245  if (sum.NumRows() < this_mat.NumRows() ||
246  sum.NumCols() < this_mat.NumCols())
247  sum.Resize(std::max(sum.NumRows(), this_mat.NumRows()),
248  std::max(sum.NumCols(), this_mat.NumCols()),
249  kCopyData);
250  sum.AddMat(1.0, this_mat);
251  }
252  if (average)
253  sum.Scale(1.0 / (po.NumArgs() - 1));
254  WriteKaldiObject(sum, po.GetArg(po.NumArgs()), binary);
255  KALDI_LOG << "Summed " << (po.NumArgs() - 1) << " matrices; "
256  << "wrote sum to " << PrintableWxfilename(po.GetArg(po.NumArgs()));
257  return 0;
258 }
259 
260 
261 } // namespace kaldi
262 
263 
264 int main(int argc, char *argv[]) {
265  try {
266  using namespace kaldi;
267 
268 
269  const char *usage =
270  "Add matrices (supports various forms)\n"
271  "\n"
272  "Type one usage:\n"
273  " matrix-sum [options] <matrix-in-rspecifier1> [<matrix-in-rspecifier2>"
274  " <matrix-in-rspecifier3> ...] <matrix-out-wspecifier>\n"
275  " e.g.: matrix-sum ark:1.weights ark:2.weights ark:combine.weights\n"
276  " This usage supports the --scale1 and --scale2 options to scale the\n"
277  " first two input tables.\n"
278  "Type two usage (sums a single table input to produce a single output):\n"
279  " matrix-sum [options] <matrix-in-rspecifier> <matrix-out-wxfilename>\n"
280  " e.g.: matrix-sum --binary=false mats.ark sum.mat\n"
281  "Type three usage (sums or averages single-file inputs to produce\n"
282  "a single output):\n"
283  " matrix-sum [options] <matrix-in-rxfilename1> <matrix-in-rxfilename2> ..."
284  " <matrix-out-wxfilename>\n"
285  " e.g.: matrix-sum --binary=false 1.mat 2.mat 3.mat sum.mat\n"
286  "See also: matrix-sum-rows, copy-matrix\n";
287 
288 
289  BaseFloat scale1 = 1.0, scale2 = 1.0;
290  bool average = false;
291  bool binary = true;
292 
293  ParseOptions po(usage);
294 
295  po.Register("scale1", &scale1, "Scale applied to first matrix "
296  "(only for type one usage)");
297  po.Register("scale2", &scale2, "Scale applied to second matrix "
298  "(only for type one usage)");
299  po.Register("binary", &binary, "If true, write output as binary (only "
300  "relevant for usage types two or three");
301  po.Register("average", &average, "If true, compute average instead of "
302  "sum; currently compatible with type 3 or type 1 usage.");
303 
304  po.Read(argc, argv);
305 
306  int32 N = po.NumArgs(), exit_status;
307 
308  if (po.NumArgs() >= 2 &&
309  ClassifyWspecifier(po.GetArg(N), NULL, NULL, NULL) != kNoWspecifier) {
310  if (average)
311  // average option with type one usage.";
312  exit_status = TypeOneUsageAverage(po);
313  else
314  // output to table.
315  exit_status = TypeOneUsage(po, scale1, scale2);
316  } else if (po.NumArgs() == 2 &&
317  ClassifyRspecifier(po.GetArg(1), NULL, NULL) != kNoRspecifier &&
318  ClassifyWspecifier(po.GetArg(N), NULL, NULL, NULL) ==
319  kNoWspecifier) {
320  KALDI_ASSERT(scale1 == 1.0 && scale2 == 1.0);
321  if (average)
322  KALDI_ERR << "--average option not compatible with type two usage.";
323  // input from a single table, output not to table.
324  exit_status = TypeTwoUsage(po, binary);
325  } else if (po.NumArgs() >= 2 &&
326  ClassifyRspecifier(po.GetArg(1), NULL, NULL) == kNoRspecifier &&
327  ClassifyWspecifier(po.GetArg(N), NULL, NULL, NULL) == kNoWspecifier) {
328  KALDI_ASSERT(scale1 == 1.0 && scale2 == 1.0);
329  // summing flat files.
330  exit_status = TypeThreeUsage(po, binary, average);
331  } else {
332  po.PrintUsage();
333  exit(1);
334  }
335  return exit_status;
336  } catch(const std::exception &e) {
337  std::cerr << e.what();
338  return -1;
339  }
340 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void DeletePointers(std::vector< A *> *v)
Deletes any non-NULL pointers in the vector v, and sets the corresponding entries of v to NULL...
Definition: stl-utils.h:184
RandomAccessTableReader< KaldiObjectHolder< Matrix< BaseFloat > > > RandomAccessBaseFloatMatrixReader
Definition: table-types.h:41
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
void AddMat(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transA=kNoTrans)
*this += alpha * M [or M^T]
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
RspecifierType ClassifyRspecifier(const std::string &rspecifier, std::string *rxfilename, RspecifierOptions *opts)
Definition: kaldi-table.cc:225
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Definition: kaldi-io.cc:832
bool SameDim(const MatrixBase< Real > &M, const MatrixBase< Real > &N)
int32 TypeTwoUsage(const ParseOptions &po, bool binary)
Definition: matrix-sum.cc:179
int32 TypeOneUsageAverage(const ParseOptions &po)
Definition: matrix-sum.cc:106
int32 TypeOneUsage(const ParseOptions &po, BaseFloat scale1, BaseFloat scale2)
Definition: matrix-sum.cc:30
float BaseFloat
Definition: kaldi-types.h:29
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
void Scale(Real alpha)
Multiply each element with a scalar value.
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
WspecifierType ClassifyWspecifier(const std::string &wspecifier, std::string *archive_wxfilename, std::string *script_wxfilename, WspecifierOptions *opts)
Definition: kaldi-table.cc:135
int NumArgs() const
Number of positional parameters (c.f. argc-1).
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void WriteKaldiObject(const C &c, const std::string &filename, bool binary)
Definition: kaldi-io.h:257
std::string PrintableWxfilename(const std::string &wxfilename)
PrintableWxfilename turns the wxfilename into a more human-readable form for error reporting...
Definition: kaldi-io.cc:73
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
int main(int argc, char *argv[])
Definition: matrix-sum.cc:264
#define KALDI_LOG
Definition: kaldi-error.h:153
int32 TypeThreeUsage(const ParseOptions &po, bool binary, bool average)
Definition: matrix-sum.cc:226