compose-transforms.cc
Go to the documentation of this file.
1 // featbin/compose-transforms.cc
2 
3 // Copyright 2009-2012 Microsoft Corporation
4 // Johns Hopkins University (author: Daniel Povey)
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 #include "base/kaldi-common.h"
22 #include "util/common-utils.h"
23 #include "matrix/kaldi-matrix.h"
25 
26 
27 int main(int argc, char *argv[]) {
28  try {
29  using namespace kaldi;
30 
31  const char *usage =
32  "Compose (affine or linear) feature transforms\n"
33  "Usage: compose-transforms [options] (<transform-A-rspecifier>|<transform-A-rxfilename>) "
34  "(<transform-B-rspecifier>|<transform-B-rxfilename>) (<transform-out-wspecifier>|<transform-out-wxfilename>)\n"
35  " Note: it does matrix multiplication (A B) so B is the transform that gets applied\n"
36  " to the features first. If b-is-affine = true, then assume last column of b corresponds to offset\n"
37  " e.g.: compose-transforms 1.mat 2.mat 3.mat\n"
38  " compose-transforms 1.mat ark:2.trans ark:3.trans\n"
39  " compose-transforms ark:1.trans ark:2.trans ark:3.trans\n"
40  " See also: transform-feats, transform-vec, extend-transform-dim, est-lda, est-pca\n";
41 
42  bool b_is_affine = false;
43  bool binary = true;
44  std::string utt2spk_rspecifier;
45  ParseOptions po(usage);
46 
47  po.Register("utt2spk", &utt2spk_rspecifier, "rspecifier for utterance to speaker map (if mixing utterance and speaker ids)");
48 
49  po.Register("b-is-affine", &b_is_affine, "If true, treat last column of transform b as an offset term (only relevant if a is affine)");
50  po.Register("binary", &binary, "Write in binary mode (only relevant if output is a wxfilename)");
51 
52  po.Read(argc, argv);
53 
54  if (po.NumArgs() != 3) {
55  po.PrintUsage();
56  exit(1);
57  }
58 
59 
60  std::string transform_a_fn = po.GetArg(1);
61  std::string transform_b_fn = po.GetArg(2);
62  std::string transform_c_fn = po.GetArg(3);
63 
64  // all these "fn"'s are either rspecifiers or filenames.
65 
66  bool a_is_rspecifier =
67  (ClassifyRspecifier(transform_a_fn, NULL, NULL)
68  != kNoRspecifier),
69  b_is_rspecifier =
70  (ClassifyRspecifier(transform_b_fn, NULL, NULL)
71  != kNoRspecifier),
72  c_is_wspecifier =
73  (ClassifyWspecifier(transform_c_fn, NULL, NULL, NULL)
74  != kNoWspecifier);
75 
76 
77  RandomAccessTokenReader utt2spk_reader;
78  if (utt2spk_rspecifier != "") {
79  if (!(a_is_rspecifier && b_is_rspecifier))
80  KALDI_ERR << "Error: utt2spk option provided compose transforms but "
81  "at least one of the inputs is a global transform.";
82  if (!utt2spk_reader.Open(utt2spk_rspecifier))
83  KALDI_ERR << "Error upening utt2spk map from "
84  << utt2spk_rspecifier;
85  }
86 
87 
88  if ( (a_is_rspecifier || b_is_rspecifier) != c_is_wspecifier)
89  KALDI_ERR << "Formats of the input and output rspecifiers/rxfilenames do "
90  "not match (if either a or b is an rspecifier, then the output must "
91  "be a wspecifier.";
92 
93 
94  if (a_is_rspecifier || b_is_rspecifier) {
95  BaseFloatMatrixWriter c_writer(transform_c_fn);
96  if (a_is_rspecifier) {
97  SequentialBaseFloatMatrixReader a_reader(transform_a_fn);
98  if (b_is_rspecifier) { // both are rspecifiers.
99  RandomAccessBaseFloatMatrixReader b_reader(transform_b_fn);
100  for (;!a_reader.Done(); a_reader.Next()) {
101  if (utt2spk_rspecifier != "") { // assume a is per-utt, b is per-spk.
102  std::string utt = a_reader.Key();
103  if (!utt2spk_reader.HasKey(utt)) {
104  KALDI_WARN << "No speaker provided for utterance " << utt
105  << " (perhaps you wrongly provided utt2spk option to "
106  " compose-transforms?)";
107  continue;
108  }
109  std::string spk = utt2spk_reader.Value(utt);
110  if (!b_reader.HasKey(spk)) {
111  KALDI_WARN << "Second table does not have key " << spk;
112  continue;
113  }
115  if (!ComposeTransforms(a_reader.Value(), b_reader.Value(a_reader.Key()),
116  b_is_affine, &c))
117  continue; // warning will have been printed already.
118  c_writer.Write(utt, c);
119  } else { // Normal case: either both per-utterance or both per-speaker.
120  if (!b_reader.HasKey(a_reader.Key())) {
121  KALDI_WARN << "Second table does not have key " << a_reader.Key();
122  } else {
124  if (!ComposeTransforms(a_reader.Value(), b_reader.Value(a_reader.Key()),
125  b_is_affine, &c))
126  continue; // warning will have been printed already.
127  c_writer.Write(a_reader.Key(), c);
128  }
129  }
130  }
131  } else { // a is rspecifier, b is rxfilename
133  ReadKaldiObject(transform_b_fn, &b);
134  for (;!a_reader.Done(); a_reader.Next()) {
136  if (!ComposeTransforms(a_reader.Value(), b,
137  b_is_affine, &c))
138  continue; // warning will have been printed already.
139  c_writer.Write(a_reader.Key(), c);
140  }
141  }
142  } else {
144  ReadKaldiObject(transform_a_fn, &a);
145  SequentialBaseFloatMatrixReader b_reader(transform_b_fn);
146  for (; !b_reader.Done(); b_reader.Next()) {
148  if (!ComposeTransforms(a, b_reader.Value(),
149  b_is_affine, &c))
150  continue; // warning will have been printed already.
151  c_writer.Write(b_reader.Key(), c);
152  }
153  }
154  } else { // all are just {rx, wx}filenames.
156  ReadKaldiObject(transform_a_fn, &a);
158  ReadKaldiObject(transform_b_fn, &b);
160  if (!b_is_affine && a.NumRows() == a.NumCols()+1 && a.NumRows() == b.NumRows()
161  && a.NumCols() == b.NumCols())
162  KALDI_WARN << "It looks like you are trying to compose two affine transforms"
163  << ", but you omitted the --b-is-affine option.";
164  if (!ComposeTransforms(a, b, b_is_affine, &c)) exit (1);
165 
166  WriteKaldiObject(c, transform_c_fn, binary);
167  }
168  return 0;
169  } catch(const std::exception &e) {
170  std::cerr << e.what();
171  return -1;
172  }
173 }
174 
175 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
bool Open(const std::string &rspecifier)
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
RspecifierType ClassifyRspecifier(const std::string &rspecifier, std::string *rxfilename, RspecifierOptions *opts)
Definition: kaldi-table.cc:225
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Definition: kaldi-io.cc:832
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
const T & Value(const std::string &key)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
bool HasKey(const std::string &key)
WspecifierType ClassifyWspecifier(const std::string &wspecifier, std::string *archive_wxfilename, std::string *script_wxfilename, WspecifierOptions *opts)
Definition: kaldi-table.cc:135
int NumArgs() const
Number of positional parameters (c.f. argc-1).
bool ComposeTransforms(const Matrix< BaseFloat > &a, const Matrix< BaseFloat > &b, bool b_is_affine, Matrix< BaseFloat > *c)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
int main(int argc, char *argv[])
void WriteKaldiObject(const C &c, const std::string &filename, bool binary)
Definition: kaldi-io.h:257