paste-feats.cc
Go to the documentation of this file.
1 // featbin/paste-feats.cc
2 
3 // Copyright 2012 Korbinian Riedhammer
4 // 2013 Brno University of Technology (Author: Karel Vesely)
5 // 2013 Johns Hopkins University (Author: Daniel Povey)
6 
7 // See ../../COPYING for clarification regarding multiple authors
8 //
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 //
13 // http://www.apache.org/licenses/LICENSE-2.0
14 //
15 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
17 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
18 // MERCHANTABLITY OR NON-INFRINGEMENT.
19 // See the Apache 2 License for the specific language governing permissions and
20 // limitations under the License.
21 
22 
23 #include "base/kaldi-common.h"
24 #include "util/common-utils.h"
25 #include "matrix/kaldi-matrix.h"
26 
27 namespace kaldi {
28 
29 // returns true if successfully appended.
30 bool AppendFeats(const std::vector<Matrix<BaseFloat> > &in,
31  const std::string &utt,
32  int32 tolerance,
33  Matrix<BaseFloat> *out) {
34  // Check the lengths
35  int32 min_len = in[0].NumRows(),
36  max_len = in[0].NumRows(),
37  tot_dim = in[0].NumCols();
38  for (int32 i = 1; i < in.size(); i++) {
39  int32 len = in[i].NumRows(), dim = in[i].NumCols();
40  tot_dim += dim;
41  if(len < min_len) min_len = len;
42  if(len > max_len) max_len = len;
43  }
44  if (max_len - min_len > tolerance || min_len == 0) {
45  KALDI_WARN << "Length mismatch " << max_len << " vs. " << min_len
46  << (utt.empty() ? "" : " for utt ") << utt
47  << " exceeds tolerance " << tolerance;
48  out->Resize(0, 0);
49  return false;
50  }
51  if (max_len - min_len > 0) {
52  KALDI_VLOG(2) << "Length mismatch " << max_len << " vs. " << min_len
53  << (utt.empty() ? "" : " for utt ") << utt
54  << " within tolerance " << tolerance;
55  }
56  out->Resize(min_len, tot_dim);
57  int32 dim_offset = 0;
58  for (int32 i = 0; i < in.size(); i++) {
59  int32 this_dim = in[i].NumCols();
60  out->Range(0, min_len, dim_offset, this_dim).CopyFromMat(
61  in[i].Range(0, min_len, 0, this_dim));
62  dim_offset += this_dim;
63  }
64  return true;
65 }
66 
67 
68 }
69 
70 int main(int argc, char *argv[]) {
71  try {
72  using namespace kaldi;
73  using namespace std;
74 
75  const char *usage =
76  "Paste feature files (assuming they have about the same durations,\n"
77  "see --length-tolerance), appending the features on each frame;\n"
78  "think of the unix command 'paste'.\n"
79  "Usage: paste-feats <in-rspecifier1> <in-rspecifier2> [<in-rspecifier3> ...] <out-wspecifier>\n"
80  " or: paste-feats <in-rxfilename1> <in-rxfilename2> [<in-rxfilename3> ...] <out-wxfilename>\n"
81  " e.g. paste-feats ark:feats1.ark \"ark:select-feats 0-3 ark:feats2.ark ark:- |\" ark:feats-out.ark\n"
82  " or: paste-feats foo.mat bar.mat baz.mat\n"
83  "See also: copy-feats, copy-matrix, append-vector-to-feats, concat-feats\n";
84 
85  ParseOptions po(usage);
86 
87  int32 length_tolerance = 0;
88  bool binary = true;
89  po.Register("length-tolerance", &length_tolerance,
90  "If length is different, trim as shortest up to a frame "
91  " difference of length-tolerance, otherwise exclude segment.");
92  po.Register("binary", &binary, "If true, output files in binary "
93  "(only relevant for single-file operation, i.e. no tables)");
94 
95  po.Read(argc, argv);
96 
97  if (po.NumArgs() < 3) {
98  po.PrintUsage();
99  exit(1);
100  }
101 
102  if (ClassifyRspecifier(po.GetArg(1), NULL, NULL)
103  != kNoRspecifier) {
104  // We're operating on tables, e.g. archives.
105 
106  // Last argument is output
107  string wspecifier = po.GetArg(po.NumArgs());
108  BaseFloatMatrixWriter feat_writer(wspecifier);
109 
110  // First input is sequential
111  string rspecifier1 = po.GetArg(1);
112  SequentialBaseFloatMatrixReader input1(rspecifier1);
113 
114  // Assemble vector of other input readers (with random-access)
115  vector<RandomAccessBaseFloatMatrixReader *> input;
116  for (int32 i = 2; i < po.NumArgs(); i++) {
117  string rspecifier = po.GetArg(i);
119  input.push_back(rd);
120  }
121 
122  int32 num_done = 0, num_err = 0;
123 
124  // Main loop
125  for (; !input1.Done(); input1.Next()) {
126  string utt = input1.Key();
127  KALDI_VLOG(2) << "Merging features for utterance " << utt;
128 
129  // Collect features from streams to vector 'feats'
130  vector<Matrix<BaseFloat> > feats(po.NumArgs() - 1);
131  feats[0] = input1.Value();
132  int32 i;
133  for (i = 0; i < static_cast<int32>(input.size()); i++) {
134  if (input[i]->HasKey(utt)) {
135  feats[i + 1] = input[i]->Value(utt);
136  } else {
137  KALDI_WARN << "Missing utt " << utt << " from input "
138  << po.GetArg(i+2);
139  num_err++;
140  break;
141  }
142  }
143  if (i != static_cast<int32>(input.size()))
144  continue;
145  Matrix<BaseFloat> output;
146  if (!AppendFeats(feats, utt, length_tolerance, &output)) {
147  num_err++;
148  continue; // it will have printed a warning.
149  }
150  feat_writer.Write(utt, output);
151  num_done++;
152  }
153 
154  for (int32 i=0; i < input.size(); i++)
155  delete input[i];
156  input.clear();
157 
158  KALDI_LOG << "Done " << num_done << " utts, errors on "
159  << num_err;
160 
161  return (num_done == 0 ? -1 : 0);
162  } else {
163  // We're operating on rxfilenames|wxfilenames, most likely files.
164  std::vector<Matrix<BaseFloat> > feats(po.NumArgs() - 1);
165  for (int32 i = 1; i < po.NumArgs(); i++)
166  ReadKaldiObject(po.GetArg(i), &(feats[i-1]));
167  Matrix<BaseFloat> output;
168  if (!AppendFeats(feats, "", length_tolerance, &output))
169  return 1; // it will have printed a warning.
170  std::string output_wxfilename = po.GetArg(po.NumArgs());
171  WriteKaldiObject(output, output_wxfilename, binary);
172  KALDI_LOG << "Wrote appended features to " << output_wxfilename;
173  return 0;
174  }
175  } catch(const std::exception &e) {
176  std::cerr << e.what();
177  return -1;
178  }
179 }
180 
181 /*
182  Testing:
183 
184 cat <<EOF >1.mat
185 [ 0 1 2
186  3 4 5
187  8 9 10 ]
188 EOF
189 cat <<EOF > 2.mat
190  [ 0 1
191  2 3 ]
192 EOF
193 paste-feats --length-tolerance=1 --binary=false 1.mat 2.mat 3a.mat
194 cat <<EOF > 3b.mat
195  [ 0 1 2 0 1
196  3 4 5 2 3 ]
197 EOF
198 cmp <(../bin/copy-matrix 3b.mat -) <(../bin/copy-matrix 3a.mat -) || echo 'Bad!'
199 
200 paste-feats --length-tolerance=1 'scp:echo foo 1.mat|' 'scp:echo foo 2.mat|' 'scp,t:echo foo 3a.mat|'
201 cmp <(../bin/copy-matrix 3b.mat -) <(../bin/copy-matrix 3a.mat -) || echo 'Bad!'
202 
203 rm {1,2,3?}.mat
204  */
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
RandomAccessTableReader< KaldiObjectHolder< Matrix< BaseFloat > > > RandomAccessBaseFloatMatrixReader
Definition: table-types.h:41
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
int main(int argc, char *argv[])
Definition: paste-feats.cc:70
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
RspecifierType ClassifyRspecifier(const std::string &rspecifier, std::string *rxfilename, RspecifierOptions *opts)
Definition: kaldi-table.cc:225
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Definition: kaldi-io.cc:832
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
bool AppendFeats(const std::vector< Matrix< BaseFloat > > &in, const std::string &utt, int32 tolerance, Matrix< BaseFloat > *out)
Definition: paste-feats.cc:30
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
#define KALDI_WARN
Definition: kaldi-error.h:150
int NumArgs() const
Number of positional parameters (c.f. argc-1).
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
void WriteKaldiObject(const C &c, const std::string &filename, bool binary)
Definition: kaldi-io.h:257
SubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Return a sub-part of matrix.
Definition: kaldi-matrix.h:202
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
#define KALDI_LOG
Definition: kaldi-error.h:153