subsample-feats.cc
Go to the documentation of this file.
1 // featbin/subsample-feats.cc
2 
3 // Copyright 2012-2014 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include <sstream>
21 #include <algorithm>
22 #include <iterator>
23 #include <utility>
24 
25 #include "base/kaldi-common.h"
26 #include "util/common-utils.h"
27 #include "matrix/kaldi-matrix.h"
28 
29 
30 int main(int argc, char *argv[]) {
31  try {
32  using namespace kaldi;
33  using namespace std;
34 
35  const char *usage =
36  "Sub-samples features by taking every n'th frame.\n"
37  "With negative values of n, will repeat each frame n times\n"
38  "(e.g. --n=-2 will repeat each frame twice)\n"
39  "\n"
40  "Usage: subsample-feats [options] <in-rspecifier> <out-wspecifier>\n"
41  " e.g. subsample-feats --n=2 ark:- ark:-\n";
42 
43  ParseOptions po(usage);
44 
45  int32 n = 1, offset = 0;
46 
47  po.Register("n", &n, "Take every n'th feature, for this value of n"
48  "(with negative value, repeats each feature n times)");
49  po.Register("offset", &offset, "Start with the feature with this offset, "
50  "then take every n'th feature.");
51 
52  KALDI_ASSERT(n != 0);
53  if (n < 0)
54  KALDI_ASSERT(offset == 0 &&
55  "--offset option cannot be used with negative n.");
56 
57  po.Read(argc, argv);
58 
59  if (po.NumArgs() != 2) {
60  po.PrintUsage();
61  exit(1);
62  }
63 
64  string rspecifier = po.GetArg(1);
65  string wspecifier = po.GetArg(2);
66 
67  SequentialBaseFloatMatrixReader feat_reader(rspecifier);
68  BaseFloatMatrixWriter feat_writer(wspecifier);
69 
70  int32 num_done = 0, num_err = 0;
71  int64 frames_in = 0, frames_out = 0;
72 
73  // process all keys
74  for (; !feat_reader.Done(); feat_reader.Next()) {
75  std::string utt = feat_reader.Key();
76  const Matrix<BaseFloat> feats(feat_reader.Value());
77 
78 
79  if (n > 0) {
80  // This code could, of course, be much more efficient; I'm just
81  // keeping it simple.
82  int32 num_indexes = 0;
83  for (int32 k = offset; k < feats.NumRows(); k += n)
84  num_indexes++; // k is the index.
85 
86  frames_in += feats.NumRows();
87  frames_out += num_indexes;
88 
89  if (num_indexes == 0) {
90  KALDI_WARN << "For utterance " << utt << ", output would have no rows, "
91  << "producing no output.";
92  num_err++;
93  continue;
94  }
95  Matrix<BaseFloat> output(num_indexes, feats.NumCols());
96  int32 i = 0;
97  for (int32 k = offset; k < feats.NumRows(); k += n, i++) {
98  SubVector<BaseFloat> src(feats, k), dest(output, i);
99  dest.CopyFromVec(src);
100  }
101  KALDI_ASSERT(i == num_indexes);
102  feat_writer.Write(utt, output);
103  num_done++;
104  } else {
105  int32 repeat = -n;
106  Matrix<BaseFloat> output(feats.NumRows() * repeat, feats.NumCols());
107  for (int32 i = 0; i < output.NumRows(); i++)
108  output.Row(i).CopyFromVec(feats.Row(i / repeat));
109  frames_in += feats.NumRows();
110  frames_out += feats.NumRows() * repeat;
111  feat_writer.Write(utt, output);
112  num_done++;
113  }
114  }
115  KALDI_LOG << "Processed " << num_done << " feature matrices; " << num_err
116  << " with errors.";
117  KALDI_LOG << "Processed " << frames_in << " input frames and "
118  << frames_out << " output frames.";
119  return (num_done != 0 ? 0 : 1);
120  } catch(const std::exception &e) {
121  std::cerr << e.what();
122  return -1;
123  }
124 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
int main(int argc, char *argv[])
struct rnnlm::@11::@12 n
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_WARN
Definition: kaldi-error.h:150
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
#define KALDI_LOG
Definition: kaldi-error.h:153
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501