apply-cmvn-sliding.cc
Go to the documentation of this file.
1 // featbin/apply-cmvn-sliding.cc
2 
3 // Copyright 2013 Johns Hopkins University (Author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include "base/kaldi-common.h"
21 #include "util/common-utils.h"
22 #include "matrix/kaldi-matrix.h"
23 #include "feat/feature-functions.h"
24 
25 
26 int main(int argc, char *argv[]) {
27  try {
28  using namespace kaldi;
29  using kaldi::int32;
30 
31  const char *usage =
32  "Apply sliding-window cepstral mean (and optionally variance)\n"
33  "normalization per utterance. If center == true, window is centered\n"
34  "on frame being normalized; otherwise it precedes it in time.\n"
35  "Useful for speaker-id; see also apply-cmvn-online\n"
36  "\n"
37  "Usage: apply-cmvn-sliding [options] <feats-rspecifier> <feats-wspecifier>\n";
38 
39  ParseOptions po(usage);
41  opts.Register(&po);
42 
43  po.Read(argc, argv);
44 
45  if (po.NumArgs() != 2) {
46  po.PrintUsage();
47  exit(1);
48  }
49 
50  int32 num_done = 0, num_err = 0;
51 
52  std::string feat_rspecifier = po.GetArg(1);
53  std::string feat_wspecifier = po.GetArg(2);
54 
55  SequentialBaseFloatMatrixReader feat_reader(feat_rspecifier);
56  BaseFloatMatrixWriter feat_writer(feat_wspecifier);
57 
58  for (;!feat_reader.Done(); feat_reader.Next()) {
59  std::string utt = feat_reader.Key();
60  Matrix<BaseFloat> feat(feat_reader.Value());
61  if (feat.NumRows() == 0) {
62  KALDI_WARN << "Empty feature matrix for utterance " << utt;
63  num_err++;
64  continue;
65  }
66  Matrix<BaseFloat> cmvn_feat(feat.NumRows(),
67  feat.NumCols(), kUndefined);
68 
69  SlidingWindowCmn(opts, feat, &cmvn_feat);
70 
71  feat_writer.Write(utt, cmvn_feat);
72  num_done++;
73  }
74 
75  KALDI_LOG << "Applied sliding-window cepstral mean "
76  << (opts.normalize_variance ? "and variance " : "")
77  << "normalization to " << num_done << " utterances, "
78  << num_err << " had errors.";
79  return (num_done != 0 ? 0 : 1);
80  } catch(const std::exception &e) {
81  std::cerr << e.what();
82  return -1;
83  }
84 }
85 
86 
int main(int argc, char *argv[])
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
void Write(const std::string &key, const T &value) const
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_WARN
Definition: kaldi-error.h:150
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
void Register(OptionsItf *opts)
int NumArgs() const
Number of positional parameters (c.f. argc-1).
#define KALDI_LOG
Definition: kaldi-error.h:153
void SlidingWindowCmn(const SlidingWindowCmnOptions &opts, const MatrixBase< BaseFloat > &input, MatrixBase< BaseFloat > *output)
Applies sliding-window cepstral mean and/or variance normalization.