apply-cmvn.cc File Reference
Include dependency graph for apply-cmvn.cc:

Go to the source code of this file.

Functions

int main (int argc, char *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 27 of file apply-cmvn.cc.

References kaldi::ApplyCmvn(), kaldi::ApplyCmvnReverse(), kaldi::ClassifyRspecifier(), SequentialTableReader< Holder >::Done(), kaldi::FakeStatsForSomeDims(), ParseOptions::GetArg(), RandomAccessTableReaderMapped< Holder >::HasKey(), KALDI_ERR, KALDI_LOG, KALDI_WARN, SequentialTableReader< Holder >::Key(), kaldi::kNoRspecifier, SequentialTableReader< Holder >::Next(), ParseOptions::NumArgs(), ParseOptions::PrintUsage(), ParseOptions::Read(), Matrix< Real >::Read(), ParseOptions::Register(), kaldi::SplitStringToIntegers(), Input::Stream(), SequentialTableReader< Holder >::Value(), RandomAccessTableReaderMapped< Holder >::Value(), and TableWriter< Holder >::Write().

27  {
28  try {
29  using namespace kaldi;
30 
31  const char *usage =
32  "Apply cepstral mean and (optionally) variance normalization\n"
33  "Per-utterance by default, or per-speaker if utt2spk option provided\n"
34  "Usage: apply-cmvn [options] (<cmvn-stats-rspecifier>|<cmvn-stats-rxfilename>) <feats-rspecifier> <feats-wspecifier>\n"
35  "e.g.: apply-cmvn --utt2spk=ark:data/train/utt2spk scp:data/train/cmvn.scp scp:data/train/feats.scp ark:-\n"
36  "See also: modify-cmvn-stats, matrix-sum, compute-cmvn-stats\n";
37 
38  ParseOptions po(usage);
39  std::string utt2spk_rspecifier;
40  bool norm_vars = false;
41  bool norm_means = true;
42  bool reverse = false;
43  std::string skip_dims_str;
44 
45  po.Register("utt2spk", &utt2spk_rspecifier,
46  "rspecifier for utterance to speaker map");
47  po.Register("norm-vars", &norm_vars, "If true, normalize variances.");
48  po.Register("norm-means", &norm_means, "You can set this to false to turn off mean "
49  "normalization. Note, the same can be achieved by using 'fake' CMVN stats; "
50  "see the --fake option to compute_cmvn_stats.sh");
51  po.Register("skip-dims", &skip_dims_str, "Dimensions for which to skip "
52  "normalization: colon-separated list of integers, e.g. 13:14:15)");
53  po.Register("reverse", &reverse, "If true, apply CMVN in a reverse sense, "
54  "so as to transform zero-mean, unit-variance input into data "
55  "with the given mean and variance.");
56 
57  po.Read(argc, argv);
58 
59  if (po.NumArgs() != 3) {
60  po.PrintUsage();
61  exit(1);
62  }
63  if (norm_vars && !norm_means)
64  KALDI_ERR << "You cannot normalize the variance but not the mean.";
65 
66 
67  std::string cmvn_rspecifier_or_rxfilename = po.GetArg(1);
68  std::string feat_rspecifier = po.GetArg(2);
69  std::string feat_wspecifier = po.GetArg(3);
70 
71  if (!norm_means) {
72  // CMVN is a no-op, we're not doing anything. Just echo the input
73  // don't even uncompress, if it was a CompressedMatrix.
74  SequentialGeneralMatrixReader reader(feat_rspecifier);
75  GeneralMatrixWriter writer(feat_wspecifier);
76  kaldi::int32 num_done = 0;
77  for (;!reader.Done(); reader.Next()) {
78  writer.Write(reader.Key(), reader.Value());
79  num_done++;
80  }
81  KALDI_LOG << "Copied " << num_done << " utterances.";
82  return (num_done != 0 ? 0 : 1);
83  }
84 
85 
86  std::vector<int32> skip_dims; // optionally use "fake"
87  // (zero-mean/unit-variance) stats for some
88  // dims to disable normalization.
89  if (!SplitStringToIntegers(skip_dims_str, ":", false, &skip_dims)) {
90  KALDI_ERR << "Bad --skip-dims option (should be colon-separated list of "
91  << "integers)";
92  }
93 
94 
95  kaldi::int32 num_done = 0, num_err = 0;
96 
97  SequentialBaseFloatMatrixReader feat_reader(feat_rspecifier);
98  BaseFloatMatrixWriter feat_writer(feat_wspecifier);
99 
100  if (ClassifyRspecifier(cmvn_rspecifier_or_rxfilename, NULL, NULL)
101  != kNoRspecifier) { // reading from a Table: per-speaker or per-utt CMN/CVN.
102  std::string cmvn_rspecifier = cmvn_rspecifier_or_rxfilename;
103 
104  RandomAccessDoubleMatrixReaderMapped cmvn_reader(cmvn_rspecifier,
105  utt2spk_rspecifier);
106 
107  for (; !feat_reader.Done(); feat_reader.Next()) {
108  std::string utt = feat_reader.Key();
109  Matrix<BaseFloat> feat(feat_reader.Value());
110  if (norm_means) {
111  if (!cmvn_reader.HasKey(utt)) {
112  KALDI_WARN << "No normalization statistics available for key "
113  << utt << ", producing no output for this utterance";
114  num_err++;
115  continue;
116  }
117  Matrix<double> cmvn_stats = cmvn_reader.Value(utt);
118  if (!skip_dims.empty())
119  FakeStatsForSomeDims(skip_dims, &cmvn_stats);
120 
121  if (reverse) {
122  ApplyCmvnReverse(cmvn_stats, norm_vars, &feat);
123  } else {
124  ApplyCmvn(cmvn_stats, norm_vars, &feat);
125  }
126  feat_writer.Write(utt, feat);
127  } else {
128  feat_writer.Write(utt, feat);
129  }
130  num_done++;
131  }
132  } else {
133  if (utt2spk_rspecifier != "")
134  KALDI_ERR << "--utt2spk option not compatible with rxfilename as input "
135  << "(did you forget ark:?)";
136  std::string cmvn_rxfilename = cmvn_rspecifier_or_rxfilename;
137  bool binary;
138  Input ki(cmvn_rxfilename, &binary);
139  Matrix<double> cmvn_stats;
140  cmvn_stats.Read(ki.Stream(), binary);
141  if (!skip_dims.empty())
142  FakeStatsForSomeDims(skip_dims, &cmvn_stats);
143 
144  for (;!feat_reader.Done(); feat_reader.Next()) {
145  std::string utt = feat_reader.Key();
146  Matrix<BaseFloat> feat(feat_reader.Value());
147  if (norm_means) {
148  if (reverse) {
149  ApplyCmvnReverse(cmvn_stats, norm_vars, &feat);
150  } else {
151  ApplyCmvn(cmvn_stats, norm_vars, &feat);
152  }
153  }
154  feat_writer.Write(utt, feat);
155  num_done++;
156  }
157  }
158  if (norm_vars)
159  KALDI_LOG << "Applied cepstral mean and variance normalization to "
160  << num_done << " utterances, errors on " << num_err;
161  else
162  KALDI_LOG << "Applied cepstral mean normalization to "
163  << num_done << " utterances, errors on " << num_err;
164  return (num_done != 0 ? 0 : 1);
165  } catch(const std::exception &e) {
166  std::cerr << e.what();
167  return -1;
168  }
169 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
Definition: text-utils.h:68
This class is for when you are reading something in random access, but it may actually be stored per-...
Definition: kaldi-table.h:432
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
RspecifierType ClassifyRspecifier(const std::string &rspecifier, std::string *rxfilename, RspecifierOptions *opts)
Definition: kaldi-table.cc:225
void ApplyCmvnReverse(const MatrixBase< double > &stats, bool var_norm, MatrixBase< BaseFloat > *feats)
This is as ApplyCmvn, but does so in the reverse sense, i.e.
Definition: cmvn.cc:117
void Read(std::istream &in, bool binary, bool add=false)
read from stream.
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
#define KALDI_LOG
Definition: kaldi-error.h:153
void ApplyCmvn(const MatrixBase< double > &stats, bool var_norm, MatrixBase< BaseFloat > *feats)
Apply cepstral mean and variance normalization to a matrix of features.
Definition: cmvn.cc:64
void FakeStatsForSomeDims(const std::vector< int32 > &dims, MatrixBase< double > *stats)
Modify the stats so that for some dimensions (specified in "dims"), we replace them with "fake" stats...
Definition: cmvn.cc:168