apply-cmvn-online.cc
Go to the documentation of this file.
1 // online2bin/apply-cmvn-online.cc
2 
3 // Copyright 2014 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include <string>
21 #include <vector>
22 #include "base/kaldi-common.h"
23 #include "util/common-utils.h"
24 #include "feat/online-feature.h"
25 
26 int main(int argc, char *argv[]) {
27  try {
28  typedef kaldi::int32 int32;
29  using namespace kaldi;
30  const char *usage =
31  "Apply online cepstral mean (and possibly variance) computation online,\n"
32  "using the same code as used for online decoding in the 'new' setup in\n"
33  "online2/ and online2bin/. If the --spk2utt option is used, it uses\n"
34  "prior utterances from the same speaker to back off to at the utterance\n"
35  "beginning. See also apply-cmvn-sliding.\n"
36  "\n"
37  "Usage: apply-cmvn-online [options] <global-cmvn-stats> <feature-rspecifier> "
38  "<feature-wspecifier>\n"
39  "e.g. apply-cmvn-online 'matrix-sum scp:data/train/cmvn.scp -|' data/train/split8/1/feats.scp ark:-\n"
40  "or: apply-cmvn-online --spk2utt=ark:data/train/split8/1/spk2utt 'matrix-sum scp:data/train/cmvn.scp -|' "
41  " data/train/split8/1/feats.scp ark:-\n";
42 
43  ParseOptions po(usage);
44 
45  OnlineCmvnOptions cmvn_opts;
46 
47  std::string spk2utt_rspecifier;
48  po.Register("spk2utt", &spk2utt_rspecifier, "rspecifier for speaker to "
49  "utterance-list map");
50  cmvn_opts.Register(&po);
51 
52  po.Read(argc, argv);
53 
54  if (po.NumArgs() != 3) {
55  po.PrintUsage();
56  exit(1);
57  }
58 
59  std::string global_stats_rxfilename = po.GetArg(1),
60  feature_rspecifier = po.GetArg(2),
61  feature_wspecifier = po.GetArg(3);
62 
63  // global_cmvn_stats helps us initialize to online CMVN to
64  // reasonable values at the beginning of the utterance.
65  Matrix<double> global_cmvn_stats;
66  ReadKaldiObject(global_stats_rxfilename, &global_cmvn_stats);
67 
68 
69 
70  BaseFloatMatrixWriter feature_writer(feature_wspecifier);
71  int32 num_done = 0, num_err = 0;
72  int64 tot_t = 0;
73 
74  if (spk2utt_rspecifier != "") {
75  SequentialTokenVectorReader spk2utt_reader(spk2utt_rspecifier);
76  RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier);
77 
78  for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) {
79  OnlineCmvnState cmvn_state(global_cmvn_stats);
80  const std::vector<std::string> &uttlist = spk2utt_reader.Value();
81  for (size_t i = 0; i < uttlist.size(); i++) {
82  std::string utt = uttlist[i];
83  if (!feature_reader.HasKey(utt)) {
84  KALDI_WARN << "No features for utterance " << utt;
85  num_err++;
86  continue;
87  }
88  const Matrix<BaseFloat> &feats = feature_reader.Value(utt);
89 
90  Matrix<BaseFloat> normalized_feats(feats.NumRows(), feats.NumCols(),
91  kUndefined);
92 
93  OnlineMatrixFeature online_matrix(feats);
94  OnlineCmvn online_cmvn(cmvn_opts,
95  cmvn_state,
96  &online_matrix);
97 
98  for (int32 t = 0; t < feats.NumRows(); t++) {
99  SubVector<BaseFloat> row(normalized_feats, t);
100  online_cmvn.GetFrame(t, &row);
101  }
102  online_cmvn.GetState(feats.NumRows() - 1, &cmvn_state);
103 
104  num_done++;
105  tot_t += feats.NumRows();
106  feature_writer.Write(utt, normalized_feats);
107  }
108  }
109  } else {
110  SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
111  for (; !feature_reader.Done(); feature_reader.Next()) {
112  std::string utt = feature_reader.Key();
113  const Matrix<BaseFloat> &feats = feature_reader.Value();
114  OnlineCmvnState cmvn_state(global_cmvn_stats);
115 
116  Matrix<BaseFloat> normalized_feats(feats.NumRows(), feats.NumCols(),
117  kUndefined);
118  OnlineMatrixFeature online_matrix(feats);
119  OnlineCmvn online_cmvn(cmvn_opts,
120  cmvn_state,
121  &online_matrix);
122 
123  for (int32 t = 0; t < feats.NumRows(); t++) {
124  SubVector<BaseFloat> row(normalized_feats, t);
125  online_cmvn.GetFrame(t, &row);
126  }
127  num_done++;
128  tot_t += feats.NumRows();
129  feature_writer.Write(utt, normalized_feats);
130  }
131  }
132 
133  KALDI_LOG << "Applied online CMVN to " << num_done << " files, or "
134  << tot_t << " frames.";
135  return (num_done != 0 ? 0 : 1);
136  } catch(const std::exception &e) {
137  std::cerr << e.what();
138  return -1;
139  }
140 }
141 
This class takes a Matrix<BaseFloat> and wraps it as an OnlineFeatureInterface: this can be useful wh...
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
This class does an online version of the cepstral mean and [optionally] variance, but note that this ...
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)
Gets the feature vector for this frame.
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Definition: kaldi-io.cc:832
void GetState(int32 cur_frame, OnlineCmvnState *cmvn_state)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
const T & Value(const std::string &key)
int main(int argc, char *argv[])
Struct OnlineCmvnState stores the state of CMVN adaptation between utterances (but not the state of t...
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_WARN
Definition: kaldi-error.h:150
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
bool HasKey(const std::string &key)
int NumArgs() const
Number of positional parameters (c.f. argc-1).
void Register(ParseOptions *po)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
#define KALDI_LOG
Definition: kaldi-error.h:153
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501