compute-cmvn-stats-two-channel.cc File Reference
Include dependency graph for compute-cmvn-stats-two-channel.cc:

Go to the source code of this file.

Namespaces

 kaldi
 This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for mispronunciations detection tasks, the reference:
 

Functions

void GetUtterancePairs (const std::string &reco2file_and_channel_rxfilename, std::vector< std::vector< std::string > > *utt_pairs)
 
void AccCmvnStatsForPair (const std::string &utt1, const std::string &utt2, const MatrixBase< BaseFloat > &feats1, const MatrixBase< BaseFloat > &feats2, BaseFloat quieter_channel_weight, MatrixBase< double > *cmvn_stats1, MatrixBase< double > *cmvn_stats2)
 
int main (int argc, char *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 100 of file compute-cmvn-stats-two-channel.cc.

References kaldi::AccCmvnStats(), kaldi::AccCmvnStatsForPair(), ParseOptions::GetArg(), kaldi::GetUtterancePairs(), RandomAccessTableReader< Holder >::HasKey(), rnnlm::i, KALDI_ASSERT, KALDI_LOG, KALDI_WARN, ParseOptions::NumArgs(), MatrixBase< Real >::NumCols(), ParseOptions::PrintUsage(), ParseOptions::Read(), ParseOptions::Register(), RandomAccessTableReader< Holder >::Value(), and TableWriter< Holder >::Write().

100  {
101  try {
102  using namespace kaldi;
103  using kaldi::int32;
104 
105  const char *usage =
106  "Compute cepstral mean and variance normalization statistics\n"
107  "Specialized for two-sided telephone data where we only accumulate\n"
108  "the louder of the two channels at each frame (and add it to that\n"
109  "side's stats). Reads a 'reco2file_and_channel' file, normally like\n"
110  "sw02001-A sw02001 A\n"
111  "sw02001-B sw02001 B\n"
112  "sw02005-A sw02005 A\n"
113  "sw02005-B sw02005 B\n"
114  "interpreted as <utterance-id> <call-id> <side> and for each <call-id>\n"
115  "that has two sides, does the 'only-the-louder' computation, else doesn\n"
116  "per-utterance stats in the normal way.\n"
117  "Note: loudness is judged by the first feature component, either energy or c0;\n"
118  "only applicable to MFCCs or PLPs (this code could be modified to handle filterbanks).\n"
119  "\n"
120  "Usage: compute-cmvn-stats-two-channel [options] <reco2file-and-channel> <feats-rspecifier> <stats-wspecifier>\n"
121  "e.g.: compute-cmvn-stats-two-channel data/train_unseg/reco2file_and_channel scp:data/train_unseg/feats.scp ark,t:-\n";
122 
123 
124  ParseOptions po(usage);
125  BaseFloat quieter_channel_weight = 0.01;
126 
127  po.Register("quieter-channel-weight", &quieter_channel_weight,
128  "For the quieter channel, apply this weight to the stats, so "
129  "that we still get stats if one channel always dominates.");
130 
131  po.Read(argc, argv);
132 
133  if (po.NumArgs() != 3) {
134  po.PrintUsage();
135  exit(1);
136  }
137 
138  int32 num_done = 0, num_err = 0;
139 
140  std::string reco2file_and_channel_rxfilename = po.GetArg(1),
141  feats_rspecifier = po.GetArg(2),
142  stats_wspecifier = po.GetArg(3);
143 
144 
145  std::vector<std::vector<std::string> > utt_pairs;
146  GetUtterancePairs(reco2file_and_channel_rxfilename, &utt_pairs);
147 
148  RandomAccessBaseFloatMatrixReader feat_reader(feats_rspecifier);
149  DoubleMatrixWriter writer(stats_wspecifier);
150 
151  for (size_t i = 0; i < utt_pairs.size(); i++) {
152  std::vector<std::string> this_pair(utt_pairs[i]);
153 
154  KALDI_ASSERT(this_pair.size() == 2 || this_pair.size() == 1);
155  if (this_pair.size() == 2) {
156  std::string utt1 = this_pair[0], utt2 = this_pair[1];
157  if (!feat_reader.HasKey(utt1)) {
158  KALDI_WARN << "No feature data for utterance " << utt1;
159  num_err++;
160  this_pair[0] = utt2;
161  this_pair.pop_back();
162  // and fall through to the singleton code below.
163  } else if (!feat_reader.HasKey(utt2)) {
164  KALDI_WARN << "No feature data for utterance " << utt2;
165  num_err++;
166  this_pair.pop_back();
167  // and fall through to the singleton code below.
168  } else {
169  Matrix<BaseFloat> feats1 = feat_reader.Value(utt1),
170  feats2 = feat_reader.Value(utt2);
171  int32 dim = feats1.NumCols();
172  Matrix<double> cmvn_stats1(2, dim + 1), cmvn_stats2(2, dim + 1);
173  AccCmvnStatsForPair(utt1, utt2, feats1, feats2, quieter_channel_weight,
174  &cmvn_stats1, &cmvn_stats2);
175  writer.Write(utt1, cmvn_stats1);
176  writer.Write(utt2, cmvn_stats2);
177  num_done += 2;
178  continue; // continue so we don't go to the singleton-processing code
179  // below.
180  }
181  }
182  // process singletons.
183  std::string utt = this_pair[0];
184  if (!feat_reader.HasKey(utt)) {
185  KALDI_WARN << "No feature data for utterance " << utt;
186  num_err++;
187  continue;
188  }
189  const Matrix<BaseFloat> &feats = feat_reader.Value(utt);
190  Matrix<double> cmvn_stats(2, feats.NumCols() + 1);
191  AccCmvnStats(feats, NULL, &cmvn_stats);
192  writer.Write(utt, cmvn_stats);
193  num_done++;
194  }
195  KALDI_LOG << "Done accumulating CMVN stats for " << num_done
196  << " utterances; " << num_err << " had errors.";
197  return (num_done != 0 ? 0 : 1);
198  } catch(const std::exception &e) {
199  std::cerr << e.what();
200  return -1;
201  }
202 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void AccCmvnStatsForPair(const std::string &utt1, const std::string &utt2, const MatrixBase< BaseFloat > &feats1, const MatrixBase< BaseFloat > &feats2, BaseFloat quieter_channel_weight, MatrixBase< double > *cmvn_stats1, MatrixBase< double > *cmvn_stats2)
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
float BaseFloat
Definition: kaldi-types.h:29
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
#define KALDI_WARN
Definition: kaldi-error.h:150
void GetUtterancePairs(const std::string &reco2file_and_channel_rxfilename, std::vector< std::vector< std::string > > *utt_pairs)
void AccCmvnStats(const VectorBase< BaseFloat > &feats, BaseFloat weight, MatrixBase< double > *stats)
Accumulation from a single frame (weighted).
Definition: cmvn.cc:30
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
#define KALDI_LOG
Definition: kaldi-error.h:153