102 using namespace kaldi;
106 "Compute cepstral mean and variance normalization statistics\n" 107 "Specialized for two-sided telephone data where we only accumulate\n" 108 "the louder of the two channels at each frame (and add it to that\n" 109 "side's stats). Reads a 'reco2file_and_channel' file, normally like\n" 110 "sw02001-A sw02001 A\n" 111 "sw02001-B sw02001 B\n" 112 "sw02005-A sw02005 A\n" 113 "sw02005-B sw02005 B\n" 114 "interpreted as <utterance-id> <call-id> <side> and for each <call-id>\n" 115 "that has two sides, does the 'only-the-louder' computation, else doesn\n" 116 "per-utterance stats in the normal way.\n" 117 "Note: loudness is judged by the first feature component, either energy or c0;\n" 118 "only applicable to MFCCs or PLPs (this code could be modified to handle filterbanks).\n" 120 "Usage: compute-cmvn-stats-two-channel [options] <reco2file-and-channel> <feats-rspecifier> <stats-wspecifier>\n" 121 "e.g.: compute-cmvn-stats-two-channel data/train_unseg/reco2file_and_channel scp:data/train_unseg/feats.scp ark,t:-\n";
127 po.Register(
"quieter-channel-weight", &quieter_channel_weight,
128 "For the quieter channel, apply this weight to the stats, so " 129 "that we still get stats if one channel always dominates.");
133 if (po.NumArgs() != 3) {
138 int32 num_done = 0, num_err = 0;
140 std::string reco2file_and_channel_rxfilename = po.GetArg(1),
141 feats_rspecifier = po.GetArg(2),
142 stats_wspecifier = po.GetArg(3);
145 std::vector<std::vector<std::string> > utt_pairs;
151 for (
size_t i = 0;
i < utt_pairs.size();
i++) {
152 std::vector<std::string> this_pair(utt_pairs[
i]);
154 KALDI_ASSERT(this_pair.size() == 2 || this_pair.size() == 1);
155 if (this_pair.size() == 2) {
156 std::string utt1 = this_pair[0], utt2 = this_pair[1];
157 if (!feat_reader.HasKey(utt1)) {
158 KALDI_WARN <<
"No feature data for utterance " << utt1;
161 this_pair.pop_back();
163 }
else if (!feat_reader.HasKey(utt2)) {
164 KALDI_WARN <<
"No feature data for utterance " << utt2;
166 this_pair.pop_back();
170 feats2 = feat_reader.Value(utt2);
174 &cmvn_stats1, &cmvn_stats2);
175 writer.Write(utt1, cmvn_stats1);
176 writer.Write(utt2, cmvn_stats2);
183 std::string utt = this_pair[0];
184 if (!feat_reader.HasKey(utt)) {
185 KALDI_WARN <<
"No feature data for utterance " << utt;
192 writer.Write(utt, cmvn_stats);
195 KALDI_LOG <<
"Done accumulating CMVN stats for " << num_done
196 <<
" utterances; " << num_err <<
" had errors.";
197 return (num_done != 0 ? 0 : 1);
198 }
catch(
const std::exception &e) {
199 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void AccCmvnStatsForPair(const std::string &utt1, const std::string &utt2, const MatrixBase< BaseFloat > &feats1, const MatrixBase< BaseFloat > &feats2, BaseFloat quieter_channel_weight, MatrixBase< double > *cmvn_stats1, MatrixBase< double > *cmvn_stats2)
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
A templated class for writing objects to an archive or script file; see The Table concept...
Allows random access to a collection of objects in an archive or script file; see The Table concept...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
void GetUtterancePairs(const std::string &reco2file_and_channel_rxfilename, std::vector< std::vector< std::string > > *utt_pairs)
void AccCmvnStats(const VectorBase< BaseFloat > &feats, BaseFloat weight, MatrixBase< double > *stats)
Accumulation from a single frame (weighted).
#define KALDI_ASSERT(cond)