35 std::vector<std::vector<std::string> > *utt_pairs) {
36 Input ki(reco2file_and_channel_rxfilename);
38 std::map<std::string, std::vector<std::string> > call_to_uttlist;
39 while (std::getline(ki.
Stream(), line)) {
40 std::vector<std::string> split_line;
42 if (split_line.size() != 3) {
43 KALDI_ERR <<
"Expecting 3 fields per line of reco2file_and_channel file " 48 std::string utt = split_line[0],
50 call_to_uttlist[call].push_back(utt);
52 for (std::map<std::string, std::vector<std::string> >::const_iterator
53 iter = call_to_uttlist.begin(); iter != call_to_uttlist.end(); ++iter) {
54 const std::vector<std::string> &uttlist = iter->second;
55 if (uttlist.size() == 2) {
56 utt_pairs->push_back(uttlist);
58 KALDI_WARN <<
"Call " << iter->first <<
" has " << uttlist.size()
59 <<
" utterances, expected two; treating them singly.";
60 for (
size_t i = 0;
i < uttlist.size();
i++) {
61 std::vector<std::string> singleton_list;
62 singleton_list.push_back(uttlist[
i]);
63 utt_pairs->push_back(singleton_list);
77 KALDI_WARN <<
"Number of frames differ between " << utt1 <<
" and " << utt2
79 <<
", treating them separately.";
86 if (feats1(
i, 0) > feats2(
i, 0)) {
100 int main(
int argc,
char *argv[]) {
102 using namespace kaldi;
106 "Compute cepstral mean and variance normalization statistics\n" 107 "Specialized for two-sided telephone data where we only accumulate\n" 108 "the louder of the two channels at each frame (and add it to that\n" 109 "side's stats). Reads a 'reco2file_and_channel' file, normally like\n" 110 "sw02001-A sw02001 A\n" 111 "sw02001-B sw02001 B\n" 112 "sw02005-A sw02005 A\n" 113 "sw02005-B sw02005 B\n" 114 "interpreted as <utterance-id> <call-id> <side> and for each <call-id>\n" 115 "that has two sides, does the 'only-the-louder' computation, else doesn\n" 116 "per-utterance stats in the normal way.\n" 117 "Note: loudness is judged by the first feature component, either energy or c0;\n" 118 "only applicable to MFCCs or PLPs (this code could be modified to handle filterbanks).\n" 120 "Usage: compute-cmvn-stats-two-channel [options] <reco2file-and-channel> <feats-rspecifier> <stats-wspecifier>\n" 121 "e.g.: compute-cmvn-stats-two-channel data/train_unseg/reco2file_and_channel scp:data/train_unseg/feats.scp ark,t:-\n";
127 po.
Register(
"quieter-channel-weight", &quieter_channel_weight,
128 "For the quieter channel, apply this weight to the stats, so " 129 "that we still get stats if one channel always dominates.");
138 int32 num_done = 0, num_err = 0;
140 std::string reco2file_and_channel_rxfilename = po.
GetArg(1),
141 feats_rspecifier = po.
GetArg(2),
142 stats_wspecifier = po.
GetArg(3);
145 std::vector<std::vector<std::string> > utt_pairs;
151 for (
size_t i = 0;
i < utt_pairs.size();
i++) {
152 std::vector<std::string> this_pair(utt_pairs[
i]);
154 KALDI_ASSERT(this_pair.size() == 2 || this_pair.size() == 1);
155 if (this_pair.size() == 2) {
156 std::string utt1 = this_pair[0], utt2 = this_pair[1];
157 if (!feat_reader.
HasKey(utt1)) {
158 KALDI_WARN <<
"No feature data for utterance " << utt1;
161 this_pair.pop_back();
163 }
else if (!feat_reader.
HasKey(utt2)) {
164 KALDI_WARN <<
"No feature data for utterance " << utt2;
166 this_pair.pop_back();
170 feats2 = feat_reader.
Value(utt2);
174 &cmvn_stats1, &cmvn_stats2);
175 writer.
Write(utt1, cmvn_stats1);
176 writer.
Write(utt2, cmvn_stats2);
183 std::string utt = this_pair[0];
184 if (!feat_reader.
HasKey(utt)) {
185 KALDI_WARN <<
"No feature data for utterance " << utt;
192 writer.
Write(utt, cmvn_stats);
195 KALDI_LOG <<
"Done accumulating CMVN stats for " << num_done
196 <<
" utterances; " << num_err <<
" had errors.";
197 return (num_done != 0 ? 0 : 1);
198 }
catch(
const std::exception &e) {
199 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void AccCmvnStatsForPair(const std::string &utt1, const std::string &utt2, const MatrixBase< BaseFloat > &feats1, const MatrixBase< BaseFloat > &feats2, BaseFloat quieter_channel_weight, MatrixBase< double > *cmvn_stats1, MatrixBase< double > *cmvn_stats2)
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Base class which provides matrix operations not involving resizing or allocation. ...
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
A templated class for writing objects to an archive or script file; see The Table concept...
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
const T & Value(const std::string &key)
void SplitStringToVector(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< std::string > *out)
Split a string using any of the single character delimiters.
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
bool HasKey(const std::string &key)
void GetUtterancePairs(const std::string &reco2file_and_channel_rxfilename, std::vector< std::vector< std::string > > *utt_pairs)
void AccCmvnStats(const VectorBase< BaseFloat > &feats, BaseFloat weight, MatrixBase< double > *stats)
Accumulation from a single frame (weighted).
int NumArgs() const
Number of positional parameters (c.f. argc-1).
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
std::string PrintableRxfilename(const std::string &rxfilename)
PrintableRxfilename turns the rxfilename into a more human-readable form for error reporting...
int main(int argc, char *argv[])