58 using namespace kaldi;
60 "Accumulate gradient scatter from training set, either per utterance or \n" 61 "for the supplied set of speakers (spk2utt option). Reads posterior to accumulate \n" 62 "fMLLR stats for each speaker/utterance. Writes gradient scatter matrix.\n" 63 "Usage: gmm-basis-fmllr-accs [options] <model-in> <feature-rspecifier>" 64 "<post-rspecifier> <accs-wspecifier>\n";
66 bool binary_write =
true;
67 string spk2utt_rspecifier;
69 po.Register(
"binary", &binary_write,
"Write output in binary mode");
70 po.Register(
"spk2utt", &spk2utt_rspecifier,
"rspecifier for speaker to " 71 "utterance-list map");
74 if (po.NumArgs() != 4) {
80 model_rxfilename = po.GetArg(1),
81 feature_rspecifier = po.GetArg(2),
82 post_rspecifier = po.GetArg(3),
83 accs_wspecifier = po.GetArg(4);
89 Input ki(model_rxfilename, &binary);
90 trans_model.
Read(ki.Stream(), binary);
91 am_gmm.
Read(ki.Stream(), binary);
97 int32 num_done = 0, num_no_post = 0, num_other_error = 0;
98 if (spk2utt_rspecifier !=
"") {
103 for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) {
105 string spk = spk2utt_reader.Key();
106 const vector<string> &uttlist = spk2utt_reader.Value();
107 for (
size_t i = 0;
i < uttlist.size();
i++) {
108 std::string utt = uttlist[
i];
109 if (!feature_reader.HasKey(utt)) {
110 KALDI_WARN <<
"Did not find features for utterance " << utt;
114 if (!post_reader.HasKey(utt)) {
115 KALDI_WARN <<
"Did not find posteriors for utterance " << utt;
120 const Posterior &post = post_reader.Value(utt);
121 if (static_cast<int32>(post.size()) != feats.
NumRows()) {
122 KALDI_WARN <<
"Posterior vector has wrong size " << (post.size())
123 <<
" vs. " << (feats.
NumRows());
132 basis_accs.AccuGradientScatter(spk_stats);
135 KALDI_LOG <<
"Accumulate statistics from " << num_spk <<
" speakers";
139 for (; !feature_reader.Done(); feature_reader.Next()) {
140 string utt = feature_reader.Key();
141 if (!post_reader.HasKey(utt)) {
142 KALDI_WARN <<
"Did not find posts for utterance " 148 const Posterior &post = post_reader.Value(utt);
150 if (static_cast<int32>(post.size()) != feats.
NumRows()) {
151 KALDI_WARN <<
"Posterior has wrong size " << (post.size())
152 <<
" vs. " << (feats.
NumRows());
161 basis_accs.AccuGradientScatter(utt_stats);
166 Output ko(accs_wspecifier, binary_write);
167 basis_accs.Write(ko.Stream(), binary_write);
169 KALDI_LOG <<
"Done " << num_done <<
" files, " << num_no_post
170 <<
" with no posts, " << num_other_error <<
" with other errors.";
171 KALDI_LOG <<
"Written gradient scatter to " << accs_wspecifier;
172 return (num_done != 0 ? 0 : 1);
173 }
catch(
const std::exception& e) {
174 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
This does not work with multiple feature transforms.
void AccumulateForUtterance(const Matrix< BaseFloat > &feats, const GaussPost &gpost, const TransitionModel &trans_model, const AmDiagGmm &am_gmm, FmllrDiagGmmAccs *spk_stats)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Stats for fMLLR subspace estimation.
void Read(std::istream &is, bool binary)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
void Read(std::istream &in_stream, bool binary)