30 int main(
int argc,
char *argv[]) {
32 using namespace kaldi;
35 "Accumulate stats for training a diagonal-covariance GMM, two-feature version\n" 36 "First features are used to get posteriors, second to accumulate stats\n" 37 "Usage: gmm-global-acc-stats-twofeats [options] <model-in> " 38 "<feature1-rspecifier> <feature2-rspecifier> <stats-out>\n" 39 "e.g.: gmm-global-acc-stats-twofeats 1.mdl scp:train.scp scp:train2.scp 1.acc\n";
43 std::string update_flags_str =
"mvw";
44 std::string gselect_rspecifier, weights_rspecifier;
45 po.
Register(
"binary", &binary,
"Write output in binary mode");
46 po.
Register(
"update-flags", &update_flags_str,
"Which GMM parameters will be " 47 "updated: subset of mvw.");
48 po.
Register(
"gselect", &gselect_rspecifier,
"rspecifier for gselect objects " 49 "to limit the #Gaussians accessed on each frame.");
50 po.
Register(
"weights", &weights_rspecifier,
"rspecifier for a vector of floats " 51 "for each utterance, that's a per-frame weight.");
59 std::string model_filename = po.
GetArg(1),
60 feature1_rspecifier = po.
GetArg(2),
61 feature2_rspecifier = po.
GetArg(3),
62 accs_wxfilename = po.
GetArg(4);
67 Input ki(model_filename, &binary_read);
76 double tot_like = 0.0, tot_weight = 0.0;
82 int32 num_done = 0, num_err = 0;
84 for (; !feature1_reader.
Done(); feature1_reader.
Next()) {
85 std::string key = feature1_reader.
Key();
86 if (!feature2_reader.
HasKey(key)) {
87 KALDI_WARN <<
"For utterance " << key <<
", second features not present.";
104 if (weights_rspecifier !=
"") {
105 if (!weights_reader.
HasKey(key)) {
106 KALDI_WARN <<
"No per-frame weights available for utterance " << key;
110 weights = weights_reader.
Value(key);
111 if (weights.
Dim() != file_frames) {
112 KALDI_WARN <<
"Weights for utterance " << key <<
" have wrong dim " 113 << weights.
Dim() <<
" vs. " << file_frames;
118 if (gselect_rspecifier !=
"") {
119 if (!gselect_reader.
HasKey(key)) {
120 KALDI_WARN <<
"No gselect information for utterance " << key;
124 const std::vector<std::vector<int32> > &gselect =
125 gselect_reader.
Value(key);
126 if (gselect.size() !=
static_cast<size_t>(file_frames)) {
127 KALDI_WARN <<
"gselect information for utterance " << key
128 <<
" has wrong size " << gselect.size() <<
" vs. " 134 for (
int32 i = 0;
i < file_frames;
i++) {
136 if (weight == 0.0)
continue;
137 file_weight += weight;
139 const std::vector<int32> &this_gselect = gselect[
i];
140 int32 gselect_size = this_gselect.size();
145 loglikes.
Scale(weight);
151 for (
int32 i = 0;
i < file_frames;
i++) {
153 if (weight == 0.0)
continue;
154 file_weight += weight;
156 posteriors.
Scale(weight);
160 KALDI_VLOG(2) <<
"File '" << key <<
"': Average likelihood = " 161 << (file_like/file_weight) <<
" over " 162 << file_weight <<
" frames.";
163 tot_like += file_like;
164 tot_weight += file_weight;
167 KALDI_LOG <<
"Done " << num_done <<
" files; " 168 << num_err <<
" with errors.";
170 <<
"frame = " << (tot_like/tot_weight) <<
" over " << tot_weight
171 <<
" (weighted) frames.";
174 KALDI_LOG <<
"Written accs to " << accs_wxfilename;
175 return (num_done != 0 ? 0 : 1);
176 }
catch(
const std::exception &e) {
177 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void LogLikelihoodsPreselect(const VectorBase< BaseFloat > &data, const std::vector< int32 > &indices, Vector< BaseFloat > *loglikes) const
Outputs the per-component log-likelihoods of a subset of mixture components.
GmmFlagsType StringToGmmFlags(std::string str)
Convert string which is some subset of "mSwa" to flags.
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
Real ApplySoftMax()
Apply soft-max to vector and return normalizer (log sum of exponentials).
void Register(const std::string &name, bool *ptr, const std::string &doc)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
BaseFloat ComponentPosteriors(const VectorBase< BaseFloat > &data, Vector< BaseFloat > *posteriors) const
Computes the posterior probabilities of all Gaussian components given a data point.
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
const T & Value(const std::string &key)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
void AccumulateForComponent(const VectorBase< BaseFloat > &data, int32 comp_index, BaseFloat weight)
Accumulate for a single component, given the posterior.
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
int main(int argc, char *argv[])
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
int32 NumGauss() const
Returns the number of mixture components in the GMM.
MatrixIndexT Dim() const
Returns the dimension of the vector.
void Scale(Real alpha)
Multiplies all elements by this constant.
bool HasKey(const std::string &key)
int NumArgs() const
Number of positional parameters (c.f. argc-1).
void Read(std::istream &in, bool binary)
A class representing a vector.
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition for Gaussian Mixture Model with diagonal covariances.
void WriteKaldiObject(const C &c, const std::string &filename, bool binary)
void Resize(int32 num_gauss, int32 dim, GmmFlagsType flags)
Allocates memory for accumulators.
void AccumulateFromPosteriors(const VectorBase< BaseFloat > &data, const VectorBase< BaseFloat > &gauss_posteriors)
Accumulate for all components, given the posteriors.
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...