29 int main(
int argc,
char *argv[]) {
31 using namespace kaldi;
34 "Accumulate stats for training a full-covariance GMM.\n" 35 "Usage: fgmm-global-acc-stats [options] <model-in> <feature-rspecifier> " 37 "e.g.: fgmm-global-acc-stats 1.mdl scp:train.scp 1.acc\n";
41 std::string update_flags_str =
"mvw";
42 std::string gselect_rspecifier, weights_rspecifier;
43 po.
Register(
"binary", &binary,
"Write output in binary mode");
44 po.
Register(
"update-flags", &update_flags_str,
"Which GMM parameters will be " 45 "updated: subset of mvw.");
46 po.
Register(
"gselect", &gselect_rspecifier,
"rspecifier for gselect objects " 47 "to limit the #Gaussians accessed on each frame.");
48 po.
Register(
"weights", &weights_rspecifier,
"rspecifier for a vector of floats " 49 "for each utterance, that's a per-frame weight.");
57 std::string model_filename = po.
GetArg(1),
58 feature_rspecifier = po.
GetArg(2),
59 accs_wxfilename = po.
GetArg(3);
64 Input ki(model_filename, &binary_read);
71 double tot_like = 0.0, tot_weight = 0.0;
76 int32 num_done = 0, num_err = 0;
78 for (; !feature_reader.
Done(); feature_reader.
Next()) {
79 std::string key = feature_reader.
Key();
86 if (weights_rspecifier !=
"") {
87 if (!weights_reader.
HasKey(key)) {
88 KALDI_WARN <<
"No per-frame weights available for utterance " << key;
92 weights = weights_reader.
Value(key);
93 if (weights.
Dim() != file_frames) {
94 KALDI_WARN <<
"Weights for utterance " << key <<
" have wrong dim " 95 << weights.
Dim() <<
" vs. " << file_frames;
101 if (gselect_rspecifier !=
"") {
102 if (!gselect_reader.
HasKey(key)) {
103 KALDI_WARN <<
"No gselect information for utterance " << key;
107 const std::vector<std::vector<int32> > &gselect =
108 gselect_reader.
Value(key);
109 if (gselect.size() !=
static_cast<size_t>(file_frames)) {
110 KALDI_WARN <<
"gselect information for utterance " << key
111 <<
" has wrong size " << gselect.size() <<
" vs. " 117 for (
int32 i = 0;
i < file_frames;
i++) {
119 if (weight == 0.0)
continue;
120 file_weight += weight;
122 const std::vector<int32> &this_gselect = gselect[
i];
123 int32 gselect_size = this_gselect.size();
128 loglikes.
Scale(weight);
133 for (
int32 i = 0;
i < file_frames;
i++) {
135 if (weight == 0.0)
continue;
136 file_weight += weight;
137 file_like += weight *
141 KALDI_VLOG(2) <<
"File '" << key <<
"': Average likelihood = " 142 << (file_like/file_weight) <<
" over " 143 << file_weight <<
" frames.";
144 tot_like += file_like;
145 tot_weight += file_weight;
148 KALDI_LOG <<
"Done " << num_done <<
" files; " 149 << num_err <<
" with errors.";
151 <<
"frame = " << (tot_like/tot_weight) <<
" over " << tot_weight
152 <<
" (weighted) frames.";
155 KALDI_LOG <<
"Written accs to " << accs_wxfilename;
156 return (num_done != 0 ? 0 : 1);
157 }
catch(
const std::exception &e) {
158 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
GmmFlagsType StringToGmmFlags(std::string str)
Convert string which is some subset of "mSwa" to flags.
void LogLikelihoodsPreselect(const VectorBase< BaseFloat > &data, const std::vector< int32 > &indices, Vector< BaseFloat > *loglikes) const
Outputs the per-component log-likelihoods of a subset of mixture components.
Definition for Gaussian Mixture Model with full covariances.
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
Real ApplySoftMax()
Apply soft-max to vector and return normalizer (log sum of exponentials).
void Register(const std::string &name, bool *ptr, const std::string &doc)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
void Resize(int32 num_components, int32 dim, GmmFlagsType flags)
Allocates memory for accumulators.
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
const T & Value(const std::string &key)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
Class for computing the maximum-likelihood estimates of the parameters of a Gaussian mixture model...
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
MatrixIndexT Dim() const
Returns the dimension of the vector.
void Scale(Real alpha)
Multiplies all elements by this constant.
bool HasKey(const std::string &key)
void Read(std::istream &is, bool binary)
int NumArgs() const
Number of positional parameters (c.f. argc-1).
A class representing a vector.
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
void WriteKaldiObject(const C &c, const std::string &filename, bool binary)
int main(int argc, char *argv[])
BaseFloat AccumulateFromFull(const FullGmm &gmm, const VectorBase< BaseFloat > &data, BaseFloat frame_posterior)
Accumulate for all components given a full-covariance GMM.
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
void AccumulateForComponent(const VectorBase< BaseFloat > &data, int32 comp_index, BaseFloat weight)
Accumulate for a single component, given the posterior.