27 int main(
int argc,
char *argv[]) {
29 using namespace kaldi;
31 typedef kaldi::int64 int64;
34 "Given features and Gaussian-selection (gselect) information for\n" 35 "a full-covariance GMM, output per-frame posteriors for the selected\n" 36 "indices. Also supports pruning the posteriors if they are below\n" 37 "a stated threshold, (and renormalizing the rest to sum to one)\n" 38 "See also: gmm-gselect, fgmm-gselect, gmm-global-get-post,\n" 39 " gmm-global-gselect-to-post\n" 41 "Usage: fgmm-global-gselect-to-post [options] <model-in> <feature-rspecifier> " 42 "<gselect-rspecifier> <post-wspecifier>\n" 43 "e.g.: fgmm-global-gselect-to-post 1.ubm ark:- 'ark:gunzip -c 1.gselect|' ark:-\n";
48 po.
Register(
"min-post", &min_post,
"If nonzero, posteriors below this " 49 "threshold will be pruned away and the rest will be renormalized " 59 std::string model_rxfilename = po.
GetArg(1),
60 feature_rspecifier = po.
GetArg(2),
61 gselect_rspecifier = po.
GetArg(3),
62 post_wspecifier = po.
GetArg(4);
67 double tot_loglike = 0.0, tot_frames = 0.0;
73 int32 num_done = 0, num_err = 0;
75 for (; !feature_reader.
Done(); feature_reader.
Next()) {
76 std::string utt = feature_reader.
Key();
79 int32 num_frames = mat.
NumRows();
83 if (!gselect_reader.
HasKey(utt)) {
84 KALDI_WARN <<
"No gselect information for utterance " << utt;
88 const std::vector<std::vector<int32> > &gselect(gselect_reader.
Value(utt));
89 if (static_cast<int32>(gselect.size()) != num_frames) {
90 KALDI_WARN <<
"gselect information for utterance " << utt
91 <<
" has wrong size " << gselect.size() <<
" vs. " 97 double this_tot_loglike = 0;
100 for (int32 t = 0; t < num_frames; t++) {
102 const std::vector<int32> &this_gselect = gselect[t];
108 if (fabs(loglikes.
Sum() - 1.0) > 0.01) {
111 if (min_post != 0.0) {
113 loglikes.
Max(&max_index);
114 for (int32
i = 0;
i < loglikes.
Dim();
i++)
115 if (loglikes(
i) < min_post)
119 loglikes(max_index) = 1.0;
121 loglikes.
Scale(1.0 / sum);
124 for (int32
i = 0;
i < loglikes.
Dim();
i++) {
125 if (loglikes(
i) != 0.0) {
126 post[t].push_back(std::make_pair(this_gselect[
i], loglikes(i)));
135 <<
" because bad posterior-sum encountered (NaN?)";
138 post_writer.
Write(utt, post);
140 KALDI_VLOG(2) <<
"Like/frame for utt " << utt <<
" was " 141 << (this_tot_loglike/num_frames) <<
" per frame over " 142 << num_frames <<
" frames.";
143 tot_loglike += this_tot_loglike;
144 tot_frames += num_frames;
148 KALDI_LOG <<
"Done " << num_done <<
" files; " << num_err <<
" had errors.";
149 KALDI_LOG <<
"Overall loglike per frame is " << (tot_loglike / tot_frames)
150 <<
" with " << (tot_posts / tot_frames) <<
" entries per frame, " 151 <<
" over " << tot_frames <<
" frames";
152 return (num_done != 0 ? 0 : 1);
153 }
catch(
const std::exception &e) {
154 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void LogLikelihoodsPreselect(const VectorBase< BaseFloat > &data, const std::vector< int32 > &indices, Vector< BaseFloat > *loglikes) const
Outputs the per-component log-likelihoods of a subset of mixture components.
Definition for Gaussian Mixture Model with full covariances.
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
A templated class for writing objects to an archive or script file; see The Table concept...
Real ApplySoftMax()
Apply soft-max to vector and return normalizer (log sum of exponentials).
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
const T & Value(const std::string &key)
int main(int argc, char *argv[])
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
Real Max() const
Returns the maximum value of any element, or -infinity for the empty vector.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
MatrixIndexT Dim() const
Returns the dimension of the vector.
void Scale(Real alpha)
Multiplies all elements by this constant.
bool HasKey(const std::string &key)
Real Sum() const
Returns sum of the elements.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
A class representing a vector.
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...