31 int main(
int argc,
char *argv[]) {
32 using namespace kaldi;
35 "Convert posteriors to Gaussian-level posteriors for SGMM training.\n" 36 "Usage: sgmm2-post-to-gpost [options] <model-in> <feature-rspecifier> " 37 "<posteriors-rspecifier> <gpost-wspecifier>\n" 38 "e.g.: sgmm2-post-to-gpost 1.mdl 1.ali scp:train.scp 'ark:ali-to-post ark:1.ali ark:-|' ark:-";
41 std::string gselect_rspecifier, spkvecs_rspecifier, utt2spk_rspecifier;
43 po.
Register(
"gselect", &gselect_rspecifier,
"Precomputed Gaussian indices (rspecifier)");
44 po.
Register(
"spk-vecs", &spkvecs_rspecifier,
"Speaker vectors (rspecifier)");
45 po.
Register(
"utt2spk", &utt2spk_rspecifier,
46 "rspecifier for utterance to speaker map");
54 if (gselect_rspecifier ==
"")
55 KALDI_ERR <<
"--gselect option is required";
57 std::string model_filename = po.
GetArg(1),
58 feature_rspecifier = po.
GetArg(2),
59 posteriors_rspecifier = po.
GetArg(3),
60 gpost_wspecifier = po.
GetArg(4);
62 using namespace kaldi;
69 Input ki(model_filename, &binary);
74 double tot_like = 0.0;
75 kaldi::int64 tot_t = 0;
87 int32 num_done = 0, num_err = 0;
88 for (; !feature_reader.
Done(); feature_reader.
Next()) {
90 std::string utt = feature_reader.
Key();
91 if (!posteriors_reader.
HasKey(utt)
92 || posteriors_reader.
Value(utt).size() != mat.
NumRows()) {
93 KALDI_WARN <<
"No posteriors available for utterance " << utt
94 <<
" (or wrong size)";
100 if (!gselect_reader.
HasKey(utt) ||
102 KALDI_WARN <<
"No Gaussian-selection info available for utterance " 103 << utt <<
" (or wrong size)";
107 const std::vector<std::vector<int32> > &gselect =
108 gselect_reader.
Value(utt);
111 if (spkvecs_reader.
IsOpen()) {
112 if (spkvecs_reader.
HasKey(utt)) {
116 KALDI_WARN <<
"Cannot find speaker vector for " << utt;
123 BaseFloat tot_like_this_file = 0.0, tot_weight = 0.0;
128 int32 prev_pdf_id = -1;
131 for (
size_t i = 0;
i < posterior.size();
i++) {
133 spk_vars, &per_frame_vars);
135 gpost[
i].gselect = gselect[
i];
136 gpost[
i].tids.resize(posterior[
i].size());
137 gpost[
i].posteriors.resize(posterior[
i].size());
140 for (
size_t j = 0;
j < posterior[
i].size();
j++) {
141 int32 tid = posterior[
i][
j].first,
144 gpost[
i].tids[
j] = tid;
146 if (pdf_id != prev_pdf_id) {
149 prev_pdf_id = pdf_id;
155 gpost[
i].posteriors[
j] = prev_posterior;
156 tot_like_this_file += prev_like * weight;
157 tot_weight += weight;
158 gpost[
i].posteriors[
j].
Scale(weight);
162 KALDI_VLOG(2) <<
"Average like for this file is " 163 << (tot_like_this_file/posterior.size()) <<
" over " 164 << posterior.size() <<
" frames.";
165 tot_like += tot_like_this_file;
166 tot_t += posterior.size();
167 if (num_done % 10 == 0)
168 KALDI_LOG <<
"Avg like per frame so far is " 170 gpost_writer.
Write(utt, gpost);
173 KALDI_LOG <<
"Overall like per frame (Gaussian only) = " 174 << (tot_like/tot_t) <<
" over " << tot_t <<
" frames.";
176 KALDI_LOG <<
"Done " << num_done <<
" files, " << num_err
179 return (num_done != 0 ? 0 : 1);
180 }
catch(
const std::exception &e) {
181 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Class for definition of the subspace Gmm acoustic model.
BaseFloat ComponentPosteriors(const Sgmm2PerFrameDerivedVars &per_frame_vars, int32 j2, Sgmm2PerSpkDerivedVars *spk_vars, Matrix< BaseFloat > *post) const
Similar to LogLikelihood() function above, but also computes the posterior probabilities for the pre-...
int main(int argc, char *argv[])
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
This class is for when you are reading something in random access, but it may actually be stored per-...
void Read(std::istream &is, bool binary)
A templated class for writing objects to an archive or script file; see The Table concept...
int32 TransitionIdToPdf(int32 trans_id) const
void ComputePerSpkDerivedVars(Sgmm2PerSpkDerivedVars *vars) const
Computes the per-speaker derived vars; assumes vars->v_s is already set up.
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
void SortPosteriorByPdfs(const TransitionModel &tmodel, Posterior *post)
Sorts posterior entries so that transition-ids with same pdf-id are next to each other.
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
const T & Value(const std::string &key)
void Scale(Real alpha)
Multiply each element with a scalar value.
void Read(std::istream &is, bool binary)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
void ComputePerFrameVars(const VectorBase< BaseFloat > &data, const std::vector< int32 > &gselect, const Sgmm2PerSpkDerivedVars &spk_vars, Sgmm2PerFrameDerivedVars *per_frame_vars) const
This needs to be called with each new frame of data, prior to accumulation or likelihood evaluation: ...
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
bool HasKey(const std::string &key)
int NumArgs() const
Number of positional parameters (c.f. argc-1).
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
const T & Value(const std::string &key)
void SetSpeakerVector(const Vector< BaseFloat > &v_s_in)
Holds the per-frame precomputed quantities x(t), x_{i}(t), z_{i}(t), and n_{i}(t) (cf...