28 int main(
int argc,
char *argv[]) {
30 using namespace kaldi;
34 "Initialize an SGMM from a trained full-covariance UBM and a specified" 36 "Usage: sgmm2-init [options] <topology> <tree> <init-model> <sgmm-out>\n" 37 "The <init-model> argument can be a UBM (the default case) or another\n" 38 "SGMM (if the --init-from-sgmm flag is used).\n" 39 "For systems with two-level tree, use --pdf-map argument.";
41 bool binary =
true, init_from_sgmm =
false, spk_dep_weights =
false;
43 int32 phn_space_dim = 0, spk_space_dim = 0;
44 std::string pdf_map_rxfilename;
45 double self_weight = 1.0;
48 po.
Register(
"binary", &binary,
"Write output in binary mode");
49 po.
Register(
"phn-space-dim", &phn_space_dim,
"Phonetic space dimension.");
50 po.
Register(
"spk-space-dim", &spk_space_dim,
"Speaker space dimension.");
51 po.
Register(
"spk-dep-weights", &spk_dep_weights,
"If true, have speaker-" 52 "dependent weights (symmetric SGMM)");
53 po.
Register(
"init-from-sgmm", &init_from_sgmm,
54 "Initialize from another SGMM (instead of a UBM).");
55 po.
Register(
"self-weight", &self_weight,
56 "If < 1.0, will be the weight of a pdf with its \"own\" mixture, " 57 "where we initialize each group with a number of mixtures. If" 58 "1.0, we initialize each group with just one mixture component.");
59 po.
Register(
"pdf-map", &pdf_map_rxfilename,
60 "For systems with 2-level trees [SCTM systems], the file that " 61 "maps from pdfs to groups (from build-tree-two-level)");
70 std::string topo_in_filename = po.
GetArg(1),
71 tree_in_filename = po.
GetArg(2),
72 init_model_filename = po.
GetArg(3),
73 sgmm_out_filename = po.
GetArg(4);
78 Input ki(tree_in_filename.c_str(), &binary_in);
79 ctx_dep.
Read(ki.Stream(), binary_in);
82 std::vector<int32> pdf2group;
83 if (pdf_map_rxfilename !=
"") {
85 Input ki(pdf_map_rxfilename, &binary_in);
88 for (int32
i = 0;
i < ctx_dep.
NumPdfs();
i++) pdf2group.push_back(
i);
114 spk_space_dim, spk_dep_weights,
125 KALDI_LOG <<
"Written model to " << sgmm_out_filename;
126 }
catch(
const std::exception &e) {
127 std::cerr << e.what() <<
'\n';
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void Write(std::ostream &os, bool binary, SgmmWriteFlagsType write_params) const
Class for definition of the subspace Gmm acoustic model.
A class for storing topology information for phones.
Definition for Gaussian Mixture Model with full covariances.
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
void Read(std::istream &is, bool binary)
void InitializeFromFullGmm(const FullGmm &gmm, const std::vector< int32 > &pdf2group, int32 phn_subspace_dim, int32 spk_subspace_dim, bool speaker_dependent_weights, BaseFloat self_weight)
Initializes the SGMM parameters from a full-covariance UBM.
void Register(const std::string &name, bool *ptr, const std::string &doc)
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
virtual int32 NumPdfs() const
NumPdfs() returns the number of acoustic pdfs (they are numbered 0.. NumPdfs()-1).
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
void ReadIntegerVector(std::istream &is, bool binary, std::vector< T > *v)
Function for reading STL vector of integer types.
void CopyGlobalsInitVecs(const AmSgmm2 &other, const std::vector< int32 > &pdf2group, BaseFloat self_weight)
Copies the global parameters from the supplied model, but sets the state vectors to zero...
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
void Read(std::istream &is, bool binary)
int NumArgs() const
Number of positional parameters (c.f. argc-1).
void Write(std::ostream &os, bool binary) const
int main(int argc, char *argv[])
void Read(std::istream &is, bool binary)
Read context-dependency object from disk; throws on error.
void ComputeNormalizers()
Computes the data-independent terms in the log-likelihood computation for each Gaussian component and...