35 KALDI_ASSERT(num_frames >= 10 * num_gauss &&
"Too few frames to train on");
37 for (
int32 i = 0;
i < num_frames;
i++) {
38 mean.AddVec(1.0 / num_frames, feats.
Row(
i));
43 KALDI_ERR <<
"Features do not have positive variance " << var;
47 std::set<int32> used_frames;
48 for (
int32 g = 0; g < num_gauss; g++) {
50 while (used_frames.count(random_frame) != 0)
51 random_frame =
RandInt(0, num_frames - 1);
52 used_frames.insert(random_frame);
53 gmm_normal.
weights_(g) = 1.0 / num_gauss;
54 gmm_normal.
means_.
Row(g).CopyFromVec(feats.
Row(random_frame));
55 gmm_normal.
vars_.
Row(g).CopyFromVec(var);
69 frame_weights.
Set(1.0);
75 KALDI_LOG <<
"Likelihood per frame on iteration " << iter
76 <<
" was " << (tot_like / feats.
NumRows()) <<
" over " 77 << feats.
NumRows() <<
" frames.";
82 KALDI_LOG <<
"Objective-function change on iteration " << iter <<
" was " 83 << (objf_change /
count) <<
" over " << count <<
" frames.";
88 int main(
int argc,
char *argv[]) {
90 using namespace kaldi;
93 "This program initializes a single diagonal GMM and does multiple iterations of\n" 94 "training from features stored in memory.\n" 95 "Usage: gmm-global-init-from-feats [options] <feature-rspecifier> <model-out>\n" 96 "e.g.: gmm-global-init-from-feats scp:train.scp 1.mdl\n";
102 int32 num_gauss = 100;
103 int32 num_gauss_init = 0;
104 int32 num_iters = 50;
105 int32 num_frames = 200000;
106 int32 srand_seed = 0;
107 int32 num_threads = 4;
109 po.
Register(
"binary", &binary,
"Write output in binary mode");
110 po.
Register(
"num-gauss", &num_gauss,
"Number of Gaussians in the model");
111 po.
Register(
"num-gauss-init", &num_gauss_init,
"Number of Gaussians in " 112 "the model initially (if nonzero and less than num_gauss, " 113 "we'll do mixture splitting)");
114 po.
Register(
"num-iters", &num_iters,
"Number of iterations of training");
115 po.
Register(
"num-frames", &num_frames,
"Number of feature vectors to store in " 116 "memory and train on (randomly chosen from the input features)");
117 po.
Register(
"srand", &srand_seed,
"Seed for random number generator ");
118 po.
Register(
"num-threads", &num_threads,
"Number of threads used for " 119 "statistics accumulation");
132 std::string feature_rspecifier = po.
GetArg(1),
133 model_wxfilename = po.
GetArg(2);
142 int64 num_read = 0, dim = 0;
144 KALDI_LOG <<
"Reading features (will keep " << num_frames <<
" frames.)";
146 for (; !feature_reader.
Done(); feature_reader.
Next()) {
152 feats.
Resize(num_frames, dim);
153 }
else if (this_feats.
NumCols() != dim) {
154 KALDI_ERR <<
"Features have inconsistent dims " 155 << this_feats.
NumCols() <<
" vs. " << dim
156 <<
" (current utt is) " << feature_reader.
Key();
158 if (num_read <= num_frames) {
159 feats.
Row(num_read - 1).CopyFromVec(this_feats.
Row(t));
163 feats.
Row(
RandInt(0, num_frames - 1)).CopyFromVec(this_feats.
Row(t));
169 if (num_read < num_frames) {
170 KALDI_WARN <<
"Number of frames read " << num_read <<
" was less than " 171 <<
"target number " << num_frames <<
", using all we read.";
174 BaseFloat percent = num_frames * 100.0 / num_read;
175 KALDI_LOG <<
"Kept " << num_frames <<
" out of " << num_read
176 <<
" input frames = " << percent <<
"%.";
179 if (num_gauss_init <= 0 || num_gauss_init > num_gauss)
180 num_gauss_init = num_gauss;
182 DiagGmm gmm(num_gauss_init, dim);
184 KALDI_LOG <<
"Initializing GMM means from random frames to " 185 << num_gauss_init <<
" Gaussians.";
190 int32 cur_num_gauss = num_gauss_init,
191 gauss_inc = (num_gauss - num_gauss_init) / (num_iters / 2);
193 for (
int32 iter = 0; iter < num_iters; iter++) {
196 int32 next_num_gauss = std::min(num_gauss, cur_num_gauss + gauss_inc);
197 if (next_num_gauss > gmm.
NumGauss()) {
198 KALDI_LOG <<
"Splitting to " << next_num_gauss <<
" Gaussians.";
199 gmm.
Split(next_num_gauss, 0.1);
200 cur_num_gauss = next_num_gauss;
205 KALDI_LOG <<
"Wrote model to " << model_wxfilename;
207 }
catch(
const std::exception &e) {
208 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
BaseFloat AccumulateFromDiagMultiThreaded(const DiagGmm &gmm, const MatrixBase< BaseFloat > &data, const VectorBase< BaseFloat > &frame_weights, int32 num_threads)
This does the same job as AccumulateFromDiag, but using multiple threads.
void Split(int32 target_components, float perturb_factor, std::vector< int32 > *history=NULL)
Split the components and remember the order in which the components were split.
Definition for Gaussian Mixture Model with diagonal covariances in normal mode: where the parameters ...
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
void MleDiagGmmUpdate(const MleDiagGmmOptions &config, const AccumDiagGmm &diag_gmm_acc, GmmFlagsType flags, DiagGmm *gmm, BaseFloat *obj_change_out, BaseFloat *count_out, int32 *floored_elements_out, int32 *floored_gaussians_out, int32 *removed_gaussians_out)
for computing the maximum-likelihood estimates of the parameters of a Gaussian mixture model...
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
bool WithProb(BaseFloat prob, struct RandomState *state)
int32 ComputeGconsts()
Sets the gconsts.
int main(int argc, char *argv[])
void Register(const std::string &name, bool *ptr, const std::string &doc)
void TrainOneIter(const Matrix< BaseFloat > &feats, const MleDiagGmmOptions &gmm_opts, int32 iter, int32 num_threads, DiagGmm *gmm)
void AddVec2(const Real alpha, const VectorBase< Real > &v)
Add vector : *this = *this + alpha * rv^2 [element-wise squaring].
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
void Register(OptionsItf *opts)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
Real Max() const
Returns the maximum value of any element, or -infinity for the empty vector.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
Matrix< double > vars_
diagonal variance
int32 NumGauss() const
Returns the number of mixture components in the GMM.
Configuration variables like variance floor, minimum occupancy, etc.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
Matrix< double > means_
Means.
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
void Set(Real f)
Set all members of a vector to a specified value.
void InitGmmFromRandomFrames(const Matrix< BaseFloat > &feats, DiagGmm *gmm)
Definition for Gaussian Mixture Model with diagonal covariances.
void WriteKaldiObject(const C &c, const std::string &filename, bool binary)
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
Vector< double > weights_
weights (not log).
void CopyFromNormal(const DiagGmmNormal &diag_gmm_normal)
Copies from DiagGmmNormal; does not resize.
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)