53 if (
inv_covars_.size() !=
static_cast<size_t>(nmix))
71 for (
int32 mix = 0; mix < ncomp; mix++) {
83 for (
int32 mix = 0; mix < ncomp; mix++) {
102 for (
int32 mix = 0; mix < num_mix; mix++) {
117 <<
", not a number in gconst computation";
123 if (gc > 0) gc = -gc;
133 vector<int32> *history) {
136 << target_components <<
" components";
149 for (
int32 mix = 0; mix < current_components; mix++) {
152 for (
int32 mix = current_components; mix < target_components; mix++) {
160 while (current_components < target_components) {
163 for (
int32 i = 1;
i < current_components;
i++) {
172 history->push_back(max_idx);
186 current_components++;
195 for (
int32 i = 0;
i < num_comps;
i++) {
207 if (target_components <= 0 ||
NumGauss() < target_components) {
208 KALDI_ERR <<
"Invalid argument for target number of Gaussians (=" 209 << target_components <<
")";
211 if (
NumGauss() == target_components) {
212 KALDI_WARN <<
"No components merged, as target = total.";
218 if (target_components == 1) {
221 vector<SpMatrix<BaseFloat> > covars(num_comp);
223 for (
int32 i = 0;
i < num_comp;
i++) {
224 covars[
i].Resize(dim);
226 covars[
i].InvertDouble();
228 covars[
i].AddVec2(1.0, means.
Row(i));
239 for (
int32 i = 0;
i < num_comp;
i++) {
260 vector<bool> discarded_component(num_comp);
263 for (
int32 i = 0;
i < num_comp;
i++) {
264 discarded_component[
i] =
false;
271 vector<SpMatrix<BaseFloat> > vars(num_comp);
273 for (
int32 i = 0;
i < num_comp;
i++) {
276 vars[
i].InvertDouble();
281 vars[
i].AddVec2(1.0, means.
Row(i));
286 for (
int32 i = 0;
i < num_comp;
i++) {
290 means.
Row(i), means.
Row(
j), vars[
i], vars[
j]);
291 delta_like(i,
j) = w_sum * merged_logdet
292 - w1 * logdet(i) - w2 * logdet(
j);
297 for (
int32 removed = 0; removed < num_comp - target_components; removed++) {
300 BaseFloat max_delta_like = -std::numeric_limits<BaseFloat>::max();
301 int32 max_i = 0, max_j = 0;
303 if (discarded_component[
i])
continue;
305 if (discarded_component[
j])
continue;
306 if (delta_like(i, j) > max_delta_like) {
307 max_delta_like = delta_like(i, j);
318 if (history != NULL) {
319 history->push_back(max_i);
320 history->push_back(max_j);
327 means.
Row(max_i).AddVec(w2/w1, means.
Row(max_j));
328 means.
Row(max_i).Scale(w1/w_sum);
330 vars[max_i].AddSp(w2/w1, vars[max_j]);
331 vars[max_i].Scale(w1/w_sum);
345 means.
Row(max_i), 0.0);
348 logdet(max_i) += 0.5 *
inv_covars_[max_i].LogPosDefDet();
352 discarded_component[max_j] =
true;
355 for (
int32 j = 0;
j < num_comp;
j++) {
356 if ((
j == max_i) || (discarded_component[
j]))
continue;
359 means.
Row(max_i), means.
Row(j), vars[max_i], vars[
j]);
360 delta_like(max_i, j) = w_sum * merged_logdet
361 - w1 * logdet(max_i) - w2 * logdet(j);
369 for (
int32 i = 0;
i < num_comp;
i++) {
370 if (discarded_component[
i]) {
383 const vector<pair<int32, int32> > &preselect) {
386 if (target_components <= 0 ||
NumGauss() < target_components) {
387 KALDI_WARN <<
"Invalid argument for target number of Gaussians (=" 388 << target_components <<
"), currently " 389 <<
NumGauss() <<
", not mixing down";
392 if (
NumGauss() == target_components) {
393 KALDI_WARN <<
"No components merged, as target = total.";
397 typedef pair<BaseFloat, pair<int32, int32> > QueueElem;
398 std::priority_queue<QueueElem> queue;
404 vector<bool> discarded_component(num_comp);
407 for (
int32 i = 0;
i < num_comp;
i++) {
408 discarded_component[
i] =
false;
416 vector<SpMatrix<BaseFloat> > vars(num_comp);
418 for (
int32 i = 0;
i < num_comp;
i++) {
421 vars[
i].InvertDouble();
426 vars[
i].AddVec2(1.0, means.
Row(i));
430 for (
int32 i = 0;
i < preselect.size();
i++) {
431 int32 idx1 = preselect[
i].first, idx2 = preselect[
i].second;
432 KALDI_ASSERT(static_cast<size_t>(idx1) < static_cast<size_t>(num_comp));
433 KALDI_ASSERT(static_cast<size_t>(idx2) < static_cast<size_t>(num_comp));
436 means.
Row(idx1), means.
Row(idx2),
437 vars[idx1], vars[idx2]),
438 delta_like = w_sum * merged_logdet - w1 * logdet(idx1) - w2 * logdet(idx2);
439 queue.push(std::make_pair(delta_like, preselect[
i]));
442 vector<int32> mapping(num_comp);
444 for (
int32 i = 0;
i < num_comp;
i++) mapping[
i] =
i;
449 removed < num_comp - target_components && !queue.empty(); ) {
450 QueueElem qelem = queue.top();
452 BaseFloat delta_log_like_old = qelem.first;
453 int32 idx1 = qelem.second.first, idx2 = qelem.second.second;
458 while (discarded_component[idx1]) idx1 = mapping[idx1];
459 while (discarded_component[idx2]) idx2 = mapping[idx2];
460 if (idx1 == idx2)
continue;
466 means.
Row(idx1), means.
Row(idx2),
467 vars[idx1], vars[idx2]);
468 delta_log_like = w_sum * merged_logdet - w1 * logdet(idx1) - w2 * logdet(idx2);
470 if (
ApproxEqual(delta_log_like, delta_log_like_old) ||
471 delta_log_like > delta_log_like_old) {
479 means.
Row(idx1).AddVec(w2/w1, means.
Row(idx2));
480 means.
Row(idx1).Scale(w1/w_sum);
482 vars[idx1].AddSp(w2/w1, vars[idx2]);
483 vars[idx1].Scale(w1/w_sum);
497 means.
Row(idx1), 0.0);
500 logdet(idx1) = 0.5 *
inv_covars_[idx1].LogPosDefDet();
504 discarded_component[idx2] =
true;
505 KALDI_VLOG(2) <<
"Delta-log-like is " << delta_log_like <<
" (merging " 506 << idx1 <<
" and " << idx2 <<
")";
507 ans += delta_log_like;
508 mapping[idx2] = idx1;
511 QueueElem new_elem(delta_log_like, std::make_pair(idx1, idx2));
512 queue.push(new_elem);
518 for (
int32 i = 0;
i < num_comp;
i++) {
519 if (mapping[
i] ==
i) {
550 tmp_mean.
AddVec(w2/w1, f2);
551 tmp_mean.
Scale(w1/w_sum);
553 tmp_var.
AddSp(w2/w1, s2);
554 tmp_var.
Scale(w1/w_sum);
555 tmp_var.
AddVec2(-1.0, tmp_mean);
558 return merged_logdet;
563 int32 comp_id)
const {
565 KALDI_ERR <<
"Must call ComputeGconsts() before computing likelihood";
566 if (data.
Dim() !=
Dim()) {
567 KALDI_ERR <<
"DiagGmm::ComponentLogLikelihood, dimension " 568 <<
"mismatch " << (data.
Dim()) <<
"vs. "<< (
Dim());
587 KALDI_ERR <<
"Invalid answer (overflow or invalid variances/features?)";
598 data_sq.AddVec2(1.0, data);
601 data_sq.ScaleDiag(0.5);
607 for (
int32 mix = 0; mix < num_comp; mix++) {
614 const vector<int32> &indices,
618 int32 num_indices =
static_cast<int32>(indices.size());
627 for (
int32 i = 0;
i < num_indices;
i++) {
639 std::vector<int32> *output)
const {
646 if (num_gselect < num_gauss) {
649 std::nth_element(ptr, ptr+num_gauss-num_gselect, ptr+num_gauss);
650 thresh = ptr[num_gauss-num_gselect];
652 thresh = -std::numeric_limits<BaseFloat>::infinity();
654 BaseFloat tot_loglike = -std::numeric_limits<BaseFloat>::infinity();
655 std::vector<std::pair<BaseFloat, int32> > pairs;
656 for (
int32 p = 0; p < num_gauss; p++) {
657 if (loglikes(p) >= thresh) {
658 pairs.push_back(std::make_pair(loglikes(p), p));
661 std::sort(pairs.begin(), pairs.end(),
662 std::greater<std::pair<BaseFloat, int32> >());
664 j < num_gselect && j < static_cast<int32>(pairs.size());
666 output->push_back(pairs[
j].second);
667 tot_loglike =
LogAdd(tot_loglike, pairs[
j].first);
676 const std::vector<int32> &preselect,
678 std::vector<int32> *output)
const {
679 static bool warned_size =
false;
680 int32 preselect_sz = preselect.size();
681 int32 this_num_gselect = std::min(num_gselect, preselect_sz);
682 if (preselect_sz <= num_gselect && !warned_size) {
684 KALDI_WARN <<
"Preselect size is less or equal to than final size, " 685 <<
"doing nothing: " << preselect_sz <<
" < " << num_gselect
686 <<
" [won't warn again]";
693 std::nth_element(ptr, ptr+preselect_sz-this_num_gselect,
695 BaseFloat thresh = ptr[preselect_sz-this_num_gselect];
697 BaseFloat tot_loglike = -std::numeric_limits<BaseFloat>::infinity();
700 std::vector<std::pair<BaseFloat, int32> > pairs;
701 for (
int32 p = 0; p < preselect_sz; p++)
702 if (loglikes(p) >= thresh)
703 pairs.push_back(std::make_pair(loglikes(p), preselect[p]));
704 std::sort(pairs.begin(), pairs.end(),
705 std::greater<std::pair<BaseFloat, int32> >());
708 j < this_num_gselect && j < static_cast<int32>(pairs.size());
710 output->push_back(pairs[
j].second);
711 tot_loglike =
LogAdd(tot_loglike, pairs[
j].first);
721 if (posterior == NULL)
KALDI_ERR <<
"NULL pointer passed as return argument.";
726 KALDI_ERR <<
"Invalid answer (overflow or invalid variances/features?)";
738 if (renorm_weights) {
746 vector<int32> gauss(gauss_in);
747 std::sort(gauss.begin(), gauss.end());
751 for (
size_t i = 0;
i < gauss.size();
i++) {
753 for (
size_t j = i + 1;
j < gauss.size();
j++)
760 KALDI_ERR <<
"Must call ComputeGconsts() before writing the model.";
762 if (!binary) out_stream <<
"\n";
767 WriteToken(out_stream, binary,
"<MEANS_INVCOVARS>");
769 WriteToken(out_stream, binary,
"<INV_COVARS>");
774 if (!binary) out_stream <<
"\n";
779 gmm.
Write(out_stream,
false);
792 us.weights_.Scale(1.0 - rho);
793 us.weights_.AddVec(rho, them.
weights_);
794 us.weights_.Scale(1.0 / us.weights_.Sum());
798 us.means_.Scale(1.0 - rho);
799 us.means_.AddMat(rho, them.
means_);
804 us.vars_[
i].Scale(1.0 - rho);
805 us.vars_[
i].AddSp(rho, them.
vars_[
i]);
809 us.CopyToFullGmm(
this);
818 if (token !=
"<FullGMMBegin>" && token !=
"<FullGMM>")
819 KALDI_ERR <<
"Expected <FullGMM>, got " << token;
822 if (token ==
"<GCONSTS>") {
826 if (token !=
"<WEIGHTS>")
827 KALDI_ERR <<
"FullGmm::Read, expected <WEIGHTS> or <GCONSTS>, got " 831 ExpectToken(in_stream, binary,
"<MEANS_INVCOVARS>");
842 if (token !=
"<FullGMMEnd>" && token !=
"</FullGMM>")
843 KALDI_ERR <<
"Expected </FullGMM>, got " << token;
849 gmm.
Read(in_stream,
false);
std::ostream & operator<<(std::ostream &os, const MatrixBase< Real > &M)
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
int32 Dim() const
Returns the dimensionality of the Gaussian mean vectors.
Packed symetric matrix class.
void Write(std::ostream &out, bool binary) const
write to stream.
void Merge(int32 target_components, std::vector< int32 > *history=NULL)
Merge the components and remember the order in which the components were merged (flat list of pairs) ...
BaseFloat ComponentPosteriors(const VectorBase< BaseFloat > &data, VectorBase< BaseFloat > *posterior) const
Computes the posterior probabilities of all Gaussian components given a data point.
int32 Dim() const
Returns the dimensionality of the Gaussian mean vectors.
Definition for Gaussian Mixture Model with full covariances in normal mode: where the parameters are ...
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
const Matrix< BaseFloat > & means_invvars() const
void LogLikelihoodsPreselect(const VectorBase< BaseFloat > &data, const std::vector< int32 > &indices, Vector< BaseFloat > *loglikes) const
Outputs the per-component log-likelihoods of a subset of mixture components.
int32 ComputeGconsts()
Sets the gconsts.
void Split(int32 target_components, float perturb_factor, std::vector< int32 > *history=NULL)
Merge the components and remember the order in which the components were merged (flat list of pairs) ...
Definition for Gaussian Mixture Model with full covariances.
void InvertDouble(Real *logdet=NULL, Real *det_sign=NULL, bool inverse_needed=true)
const Vector< BaseFloat > & gconsts() const
Const accessors.
Vector< BaseFloat > weights_
weights (not log).
void Write(std::ostream &os, bool binary) const
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
void MulTp(const TpMatrix< Real > &M, const MatrixTransposeType trans)
Multiplies this vector by lower-triangular matrix: *this <– *this *M.
uint16 GmmFlagsType
Bitwise OR of the above flags.
void ScaleDiag(const Real alpha)
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
Real LogSumExp(Real prune=-1.0) const
Returns log(sum(exp())) without exp overflow If prune > 0.0, ignores terms less than the max - prune...
Real ApplySoftMax()
Apply soft-max to vector and return normalizer (log sum of exponentials).
void CopyFromSp(const SpMatrix< Real > &other)
std::vector< SpMatrix< double > > vars_
covariances
void Perturb(float perturb_factor)
Perturbs the component means with a random vector multiplied by the pertrub factor.
void RemoveComponent(int32 gauss, bool renorm_weights)
Mutators for single component, supports float or double Removes single component from model...
FullGmm()
Empty constructor.
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
void CopyFromFullGmm(const FullGmm &fullgmm)
Copies from given FullGmm.
void AddVec2(const Real alpha, const VectorBase< OtherReal > &v)
rank-one update, this <– this + alpha v v'
void Read(std::istream &in, bool binary, bool add=false)
read from stream.
void Cholesky(const SpMatrix< Real > &orig)
void Resize(int32 nMix, int32 dim)
Resizes arrays to this dim. Does not initialize data.
Vector< double > weights_
weights (not log).
void ResizeInvCovars(int32 nMix, int32 dim)
Resizes arrays to this dim. Does not initialize data.
Real LogPosDefDet() const
Computes log determinant but only for +ve-def matrices (it uses Cholesky).
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
void RemoveComponents(const std::vector< int32 > &gauss, bool renorm_weights)
Removes multiple components from model; "gauss" must not have dups.
void AddSp(const Real alpha, const SpMatrix< Real > &Ma)
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
bool valid_gconsts_
Recompute gconsts_ if false.
BaseFloat GaussianSelectionPreselect(const VectorBase< BaseFloat > &data, const std::vector< int32 > &preselect, int32 num_gselect, std::vector< int32 > *output) const
Get gaussian selection information for one frame.
BaseFloat GaussianSelection(const VectorBase< BaseFloat > &data, int32 num_gselect, std::vector< int32 > *output) const
Get gaussian selection information for one frame.
Real VecSpVec(const VectorBase< Real > &v1, const SpMatrix< Real > &M, const VectorBase< Real > &v2)
Computes v1^T * M * v2.
Real TraceSpSpLower(const SpMatrix< Real > &A, const SpMatrix< Real > &B)
Packed symetric matrix class.
BaseFloat MergedComponentsLogdet(BaseFloat w1, BaseFloat w2, const VectorBase< BaseFloat > &f1, const VectorBase< BaseFloat > &f2, const SpMatrix< BaseFloat > &s1, const SpMatrix< BaseFloat > &s2) const
Real * Data()
Returns a pointer to the start of the vector's data.
int32 NumGauss() const
Returns the number of mixture components in the GMM.
const Vector< BaseFloat > & weights() const
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
int32 NumGauss() const
Returns the number of mixture components in the GMM.
MatrixIndexT Dim() const
Returns the dimension of the vector.
void Scale(Real alpha)
Multiplies all elements by this constant.
const Vector< BaseFloat > & weights() const
void AddMatVec(const Real alpha, const MatrixBase< Real > &M, const MatrixTransposeType trans, const VectorBase< Real > &v, const Real beta)
Add matrix times vector : this <– beta*this + alpha*M*v.
void SetRandn()
Set vector to random normally-distributed noise.
void Read(std::istream &is, bool binary)
BaseFloat MergePreselect(int32 target_components, const std::vector< std::pair< int32, int32 > > &preselect_pairs)
Merge the components and remember the order in which the components were merged (flat list of pairs);...
double LogAdd(double x, double y)
std::istream & operator>>(std::istream &is, Matrix< Real > &M)
BaseFloat ComponentLogLikelihood(const VectorBase< BaseFloat > &data, int32 comp_id) const
Computes the contribution log-likelihood of a data point from a single Gaussian component.
A class representing a vector.
void LogLikelihoods(const VectorBase< BaseFloat > &data, Vector< BaseFloat > *loglikes) const
Outputs the per-component contributions to the log-likelihood.
void CopyFromDiagGmm(const DiagGmm &diaggmm)
Copies from given DiagGmm.
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition for Gaussian Mixture Model with diagonal covariances.
SubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Return a sub-part of matrix.
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
void RemoveRow(MatrixIndexT i)
Remove a specified row.
std::vector< SpMatrix< BaseFloat > > inv_covars_
Inverse covariances.
void Interpolate(BaseFloat rho, const FullGmm &source, GmmFlagsType flags=kGmmAll)
this = rho x source + (1-rho) x this
Provides a vector abstraction class.
Matrix< BaseFloat > means_invcovars_
Means times inverse covariances.
void SetZero()
Set vector to all zeros.
Matrix< double > means_
Means.
bool IsSortedAndUniq(const std::vector< T > &vec)
Returns true if the vector is sorted and contains each element only once.
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
BaseFloat LogLikelihood(const VectorBase< BaseFloat > &data) const
Returns the log-likelihood of a data point (vector) given the GMM.
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
const Matrix< BaseFloat > & inv_vars() const
Vector< BaseFloat > gconsts_
Equals log(weight) - 0.5 * (log det(var) + mean'*inv(var)*mean)