40 : valid_gconsts_(false) {
44 int32 num_gauss = 0, dim = gmms[0].second->Dim();
45 for (
size_t i = 0;
i < gmms.size();
i++)
46 num_gauss += gmms[
i].second->NumGauss();
49 for (
size_t i = 0;
i < gmms.size();
i++) {
52 const DiagGmm &gmm = *(gmms[
i].second);
100 for (
int32 mix = 0; mix < ncomp; mix++) {
121 if (num_mix != static_cast<int32>(
gconsts_.Dim()))
124 for (
int32 mix = 0; mix < num_mix; mix++) {
139 <<
", not a number in gconst computation";
145 if (gc > 0) gc = -gc;
155 std::vector<int32> *history) {
158 << target_components <<
" components";
160 if (target_components ==
NumGauss()) {
161 KALDI_WARN <<
"Already have the target # of Gaussians. Doing nothing.";
181 while (current_components < target_components) {
184 for (
int32 i = 1;
i < current_components;
i++) {
193 history->push_back(max_idx);
209 current_components++;
219 for (
int32 i = 0;
i < num_comps;
i++) {
233 if (target_components <= 0 ||
NumGauss() < target_components) {
234 KALDI_ERR <<
"Invalid argument for target number of Gaussians (=" 235 << target_components <<
"), #Gauss = " <<
NumGauss();
237 if (
NumGauss() == target_components) {
238 KALDI_VLOG(2) <<
"No components merged, as target (" << target_components
242 double min_var = 1.0e-10;
243 std::vector<Clusterable*> clusterable_vec;
246 KALDI_WARN <<
"Not using zero-weight Gaussians in clustering.";
255 x_stats.AddVecDivVec(1.0, mean_invvar, inv_var, count);
258 x2_stats.
AddVec2(1.0, x_stats);
259 x_stats.Scale(count);
260 x2_stats.
Scale(count);
264 if (clusterable_vec.size() <= target_components) {
265 KALDI_WARN <<
"Not doing clustering phase since lost too many Gaussians " 266 <<
"due to zero weight. Warning: zero-weight Gaussians are " 271 std::vector<Clusterable*> clusters;
274 &clusters, NULL, cfg);
276 for (
int32 g = 0; g < static_cast<int32>(clusters.size()); g++) {
281 inv_var.CopyFromVec(gc->
x2_stats());
282 inv_var.Scale(1.0 / gc->
count());
285 inv_var.AddVec2(-1.0, mean_invvar);
286 inv_var.InvertElements();
296 if (target_components <= 0 ||
NumGauss() < target_components) {
297 KALDI_ERR <<
"Invalid argument for target number of Gaussians (=" 298 << target_components <<
"), #Gauss = " <<
NumGauss();
300 if (
NumGauss() == target_components) {
301 KALDI_VLOG(2) <<
"No components merged, as target (" << target_components
308 if (target_components == 1) {
316 for (
int32 i = 0;
i < num_comp;
i++) {
317 vars.
Row(
i).AddVec2(1.0, means.
Row(
i));
326 for (
int32 i = 0;
i < num_comp;
i++) {
346 std::vector<bool> discarded_component(num_comp);
348 for (
int32 i = 0;
i < num_comp;
i++) {
349 discarded_component[
i] =
false;
364 for (
int32 i = 0;
i < num_comp;
i++) {
365 vars.
Row(
i).AddVec2(1.0, means.
Row(
i));
370 for (
int32 i = 0;
i < num_comp;
i++) {
375 delta_like(i,
j) = w_sum * merged_logdet
376 - w1 * logdet(i) - w2 * logdet(
j);
381 for (
int32 removed = 0; removed < num_comp - target_components; removed++) {
384 BaseFloat max_delta_like = -std::numeric_limits<BaseFloat>::max();
385 int32 max_i = -1, max_j = -1;
387 if (discarded_component[
i])
continue;
389 if (discarded_component[
j])
continue;
390 if (delta_like(i, j) > max_delta_like) {
391 max_delta_like = delta_like(i, j);
399 KALDI_ASSERT(max_i != max_j && max_i != -1 && max_j != -1);
402 if (history != NULL) {
403 history->push_back(max_i);
404 history->push_back(max_j);
411 means.
Row(max_i).AddVec(w2/w1, means.
Row(max_j));
412 means.
Row(max_i).Scale(w1/w_sum);
414 vars.
Row(max_i).AddVec(w2/w1, vars.
Row(max_j));
415 vars.
Row(max_i).Scale(w1/w_sum);
439 discarded_component[max_j] =
true;
442 for (
int32 j = 0;
j < num_comp;
j++) {
443 if ((
j == max_i) || (discarded_component[
j]))
continue;
448 means.
Row(max_i), means.
Row(j), vars.
Row(max_i), vars.
Row(j));
449 delta_like(max_i, j) = w_sum * merged_logdet - w1 * logdet(max_i)
458 for (
int32 i = 0;
i < num_comp;
i++) {
459 if (discarded_component[
i]) {
483 tmp_mean.
AddVec(w2/w1, f2);
484 tmp_mean.
Scale(w1/w_sum);
486 tmp_var.
AddVec(w2/w1, s2);
487 tmp_var.
Scale(w1/w_sum);
488 tmp_var.
AddVec2(-1.0, tmp_mean);
491 merged_logdet -= 0.5 *
Log(tmp_var(
d));
494 return merged_logdet;
498 int32 comp_id)
const {
500 KALDI_ERR <<
"Must call ComputeGconsts() before computing likelihood";
501 if (static_cast<int32>(data.
Dim()) !=
Dim()) {
502 KALDI_ERR <<
"DiagGmm::ComponentLogLikelihood, dimension " 503 <<
"mismatch " << (data.
Dim()) <<
" vs. "<< (
Dim());
519 KALDI_ERR <<
"Must call ComputeGconsts() before computing likelihood";
524 KALDI_ERR <<
"Invalid answer (overflow or invalid variances/features?)";
532 if (data.
Dim() !=
Dim()) {
533 KALDI_ERR <<
"DiagGmm::LogLikelihoods, dimension " 534 <<
"mismatch " << data.
Dim() <<
" vs. "<<
Dim();
552 KALDI_ERR <<
"DiagGmm::LogLikelihoods, dimension " 553 <<
"mismatch " << data.
NumCols() <<
" vs. "<<
Dim();
567 const std::vector<int32> &indices,
571 data_sq.ApplyPow(2.0);
573 int32 num_indices =
static_cast<int32>(indices.size());
575 if (indices.back() + 1 - indices.front() == num_indices) {
577 int32 start_idx = indices.front();
588 for (
int32 i = 0;
i < num_indices;
i++) {
593 (*loglikes)(
i) = this_loglike;
604 KALDI_ERR <<
"Must call ComputeGconsts() before computing likelihood";
605 if (posterior == NULL)
KALDI_ERR <<
"NULL pointer passed as return argument.";
610 KALDI_ERR <<
"Invalid answer (overflow or invalid variances/features?)";
611 if (posterior->
Dim() != loglikes.
Dim())
620 KALDI_ERR <<
"Attempting to remove the only remaining component.";
626 if (renorm_weights) {
633 bool renorm_weights) {
634 std::vector<int32> gauss(gauss_in);
635 std::sort(gauss.begin(), gauss.end());
638 for (
size_t i = 0;
i < gauss.size();
i++) {
640 for (
size_t j = i + 1;
j < gauss.size();
j++)
654 us.weights_.Scale(1.0 - rho);
655 us.weights_.AddVec(rho, them.
weights_);
656 us.weights_.Scale(1.0 / us.weights_.Sum());
660 us.means_.Scale(1.0 - rho);
661 us.means_.AddMat(rho, them.
means_);
665 us.vars_.Scale(1.0 - rho);
666 us.vars_.AddMat(rho, them.
vars_);
669 us.CopyToDiagGmm(
this);
681 us.weights_.Scale(1.0 - rho);
682 us.weights_.AddVec(rho, them.
weights_);
683 us.weights_.Scale(1.0 / us.weights_.Sum());
687 us.means_.Scale(1.0 - rho);
688 us.means_.AddMat(rho, them.
means_);
693 us.vars_.Scale(1. - rho);
697 us.vars_.Row(
i).AddVec(rho, diag);
701 us.CopyToDiagGmm(
this);
707 KALDI_ERR <<
"Must call ComputeGconsts() before writing the model.";
709 if (!binary) out_stream <<
"\n";
714 WriteToken(out_stream, binary,
"<MEANS_INVVARS>");
719 if (!binary) out_stream <<
"\n";
724 gmm.
Write(os,
false);
733 if (token !=
"<DiagGMMBegin>" && token !=
"<DiagGMM>")
734 KALDI_ERR <<
"Expected <DiagGMM>, got " << token;
736 if (token ==
"<GCONSTS>") {
740 if (token !=
"<WEIGHTS>")
741 KALDI_ERR <<
"DiagGmm::Read, expected <WEIGHTS> or <GCONSTS>, got " 752 if (token !=
"<DiagGMMEnd>" && token !=
"</DiagGMM>")
753 KALDI_ERR <<
"Expected </DiagGMM>, got " << token;
767 std::vector<int32> *output)
const {
774 if (num_gselect < num_gauss) {
777 std::nth_element(ptr, ptr+num_gauss-num_gselect, ptr+num_gauss);
778 thresh = ptr[num_gauss-num_gselect];
780 thresh = -std::numeric_limits<BaseFloat>::infinity();
782 BaseFloat tot_loglike = -std::numeric_limits<BaseFloat>::infinity();
783 std::vector<std::pair<BaseFloat, int32> > pairs;
784 for (
int32 p = 0; p < num_gauss; p++) {
785 if (loglikes(p) >= thresh) {
786 pairs.push_back(std::make_pair(loglikes(p), p));
789 std::sort(pairs.begin(), pairs.end(),
790 std::greater<std::pair<BaseFloat, int32> >());
792 j < num_gselect && j < static_cast<int32>(pairs.size());
794 output->push_back(pairs[
j].second);
795 tot_loglike =
LogAdd(tot_loglike, pairs[
j].first);
803 std::vector<std::vector<int32> > *output)
const {
807 int32 max_mem = 10000000;
810 if (mem_needed > max_mem) {
813 int32 num_parts = (mem_needed + max_mem - 1) / max_mem;
814 int32 part_frames = (data.
NumRows() + num_parts - 1) / num_parts;
815 double tot_ans = 0.0;
816 std::vector<std::vector<int32> > part_output;
818 output->resize(num_frames);
819 for (
int32 p = 0; p < num_parts; p++) {
820 int32 start_frame = p * part_frames,
821 this_num_frames = std::min(num_frames - start_frame, part_frames);
825 for (
int32 t = 0; t < this_num_frames; t++)
826 (*output)[start_frame + t].swap(part_output[t]);
837 output->resize(num_frames);
839 for (
int32 i = 0;
i < num_frames;
i++) {
843 if (num_gselect < num_gauss) {
846 std::nth_element(ptr, ptr+num_gauss-num_gselect, ptr+num_gauss);
847 thresh = ptr[num_gauss-num_gselect];
849 thresh = -std::numeric_limits<BaseFloat>::infinity();
851 BaseFloat tot_loglike = -std::numeric_limits<BaseFloat>::infinity();
852 std::vector<std::pair<BaseFloat, int32> > pairs;
853 for (
int32 p = 0; p < num_gauss; p++) {
854 if (loglikes(p) >= thresh) {
855 pairs.push_back(std::make_pair(loglikes(p), p));
858 std::sort(pairs.begin(), pairs.end(),
859 std::greater<std::pair<BaseFloat, int32> >());
860 std::vector<int32> &this_output = (*output)[
i];
862 j < num_gselect && j < static_cast<int32>(pairs.size());
864 this_output.push_back(pairs[
j].second);
865 tot_loglike =
LogAdd(tot_loglike, pairs[
j].first);
877 const std::vector<int32> &preselect,
879 std::vector<int32> *output)
const {
880 static bool warned_size =
false;
881 int32 preselect_sz = preselect.size();
882 int32 this_num_gselect = std::min(num_gselect, preselect_sz);
883 if (preselect_sz <= num_gselect && !warned_size) {
885 KALDI_WARN <<
"Preselect size is less or equal to than final size, " 886 <<
"doing nothing: " << preselect_sz <<
" < " << num_gselect
887 <<
" [won't warn again]";
894 std::nth_element(ptr, ptr+preselect_sz-this_num_gselect,
896 BaseFloat thresh = ptr[preselect_sz-this_num_gselect];
898 BaseFloat tot_loglike = -std::numeric_limits<BaseFloat>::infinity();
901 std::vector<std::pair<BaseFloat, int32> > pairs;
902 for (
int32 p = 0; p < preselect_sz; p++)
903 if (loglikes(p) >= thresh)
904 pairs.push_back(std::make_pair(loglikes(p), preselect[p]));
905 std::sort(pairs.begin(), pairs.end(),
906 std::greater<std::pair<BaseFloat, int32> >());
909 j < this_num_gselect && j < static_cast<int32>(pairs.size());
911 output->push_back(pairs[
j].second);
912 tot_loglike =
LogAdd(tot_loglike, pairs[
j].first);
937 for (
int32 d = 0;
d < inv_var.Dim();
d++) {
939 mean = mean_invvar(
d) / inv_var(
d);
954 x2.ApplyFloor(var_floor);
958 mean.
Row(0).CopyFromVec(x);
960 inv_var.
Row(0).CopyFromVec(x2);
std::ostream & operator<<(std::ostream &os, const MatrixBase< Real > &M)
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
int32 Dim() const
Returns the dimensionality of the Gaussian mean vectors.
void CopyFromDiagGmm(const DiagGmm &diaggmm)
Copies from given DiagGmm.
void Write(std::ostream &out, bool binary) const
write to stream.
void Perturb(float perturb_factor)
Perturbs the component means with a random vector multiplied by the pertrub factor.
void Interpolate(BaseFloat rho, const DiagGmm &source, GmmFlagsType flags=kGmmAll)
this = rho x source + (1-rho) x this
const std::vector< SpMatrix< BaseFloat > > & inv_covars() const
void DeletePointers(std::vector< A *> *v)
Deletes any non-NULL pointers in the vector v, and sets the corresponding entries of v to NULL...
void SetInvVarsAndMeans(const MatrixBase< Real > &invvars, const MatrixBase< Real > &means)
Use SetInvVarsAndMeans if updating both means and (inverse) variances.
void Write(std::ostream &os, bool binary) const
void Merge(int32 target_components, std::vector< int32 > *history=NULL)
Merge the components and remember the order in which the components were merged (flat list of pairs) ...
void LogLikelihoodsPreselect(const VectorBase< BaseFloat > &data, const std::vector< int32 > &indices, Vector< BaseFloat > *loglikes) const
Outputs the per-component log-likelihoods of a subset of mixture components.
float RandUniform(struct RandomState *state=NULL)
Returns a random number strictly between 0 and 1.
int32 Dim() const
Returns the dimensionality of the Gaussian mean vectors.
void Split(int32 target_components, float perturb_factor, std::vector< int32 > *history=NULL)
Split the components and remember the order in which the components were split.
Definition for Gaussian Mixture Model with full covariances in normal mode: where the parameters are ...
Definition for Gaussian Mixture Model with diagonal covariances in normal mode: where the parameters ...
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Base class which provides matrix operations not involving resizing or allocation. ...
const Matrix< BaseFloat > & means_invvars() const
Definition for Gaussian Mixture Model with full covariances.
BaseFloat ClusterKMeans(const std::vector< Clusterable *> &points, int32 num_clust, std::vector< Clusterable *> *clusters_out, std::vector< int32 > *assignments_out, ClusterKMeansOptions cfg)
ClusterKMeans is a K-means-like clustering algorithm.
void Resize(int32 nMix, int32 dim)
Resizes arrays to this dim. Does not initialize data.
void AddMat(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transA=kNoTrans)
*this += alpha * M [or M^T]
int32 ComputeGconsts()
Sets the gconsts.
float RandGauss(struct RandomState *state=NULL)
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
uint16 GmmFlagsType
Bitwise OR of the above flags.
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
Real LogSumExp(Real prune=-1.0) const
Returns log(sum(exp())) without exp overflow If prune > 0.0, ignores terms less than the max - prune...
Real ApplySoftMax()
Apply soft-max to vector and return normalizer (log sum of exponentials).
const Vector< BaseFloat > & gconsts() const
Const accessors.
void CopyFromSp(const SpMatrix< Real > &other)
std::vector< SpMatrix< double > > vars_
covariances
void AddVec2(const Real alpha, const VectorBase< Real > &v)
Add vector : *this = *this + alpha * rv^2 [element-wise squaring].
BaseFloat ComponentLogLikelihood(const VectorBase< BaseFloat > &data, int32 comp_id) const
Computes the log-likelihood of a data point given a single Gaussian component.
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
void Read(std::istream &in, bool binary, bool add=false)
read from stream.
void CopyDiagFromPacked(const PackedMatrix< Real > &M)
Extracts the diagonal of a packed matrix M; works for Sp or Tp.
BaseFloat ComponentPosteriors(const VectorBase< BaseFloat > &data, Vector< BaseFloat > *posteriors) const
Computes the posterior probabilities of all Gaussian components given a data point.
Vector< double > weights_
weights (not log).
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
void RemoveComponent(int32 gauss, bool renorm_weights)
Removes single component from model.
void Scale(Real alpha)
Multiply each element with a scalar value.
void MulElements(const VectorBase< Real > &v)
Multiply element-by-element by another vector.
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
BaseFloat LogLikelihood(const VectorBase< BaseFloat > &data) const
Returns the log-likelihood of a data point (vector) given the GMM.
void RemoveComponents(const std::vector< int32 > &gauss, bool renorm_weights)
Removes multiple components from model; "gauss" must not have dups.
bool valid_gconsts_
Recompute gconsts_ if false.
Matrix< BaseFloat > inv_vars_
Inverted (diagonal) variances.
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
Real * Data()
Returns a pointer to the start of the vector's data.
int32 NumGauss() const
Returns the number of mixture components in the GMM.
const Vector< BaseFloat > & weights() const
Matrix< double > vars_
diagonal variance
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
int32 NumGauss() const
Returns the number of mixture components in the GMM.
MatrixIndexT Dim() const
Returns the dimension of the vector.
void Scale(Real alpha)
Multiplies all elements by this constant.
const Vector< BaseFloat > & weights() const
void AddMatVec(const Real alpha, const MatrixBase< Real > &M, const MatrixTransposeType trans, const VectorBase< Real > &v, const Real beta)
Add matrix times vector : this <– beta*this + alpha*M*v.
void LogLikelihoods(const VectorBase< BaseFloat > &data, Vector< BaseFloat > *loglikes) const
Outputs the per-component log-likelihoods.
void MulElements(const MatrixBase< Real > &A)
Element by element multiplication with a given matrix.
BaseFloat GaussianSelectionPreselect(const VectorBase< BaseFloat > &data, const std::vector< int32 > &preselect, int32 num_gselect, std::vector< int32 > *output) const
Get gaussian selection information for one frame.
void MergeKmeans(int32 target_components, ClusterKMeansOptions cfg=ClusterKMeansOptions())
double LogAdd(double x, double y)
void GetMeans(Matrix< Real > *m) const
Accessor for means.
std::istream & operator>>(std::istream &is, Matrix< Real > &M)
void CopyFromFullGmm(const FullGmm &fullgmm)
Copies from given FullGmm.
Matrix< double > means_
Means.
void InvertElements()
Inverts all the elements of the matrix.
void Read(std::istream &in, bool binary)
Vector< BaseFloat > weights_
weights (not log).
void InvertElements()
Invert all elements.
SubVector< double > x2_stats() const
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Vector< BaseFloat > gconsts_
Equals log(weight) - 0.5 * (log det(var) + mean*mean*inv(var))
void ApplyPow(Real power)
Take all elements of vector to a power.
BaseFloat GaussianSelection(const VectorBase< BaseFloat > &data, int32 num_gselect, std::vector< int32 > *output) const
Get gaussian selection information for one frame.
Definition for Gaussian Mixture Model with diagonal covariances.
SubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Return a sub-part of matrix.
void Generate(VectorBase< BaseFloat > *output)
Generates a random data-point from this distribution.
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
void RemoveRow(MatrixIndexT i)
Remove a specified row.
DiagGmm()
Empty constructor.
void SetWeights(const VectorBase< Real > &w)
Mutators for both float or double.
Vector< double > weights_
weights (not log).
Provides a vector abstraction class.
GaussClusterable wraps Gaussian statistics in a form accessible to generic clustering algorithms...
void CopyFromNormal(const DiagGmmNormal &diag_gmm_normal)
Copies from DiagGmmNormal; does not resize.
void ApplyPow(Real power)
void CopyRowsFromVec(const VectorBase< Real > &v)
This function has two modes of operation.
Matrix< double > means_
Means.
bool IsSortedAndUniq(const std::vector< T > &vec)
Returns true if the vector is sorted and contains each element only once.
void Invert(Real *logdet=NULL, Real *det_sign=NULL, bool inverse_needed=true)
matrix inverse.
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
SubVector< double > x_stats() const
Sub-matrix representation.
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
Matrix< BaseFloat > means_invvars_
Means times inverted variance.
void Set(Real)
Sets all elements to a specific value.
const Matrix< BaseFloat > & inv_vars() const
void CopyToDiagGmm(DiagGmm *diaggmm, GmmFlagsType flags=kGmmAll) const
Copies to DiagGmm the requested parameters.
BaseFloat merged_components_logdet(BaseFloat w1, BaseFloat w2, const VectorBase< BaseFloat > &f1, const VectorBase< BaseFloat > &f2, const VectorBase< BaseFloat > &s1, const VectorBase< BaseFloat > &s2) const