34 vector<string> ctx_vec;
37 for (
size_t i = 0;
i < ctx_vec.size();
i++) {
38 vector<string> pair_vec;
41 for (
size_t j = 0;
j < pair_vec.size();
j++) {
42 vector<string> one_pair;
45 "Mal-formed context string: bad --context-expansion option?");
51 KALDI_ERR <<
"Mal-formed context string: bad --context-expansion option?";
52 contexts_[
i].push_back(std::make_pair(pos, weight));
65 double tot_count = 0.0;
75 x2_stats.
Scale(1.0 / tot_count);
76 x_stats.
Scale(1.0 / tot_count);
77 x2_stats.
AddVec2(-1.0, x_stats);
84 KALDI_ERR <<
"Error initializing fMPE object: cholesky of " 85 "feature variance failed. Probably code error, or NaN/inf in model";
104 && feat_out->
NumCols() == dim);
106 for (
int32 i = 0;
i < ncontexts;
i++) {
117 for (
int32 t_out = 0; t_out < T; t_out++) {
118 int32 t_in = t_out + t_offset;
119 if (t_in >= 0 && t_in < T)
120 feat_out->
Row(t_out).AddVec(weight, this_intermed_feat.
Row(t_in));
135 && feat_deriv.
NumCols() == dim);
137 for (
int32 i = 0;
i < ncontexts;
i++) {
149 for (
int32 t_out = 0; t_out < T; t_out++) {
150 int32 t_in = t_out + t_offset;
151 if (t_in >= 0 && t_in < T)
152 this_intermed_feat_deriv.
Row(t_in).AddVec(weight,
153 feat_deriv.
Row(t_out));
164 for (
int32 t = 0; t < T; t++) {
183 const std::vector<std::vector<int32> > &gselect,
199 std::vector<std::pair<std::pair<int32, int32>,
BaseFloat> > all_posts;
208 int32 gauss = gselect[t][
i];
209 all_posts.push_back(std::make_pair(std::make_pair(gauss, t), post(
i)));
212 std::sort(all_posts.begin(), all_posts.end());
214 bool optimize =
true;
221 for (
size_t i = 0;
i < all_posts.size();
i++) {
222 int32 gauss = all_posts[
i].first.first, t = all_posts[
i].first.second;
236 input_chunk.
Range(0, dim).AddVecDivVec(this_post, this_feat, this_stddev,
245 kTrans, input_chunk, 1.0);
251 while (i < all_posts.size()) {
252 int32 gauss = all_posts[
i].first.first;
259 batch_size+i < static_cast<int32>(all_posts.size()) &&
260 all_posts[batch_size+i].first.first == gauss;
264 for (
int32 j = 0;
j < batch_size;
j++) {
267 int32 t = all_posts[i+
j].first.second;
271 this_input_chunk.
Range(0, dim).AddVecVec(-this_post,
274 this_input_chunk.
Range(0, dim).AddVecDivVec(this_post, this_feat,
284 for (
int32 j = 0;
j < batch_size;
j++) {
286 int32 t = all_posts[i+
j].first.second;
290 this_intermed_feat.
AddVec(1.0, this_intermed_temp);
303 const std::vector<std::vector<int32> > &gselect,
318 std::vector<std::pair<std::pair<int32, int32>,
BaseFloat> > all_posts;
329 int32 gauss = gselect[t][
i];
330 all_posts.push_back(std::make_pair(std::make_pair(gauss, t), post(
i)));
333 std::sort(all_posts.begin(), all_posts.end());
334 for (
size_t i = 0;
i < all_posts.size();
i++) {
335 int32 gauss = all_posts[
i].first.first, t = all_posts[
i].first.second;
342 input_chunk.
Range(0, dim).AddVecDivVec(this_post, this_feat, this_stddev,
353 gauss*(dim+1), dim+1,
355 minus_chunk(*proj_deriv_minus,
356 gauss*(dim+1), dim+1,
365 this_intermed_feat_deriv,
366 &plus_chunk, &minus_chunk);
371 const std::vector<std::vector<int32> > &gselect,
396 const std::vector<std::vector<int32> > &gselect,
406 SameDim(feat_in, direct_feat_deriv));
408 if (indirect_feat_deriv != NULL)
409 fmpe_stats->
AccumulateChecks(feat_in, direct_feat_deriv, *indirect_feat_deriv);
412 if (indirect_feat_deriv != NULL)
413 feat_deriv.
AddMat(1.0, *indirect_feat_deriv);
422 &stats_plus, &stats_minus);
431 ReadToken(is, binary, &context_expansion);
483 BaseFloat z = ((p-
n) + x*(p+
n)/learning_rate) / (2*l2_weight + (p+
n)/learning_rate);
486 tot_linear_objf_impr += (z-x) * (p-
n);
488 if (z*x < 0) changed++;
491 KALDI_LOG <<
"Objf impr (assuming linear) is " << tot_linear_objf_impr;
493 <<
"% of matrix elements changed sign.";
494 return tot_linear_objf_impr;
502 KALDI_ERR <<
"Fmpe::Write, object not initialized.";
507 C_.Write(os, binary);
529 KALDI_ASSERT((model_diff != NULL) == (indirect_deriv != NULL));
533 if (indirect_deriv != NULL)
537 for (
size_t i = 0;
i < posterior.size();
i++) {
538 for (
size_t j = 0;
j < posterior[
i].size();
j++) {
539 int32 tid = posterior[
i][
j].first,
549 gauss_posteriors.
Scale(weight);
559 gauss_posteriors, 1.0);
565 gauss_posteriors, 0.0);
572 this_direct_deriv.
AddVecVec(-1.0, this_feat, temp_vec, 1.0);
573 if (model_diff != NULL && weight > 0.0) {
588 gauss_posteriors_dbl, 0.0);
589 this_indirect_deriv.
AddVec(1.0, temp_vec_dbl);
591 gauss_posteriors_dbl, 0.0);
592 temp_vec.CopyFromVec(temp_vec_dbl);
595 this_indirect_deriv.
AddVecVec(2.0, this_feat, temp_vec, 1.0);
605 int32 proj_num_rows = deriv.NumRows(),
606 proj_num_cols = deriv.NumCols()/2;
612 int32 proj_num_rows = deriv.NumRows(),
613 proj_num_cols = deriv.NumCols()/2;
615 proj_num_cols, proj_num_cols);
621 deriv.Resize(num_rows, num_cols*2);
624 checks.Resize(8, feat_dim);
632 indirect_deriv.
NumRows() == T && indirect_deriv.
NumCols() == dim);
633 KALDI_ASSERT(checks.NumRows() == 8 && checks.NumCols() == dim);
634 for (
int32 t = 0; t < T; t++) {
637 checks(0,
d) += std::max(zero, direct_deriv(t,
d));
638 checks(1,
d) += std::max(zero, -direct_deriv(t,
d));
639 checks(2,
d) += std::max(zero, indirect_deriv(t,
d));
640 checks(3,
d) += std::max(zero, -indirect_deriv(t,
d));
641 checks(4,
d) += std::max(zero, feats(t,
d)*direct_deriv(t,
d));
642 checks(5,
d) += std::max(zero, -feats(t,
d)*direct_deriv(t,
d));
643 checks(6,
d) += std::max(zero, feats(t,
d)*indirect_deriv(t,
d));
644 checks(7,
d) += std::max(zero, -feats(t,
d)*indirect_deriv(t,
d));
650 if (checks.IsZero()) {
651 KALDI_LOG <<
"No checks will be done, probably indirect derivative was not used.";
654 int32 dim = checks.NumCols();
655 Vector<double> shift_check(dim), shift_check2(dim), scale_check(dim), scale_check2(dim);
658 double shift_num = checks(0,
d) - checks(1,
d) + checks(2,
d) - checks(3,
d),
659 shift_den = checks(0,
d) + checks(1,
d) + checks(2,
d) + checks(3,
d),
660 shift_den2 = fabs(checks(0,
d) - checks(1,
d)) + fabs(checks(2,
d) - checks(3,
d));
661 shift_check(
d) = shift_num / shift_den;
662 shift_check2(
d) = shift_num / shift_den2;
663 double scale_num = checks(4,
d) - checks(5,
d) + checks(6,
d) - checks(7,
d),
664 scale_den = checks(4,
d) + checks(5,
d) + checks(6,
d) + checks(7,
d),
665 scale_den2 = fabs(checks(4,
d) - checks(5,
d)) + fabs(checks(6,
d) - checks(7,
d));
666 scale_check(
d) = scale_num / scale_den;
667 scale_check2(
d) = scale_num / scale_den2;
670 KALDI_LOG <<
"Shift-check is as follows (should be in range +- 0.01 or less)." 672 KALDI_LOG <<
"Scale-check is as follows (should be in range +- 0.01 or less)." 674 KALDI_LOG <<
"Shift-check(2) is as follows: most elements should be in range +-0.1: " 676 KALDI_LOG <<
"Scale-check(2) is as follows: most elements should be in range +-0.1: " 681 deriv.Write(os, binary);
682 checks.Write(os, binary);
686 deriv.Read(is, binary, add);
687 checks.Read(is, binary, add);
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
int32 Dim() const
Returns the dimensionality of the Gaussian mean vectors.
bool ConvertStringToInteger(const std::string &str, Int *out)
Converts a string into an integer via strtoll and returns false if there was any kind of problem (i...
Real Min() const
Returns minimum element of matrix.
void Write(std::ostream &out, bool binary) const
write to stream.
void Write(std::ostream &os, bool binary) const
void LogLikelihoodsPreselect(const VectorBase< BaseFloat > &data, const std::vector< int32 > &indices, Vector< BaseFloat > *loglikes) const
Outputs the per-component log-likelihoods of a subset of mixture components.
void ComputeFeatures(const MatrixBase< BaseFloat > &feat_in, const std::vector< std::vector< int32 > > &gselect, Matrix< BaseFloat > *feat_out) const
Definition for Gaussian Mixture Model with diagonal covariances in normal mode: where the parameters ...
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Base class which provides matrix operations not involving resizing or allocation. ...
const Matrix< BaseFloat > & means_invvars() const
BaseFloat Update(const FmpeUpdateOptions &config, const FmpeStats &stats)
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
void ApplyC(MatrixBase< BaseFloat > *feat_out, bool reverse=false) const
void ApplyContext(const MatrixBase< BaseFloat > &intermed_feat, MatrixBase< BaseFloat > *feat_out) const
void AddMat(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transA=kNoTrans)
*this += alpha * M [or M^T]
void ApplyProjection(const MatrixBase< BaseFloat > &feat_in, const std::vector< std::vector< int32 > > &gselect, MatrixBase< BaseFloat > *intermed_feat) const
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
BaseFloat ComputeAmGmmFeatureDeriv(const AmDiagGmm &am_gmm, const TransitionModel &trans_model, const Posterior &posterior, const MatrixBase< BaseFloat > &features, Matrix< BaseFloat > *direct_deriv, const AccumAmDiagGmm *model_diff, Matrix< BaseFloat > *indirect_deriv)
Computes derivatives of the likelihood of these states (weighted), w.r.t.
Matrix< BaseFloat > projT_
void ApplyContextReverse(const MatrixBase< BaseFloat > &feat_deriv, MatrixBase< BaseFloat > *intermed_feat_deriv) const
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
Real ApplySoftMax()
Apply soft-max to vector and return normalizer (log sum of exponentials).
int32 TransitionIdToPdf(int32 trans_id) const
std::string context_expansion
std::vector< std::vector< std::pair< int32, BaseFloat > > > contexts_
bool SameDim(const MatrixBase< Real > &M, const MatrixBase< Real > &N)
void ApplyProjectionReverse(const MatrixBase< BaseFloat > &feat_in, const std::vector< std::vector< int32 > > &gselect, const MatrixBase< BaseFloat > &intermed_feat_deriv, MatrixBase< BaseFloat > *proj_deriv_plus, MatrixBase< BaseFloat > *proj_deriv_minus) const
void AddVecVec(Real alpha, const VectorBase< Real > &v, const VectorBase< Real > &r, Real beta)
Add element-by-element product of vectors:
void Init(const Fmpe &fmpe)
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
void AddVec2(const Real alpha, const VectorBase< OtherReal > &v)
rank-one update, this <– this + alpha v v'
void Read(std::istream &is, bool binary)
void Read(std::istream &in, bool binary, bool add=false)
read from stream.
void Cholesky(const SpMatrix< Real > &orig)
BaseFloat ComponentPosteriors(const VectorBase< BaseFloat > &data, Vector< BaseFloat > *posteriors) const
Computes the posterior probabilities of all Gaussian components given a data point.
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
void SetContexts(std::string context_str)
int32 ProjectionTNumCols() const
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
const MatrixBase< double > & variance_accumulator() const
const MatrixBase< double > & mean_accumulator() const
void SplitStringToVector(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< std::string > *out)
Split a string using any of the single character delimiters.
SubMatrix< BaseFloat > DerivMinus() const
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
void Read(std::istream &is, bool binary)
bool ConvertStringToReal(const std::string &str, T *out)
ConvertStringToReal converts a string into either float or double and returns false if there was any ...
void AddOuterProductPlusMinus(Real alpha, const VectorBase< Real > &a, const VectorBase< Real > &b, MatrixBase< Real > *plus, MatrixBase< Real > *minus)
Matrix< double > vars_
diagonal variance
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
int32 NumGauss() const
Returns the number of mixture components in the GMM.
MatrixIndexT Dim() const
Returns the dimension of the vector.
void Scale(Real alpha)
Multiplies all elements by this constant.
void AddMatVec(const Real alpha, const MatrixBase< Real > &M, const MatrixTransposeType trans, const VectorBase< Real > &v, const Real beta)
Add matrix times vector : this <– beta*this + alpha*M*v.
int32 NumContexts() const
Matrix< double > means_
Means.
DiagGmm & GetPdf(int32 pdf_index)
Accessors.
void Read(std::istream &in, bool binary)
void Write(std::ostream &os, bool binary) const
const AccumDiagGmm & GetAcc(int32 index) const
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
void AccumulateChecks(const MatrixBase< BaseFloat > &feats, const MatrixBase< BaseFloat > &direct_deriv, const MatrixBase< BaseFloat > &indirect_deriv)
If we're using the indirect differential, accumulates certain quantities that will be used in the upd...
Definition for Gaussian Mixture Model with diagonal covariances.
SubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Return a sub-part of matrix.
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
Matrix< BaseFloat > stddevs_
Vector< double > weights_
weights (not log).
int32 ProjectionTNumRows() const
void ApplyPow(Real power)
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
void Read(std::istream &is, bool binary, bool add=false)
void ApplyCReverse(MatrixBase< BaseFloat > *deriv) const
Sub-matrix representation.
void AddDiagVec(const Real alpha, const VectorBase< OtherReal > &v)
diagonal update, this <– this + diag(v)
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
void AccStats(const MatrixBase< BaseFloat > &feat_in, const std::vector< std::vector< int32 > > &gselect, const MatrixBase< BaseFloat > &direct_feat_deriv, const MatrixBase< BaseFloat > *indirect_feat_deriv, FmpeStats *stats) const
SubMatrix< BaseFloat > DerivPlus() const
void Write(std::ostream &os, bool binary) const
const Matrix< BaseFloat > & inv_vars() const
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).
void Write(std::ostream &os, bool binary) const