Fmpe Class Reference

#include <fmpe.h>

Collaboration diagram for Fmpe:

Public Member Functions

 Fmpe ()
 
 Fmpe (const DiagGmm &gmm, const FmpeOptions &config)
 
int32 FeatDim () const
 
int32 NumGauss () const
 
int32 NumContexts () const
 
int32 ProjectionTNumRows () const
 
int32 ProjectionTNumCols () const
 
void ComputeFeatures (const MatrixBase< BaseFloat > &feat_in, const std::vector< std::vector< int32 > > &gselect, Matrix< BaseFloat > *feat_out) const
 
void AccStats (const MatrixBase< BaseFloat > &feat_in, const std::vector< std::vector< int32 > > &gselect, const MatrixBase< BaseFloat > &direct_feat_deriv, const MatrixBase< BaseFloat > *indirect_feat_deriv, FmpeStats *stats) const
 
void Write (std::ostream &os, bool binary) const
 
void Read (std::istream &is, bool binary)
 
BaseFloat Update (const FmpeUpdateOptions &config, const FmpeStats &stats)
 

Private Member Functions

void SetContexts (std::string context_str)
 
void ComputeC ()
 
void ComputeStddevs ()
 
void ApplyProjection (const MatrixBase< BaseFloat > &feat_in, const std::vector< std::vector< int32 > > &gselect, MatrixBase< BaseFloat > *intermed_feat) const
 
void ApplyProjectionReverse (const MatrixBase< BaseFloat > &feat_in, const std::vector< std::vector< int32 > > &gselect, const MatrixBase< BaseFloat > &intermed_feat_deriv, MatrixBase< BaseFloat > *proj_deriv_plus, MatrixBase< BaseFloat > *proj_deriv_minus) const
 
void ApplyContext (const MatrixBase< BaseFloat > &intermed_feat, MatrixBase< BaseFloat > *feat_out) const
 
void ApplyContextReverse (const MatrixBase< BaseFloat > &feat_deriv, MatrixBase< BaseFloat > *intermed_feat_deriv) const
 
void ApplyC (MatrixBase< BaseFloat > *feat_out, bool reverse=false) const
 
void ApplyCReverse (MatrixBase< BaseFloat > *deriv) const
 

Private Attributes

DiagGmm gmm_
 
FmpeOptions config_
 
Matrix< BaseFloatstddevs_
 
Matrix< BaseFloatprojT_
 
TpMatrix< BaseFloatC_
 
std::vector< std::vector< std::pair< int32, BaseFloat > > > contexts_
 

Detailed Description

Definition at line 138 of file fmpe.h.

Constructor & Destructor Documentation

◆ Fmpe() [1/2]

Fmpe ( )
inline

Definition at line 140 of file fmpe.h.

140 {}

◆ Fmpe() [2/2]

Fmpe ( const DiagGmm gmm,
const FmpeOptions config 
)

Definition at line 435 of file fmpe.cc.

References Fmpe::ComputeC(), Fmpe::ComputeStddevs(), FmpeOptions::context_expansion, Fmpe::FeatDim(), Fmpe::NumContexts(), Fmpe::NumGauss(), Fmpe::projT_, Matrix< Real >::Resize(), and Fmpe::SetContexts().

435  : gmm_(gmm),
436  config_(config) {
437  SetContexts(config.context_expansion);
438  ComputeC();
439  ComputeStddevs();
440  projT_.Resize(NumGauss() * (FeatDim()+1), FeatDim() * NumContexts());
441 }
int32 NumGauss() const
Definition: fmpe.h:144
Matrix< BaseFloat > projT_
Definition: fmpe.h:228
void ComputeStddevs()
Definition: fmpe.cc:89
void SetContexts(std::string context_str)
Definition: fmpe.cc:29
int32 FeatDim() const
Definition: fmpe.h:143
FmpeOptions config_
Definition: fmpe.h:223
void ComputeC()
Definition: fmpe.cc:57
int32 NumContexts() const
Definition: fmpe.h:145
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
DiagGmm gmm_
Definition: fmpe.h:222

Member Function Documentation

◆ AccStats()

void AccStats ( const MatrixBase< BaseFloat > &  feat_in,
const std::vector< std::vector< int32 > > &  gselect,
const MatrixBase< BaseFloat > &  direct_feat_deriv,
const MatrixBase< BaseFloat > *  indirect_feat_deriv,
FmpeStats stats 
) const

Definition at line 395 of file fmpe.cc.

References FmpeStats::AccumulateChecks(), MatrixBase< Real >::AddMat(), Fmpe::ApplyContextReverse(), Fmpe::ApplyCReverse(), Fmpe::ApplyProjectionReverse(), FmpeStats::DerivMinus(), FmpeStats::DerivPlus(), Fmpe::FeatDim(), KALDI_ASSERT, MatrixBase< Real >::NumCols(), Fmpe::NumContexts(), MatrixBase< Real >::NumRows(), Fmpe::projT_, and kaldi::SameDim().

Referenced by main(), and kaldi::TestFmpe().

399  {
400  SubMatrix<BaseFloat> stats_plus(fmpe_stats->DerivPlus());
401  SubMatrix<BaseFloat> stats_minus(fmpe_stats->DerivMinus());
402  int32 dim = FeatDim(), ncontexts = NumContexts();
403  KALDI_ASSERT(feat_in.NumRows() != 0 && feat_in.NumCols() == dim);
404  KALDI_ASSERT(feat_in.NumRows() == static_cast<int32>(gselect.size()));
405  KALDI_ASSERT(SameDim(stats_plus, projT_) && SameDim(stats_minus, projT_) &&
406  SameDim(feat_in, direct_feat_deriv));
407 
408  if (indirect_feat_deriv != NULL)
409  fmpe_stats->AccumulateChecks(feat_in, direct_feat_deriv, *indirect_feat_deriv);
410 
411  Matrix<BaseFloat> feat_deriv(direct_feat_deriv); // "feat_deriv" is initially direct+indirect.
412  if (indirect_feat_deriv != NULL)
413  feat_deriv.AddMat(1.0, *indirect_feat_deriv);
414 
415  // We do the "*Reverse" version of each stage now, in reverse order.
416  ApplyCReverse(&feat_deriv);
417 
418  Matrix<BaseFloat> intermed_feat_deriv(feat_in.NumRows(), dim*ncontexts);
419  ApplyContextReverse(feat_deriv, &intermed_feat_deriv);
420 
421  ApplyProjectionReverse(feat_in, gselect, intermed_feat_deriv,
422  &stats_plus, &stats_minus);
423 }
kaldi::int32 int32
Matrix< BaseFloat > projT_
Definition: fmpe.h:228
void ApplyContextReverse(const MatrixBase< BaseFloat > &feat_deriv, MatrixBase< BaseFloat > *intermed_feat_deriv) const
Definition: fmpe.cc:126
bool SameDim(const MatrixBase< Real > &M, const MatrixBase< Real > &N)
void ApplyProjectionReverse(const MatrixBase< BaseFloat > &feat_in, const std::vector< std::vector< int32 > > &gselect, const MatrixBase< BaseFloat > &intermed_feat_deriv, MatrixBase< BaseFloat > *proj_deriv_plus, MatrixBase< BaseFloat > *proj_deriv_minus) const
Definition: fmpe.cc:302
int32 FeatDim() const
Definition: fmpe.h:143
int32 NumContexts() const
Definition: fmpe.h:145
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void ApplyCReverse(MatrixBase< BaseFloat > *deriv) const
Definition: fmpe.h:218

◆ ApplyC()

void ApplyC ( MatrixBase< BaseFloat > *  feat_out,
bool  reverse = false 
) const
private

Definition at line 161 of file fmpe.cc.

References Fmpe::C_, VectorBase< Real >::CopyFromVec(), kaldi::kNoTrans, kaldi::kTrans, MatrixBase< Real >::NumCols(), and MatrixBase< Real >::NumRows().

Referenced by Fmpe::ComputeFeatures().

161  {
162  int32 T = feat_out->NumRows();
163  Vector<BaseFloat> tmp(feat_out->NumCols());
164  for (int32 t = 0; t < T; t++) {
165  SubVector<BaseFloat> row(*feat_out, t);
166  // Next line does: tmp = C_ * row
167  tmp.AddTpVec(1.0, C_, (reverse ? kTrans : kNoTrans), row, 0.0);
168  row.CopyFromVec(tmp);
169  }
170 }
kaldi::int32 int32
TpMatrix< BaseFloat > C_
Definition: fmpe.h:232

◆ ApplyContext()

void ApplyContext ( const MatrixBase< BaseFloat > &  intermed_feat,
MatrixBase< BaseFloat > *  feat_out 
) const
private

Definition at line 97 of file fmpe.cc.

References Fmpe::contexts_, Fmpe::FeatDim(), rnnlm::i, rnnlm::j, KALDI_ASSERT, MatrixBase< Real >::NumCols(), Fmpe::NumContexts(), MatrixBase< Real >::NumRows(), and MatrixBase< Real >::Row().

Referenced by Fmpe::ComputeFeatures().

98  {
99  // Applies the temporal-context part of the transformation.
100  int32 dim = FeatDim(), ncontexts = NumContexts(),
101  T = intermed_feat.NumRows();
102  KALDI_ASSERT(intermed_feat.NumCols() == dim * ncontexts &&
103  intermed_feat.NumRows() == feat_out->NumRows()
104  && feat_out->NumCols() == dim);
105  // note: ncontexts == contexts_.size().
106  for (int32 i = 0; i < ncontexts; i++) {
107  // this_intermed_feat is the chunk of the "intermediate features"
108  // that corresponds to this "context"
109  SubMatrix<BaseFloat> this_intermed_feat(intermed_feat, 0, T,
110  dim*i, dim);
111  for (int32 j = 0; j < static_cast<int32>(contexts_[i].size()); j++) {
112  int32 t_offset = contexts_[i][j].first;
113  BaseFloat weight = contexts_[i][j].second;
114  // Note: we could do this more efficiently using matrix operations,
115  // but this doesn't dominate the computation and I think this is
116  // clearer.
117  for (int32 t_out = 0; t_out < T; t_out++) { // t_out indexes the output
118  int32 t_in = t_out + t_offset; // t_in indexes the input.
119  if (t_in >= 0 && t_in < T) // Discard frames outside range.
120  feat_out->Row(t_out).AddVec(weight, this_intermed_feat.Row(t_in));
121  }
122  }
123  }
124 }
kaldi::int32 int32
std::vector< std::vector< std::pair< int32, BaseFloat > > > contexts_
Definition: fmpe.h:242
float BaseFloat
Definition: kaldi-types.h:29
int32 FeatDim() const
Definition: fmpe.h:143
int32 NumContexts() const
Definition: fmpe.h:145
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ ApplyContextReverse()

void ApplyContextReverse ( const MatrixBase< BaseFloat > &  feat_deriv,
MatrixBase< BaseFloat > *  intermed_feat_deriv 
) const
private

Definition at line 126 of file fmpe.cc.

References Fmpe::contexts_, Fmpe::FeatDim(), rnnlm::i, rnnlm::j, KALDI_ASSERT, MatrixBase< Real >::NumCols(), Fmpe::NumContexts(), MatrixBase< Real >::NumRows(), and MatrixBase< Real >::Row().

Referenced by Fmpe::AccStats().

128  {
129  // Applies the temporal-context part of the transformation,
130  // in reverse, for getting derivatives for training.
131  int32 dim = FeatDim(), ncontexts = NumContexts(),
132  T = feat_deriv.NumRows();
133  KALDI_ASSERT(intermed_feat_deriv->NumCols() == dim * ncontexts &&
134  intermed_feat_deriv->NumRows() == feat_deriv.NumRows()
135  && feat_deriv.NumCols() == dim);
136  // note: ncontexts == contexts_.size().
137  for (int32 i = 0; i < ncontexts; i++) {
138  // this_intermed_feat is the chunk of the derivative of
139  // "intermediate features" that corresponds to this "context"
140  // (this is output, in this routine).
141  SubMatrix<BaseFloat> this_intermed_feat_deriv(*intermed_feat_deriv, 0, T,
142  dim*i, dim);
143  for (int32 j = 0; j < static_cast<int32>(contexts_[i].size()); j++) {
144  int32 t_offset = contexts_[i][j].first;
145  BaseFloat weight = contexts_[i][j].second;
146  // Note: we could do this more efficiently using matrix operations,
147  // but this doesn't dominate the computation and I think this is
148  // clearer.
149  for (int32 t_out = 0; t_out < T; t_out++) { // t_out indexes the output
150  int32 t_in = t_out + t_offset; // t_in indexes the input.
151  if (t_in >= 0 && t_in < T) // Discard frames outside range.
152  this_intermed_feat_deriv.Row(t_in).AddVec(weight,
153  feat_deriv.Row(t_out));
154  // Note: the line above is where the work happens; it's the same
155  // as in ApplyContext except reversing the input and output.
156  }
157  }
158  }
159 }
kaldi::int32 int32
std::vector< std::vector< std::pair< int32, BaseFloat > > > contexts_
Definition: fmpe.h:242
float BaseFloat
Definition: kaldi-types.h:29
int32 FeatDim() const
Definition: fmpe.h:143
int32 NumContexts() const
Definition: fmpe.h:145
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ ApplyCReverse()

void ApplyCReverse ( MatrixBase< BaseFloat > *  deriv) const
inlineprivate

Definition at line 218 of file fmpe.h.

Referenced by Fmpe::AccStats().

218 { ApplyC(deriv, true); }
void ApplyC(MatrixBase< BaseFloat > *feat_out, bool reverse=false) const
Definition: fmpe.cc:161

◆ ApplyProjection()

void ApplyProjection ( const MatrixBase< BaseFloat > &  feat_in,
const std::vector< std::vector< int32 > > &  gselect,
MatrixBase< BaseFloat > *  intermed_feat 
) const
private

Definition at line 182 of file fmpe.cc.

References MatrixBase< Real >::AddMatMat(), VectorBase< Real >::AddMatVec(), VectorBase< Real >::AddVec(), VectorBase< Real >::ApplySoftMax(), Fmpe::config_, VectorBase< Real >::Dim(), Fmpe::FeatDim(), Fmpe::gmm_, rnnlm::i, rnnlm::j, kaldi::kNoTrans, kaldi::kTrans, DiagGmm::LogLikelihoodsPreselect(), DiagGmm::means_invvars(), Fmpe::NumContexts(), MatrixBase< Real >::NumRows(), FmpeOptions::post_scale, Fmpe::projT_, VectorBase< Real >::Range(), MatrixBase< Real >::Range(), MatrixBase< Real >::Row(), and Fmpe::stddevs_.

Referenced by Fmpe::ComputeFeatures().

184  {
185  int32 dim = FeatDim(), ncontexts = NumContexts();
186 
187  Vector<BaseFloat> post; // will be posteriors of selected Gaussians.
188  Vector<BaseFloat> input_chunk(dim+1); // will be a segment of
189  // the high-dimensional features.
190 
191  // "all_posts" is a vector of ((gauss-index, time-index), gaussian
192  // posterior).
193  // We'll compute the posterior information, sort it, and then
194  // go through it in sorted order, which maintains memory locality
195  // when accessing the projection matrix.
196  // Note: if we really cared we could make this use level-3 BLAS
197  // (matrix-matrix multiply), but we'd need to have a temporary
198  // matrix for the output and input.
199  std::vector<std::pair<std::pair<int32, int32>, BaseFloat> > all_posts;
200 
201  for (int32 t = 0; t < feat_in.NumRows(); t++) {
202  SubVector<BaseFloat> this_feat(feat_in, t);
203  gmm_.LogLikelihoodsPreselect(this_feat, gselect[t], &post);
204  // At this point, post will contain log-likes of the selected
205  // Gaussians.
206  post.ApplySoftMax(); // Now they are posteriors (which sum to one).
207  for (int32 i = 0; i < post.Dim(); i++) {
208  int32 gauss = gselect[t][i];
209  all_posts.push_back(std::make_pair(std::make_pair(gauss, t), post(i)));
210  }
211  }
212  std::sort(all_posts.begin(), all_posts.end());
213 
214  bool optimize = true;
215 
216  if (!optimize) { // Why do we keep this un-optimized code around?
217  // For clarity, so you can see what's going on, and for easier
218  // comparision with ApplyProjectionReverse which is similar to this
219  // un-optimized segment. Both un-optimized and optimized versions
220  // should give identical transforms (up to tiny roundoff differences).
221  for (size_t i = 0; i < all_posts.size(); i++) {
222  int32 gauss = all_posts[i].first.first, t = all_posts[i].first.second;
223  SubVector<BaseFloat> this_feat(feat_in, t);
224  SubVector<BaseFloat> this_intermed_feat(*intermed_feat, t);
225  BaseFloat this_post = all_posts[i].second;
226  SubVector<BaseFloat> this_stddev(stddevs_, gauss);
227 
228  // The next line is equivalent to setting input_chunk to
229  // -this_post * the gaussian mean / (gaussian stddev). Note: we use
230  // the fact that mean * inv_var * stddev == mean / stddev.
231  input_chunk.Range(0, dim).AddVecVec(-this_post, gmm_.means_invvars().Row(gauss),
232  this_stddev, 0.0);
233  // The next line is equivalent to adding (feat / gaussian stddev) to
234  // input_chunk, so now it contains (feat - mean) / stddev, which is
235  // our "normalized" feature offset.
236  input_chunk.Range(0, dim).AddVecDivVec(this_post, this_feat, this_stddev,
237  1.0);
238  // The last element of this input_chunk is the posterior itself
239  // (between 0 and 1).
240  input_chunk(dim) = this_post * config_.post_scale;
241 
242  // this_intermed_feat += [appropriate chjunk of projT_] * input_chunk.
243  this_intermed_feat.AddMatVec(1.0, projT_.Range(gauss*(dim+1), dim+1,
244  0, dim*ncontexts),
245  kTrans, input_chunk, 1.0);
246  }
247  } else {
248  size_t i = 0;
249  // We process the "posts" vector in chunks, where each chunk corresponds to
250  // the same Gaussian index (but different times).
251  while (i < all_posts.size()) {
252  int32 gauss = all_posts[i].first.first;
253  SubVector<BaseFloat> this_stddev(stddevs_, gauss),
254  this_mean_invvar(gmm_.means_invvars(), gauss);
255  SubMatrix<BaseFloat> this_projT_chunk(projT_, gauss*(dim+1), dim+1,
256  0, dim*ncontexts);
257  int32 batch_size; // number of posteriors with same Gaussian..
258  for (batch_size = 0;
259  batch_size+i < static_cast<int32>(all_posts.size()) &&
260  all_posts[batch_size+i].first.first == gauss;
261  batch_size++); // empty loop body.
262  Matrix<BaseFloat> input_chunks(batch_size, dim+1);
263  Matrix<BaseFloat> intermed_temp(batch_size, dim*ncontexts);
264  for (int32 j = 0; j < batch_size; j++) { // set up "input_chunks".
265  // To understand this code, first examine code and comments in "non-optimized"
266  // code chunk above (the other branch of the if/else statement).
267  int32 t = all_posts[i+j].first.second;
268  SubVector<BaseFloat> this_feat(feat_in, t);
269  SubVector<BaseFloat> this_input_chunk(input_chunks, j);
270  BaseFloat this_post = all_posts[i+j].second;
271  this_input_chunk.Range(0, dim).AddVecVec(-this_post,
272  this_mean_invvar,
273  this_stddev, 0.0);
274  this_input_chunk.Range(0, dim).AddVecDivVec(this_post, this_feat,
275  this_stddev, 1.0);
276  this_input_chunk(dim) = this_post * config_.post_scale;
277  }
278  // The next line is where most of the computation will happen,
279  // during the feature computation phase. We have rearranged
280  // stuff so it's a matrix-matrix operation, for greater
281  // efficiency (when using optimized libraries like ATLAS).
282  intermed_temp.AddMatMat(1.0, input_chunks, kNoTrans,
283  this_projT_chunk, kNoTrans, 0.0);
284  for (int32 j = 0; j < batch_size; j++) { // add data from
285  // intermed_temp to the output "intermed_feat"
286  int32 t = all_posts[i+j].first.second;
287  SubVector<BaseFloat> this_intermed_feat(*intermed_feat, t);
288  SubVector<BaseFloat> this_intermed_temp(intermed_temp, j);
289  // this_intermed_feat += this_intermed_temp.
290  this_intermed_feat.AddVec(1.0, this_intermed_temp);
291  }
292  i += batch_size;
293  }
294  }
295 }
void LogLikelihoodsPreselect(const VectorBase< BaseFloat > &data, const std::vector< int32 > &indices, Vector< BaseFloat > *loglikes) const
Outputs the per-component log-likelihoods of a subset of mixture components.
Definition: diag-gmm.cc:566
const Matrix< BaseFloat > & means_invvars() const
Definition: diag-gmm.h:179
kaldi::int32 int32
Matrix< BaseFloat > projT_
Definition: fmpe.h:228
float BaseFloat
Definition: kaldi-types.h:29
int32 FeatDim() const
Definition: fmpe.h:143
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
FmpeOptions config_
Definition: fmpe.h:223
int32 NumContexts() const
Definition: fmpe.h:145
SubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Return a sub-part of matrix.
Definition: kaldi-matrix.h:202
Matrix< BaseFloat > stddevs_
Definition: fmpe.h:224
BaseFloat post_scale
Definition: fmpe.h:67
DiagGmm gmm_
Definition: fmpe.h:222

◆ ApplyProjectionReverse()

void ApplyProjectionReverse ( const MatrixBase< BaseFloat > &  feat_in,
const std::vector< std::vector< int32 > > &  gselect,
const MatrixBase< BaseFloat > &  intermed_feat_deriv,
MatrixBase< BaseFloat > *  proj_deriv_plus,
MatrixBase< BaseFloat > *  proj_deriv_minus 
) const
private

Definition at line 302 of file fmpe.cc.

References kaldi::AddOuterProductPlusMinus(), VectorBase< Real >::ApplySoftMax(), Fmpe::config_, VectorBase< Real >::Dim(), Fmpe::FeatDim(), Fmpe::gmm_, rnnlm::i, DiagGmm::LogLikelihoodsPreselect(), DiagGmm::means_invvars(), Fmpe::NumContexts(), MatrixBase< Real >::NumRows(), FmpeOptions::post_scale, VectorBase< Real >::Range(), MatrixBase< Real >::Row(), and Fmpe::stddevs_.

Referenced by Fmpe::AccStats().

306  {
307  int32 dim = FeatDim(), ncontexts = NumContexts();
308 
309  Vector<BaseFloat> post; // will be posteriors of selected Gaussians.
310  Vector<BaseFloat> input_chunk(dim+1); // will be a segment of
311  // the high-dimensional features.
312 
313  // "all_posts" is a vector of ((gauss-index, time-index), gaussian
314  // posterior).
315  // We'll compute the posterior information, sort it, and then
316  // go through it in sorted order, which maintains memory locality
317  // when accessing the projection matrix.
318  std::vector<std::pair<std::pair<int32, int32>, BaseFloat> > all_posts;
319 
320  for (int32 t = 0; t < feat_in.NumRows(); t++) {
321  SubVector<BaseFloat> this_feat(feat_in, t);
322  gmm_.LogLikelihoodsPreselect(this_feat, gselect[t], &post);
323  // At this point, post will contain log-likes of the selected
324  // Gaussians.
325  post.ApplySoftMax(); // Now they are posteriors (which sum to one).
326  for (int32 i = 0; i < post.Dim(); i++) {
327  // The next few lines (where we set up "input_chunk") are identical
328  // to ApplyProjection.
329  int32 gauss = gselect[t][i];
330  all_posts.push_back(std::make_pair(std::make_pair(gauss, t), post(i)));
331  }
332  }
333  std::sort(all_posts.begin(), all_posts.end());
334  for (size_t i = 0; i < all_posts.size(); i++) {
335  int32 gauss = all_posts[i].first.first, t = all_posts[i].first.second;
336  BaseFloat this_post = all_posts[i].second;
337  SubVector<BaseFloat> this_feat(feat_in, t);
338  SubVector<BaseFloat> this_intermed_feat_deriv(intermed_feat_deriv, t);
339  SubVector<BaseFloat> this_stddev(stddevs_, gauss);
340  input_chunk.Range(0, dim).AddVecVec(-this_post, gmm_.means_invvars().Row(gauss),
341  this_stddev, 0.0);
342  input_chunk.Range(0, dim).AddVecDivVec(this_post, this_feat, this_stddev,
343  1.0);
344  input_chunk(dim) = this_post * config_.post_scale;
345 
346  // If not for accumulating the + and - parts separately, we would be
347  // doing something like:
348  // proj_deriv_.Range(0, dim*ncontexts, gauss*(dim+1), dim+1).AddVecVec(
349  // 1.0, this_intermed_feat_deriv, input_chunk);
350 
351 
352  SubMatrix<BaseFloat> plus_chunk(*proj_deriv_plus,
353  gauss*(dim+1), dim+1,
354  0, dim*ncontexts),
355  minus_chunk(*proj_deriv_minus,
356  gauss*(dim+1), dim+1,
357  0, dim*ncontexts);
358 
359  // This next function takes the rank-one matrix
360  // (input_chunk * this_intermed_deriv'), and adds the positive
361  // part to proj_deriv_plus, and minus the negative part to
362  // proj_deriv_minus.
363  AddOuterProductPlusMinus(static_cast<BaseFloat>(1.0),
364  input_chunk,
365  this_intermed_feat_deriv,
366  &plus_chunk, &minus_chunk);
367  }
368 }
void LogLikelihoodsPreselect(const VectorBase< BaseFloat > &data, const std::vector< int32 > &indices, Vector< BaseFloat > *loglikes) const
Outputs the per-component log-likelihoods of a subset of mixture components.
Definition: diag-gmm.cc:566
const Matrix< BaseFloat > & means_invvars() const
Definition: diag-gmm.h:179
kaldi::int32 int32
float BaseFloat
Definition: kaldi-types.h:29
int32 FeatDim() const
Definition: fmpe.h:143
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
void AddOuterProductPlusMinus(Real alpha, const VectorBase< Real > &a, const VectorBase< Real > &b, MatrixBase< Real > *plus, MatrixBase< Real > *minus)
FmpeOptions config_
Definition: fmpe.h:223
int32 NumContexts() const
Definition: fmpe.h:145
Matrix< BaseFloat > stddevs_
Definition: fmpe.h:224
BaseFloat post_scale
Definition: fmpe.h:67
DiagGmm gmm_
Definition: fmpe.h:222

◆ ComputeC()

void ComputeC ( )
private

Definition at line 57 of file fmpe.cc.

References SpMatrix< Real >::AddDiagVec(), VectorBase< Real >::AddVec(), SpMatrix< Real >::AddVec2(), Fmpe::C_, TpMatrix< Real >::Cholesky(), DiagGmm::Dim(), Fmpe::gmm_, KALDI_ASSERT, KALDI_ERR, DiagGmmNormal::means_, DiagGmmNormal::NumGauss(), DiagGmm::NumGauss(), MatrixBase< Real >::Row(), PackedMatrix< Real >::Scale(), VectorBase< Real >::Scale(), DiagGmmNormal::vars_, and DiagGmmNormal::weights_.

Referenced by Fmpe::Fmpe().

57  {
58  KALDI_ASSERT(gmm_.NumGauss() != 0.0);
59  int32 dim = gmm_.Dim();
60 
61  // Getting stats from the GMM... assume the model is
62  // correct.
63  SpMatrix<double> x2_stats(dim);
64  Vector<double> x_stats(dim);
65  double tot_count = 0.0;
66  DiagGmmNormal ngmm(gmm_);
67  for (int32 pdf = 0; pdf < ngmm.NumGauss(); pdf++) {
68  x2_stats.AddVec2(ngmm.weights_(pdf), ngmm.means_.Row(pdf));
69  x2_stats.AddDiagVec(ngmm.weights_(pdf), ngmm.vars_.Row(pdf)); // add diagonal
70  // covar to diagonal elements of x2_stats.
71  x_stats.AddVec(ngmm.weights_(pdf), ngmm.means_.Row(pdf));
72  tot_count += ngmm.weights_(pdf);
73  }
74  KALDI_ASSERT(tot_count != 0.0);
75  x2_stats.Scale(1.0 / tot_count);
76  x_stats.Scale(1.0 / tot_count);
77  x2_stats.AddVec2(-1.0, x_stats); // subtract outer product of mean,
78  // to get centered covariance.
79  C_.Resize(dim);
80  try {
81  TpMatrix<double> Ctmp(dim); Ctmp.Cholesky(x2_stats);
82  C_.CopyFromTp(Ctmp);
83  } catch (...) {
84  KALDI_ERR << "Error initializing fMPE object: cholesky of "
85  "feature variance failed. Probably code error, or NaN/inf in model";
86  }
87 }
int32 Dim() const
Returns the dimensionality of the Gaussian mean vectors.
Definition: diag-gmm.h:74
kaldi::int32 int32
#define KALDI_ERR
Definition: kaldi-error.h:147
int32 NumGauss() const
Returns the number of mixture components in the GMM.
Definition: diag-gmm.h:72
TpMatrix< BaseFloat > C_
Definition: fmpe.h:232
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
DiagGmm gmm_
Definition: fmpe.h:222

◆ ComputeFeatures()

void ComputeFeatures ( const MatrixBase< BaseFloat > &  feat_in,
const std::vector< std::vector< int32 > > &  gselect,
Matrix< BaseFloat > *  feat_out 
) const

Definition at line 370 of file fmpe.cc.

References Fmpe::ApplyC(), Fmpe::ApplyContext(), Fmpe::ApplyProjection(), Fmpe::FeatDim(), KALDI_ASSERT, MatrixBase< Real >::NumCols(), Fmpe::NumContexts(), MatrixBase< Real >::NumRows(), and Matrix< Real >::Resize().

Referenced by main(), and kaldi::TestFmpe().

372  {
373  int32 dim = FeatDim();
374  KALDI_ASSERT(feat_in.NumRows() != 0 && feat_in.NumCols() == dim);
375  KALDI_ASSERT(feat_in.NumRows() == static_cast<int32>(gselect.size()));
376  feat_out->Resize(feat_in.NumRows(), feat_in.NumCols()); // will zero it.
377 
378  // Intermediate-dimension features
379  Matrix<BaseFloat> intermed_feat(feat_in.NumRows(),
380  dim * NumContexts());
381 
382  // Apply the main projection, from high-dim to intermediate
383  // dimension (dim * NumContexts()).
384  ApplyProjection(feat_in, gselect, &intermed_feat);
385 
386  // Apply the temporal context and reduces from
387  // dimension dim*ncontexts to dim.
388  ApplyContext(intermed_feat, feat_out);
389 
390  // Lastly, apply the the "C" matrix-- linear transform on the offsets.
391  ApplyC(feat_out);
392 }
void ApplyC(MatrixBase< BaseFloat > *feat_out, bool reverse=false) const
Definition: fmpe.cc:161
void ApplyContext(const MatrixBase< BaseFloat > &intermed_feat, MatrixBase< BaseFloat > *feat_out) const
Definition: fmpe.cc:97
void ApplyProjection(const MatrixBase< BaseFloat > &feat_in, const std::vector< std::vector< int32 > > &gselect, MatrixBase< BaseFloat > *intermed_feat) const
Definition: fmpe.cc:182
kaldi::int32 int32
int32 FeatDim() const
Definition: fmpe.h:143
int32 NumContexts() const
Definition: fmpe.h:145
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).

◆ ComputeStddevs()

void ComputeStddevs ( )
private

Definition at line 89 of file fmpe.cc.

References MatrixBase< Real >::ApplyPow(), MatrixBase< Real >::CopyFromMat(), Fmpe::gmm_, DiagGmm::inv_vars(), MatrixBase< Real >::NumCols(), MatrixBase< Real >::NumRows(), Matrix< Real >::Resize(), and Fmpe::stddevs_.

Referenced by Fmpe::Fmpe(), and Fmpe::Read().

89  {
90  const Matrix<BaseFloat> &inv_vars = gmm_.inv_vars();
91  stddevs_.Resize(inv_vars.NumRows(), inv_vars.NumCols());
92  stddevs_.CopyFromMat(inv_vars);
93  stddevs_.ApplyPow(-0.5);
94 }
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
Matrix< BaseFloat > stddevs_
Definition: fmpe.h:224
void ApplyPow(Real power)
Definition: kaldi-matrix.h:341
DiagGmm gmm_
Definition: fmpe.h:222
const Matrix< BaseFloat > & inv_vars() const
Definition: diag-gmm.h:180

◆ FeatDim()

int32 FeatDim ( ) const
inline

Definition at line 143 of file fmpe.h.

Referenced by Fmpe::AccStats(), Fmpe::ApplyContext(), Fmpe::ApplyContextReverse(), Fmpe::ApplyProjection(), Fmpe::ApplyProjectionReverse(), Fmpe::ComputeFeatures(), Fmpe::Fmpe(), and FmpeStats::Init().

143 { return gmm_.Dim(); }
int32 Dim() const
Returns the dimensionality of the Gaussian mean vectors.
Definition: diag-gmm.h:74
DiagGmm gmm_
Definition: fmpe.h:222

◆ NumContexts()

int32 NumContexts ( ) const
inline

Definition at line 145 of file fmpe.h.

Referenced by Fmpe::AccStats(), Fmpe::ApplyContext(), Fmpe::ApplyContextReverse(), Fmpe::ApplyProjection(), Fmpe::ApplyProjectionReverse(), Fmpe::ComputeFeatures(), and Fmpe::Fmpe().

145 { return static_cast<int32>(contexts_.size()); }
kaldi::int32 int32
std::vector< std::vector< std::pair< int32, BaseFloat > > > contexts_
Definition: fmpe.h:242

◆ NumGauss()

int32 NumGauss ( ) const
inline

Definition at line 144 of file fmpe.h.

Referenced by Fmpe::Fmpe().

144 { return gmm_.NumGauss(); }
int32 NumGauss() const
Returns the number of mixture components in the GMM.
Definition: diag-gmm.h:72
DiagGmm gmm_
Definition: fmpe.h:222

◆ ProjectionTNumCols()

int32 ProjectionTNumCols ( ) const
inline

Definition at line 152 of file fmpe.h.

References FmpeOptions::Read(), and FmpeOptions::Write().

Referenced by FmpeStats::Init().

152 { return FeatDim() * NumContexts(); }
int32 FeatDim() const
Definition: fmpe.h:143
int32 NumContexts() const
Definition: fmpe.h:145

◆ ProjectionTNumRows()

int32 ProjectionTNumRows ( ) const
inline

Definition at line 151 of file fmpe.h.

Referenced by FmpeStats::Init().

151 { return (FeatDim()+1) * NumGauss(); }
int32 NumGauss() const
Definition: fmpe.h:144
int32 FeatDim() const
Definition: fmpe.h:143

◆ Read()

void Read ( std::istream &  is,
bool  binary 
)

Definition at line 512 of file fmpe.cc.

References Fmpe::C_, Fmpe::ComputeStddevs(), Fmpe::config_, FmpeOptions::context_expansion, Fmpe::gmm_, Fmpe::projT_, FmpeOptions::Read(), DiagGmm::Read(), Matrix< Real >::Read(), and Fmpe::SetContexts().

Referenced by kaldi::TestFmpe().

512  {
513  gmm_.Read(is, binary);
514  config_.Read(is, binary);
515  ComputeStddevs(); // computed from gmm.
516  projT_.Read(is, binary);
517  C_.Read(is, binary);
519 }
Matrix< BaseFloat > projT_
Definition: fmpe.h:228
std::string context_expansion
Definition: fmpe.h:47
void ComputeStddevs()
Definition: fmpe.cc:89
void Read(std::istream &is, bool binary)
Definition: fmpe.cc:430
void Read(std::istream &in, bool binary, bool add=false)
read from stream.
void SetContexts(std::string context_str)
Definition: fmpe.cc:29
TpMatrix< BaseFloat > C_
Definition: fmpe.h:232
FmpeOptions config_
Definition: fmpe.h:223
void Read(std::istream &in, bool binary)
Definition: diag-gmm.cc:728
DiagGmm gmm_
Definition: fmpe.h:222

◆ SetContexts()

void SetContexts ( std::string  context_str)
private

Definition at line 29 of file fmpe.cc.

References Fmpe::contexts_, kaldi::ConvertStringToInteger(), kaldi::ConvertStringToReal(), rnnlm::i, rnnlm::j, KALDI_ASSERT, KALDI_ERR, and kaldi::SplitStringToVector().

Referenced by Fmpe::Fmpe(), and Fmpe::Read().

29  {
30  // sets the contexts_ variable.
31  using std::vector;
32  using std::string;
33  contexts_.clear();
34  vector<string> ctx_vec; // splitting context_str on ":"
35  SplitStringToVector(context_str, ":", false, &ctx_vec);
36  contexts_.resize(ctx_vec.size());
37  for (size_t i = 0; i < ctx_vec.size(); i++) {
38  vector<string> pair_vec; // splitting ctx_vec[i] on ";"
39  SplitStringToVector(ctx_vec[i], ";", false, &pair_vec);
40  KALDI_ASSERT(pair_vec.size() != 0 && "empty context!");
41  for (size_t j = 0; j < pair_vec.size(); j++) {
42  vector<string> one_pair;
43  SplitStringToVector(pair_vec[j], ",", false, &one_pair);
44  KALDI_ASSERT(one_pair.size() == 2 &&
45  "Mal-formed context string: bad --context-expansion option?");
46  int32 pos = 0;
47  BaseFloat weight = BaseFloat(0);
48  bool ok = ConvertStringToInteger(one_pair[0], &pos);
49  ok = ConvertStringToReal(one_pair[1], &weight) && ok;
50  if (!ok)
51  KALDI_ERR << "Mal-formed context string: bad --context-expansion option?";
52  contexts_[i].push_back(std::make_pair(pos, weight));
53  }
54  }
55 }
bool ConvertStringToInteger(const std::string &str, Int *out)
Converts a string into an integer via strtoll and returns false if there was any kind of problem (i...
Definition: text-utils.h:118
kaldi::int32 int32
std::vector< std::vector< std::pair< int32, BaseFloat > > > contexts_
Definition: fmpe.h:242
float BaseFloat
Definition: kaldi-types.h:29
void SplitStringToVector(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< std::string > *out)
Split a string using any of the single character delimiters.
Definition: text-utils.cc:63
#define KALDI_ERR
Definition: kaldi-error.h:147
bool ConvertStringToReal(const std::string &str, T *out)
ConvertStringToReal converts a string into either float or double and returns false if there was any ...
Definition: text-utils.cc:238
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ Update()

BaseFloat Update ( const FmpeUpdateOptions config,
const FmpeStats stats 
)

Definition at line 443 of file fmpe.cc.

References FmpeStats::DerivMinus(), FmpeStats::DerivPlus(), rnnlm::i, rnnlm::j, KALDI_ASSERT, KALDI_LOG, FmpeUpdateOptions::l2_weight, FmpeUpdateOptions::learning_rate, MatrixBase< Real >::Min(), rnnlm::n, MatrixBase< Real >::NumCols(), MatrixBase< Real >::NumRows(), Fmpe::projT_, and kaldi::SameDim().

Referenced by main(), and kaldi::TestFmpe().

444  {
445  SubMatrix<BaseFloat> proj_deriv_plus = stats.DerivPlus(),
446  proj_deriv_minus = stats.DerivMinus();
447  // tot_linear_objf_impr is the change in the actual
448  // objective function if it were linear, i.e.
449  // objf-gradient . parameter-change
450  // Note: none of this is normalized by the #frames (we don't have
451  // this info here), so that is done at the script level.
452  BaseFloat tot_linear_objf_impr = 0.0;
453  int32 changed = 0; // Keep track of how many elements change sign.
454  KALDI_ASSERT(SameDim(proj_deriv_plus, projT_) && SameDim(proj_deriv_minus, projT_));
455  KALDI_ASSERT(proj_deriv_plus.Min() >= 0);
456  KALDI_ASSERT(proj_deriv_minus.Min() >= 0);
457  BaseFloat learning_rate = config.learning_rate,
458  l2_weight = config.l2_weight;
459 
460  for (int32 i = 0; i < projT_.NumRows(); i++) {
461  for (int32 j = 0; j < projT_.NumCols(); j++) {
462  BaseFloat p = proj_deriv_plus(i, j), n = proj_deriv_minus(i, j),
463  x = projT_(i, j);
464  // Suppose the basic update (before regularization) is:
465  // z <-- x + learning_rate * (p - n) / (p + n),
466  // where z is the new parameter and x is the old one.
467  // Here, we view (learning_rate / (p + n)) as a parameter-specific
468  // learning rate. In fact we view this update as the maximization
469  // of an auxiliary function of the form:
470  // (z-x).(p-n) - 0.5 (z - x)^2 (p+n)/learning_rate
471  // and taking the derivative w.r.t z, we get:
472  // Q'(z) = (p-n) - (z - x) (p+n) / learning_rate
473  // which we set to zero and solve for z, to get z = x + learning_rate.(p-n)/(p+n)
474  // At this point we add regularization, a term of the form -l2_weight * z^2.
475  // Our new auxiliary function derivative is:
476  // Q(z) = -2.l2_weight.z + (p-n) - (z - x) (p+n) / learning_rate
477  // We can write this as:
478  // Q(z) = z . (-2.l2_weight - (p+n)/learning_rate)
479  // + (p-n) + x(p+n)/learning_rate
480  // solving for z, we get:
481  // z = ((p-n) + x (p+n)/learning_rate) / (2.l2_weight + (p+n)/learning_rate)
482 
483  BaseFloat z = ((p-n) + x*(p+n)/learning_rate) / (2*l2_weight + (p+n)/learning_rate);
484  // z is the new parameter value.
485 
486  tot_linear_objf_impr += (z-x) * (p-n); // objf impr based on linear assumption.
487  projT_(i, j) = z;
488  if (z*x < 0) changed++;
489  }
490  }
491  KALDI_LOG << "Objf impr (assuming linear) is " << tot_linear_objf_impr;
492  KALDI_LOG << ((100.0*changed)/(projT_.NumRows()*projT_.NumCols()))
493  << "% of matrix elements changed sign.";
494  return tot_linear_objf_impr;
495 }
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
kaldi::int32 int32
Matrix< BaseFloat > projT_
Definition: fmpe.h:228
bool SameDim(const MatrixBase< Real > &M, const MatrixBase< Real > &N)
float BaseFloat
Definition: kaldi-types.h:29
struct rnnlm::@11::@12 n
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
#define KALDI_LOG
Definition: kaldi-error.h:153

◆ Write()

void Write ( std::ostream &  os,
bool  binary 
) const

Definition at line 500 of file fmpe.cc.

References Fmpe::C_, Fmpe::config_, Fmpe::gmm_, KALDI_ERR, DiagGmm::NumGauss(), Fmpe::projT_, FmpeOptions::Write(), DiagGmm::Write(), and MatrixBase< Real >::Write().

Referenced by main(), and kaldi::TestFmpe().

500  {
501  if (gmm_.NumGauss() == 0)
502  KALDI_ERR << "Fmpe::Write, object not initialized.";
503  gmm_.Write(os, binary);
504  config_.Write(os, binary);
505  // stddevs_ are derived, don't write them.
506  projT_.Write(os, binary);
507  C_.Write(os, binary);
508  // contexts_ are derived from config, don't write them.
509 }
void Write(std::ostream &out, bool binary) const
write to stream.
void Write(std::ostream &os, bool binary) const
Definition: diag-gmm.cc:705
Matrix< BaseFloat > projT_
Definition: fmpe.h:228
#define KALDI_ERR
Definition: kaldi-error.h:147
int32 NumGauss() const
Returns the number of mixture components in the GMM.
Definition: diag-gmm.h:72
TpMatrix< BaseFloat > C_
Definition: fmpe.h:232
FmpeOptions config_
Definition: fmpe.h:223
DiagGmm gmm_
Definition: fmpe.h:222
void Write(std::ostream &os, bool binary) const
Definition: fmpe.cc:426

Member Data Documentation

◆ C_

TpMatrix<BaseFloat> C_
private

Definition at line 232 of file fmpe.h.

Referenced by Fmpe::ApplyC(), Fmpe::ComputeC(), Fmpe::Read(), and Fmpe::Write().

◆ config_

FmpeOptions config_
private

◆ contexts_

std::vector<std::vector<std::pair<int32, BaseFloat> > > contexts_
private

Definition at line 242 of file fmpe.h.

Referenced by Fmpe::ApplyContext(), Fmpe::ApplyContextReverse(), and Fmpe::SetContexts().

◆ gmm_

◆ projT_

Matrix<BaseFloat> projT_
private

◆ stddevs_

Matrix<BaseFloat> stddevs_
private

The documentation for this class was generated from the following files: