doc/logistic-regression_8cc_source.html

 // ivector/logistic-regression.cc

 // Copyright 2014  David Snyder

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.


 #include "ivector/logistic-regression.h"
 #include "gmm/model-common.h" // For GetSplitTargets()
 #include <numeric> // For std::accumulate

 namespace kaldi {

 void LogisticRegression::Train(const Matrix<BaseFloat> &xs,
                                const std::vector<int32> &ys,
                                const LogisticRegressionConfig &conf) {

   int32 xs_num_rows = xs.NumRows(), xs_num_cols = xs.NumCols(),
                      num_ys = ys.size();
   KALDI_ASSERT(xs_num_rows == num_ys);

   // Adding on extra column for each x to handle the prior.
   Matrix<BaseFloat> xs_with_prior(xs_num_rows, xs_num_cols + 1);
   SubMatrix<BaseFloat> sub_xs(xs_with_prior, 0, xs_num_rows, 0, xs_num_cols);
   sub_xs.CopyFromMat(xs);

   int32 num_classes = *std::max_element(ys.begin(), ys.end()) + 1;
   weights_.Resize(num_classes, xs_num_cols + 1);
   Matrix<BaseFloat> xw(xs_num_rows, num_classes);

   // Adding on extra column for each x to handle the prior.
   for (int32 i = 0; i < xs_num_rows; i++) {
     xs_with_prior(i, xs_num_cols) = 1.0;
   }

   // At the beginning of training we have no mixture components,
   // therefore class_ is the "identity" mapping, that is
   // class_[i] = i.
   for (int32 i = 0; i < num_classes; i++) {
     class_.push_back(i);
   }

   weights_.SetZero();
   TrainParameters(xs_with_prior, ys, conf, &xw);
   KALDI_LOG << "Finished training parameters without mixture components.";

   // If we are using mixture components, we add those components
   // in MixUp and retrain with the extra weights.
   if (conf.mix_up > num_classes) {
     MixUp(ys, num_classes, conf);
     Matrix<BaseFloat> xw(xs_num_rows, weights_.NumRows());
     TrainParameters(xs_with_prior, ys, conf, &xw);
     KALDI_LOG << "Finished training mixture components.";
   }
 }


 void LogisticRegression::MixUp(const std::vector<int32> &ys,
                                const int32 &num_classes,
                                const LogisticRegressionConfig &conf) {

   Vector<BaseFloat> counts(num_classes);
   for (int32 i = 0; i < ys.size(); i++) {
     counts(ys[i]) += 1.0;
   }

   // TODO: Figure out what min_count should be
   int32 min_count = 1;
   std::vector<int32> targets;
   GetSplitTargets(counts, conf.mix_up, conf.power, min_count, &targets);
   int32 new_dim = std::accumulate(targets.begin(), targets.end(),
                                   static_cast<int32>(0));

   KALDI_LOG << "Target number mixture components was " << conf.mix_up
             << ". Training " << new_dim << " mixture components.";

   int32 old_dim = weights_.NumRows(),
         num_components = old_dim,
         num_feats = weights_.NumCols();

   Matrix<BaseFloat> old_weights(weights_);
   weights_.Resize(new_dim, num_feats);
   SubMatrix<BaseFloat> sub_weights(weights_, 0, num_classes, 0, num_feats);
   // We need to retain the original weights
   sub_weights.CopyFromMat(old_weights);
   class_.resize(new_dim);
   // For each class i
   for (int32 i = 0; i < targets.size(); i++) {
     int32 mixes = targets[i];
     // We start at j = 1 since one copy of the components already
     // exists in weights_.
     for (int32 j = 1; j < mixes; j++) {
       int32 offset = num_components;
       weights_.Row(offset).CopyRowFromMat(weights_, i);
       Vector<BaseFloat> noise(num_feats);
       noise.SetRandn();
       weights_.Row(offset).AddVec(1.0e-05, noise);
       class_[offset] = i; // The class i maps to the row at offset
       num_components += 1;
     }
   }
 }

 void LogisticRegression::TrainParameters(const Matrix<BaseFloat> &xs,
     const std::vector<int32> &ys, const LogisticRegressionConfig &conf,
     Matrix<BaseFloat> *xw) {
   int32 max_steps = conf.max_steps;
   BaseFloat normalizer = conf.normalizer;
   LbfgsOptions lbfgs_opts;
   lbfgs_opts.minimize = false;
   // Get initial w vector
   Vector<BaseFloat> init_w(weights_.NumRows() * weights_.NumCols());
   init_w.CopyRowsFromMat(weights_);
   OptimizeLbfgs<BaseFloat> lbfgs(init_w, lbfgs_opts);

   for (int32 step = 0; step < max_steps; step++) {
     DoStep(xs, xw, ys, &lbfgs, normalizer);
   }

   Vector<BaseFloat> best_w(lbfgs.GetValue());
   weights_.CopyRowsFromVec(best_w);
 }

 void LogisticRegression::GetLogPosteriors(const Matrix<BaseFloat> &xs,
                                           Matrix<BaseFloat> *log_posteriors) {
   int32 xs_num_rows = xs.NumRows(),
       xs_num_cols = xs.NumCols(),
       num_mixes = weights_.NumRows();

   int32 num_classes = *std::max_element(class_.begin(), class_.end()) + 1;

   log_posteriors->Resize(xs_num_rows, num_classes);
   Matrix<BaseFloat> xw(xs_num_rows, num_mixes);

   Matrix<BaseFloat> xs_with_prior(xs_num_rows, xs_num_cols + 1);
   SubMatrix<BaseFloat> sub_xs(xs_with_prior, 0, xs_num_rows, 0, xs_num_cols);
   sub_xs.CopyFromMat(xs);
   // Adding on extra column for each x to handle the prior.
   for (int32 i = 0; i < xs_num_rows; i++) {
     xs_with_prior(i, xs_num_cols) = 1.0;
   }
   xw.AddMatMat(1.0, xs_with_prior, kNoTrans, weights_,
                kTrans, 0.0);

   log_posteriors->Set(-std::numeric_limits<BaseFloat>::infinity());

   // i is the training example
   for (int32 i = 0; i < xs_num_rows; i++) {
     for (int32 j = 0; j < num_mixes; j++) {
       int32 k = class_[j];
       (*log_posteriors)(i,k) = LogAdd((*log_posteriors)(i,k), xw(i, j));
     }
     // Normalize the row.
     log_posteriors->Row(i).Add(-xw.Row(i).LogSumExp());
   }
 }

 void LogisticRegression::GetLogPosteriors(const Vector<BaseFloat> &x,
                                           Vector<BaseFloat> *log_posteriors) {
   int32 x_dim = x.Dim();
   int32 num_classes = *std::max_element(class_.begin(), class_.end()) + 1,
       num_mixes = weights_.NumRows();
   log_posteriors->Resize(num_classes);
   Vector<BaseFloat> xw(weights_.NumRows());

   Vector<BaseFloat> x_with_prior(x_dim + 1);
   SubVector<BaseFloat> sub_x(x_with_prior, 0, x_dim);
   sub_x.CopyFromVec(x);
   // Adding on extra element to handle the prior
   x_with_prior(x_dim) = 1.0;

   xw.AddMatVec(1.0, weights_, kNoTrans, x_with_prior, kNoTrans);

   log_posteriors->Set(-std::numeric_limits<BaseFloat>::infinity());

   for (int32 i = 0; i < num_mixes; i++) {
     int32 j = class_[i];
     (*log_posteriors)(j) = LogAdd((*log_posteriors)(j), xw(i));
   }
   log_posteriors->Add(-log_posteriors->LogSumExp());
 }

 BaseFloat LogisticRegression::DoStep(const Matrix<BaseFloat> &xs,
     Matrix<BaseFloat> *xw,
     const std::vector<int32> &ys, OptimizeLbfgs<BaseFloat> *lbfgs,
     BaseFloat normalizer) {
   Matrix<BaseFloat> gradient(weights_.NumRows(), weights_.NumCols());
   // Vector form of the above matrix
   Vector<BaseFloat> grad_vec(weights_.NumRows() * weights_.NumCols());

   // Calculate XW.T. The rows correspond to the x
   // training examples and the columns to the class labels.
   xw->AddMatMat(1.0, xs, kNoTrans, weights_, kTrans, 0.0);

   // Calculate both the gradient and the objective function.
   BaseFloat objf = GetObjfAndGrad(xs, ys, *xw, &gradient, normalizer);

   // Convert gradient (a matrix) into a vector of size
   // gradient.NumCols * gradient.NumRows.
   grad_vec.CopyRowsFromMat(gradient);

   // Compute next step in L-BFGS.
   lbfgs->DoStep(objf, grad_vec);

   // Update weights
   Vector<BaseFloat> new_w(lbfgs->GetProposedValue());
   weights_.CopyRowsFromVec(new_w);
   KALDI_LOG << "Objective function is " << objf;
   return objf;
 }

 BaseFloat LogisticRegression::GetObjfAndGrad(
     const Matrix<BaseFloat> &xs,
     const std::vector<int32> &ys, const Matrix<BaseFloat> &xw,
     Matrix<BaseFloat> *grad, BaseFloat normalizer) {
   BaseFloat raw_objf = 0.0;
   int32 num_classes = *std::max_element(ys.begin(), ys.end()) + 1;
   std::vector< std::vector<int32> > class_to_cols(num_classes, std::vector<int32>());
   for (int32 i = 0; i < class_.size(); i++) {
     class_to_cols[class_[i]].push_back(i);
   }
   // For each training example class
   for (int32 i = 0; i < ys.size(); i++) {
     Vector<BaseFloat> row(xw.NumCols());
     row.CopyFromVec(xw.Row(i));
     row.ApplySoftMax();
     // Identify the rows of weights_ (which are a set of columns in wx)
     // which correspond to class ys[i]
     const std::vector<int32> &cols = class_to_cols[ys[i]];
     SubVector<BaseFloat> x = xs.Row(i);
     BaseFloat class_sum = 0.0;
     for (int32 j = 0; j < cols.size(); j++) {
       class_sum += row(cols[j]);
     }
     if (class_sum < 1.0e-20) class_sum = 1.0e-20;
     raw_objf += Log(class_sum);
     // Iterate over weights for each component. If there are no
     // mixtures each row corresponds to a class.
     for (int32 k = 0; k < weights_.NumRows(); k++) {
       // p(y = k | x_i) where k is a component.
       BaseFloat p = row(k);
       if (class_[k] == ys[i]) {
         // If the classes aren't split into mixture components
         // then p/class_sum = 1.0.
         grad->Row(k).AddVec(p/class_sum - p, x);
       } else {
         grad->Row(k).AddVec(-1.0 * p, x);
       }
     }
   }
   // Scale and add regularization term.
   grad->Scale(1.0/ys.size());
   grad->AddMat(-1.0 * normalizer, weights_);
   raw_objf /= ys.size();
   BaseFloat regularizer = - 0.5 * normalizer
                           * TraceMatMat(weights_, weights_, kTrans);
   KALDI_VLOG(2) << "Objf is " << raw_objf << " + " << regularizer
                 << " = " << (raw_objf + regularizer);
   return raw_objf + regularizer;
 }

 void LogisticRegression::SetWeights(const Matrix<BaseFloat> &weights,
                                     const std::vector<int32> classes) {
   weights_.Resize(weights.NumRows(), weights.NumCols());
   weights_.CopyFromMat(weights);
   class_.resize(classes.size());
   for (int32 i = 0; i < class_.size(); i++)
     class_[i] = classes[i];
 }

 void LogisticRegression::ScalePriors(const Vector<BaseFloat> &scales) {
   Vector<BaseFloat> log_scales(scales);
   log_scales.ApplyLog();

   for (int32 i = 0; i < weights_.NumRows(); i++)
     weights_(i, weights_.NumCols() - 1) += log_scales(class_[i]);
 }

 void LogisticRegression::Write(std::ostream &os, bool binary) const {
   WriteToken(os, binary, "<LogisticRegression>");
   WriteToken(os, binary, "<weights>");
   weights_.Write(os, binary);
   WriteToken(os, binary, "<class>");
   WriteIntegerVector(os, binary, class_);
   WriteToken(os, binary, "</LogisticRegression>");
 }

 void LogisticRegression::Read(std::istream &is, bool binary) {
   ExpectToken(is, binary, "<LogisticRegression>");
   ExpectToken(is, binary, "<weights>");
   weights_.Read(is, binary);
   std::string token;
   ReadToken(is, binary, &token);
   if (token == "<class>") {
     ReadIntegerVector(is, binary, &class_);
   } else {
     int32 num_classes = weights_.NumRows();
     for (int32 i = 0; i < num_classes; i++) {
       class_.push_back(i);
     }
   }
   ExpectToken(is, binary, "</LogisticRegression>");
 }

 }
kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::OptimizeLbfgs::DoStep
void DoStep(Real function_value, const VectorBase< Real > &gradient)
The user calls this function to provide the class with the function and gradient info at the point Ge...
Definition: optimization.cc:383

kaldi::MatrixBase::Write
void Write(std::ostream &out, bool binary) const
write to stream.
Definition: kaldi-matrix.cc:1379

rnnlm::j
int j
Definition: mikolov-rnnlm-lib.cc:66

kaldi::LogisticRegression::GetObjfAndGrad
BaseFloat GetObjfAndGrad(const Matrix< BaseFloat > &xs, const std::vector< int32 > &ys, const Matrix< BaseFloat > &xw, Matrix< BaseFloat > *grad, BaseFloat normalizer)
Definition: logistic-regression.cc:225

kaldi::MatrixBase::NumCols
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67

kaldi::LogisticRegression::TrainParameters
void TrainParameters(const Matrix< BaseFloat > &xs, const std::vector< int32 > &ys, const LogisticRegressionConfig &conf, Matrix< BaseFloat > *xw)
Definition: logistic-regression.cc:117

kaldi::LogisticRegression::Write
void Write(std::ostream &os, bool binary) const
Definition: logistic-regression.cc:292

kaldi::MatrixBase::AddMat
void AddMat(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transA=kNoTrans)
*this += alpha * M [or M^T]
Definition: kaldi-matrix.cc:356

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

kaldi::ReadToken
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
Definition: io-funcs.cc:154

kaldi::OptimizeLbfgs::GetValue
const VectorBase< Real > & GetValue(Real *objf_value=NULL) const
This returns the value of the variable x that has the best objective function so far, and the corresponding objective function value if requested.
Definition: optimization.cc:416

kaldi::Matrix< BaseFloat >

kaldi::LogisticRegressionConfig::normalizer
double normalizer
Definition: logistic-regression.h:34

kaldi::LogisticRegression::Train
void Train(const Matrix< BaseFloat > &xs, const std::vector< int32 > &ys, const LogisticRegressionConfig &conf)
Definition: logistic-regression.cc:27

kaldi::VectorBase::ApplyLog
void ApplyLog()
Apply natural log to all elements.
Definition: kaldi-vector.cc:783

kaldi::Vector::Resize
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
Definition: kaldi-vector.cc:190

kaldi::MatrixBase::CopyFromMat
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
Definition: kaldi-matrix.cc:862

kaldi::VectorBase::LogSumExp
Real LogSumExp(Real prune=-1.0) const
Returns log(sum(exp())) without exp overflow If prune > 0.0, ignores terms less than the max - prune...
Definition: kaldi-vector.cc:755

kaldi::kTrans
Definition: matrix-common.h:33

kaldi::GetSplitTargets
void GetSplitTargets(const Vector< BaseFloat > &state_occs, int32 target_components, BaseFloat power, BaseFloat min_count, std::vector< int32 > *targets)
Get Gaussian-mixture or substate-mixture splitting targets, according to a power rule (e...
Definition: model-common.cc:116

kaldi::LogisticRegression::GetLogPosteriors
void GetLogPosteriors(const Matrix< BaseFloat > &xs, Matrix< BaseFloat > *log_posteriors)
Definition: logistic-regression.cc:137

kaldi::VectorBase::CopyFromVec
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
Definition: kaldi-vector.cc:228

kaldi::Matrix::Read
void Read(std::istream &in, bool binary, bool add=false)
read from stream.
Definition: kaldi-matrix.cc:1450

kaldi::MatrixBase::Row
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188

kaldi::ReadIntegerVector
void ReadIntegerVector(std::istream &is, bool binary, std::vector< T > *v)
Function for reading STL vector of integer types.
Definition: io-funcs-inl.h:232

kaldi::Log
double Log(double x)
Definition: kaldi-math.h:100

float

kaldi::LogisticRegressionConfig::max_steps
int32 max_steps
Definition: logistic-regression.h:32

kaldi::MatrixBase::Scale
void Scale(Real alpha)
Multiply each element with a scalar value.
Definition: kaldi-matrix.cc:1209

kaldi::ExpectToken
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
Definition: io-funcs.cc:191

kaldi::LogisticRegressionConfig::power
double power
Definition: logistic-regression.h:34

kaldi::MatrixBase::AddMatMat
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
Definition: kaldi-matrix.cc:171

kaldi::kNoTrans
Definition: matrix-common.h:34

kaldi::OptimizeLbfgs
Definition: optimization.h:121

kaldi::TraceMatMat
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
Definition: kaldi-matrix.cc:2692

kaldi::WriteToken
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134

kaldi::VectorBase::Dim
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64

kaldi::MatrixBase::SetZero
void SetZero()
Sets matrix to zero.
Definition: kaldi-matrix.cc:1330

kaldi::VectorBase::AddMatVec
void AddMatVec(const Real alpha, const MatrixBase< Real > &M, const MatrixTransposeType trans, const VectorBase< Real > &v, const Real beta)
Add matrix times vector : this <– beta*this + alpha*M*v.
Definition: kaldi-vector.cc:92

kaldi::LbfgsOptions::minimize
bool minimize
Definition: optimization.h:85

kaldi::VectorBase::SetRandn
void SetRandn()
Set vector to random normally-distributed noise.
Definition: kaldi-vector.cc:301

kaldi::LogAdd
double LogAdd(double x, double y)
Definition: kaldi-math.h:184

kaldi::LogisticRegression::ScalePriors
void ScalePriors(const Vector< BaseFloat > &prior_scales)
Definition: logistic-regression.cc:284

rnnlm::i
int i
Definition: mikolov-rnnlm-lib.cc:66

logistic-regression.h

kaldi::Vector
A class representing a vector.
Definition: kaldi-vector.h:406

kaldi::LogisticRegression::SetWeights
void SetWeights(const Matrix< BaseFloat > &weights, const std::vector< int32 > classes)
Definition: logistic-regression.cc:275

KALDI_ASSERT
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

kaldi::MatrixBase::NumRows
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64

kaldi::VectorBase::Set
void Set(Real f)
Set all members of a vector to a specified value.
Definition: kaldi-vector.cc:336

kaldi::LogisticRegression::Read
void Read(std::istream &is, bool binary)
Definition: logistic-regression.cc:301

KALDI_VLOG
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156

kaldi::WriteIntegerVector
void WriteIntegerVector(std::ostream &os, bool binary, const std::vector< T > &v)
Function for writing STL vectors of integer types.
Definition: io-funcs-inl.h:198

kaldi::VectorBase::CopyRowsFromMat
void CopyRowsFromMat(const MatrixBase< Real > &M)
Performs a row stack of the matrix M.
Definition: kaldi-vector.cc:348

model-common.h

kaldi::Matrix::Resize
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
Definition: kaldi-matrix.cc:819

kaldi::LogisticRegression::class_
std::vector< int32 > class_
Definition: logistic-regression.h:121

kaldi::LbfgsOptions
This is an implementation of L-BFGS.
Definition: optimization.h:84

kaldi::LogisticRegressionConfig
Definition: logistic-regression.h:31

kaldi::LogisticRegression::weights_
Matrix< BaseFloat > weights_
Definition: logistic-regression.h:117

kaldi::VectorBase::Add
void Add(Real c)
Add a constant to each element of a vector.
Definition: kaldi-vector.cc:956

kaldi::MatrixBase::CopyRowsFromVec
void CopyRowsFromVec(const VectorBase< Real > &v)
This function has two modes of operation.
Definition: kaldi-matrix.cc:997

KALDI_LOG
#define KALDI_LOG
Definition: kaldi-error.h:153

kaldi::SubMatrix
Sub-matrix representation.
Definition: kaldi-matrix.h:988

kaldi::SubVector
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501

kaldi::OptimizeLbfgs::GetProposedValue
const VectorBase< Real > & GetProposedValue() const
This returns the value at which the function wants us to compute the objective function and gradient...
Definition: optimization.h:134

kaldi::LogisticRegression::MixUp
void MixUp(const std::vector< int32 > &ys, const int32 &num_classes, const LogisticRegressionConfig &conf)
Definition: logistic-regression.cc:71

kaldi::LogisticRegressionConfig::mix_up
int32 mix_up
Definition: logistic-regression.h:32

kaldi::MatrixBase::Set
void Set(Real)
Sets all elements to a specific value.
Definition: kaldi-matrix.cc:1339

kaldi::LogisticRegression::DoStep
BaseFloat DoStep(const Matrix< BaseFloat > &xs, Matrix< BaseFloat > *xw, const std::vector< int32 > &ys, OptimizeLbfgs< BaseFloat > *lbfgs, BaseFloat normalizer)
Definition: logistic-regression.cc:196