logistic-regression.cc
Go to the documentation of this file.
1 // ivector/logistic-regression.cc
2 
3 // Copyright 2014 David Snyder
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
22 #include "gmm/model-common.h" // For GetSplitTargets()
23 #include <numeric> // For std::accumulate
24 
25 namespace kaldi {
26 
28  const std::vector<int32> &ys,
29  const LogisticRegressionConfig &conf) {
30 
31  int32 xs_num_rows = xs.NumRows(), xs_num_cols = xs.NumCols(),
32  num_ys = ys.size();
33  KALDI_ASSERT(xs_num_rows == num_ys);
34 
35  // Adding on extra column for each x to handle the prior.
36  Matrix<BaseFloat> xs_with_prior(xs_num_rows, xs_num_cols + 1);
37  SubMatrix<BaseFloat> sub_xs(xs_with_prior, 0, xs_num_rows, 0, xs_num_cols);
38  sub_xs.CopyFromMat(xs);
39 
40  int32 num_classes = *std::max_element(ys.begin(), ys.end()) + 1;
41  weights_.Resize(num_classes, xs_num_cols + 1);
42  Matrix<BaseFloat> xw(xs_num_rows, num_classes);
43 
44  // Adding on extra column for each x to handle the prior.
45  for (int32 i = 0; i < xs_num_rows; i++) {
46  xs_with_prior(i, xs_num_cols) = 1.0;
47  }
48 
49  // At the beginning of training we have no mixture components,
50  // therefore class_ is the "identity" mapping, that is
51  // class_[i] = i.
52  for (int32 i = 0; i < num_classes; i++) {
53  class_.push_back(i);
54  }
55 
56  weights_.SetZero();
57  TrainParameters(xs_with_prior, ys, conf, &xw);
58  KALDI_LOG << "Finished training parameters without mixture components.";
59 
60  // If we are using mixture components, we add those components
61  // in MixUp and retrain with the extra weights.
62  if (conf.mix_up > num_classes) {
63  MixUp(ys, num_classes, conf);
64  Matrix<BaseFloat> xw(xs_num_rows, weights_.NumRows());
65  TrainParameters(xs_with_prior, ys, conf, &xw);
66  KALDI_LOG << "Finished training mixture components.";
67  }
68 }
69 
70 
71 void LogisticRegression::MixUp(const std::vector<int32> &ys,
72  const int32 &num_classes,
73  const LogisticRegressionConfig &conf) {
74 
75  Vector<BaseFloat> counts(num_classes);
76  for (int32 i = 0; i < ys.size(); i++) {
77  counts(ys[i]) += 1.0;
78  }
79 
80  // TODO: Figure out what min_count should be
81  int32 min_count = 1;
82  std::vector<int32> targets;
83  GetSplitTargets(counts, conf.mix_up, conf.power, min_count, &targets);
84  int32 new_dim = std::accumulate(targets.begin(), targets.end(),
85  static_cast<int32>(0));
86 
87  KALDI_LOG << "Target number mixture components was " << conf.mix_up
88  << ". Training " << new_dim << " mixture components.";
89 
90  int32 old_dim = weights_.NumRows(),
91  num_components = old_dim,
92  num_feats = weights_.NumCols();
93 
94  Matrix<BaseFloat> old_weights(weights_);
95  weights_.Resize(new_dim, num_feats);
96  SubMatrix<BaseFloat> sub_weights(weights_, 0, num_classes, 0, num_feats);
97  // We need to retain the original weights
98  sub_weights.CopyFromMat(old_weights);
99  class_.resize(new_dim);
100  // For each class i
101  for (int32 i = 0; i < targets.size(); i++) {
102  int32 mixes = targets[i];
103  // We start at j = 1 since one copy of the components already
104  // exists in weights_.
105  for (int32 j = 1; j < mixes; j++) {
106  int32 offset = num_components;
107  weights_.Row(offset).CopyRowFromMat(weights_, i);
108  Vector<BaseFloat> noise(num_feats);
109  noise.SetRandn();
110  weights_.Row(offset).AddVec(1.0e-05, noise);
111  class_[offset] = i; // The class i maps to the row at offset
112  num_components += 1;
113  }
114  }
115 }
116 
118  const std::vector<int32> &ys, const LogisticRegressionConfig &conf,
119  Matrix<BaseFloat> *xw) {
120  int32 max_steps = conf.max_steps;
121  BaseFloat normalizer = conf.normalizer;
122  LbfgsOptions lbfgs_opts;
123  lbfgs_opts.minimize = false;
124  // Get initial w vector
126  init_w.CopyRowsFromMat(weights_);
127  OptimizeLbfgs<BaseFloat> lbfgs(init_w, lbfgs_opts);
128 
129  for (int32 step = 0; step < max_steps; step++) {
130  DoStep(xs, xw, ys, &lbfgs, normalizer);
131  }
132 
133  Vector<BaseFloat> best_w(lbfgs.GetValue());
134  weights_.CopyRowsFromVec(best_w);
135 }
136 
138  Matrix<BaseFloat> *log_posteriors) {
139  int32 xs_num_rows = xs.NumRows(),
140  xs_num_cols = xs.NumCols(),
141  num_mixes = weights_.NumRows();
142 
143  int32 num_classes = *std::max_element(class_.begin(), class_.end()) + 1;
144 
145  log_posteriors->Resize(xs_num_rows, num_classes);
146  Matrix<BaseFloat> xw(xs_num_rows, num_mixes);
147 
148  Matrix<BaseFloat> xs_with_prior(xs_num_rows, xs_num_cols + 1);
149  SubMatrix<BaseFloat> sub_xs(xs_with_prior, 0, xs_num_rows, 0, xs_num_cols);
150  sub_xs.CopyFromMat(xs);
151  // Adding on extra column for each x to handle the prior.
152  for (int32 i = 0; i < xs_num_rows; i++) {
153  xs_with_prior(i, xs_num_cols) = 1.0;
154  }
155  xw.AddMatMat(1.0, xs_with_prior, kNoTrans, weights_,
156  kTrans, 0.0);
157 
158  log_posteriors->Set(-std::numeric_limits<BaseFloat>::infinity());
159 
160  // i is the training example
161  for (int32 i = 0; i < xs_num_rows; i++) {
162  for (int32 j = 0; j < num_mixes; j++) {
163  int32 k = class_[j];
164  (*log_posteriors)(i,k) = LogAdd((*log_posteriors)(i,k), xw(i, j));
165  }
166  // Normalize the row.
167  log_posteriors->Row(i).Add(-xw.Row(i).LogSumExp());
168  }
169 }
170 
172  Vector<BaseFloat> *log_posteriors) {
173  int32 x_dim = x.Dim();
174  int32 num_classes = *std::max_element(class_.begin(), class_.end()) + 1,
175  num_mixes = weights_.NumRows();
176  log_posteriors->Resize(num_classes);
178 
179  Vector<BaseFloat> x_with_prior(x_dim + 1);
180  SubVector<BaseFloat> sub_x(x_with_prior, 0, x_dim);
181  sub_x.CopyFromVec(x);
182  // Adding on extra element to handle the prior
183  x_with_prior(x_dim) = 1.0;
184 
185  xw.AddMatVec(1.0, weights_, kNoTrans, x_with_prior, kNoTrans);
186 
187  log_posteriors->Set(-std::numeric_limits<BaseFloat>::infinity());
188 
189  for (int32 i = 0; i < num_mixes; i++) {
190  int32 j = class_[i];
191  (*log_posteriors)(j) = LogAdd((*log_posteriors)(j), xw(i));
192  }
193  log_posteriors->Add(-log_posteriors->LogSumExp());
194 }
195 
197  Matrix<BaseFloat> *xw,
198  const std::vector<int32> &ys, OptimizeLbfgs<BaseFloat> *lbfgs,
199  BaseFloat normalizer) {
201  // Vector form of the above matrix
203 
204  // Calculate XW.T. The rows correspond to the x
205  // training examples and the columns to the class labels.
206  xw->AddMatMat(1.0, xs, kNoTrans, weights_, kTrans, 0.0);
207 
208  // Calculate both the gradient and the objective function.
209  BaseFloat objf = GetObjfAndGrad(xs, ys, *xw, &gradient, normalizer);
210 
211  // Convert gradient (a matrix) into a vector of size
212  // gradient.NumCols * gradient.NumRows.
213  grad_vec.CopyRowsFromMat(gradient);
214 
215  // Compute next step in L-BFGS.
216  lbfgs->DoStep(objf, grad_vec);
217 
218  // Update weights
219  Vector<BaseFloat> new_w(lbfgs->GetProposedValue());
220  weights_.CopyRowsFromVec(new_w);
221  KALDI_LOG << "Objective function is " << objf;
222  return objf;
223 }
224 
226  const Matrix<BaseFloat> &xs,
227  const std::vector<int32> &ys, const Matrix<BaseFloat> &xw,
228  Matrix<BaseFloat> *grad, BaseFloat normalizer) {
229  BaseFloat raw_objf = 0.0;
230  int32 num_classes = *std::max_element(ys.begin(), ys.end()) + 1;
231  std::vector< std::vector<int32> > class_to_cols(num_classes, std::vector<int32>());
232  for (int32 i = 0; i < class_.size(); i++) {
233  class_to_cols[class_[i]].push_back(i);
234  }
235  // For each training example class
236  for (int32 i = 0; i < ys.size(); i++) {
237  Vector<BaseFloat> row(xw.NumCols());
238  row.CopyFromVec(xw.Row(i));
239  row.ApplySoftMax();
240  // Identify the rows of weights_ (which are a set of columns in wx)
241  // which correspond to class ys[i]
242  const std::vector<int32> &cols = class_to_cols[ys[i]];
243  SubVector<BaseFloat> x = xs.Row(i);
244  BaseFloat class_sum = 0.0;
245  for (int32 j = 0; j < cols.size(); j++) {
246  class_sum += row(cols[j]);
247  }
248  if (class_sum < 1.0e-20) class_sum = 1.0e-20;
249  raw_objf += Log(class_sum);
250  // Iterate over weights for each component. If there are no
251  // mixtures each row corresponds to a class.
252  for (int32 k = 0; k < weights_.NumRows(); k++) {
253  // p(y = k | x_i) where k is a component.
254  BaseFloat p = row(k);
255  if (class_[k] == ys[i]) {
256  // If the classes aren't split into mixture components
257  // then p/class_sum = 1.0.
258  grad->Row(k).AddVec(p/class_sum - p, x);
259  } else {
260  grad->Row(k).AddVec(-1.0 * p, x);
261  }
262  }
263  }
264  // Scale and add regularization term.
265  grad->Scale(1.0/ys.size());
266  grad->AddMat(-1.0 * normalizer, weights_);
267  raw_objf /= ys.size();
268  BaseFloat regularizer = - 0.5 * normalizer
270  KALDI_VLOG(2) << "Objf is " << raw_objf << " + " << regularizer
271  << " = " << (raw_objf + regularizer);
272  return raw_objf + regularizer;
273 }
274 
276  const std::vector<int32> classes) {
277  weights_.Resize(weights.NumRows(), weights.NumCols());
278  weights_.CopyFromMat(weights);
279  class_.resize(classes.size());
280  for (int32 i = 0; i < class_.size(); i++)
281  class_[i] = classes[i];
282 }
283 
285  Vector<BaseFloat> log_scales(scales);
286  log_scales.ApplyLog();
287 
288  for (int32 i = 0; i < weights_.NumRows(); i++)
289  weights_(i, weights_.NumCols() - 1) += log_scales(class_[i]);
290 }
291 
292 void LogisticRegression::Write(std::ostream &os, bool binary) const {
293  WriteToken(os, binary, "<LogisticRegression>");
294  WriteToken(os, binary, "<weights>");
295  weights_.Write(os, binary);
296  WriteToken(os, binary, "<class>");
297  WriteIntegerVector(os, binary, class_);
298  WriteToken(os, binary, "</LogisticRegression>");
299 }
300 
301 void LogisticRegression::Read(std::istream &is, bool binary) {
302  ExpectToken(is, binary, "<LogisticRegression>");
303  ExpectToken(is, binary, "<weights>");
304  weights_.Read(is, binary);
305  std::string token;
306  ReadToken(is, binary, &token);
307  if (token == "<class>") {
308  ReadIntegerVector(is, binary, &class_);
309  } else {
310  int32 num_classes = weights_.NumRows();
311  for (int32 i = 0; i < num_classes; i++) {
312  class_.push_back(i);
313  }
314  }
315  ExpectToken(is, binary, "</LogisticRegression>");
316 }
317 
318 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void DoStep(Real function_value, const VectorBase< Real > &gradient)
The user calls this function to provide the class with the function and gradient info at the point Ge...
void Write(std::ostream &out, bool binary) const
write to stream.
BaseFloat GetObjfAndGrad(const Matrix< BaseFloat > &xs, const std::vector< int32 > &ys, const Matrix< BaseFloat > &xw, Matrix< BaseFloat > *grad, BaseFloat normalizer)
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
void TrainParameters(const Matrix< BaseFloat > &xs, const std::vector< int32 > &ys, const LogisticRegressionConfig &conf, Matrix< BaseFloat > *xw)
void Write(std::ostream &os, bool binary) const
void AddMat(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transA=kNoTrans)
*this += alpha * M [or M^T]
kaldi::int32 int32
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
Definition: io-funcs.cc:154
const VectorBase< Real > & GetValue(Real *objf_value=NULL) const
This returns the value of the variable x that has the best objective function so far, and the corresponding objective function value if requested.
void Train(const Matrix< BaseFloat > &xs, const std::vector< int32 > &ys, const LogisticRegressionConfig &conf)
void ApplyLog()
Apply natural log to all elements.
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
Real LogSumExp(Real prune=-1.0) const
Returns log(sum(exp())) without exp overflow If prune > 0.0, ignores terms less than the max - prune...
void GetSplitTargets(const Vector< BaseFloat > &state_occs, int32 target_components, BaseFloat power, BaseFloat min_count, std::vector< int32 > *targets)
Get Gaussian-mixture or substate-mixture splitting targets, according to a power rule (e...
void GetLogPosteriors(const Matrix< BaseFloat > &xs, Matrix< BaseFloat > *log_posteriors)
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
void Read(std::istream &in, bool binary, bool add=false)
read from stream.
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
void ReadIntegerVector(std::istream &is, bool binary, std::vector< T > *v)
Function for reading STL vector of integer types.
Definition: io-funcs-inl.h:232
double Log(double x)
Definition: kaldi-math.h:100
void Scale(Real alpha)
Multiply each element with a scalar value.
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
Definition: io-funcs.cc:191
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
void SetZero()
Sets matrix to zero.
void AddMatVec(const Real alpha, const MatrixBase< Real > &M, const MatrixTransposeType trans, const VectorBase< Real > &v, const Real beta)
Add matrix times vector : this <– beta*this + alpha*M*v.
Definition: kaldi-vector.cc:92
void SetRandn()
Set vector to random normally-distributed noise.
double LogAdd(double x, double y)
Definition: kaldi-math.h:184
void ScalePriors(const Vector< BaseFloat > &prior_scales)
A class representing a vector.
Definition: kaldi-vector.h:406
void SetWeights(const Matrix< BaseFloat > &weights, const std::vector< int32 > classes)
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void Set(Real f)
Set all members of a vector to a specified value.
void Read(std::istream &is, bool binary)
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
void WriteIntegerVector(std::ostream &os, bool binary, const std::vector< T > &v)
Function for writing STL vectors of integer types.
Definition: io-funcs-inl.h:198
void CopyRowsFromMat(const MatrixBase< Real > &M)
Performs a row stack of the matrix M.
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
std::vector< int32 > class_
This is an implementation of L-BFGS.
Definition: optimization.h:84
Matrix< BaseFloat > weights_
void Add(Real c)
Add a constant to each element of a vector.
void CopyRowsFromVec(const VectorBase< Real > &v)
This function has two modes of operation.
#define KALDI_LOG
Definition: kaldi-error.h:153
Sub-matrix representation.
Definition: kaldi-matrix.h:988
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
const VectorBase< Real > & GetProposedValue() const
This returns the value at which the function wants us to compute the objective function and gradient...
Definition: optimization.h:134
void MixUp(const std::vector< int32 > &ys, const int32 &num_classes, const LogisticRegressionConfig &conf)
void Set(Real)
Sets all elements to a specific value.
BaseFloat DoStep(const Matrix< BaseFloat > &xs, Matrix< BaseFloat > *xw, const std::vector< int32 > &ys, OptimizeLbfgs< BaseFloat > *lbfgs, BaseFloat normalizer)