differentiable-transform.h
Go to the documentation of this file.
1 // transform/differentiable-transform.h
2 
3 // Copyright 2018 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
21 #ifndef KALDI_TRANSFORM_DIFFERENTIABLE_TRANSFORM_H_
22 #define KALDI_TRANSFORM_DIFFERENTIABLE_TRANSFORM_H_
23 
24 #include <vector>
25 
26 #include "base/kaldi-common.h"
27 #include "util/kaldi-table.h"
28 #include "util/kaldi-holder.h"
29 
30 namespace kaldi {
31 
32 
33 namespace differentiable_transform {
34 
36  public:
37 
38  virtual ~MinibatchInfoItf() { }
39 };
40 
41 
43 
44  virtual ~SpeakerStatsItf() { }
45 };
46 
47 
48 
86  public:
87 
89  virtual int32 Dim() const = 0;
90 
91 
98  int32 NumClasses() const { return num_classes_; }
99 
100 
107  virtual void SetNumClasses(int32 num_classes) { num_classes_ = num_classes; }
108 
147  virtual MinibatchInfoItf* TrainingForward(
148  const CuMatrixBase<BaseFloat> &input,
149  int32 num_chunks,
150  int32 num_spk,
151  const Posterior &posteriors,
152  CuMatrixBase<BaseFloat> *output) const = 0;
153 
154 
181  virtual void TrainingBackward(
182  const CuMatrixBase<BaseFloat> &input,
183  const CuMatrixBase<BaseFloat> &output_deriv,
184  int32 num_chunks,
185  int32 num_spk,
186  const Posterior &posteriors,
187  const MinibatchInfoItf &minibatch_info,
188  CuMatrixBase<BaseFloat> *input_deriv) const = 0;
189 
190 
195  virtual int32 NumFinalIterations() = 0;
196 
213  virtual void Accumulate(
214  int32 final_iter,
215  const CuMatrixBase<BaseFloat> &input,
216  int32 num_chunks,
217  int32 num_spk,
218  const Posterior &posteriors) = 0;
219 
220  // To be called after repeated alls to Accumulate(), does any estimation that
221  // is required in training time (normally per-speaker means and possibly
222  // variances.
223  virtual void Estimate(int32 final_iter) = 0;
224 
225  // Returns an object representing sufficient statistics for estimating a
226  // speaker-dependent transform. This object will initially have zero
227  // counts in its statistics. It will represent the stats for a single
228  // speaker.
229  virtual SpeakerStatsItf *GetEmptySpeakerStats() = 0;
230 
231 
232  // Accumulate statistics for a segment of test data, storing them in the
233  // object 'speaker_stats'. There is no assumption that the soft-counts in
234  // 'posteriors' are positive; this allows you to change your mind about the
235  // traceback, in test-time, by subtracting the stats that you no longer want
236  // to use.
237  virtual void TestingAccumulate(
238  const MatrixBase<BaseFloat> &input,
239  const Posterior &posteriors,
240  SpeakerStatsItf *speaker_stats) const = 0;
241 
242  // Applies the transformation implied by the statistics in 'speaker_stats' to
243  // 'input', storing in the result in 'output'. It will do any estimation
244  // procedure that is required first, if applicable.
245  virtual void TestingForward(
246  const MatrixBase<BaseFloat> &input,
247  const SpeakerStatsItf &speaker_stats,
248  MatrixBase<BaseFloat> *output) const = 0;
249 
250 
251  // Read transform from stream (works out its type). Dies on error.
252  static DifferentiableTransform* ReadNew(std::istream &is, bool binary);
253 
254  // Copies transform (deep copy).
255  virtual DifferentiableTransform* Copy() const = 0;
256 
257  // Returns a new transform of the given type e.g. "MeanNormalize",
258  // or NULL if no such component type exists.
259  static DifferentiableTransform *NewTransformOfType(const std::string &type);
260 
261  // Write transform to stream
262  virtual void Write(std::ostream &os, bool binary) const = 0;
263 
264  // Reads transform from stream (normally you would previously have created
265  // the transform object of the correct type using ReadNew().
266  virtual void Read(std::istream &is, bool binary) = 0;
267 
268  protected:
270 
271 
272 };
273 
274 
281  public:
282 
283  int32 Dim() const override { return dim_; }
284  int32 NumClasses() const override { return num_classes_; }
286  const CuMatrixBase<BaseFloat> &input,
287  int32 num_chunks,
288  int32 num_spk,
289  const Posterior &posteriors,
290  CuMatrixBase<BaseFloat> *output) const override {
291  output->CopyFromMat(input);
292  return NULL;
293  }
294  virtual void TrainingBackward(
295  const CuMatrixBase<BaseFloat> &input,
296  const CuMatrixBase<BaseFloat> &output_deriv,
297  int32 num_chunks,
298  int32 num_spk,
299  const Posterior &posteriors,
300  const MinibatchInfoItf &minibatch_info,
301  CuMatrixBase<BaseFloat> *input_deriv) const override {
302  input_deriv->AddMat(1.0, output_deriv);
303  }
304 
305  virtual int32 NumFinalIterations() { return 0; }
306 
308  int32 final_iter,
309  const CuMatrixBase<BaseFloat> &input,
310  int32 num_chunks,
311  int32 num_spk,
312  const Posterior &posteriors) override { }
313 
314 
315 
316  SpeakerStatsItf *GetEmptySpeakerStats() override { return NULL; }
317 
319  const MatrixBase<BaseFloat> &input,
320  const Posterior &posteriors,
321  SpeakerStatsItf *speaker_stats) const override { }
323  const MatrixBase<BaseFloat> &input,
324  const SpeakerStatsItf &speaker_stats,
325  MatrixBase<BaseFloat> *output) override {
326  output->CopyFromMat(input);
327  }
328 
329  void Estimate(int32 final_iter) override { }
330 
332  dim_(other.dim_), num_classes_(other.num_classes_) { }
333 
334  DifferentiableTransform* Copy() const override {
335  return new NoOpTransform(*this);
336  }
337 
338  void Write(std::ostream &os, bool binary) const override;
339 
340  void Read(std::istream &is, bool binary) override;
341 
342  private:
345 };
346 
347 
354  public:
355 
356  int32 Dim() const override;
357  int32 SetNumClasses() const override;
358 
359  MinibatchInfoItf* TrainingForward(
360  const CuMatrixBase<BaseFloat> &input,
361  int32 num_chunks,
362  int32 num_spk,
363  const Posterior &posteriors,
364  CuMatrixBase<BaseFloat> *output) const override;
365  virtual void TrainingBackward(
366  const CuMatrixBase<BaseFloat> &input,
367  const CuMatrixBase<BaseFloat> &output_deriv,
368  int32 num_chunks,
369  int32 num_spk,
370  const Posterior &posteriors,
371  const MinibatchInfoItf &minibatch_info,
372  CuMatrixBase<BaseFloat> *input_deriv) const override;
373 
374  virtual int32 NumFinalIterations();
375 
376  void Accumulate(
377  int32 final_iter,
378  const CuMatrixBase<BaseFloat> &input,
379  int32 num_chunks,
380  int32 num_spk,
381  const Posterior &posteriors) override;
382 
383  SpeakerStatsItf *GetEmptySpeakerStats() override;
384 
385  void TestingAccumulate(
386  const MatrixBase<BaseFloat> &input,
387  const Posterior &posteriors,
388  SpeakerStatsItf *speaker_stats) const override;
389 
390  virtual void TestingForward(
391  const MatrixBase<BaseFloat> &input,
392  const SpeakerStatsItf &speaker_stats,
393  MatrixBase<BaseFloat> *output) override;
394 
395  void Estimate(int32 final_iter) override;
396 
397  SequenceTransform(const SequenceTransform &other);
398 
399  DifferentiableTransform* Copy() const override {
400  return new SequenceTransform(*this);
401  }
402 
403  void Write(std::ostream &os, bool binary) const override;
404 
405  void Read(std::istream &is, bool binary) override;
406 
407  private:
408  std::vector<DifferentiableTransform*> transforms_;
409 };
410 
411 
419  public:
420 
421  int32 Dim() const override;
422  int32 SetNumClasses() const override;
423 
424  MinibatchInfoItf* TrainingForward(
425  const CuMatrixBase<BaseFloat> &input,
426  int32 num_chunks,
427  int32 num_spk,
428  const Posterior &posteriors,
429  CuMatrixBase<BaseFloat> *output) const override;
430  virtual void TrainingBackward(
431  const CuMatrixBase<BaseFloat> &input,
432  const CuMatrixBase<BaseFloat> &output_deriv,
433  int32 num_chunks,
434  int32 num_spk,
435  const Posterior &posteriors,
436  const MinibatchInfoItf &minibatch_info,
437  CuMatrixBase<BaseFloat> *input_deriv) const override;
438 
439  virtual int32 NumFinalIterations();
440 
441  void Accumulate(
442  int32 final_iter,
443  const CuMatrixBase<BaseFloat> &input,
444  int32 num_chunks,
445  int32 num_spk,
446  const Posterior &posteriors) override;
447 
448  virtual void TestingForward(
449  const MatrixBase<BaseFloat> &input,
450  const SpeakerStatsItf &speaker_stats,
451  MatrixBase<BaseFloat> *output) override;
452 
453  void Estimate(int32 final_iter) override;
454 
455  AppendTransform(const AppendTransform &other);
456 
457  DifferentiableTransform* Copy() const override {
458  return new AppendTransform(*this);
459  }
460 
461  void Write(std::ostream &os, bool binary) const override;
462 
463  void Read(std::istream &is, bool binary) override;
464 
465  private:
466  std::vector<DifferentiableTransform*> transforms_;
467 };
468 
469 
470 
478  int32 Dim() const override;
479  int32 NumClasses() const override;
481  const CuMatrixBase<BaseFloat> &input,
482  int32 num_chunks,
483  int32 num_spk,
484  const Posterior &posteriors,
485  CuMatrixBase<BaseFloat> *output) const override {
486  output->CopyFromMat(input);
487  return NULL;
488  }
489  virtual void TrainingBackward(
490  const CuMatrixBase<BaseFloat> &input,
491  const CuMatrixBase<BaseFloat> &output_deriv,
492  int32 num_chunks,
493  int32 num_spk,
494  const Posterior &posteriors,
495  const MinibatchInfoItf &minibatch_info,
496  CuMatrixBase<BaseFloat> *input_deriv) const override;
497 
498  void Accumulate(
499  const CuMatrixBase<BaseFloat> &input,
500  int32 num_chunks,
501  int32 num_spk,
502  const Posterior &posteriors) override;
503 
504  void Estimate() override { }
505 
506  AppendTransform(const AppendTransform &other);
507 
508  DifferentiableTransform* Copy() const override;
509 
510  void Write(std::ostream &os, bool binary) const override;
511 
512  void Read(std::istream &is, bool binary) override;
513  private:
514  std::vector<DifferentiableTransform*> transforms_;
515 };
516 
517 
525  public:
526  int32 Dim() const override;
527  int32 NumClasses() const override;
529  const CuMatrixBase<BaseFloat> &input,
530  int32 num_chunks,
531  int32 num_spk,
532  const Posterior &posteriors,
533  CuMatrixBase<BaseFloat> *output) const override {
534  output->CopyFromMat(input);
535  return NULL;
536  }
537  virtual void TrainingBackward(
538  const CuMatrixBase<BaseFloat> &input,
539  const CuMatrixBase<BaseFloat> &output_deriv,
540  int32 num_chunks,
541  int32 num_spk,
542  const Posterior &posteriors,
543  const MinibatchInfoItf &minibatch_info,
544  CuMatrixBase<BaseFloat> *input_deriv) const override;
545 
546  void Accumulate(
547  const CuMatrixBase<BaseFloat> &input,
548  int32 num_chunks,
549  int32 num_spk,
550  const Posterior &posteriors) override;
551 
552  virtual void TestingForward(
553  const MatrixBase<BaseFloat> &input,
554  const SpeakerStatsItf &speaker_stats,
555  MatrixBase<BaseFloat> *output) override;
556 
557 
558  void Estimate() override { }
559 
560  AppendTransform(const AppendTransform &other);
561 
562  DifferentiableTransform* Copy() const override;
563 
564  void Write(std::ostream &os, bool binary) const override;
565 
566  void Read(std::istream &is, bool binary) override;
567  private:
568 
569  // OK: how to compute stats
571 
572  // Stores the total weights, per frame, that correspond to the Posteriors
573  // supplied to TrainingForward().
575 
576  // The total of frame_weights.
578  };
579 
580  // dim_ is the feature dimension
582 
583  // The class-dependent means. Dimension is num_classes_ by dim_.
584  // Note: these will not be set up during training, they will only
585  // be set up after calling Accumulate() and Estimate(), which happens
586  // in test time.
588 
589  // mean_stats_ and count_ are used in Accumulate() to accumulate
590  // statistics to adapt the mean.
592  double count_;
593 
594 };
595 
596 
603  public:
604  int32 Dim() const override;
605  int32 NumClasses() const override;
606  MinibatchInfoItf* TrainingForward(
607  const CuMatrixBase<BaseFloat> &input,
608  int32 num_chunks,
609  int32 num_spk,
610  const Posterior &posteriors,
611  CuMatrixBase<BaseFloat> *output) const override;
612  virtual void TrainingBackward(
613  const CuMatrixBase<BaseFloat> &input,
614  const CuMatrixBase<BaseFloat> &output_deriv,
615  int32 num_chunks,
616  int32 num_spk,
617  const Posterior &posteriors,
618  const MinibatchInfoItf &minibatch_info,
619  CuMatrixBase<BaseFloat> *input_deriv) const override;
620  void Accumulate(
621  int32 final_iter,
622  const CuMatrixBase<BaseFloat> &input,
623  int32 num_chunks,
624  int32 num_spk,
625  const Posterior &posteriors) override;
626 
627  SpeakerStatsItf *GetEmptySpeakerStats() override;
628 
629  void TestingAccumulate(
630  const MatrixBase<BaseFloat> &input,
631  const Posterior &posteriors,
632  SpeakerStatsItf *speaker_stats) const override;
633 
634  virtual void TestingForward(
635  const MatrixBase<BaseFloat> &input,
636  const SpeakerStatsItf &speaker_stats,
637  MatrixBase<BaseFloat> *output) override;
638 
639  void Estimate(int32 final_iter) override { }
640 
641  FmllrTransform(const FmllrTransform &other);
642 
643  DifferentiableTransform* Copy() const override;
644 
645  void Write(std::ostream &os, bool binary) const override;
646 
647  void Read(std::istream &is, bool binary) override;
648  private:
649 
650  // OK: how to compute stats
652 
653  // Stores the total weights, per frame, that correspond to the Posteriors
654  // supplied to TrainingForward(). frame_weights.Dim() equals
655  // input.NumRows().
657 
658  // The total of frame_weights per speaker.
659  CuVector<BaseFloat> frame_weights;
660 
662  };
663 
665 
666  };
667 
668  // dim_ is the feature dimension
670 
671  // The class-dependent means. Dimension is num_classes_ by dim_.
672  // Note: these will not be set up during training, they will only
673  // be set up after calling Accumulate() and Estimate(), which happens
674  // in test time.
676 
677  // mean_stats_ and count_ are used in Accumulate() to accumulate
678  // statistics to adapt the mean.
680  double count_;
681 
682 };
683 
684 
685 } // namespace differentiable_transform
686 } // namespace kaldi
687 
688 #endif // KALDI_TRANSFORM_DIFFERENTIABLE_TRANSFORM_H_
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:344
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
This class is for speaker-dependent feature-space transformations – principally various varieties of...
int32 Dim() const override
Return the dimension of the input and output features.
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
This is a version of the transform class that consists of a number of other transforms, appended dimension-wise– e.g.
DifferentiableTransform * Copy() const override
kaldi::int32 int32
void AddMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType trans=kNoTrans)
*this += alpha * A
Definition: cu-matrix.cc:954
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
void Accumulate(int32 final_iter, const CuMatrixBase< BaseFloat > &input, int32 num_chunks, int32 num_spk, const Posterior &posteriors) override
This will typically be called sequentially, minibatch by minibatch, for a subset of training data...
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
Definition: posterior.h:42
virtual int32 NumFinalIterations()
Returns the number of times you have to (call Accumulate() on a subset of data, then call Estimate())...
This version of the transform class does a mean normalization: adding an offset to its input so that ...
MinibatchInfoItf * TrainingForward(const CuMatrixBase< BaseFloat > &input, int32 num_chunks, int32 num_spk, const Posterior &posteriors, CuMatrixBase< BaseFloat > *output) const override
This is the function you call in training time, for the forward pass; it adapts the features...
virtual void SetNumClasses(int32 num_classes)
This can be used to change the number of classes.
void TestingAccumulate(const MatrixBase< BaseFloat > &input, const Posterior &posteriors, SpeakerStatsItf *speaker_stats) const override
MinibatchInfoItf * TrainingForward(const CuMatrixBase< BaseFloat > &input, int32 num_chunks, int32 num_spk, const Posterior &posteriors, CuMatrixBase< BaseFloat > *output) const override
This is the function you call in training time, for the forward pass; it adapts the features...
Notes on the math behind differentiable fMLLR transform.
MinibatchInfoItf * TrainingForward(const CuMatrixBase< BaseFloat > &input, int32 num_chunks, int32 num_spk, const Posterior &posteriors, CuMatrixBase< BaseFloat > *output) const override
This is the function you call in training time, for the forward pass; it adapts the features...
DifferentiableTransform * Copy() const override
Matrix for CUDA computing.
Definition: matrix-common.h:69
This is a version of the transform class that does a sequence of other transforms, specified by other instances of the DifferentiableTransform interface.
std::vector< DifferentiableTransform * > transforms_
void TestingForward(const MatrixBase< BaseFloat > &input, const SpeakerStatsItf &speaker_stats, MatrixBase< BaseFloat > *output) override
This is a version of the transform class that does nothing.
int32 NumClasses() const
Return the number of classes in the model used for adaptation.
virtual void TrainingBackward(const CuMatrixBase< BaseFloat > &input, const CuMatrixBase< BaseFloat > &output_deriv, int32 num_chunks, int32 num_spk, const Posterior &posteriors, const MinibatchInfoItf &minibatch_info, CuMatrixBase< BaseFloat > *input_deriv) const override
This does the backpropagation, during the training pass.
DifferentiableTransform * Copy() const override
std::vector< DifferentiableTransform * > transforms_
void Copy(const CuMatrixBase< Real > &src, const CuArray< int32 > &copy_from_indices, CuMatrixBase< Real > *tgt)
Copies elements from src into tgt as given by copy_from_indices.
Definition: cu-math.cc:173