MatrixRandomizer Class Reference

Shuffles rows of a matrix according to the indices in the mask,. More...

#include <nnet-randomizer.h>

Collaboration diagram for MatrixRandomizer:

Public Member Functions

 MatrixRandomizer ()
 
 MatrixRandomizer (const NnetDataRandomizerOptions &conf)
 
void Init (const NnetDataRandomizerOptions &conf)
 Set the randomizer parameters (size) More...
 
void AddData (const CuMatrixBase< BaseFloat > &m)
 Add data to randomization buffer. More...
 
bool IsFull ()
 Returns true, when capacity is full. More...
 
int32 NumFrames ()
 Number of frames stored inside the Randomizer. More...
 
void Randomize (const std::vector< int32 > &mask)
 Randomize matrix row-order using mask. More...
 
bool Done ()
 Returns true, if no more data for another mini-batch (after current one) More...
 
void Next ()
 Sets cursor to next mini-batch. More...
 
const CuMatrixBase< BaseFloat > & Value ()
 Returns matrix-window with next mini-batch. More...
 

Private Attributes

CuMatrix< BaseFloatdata_
 
CuMatrix< BaseFloatdata_aux_
 
CuMatrix< BaseFloatminibatch_
 
int32 data_begin_
 A cursor, pointing to the 'row' where the next mini-batch begins,. More...
 
int32 data_end_
 A cursor, pointing to the 'row' after the end of data,. More...
 
NnetDataRandomizerOptions conf_
 

Detailed Description

Shuffles rows of a matrix according to the indices in the mask,.

Definition at line 87 of file nnet-randomizer.h.

Constructor & Destructor Documentation

◆ MatrixRandomizer() [1/2]

MatrixRandomizer ( )
inline

Definition at line 89 of file nnet-randomizer.h.

89  :
90  data_begin_(0),
91  data_end_(0)
92  { }
int32 data_begin_
A cursor, pointing to the &#39;row&#39; where the next mini-batch begins,.
int32 data_end_
A cursor, pointing to the &#39;row&#39; after the end of data,.

◆ MatrixRandomizer() [2/2]

MatrixRandomizer ( const NnetDataRandomizerOptions conf)
inlineexplicit

Definition at line 94 of file nnet-randomizer.h.

94  :
95  data_begin_(0),
96  data_end_(0)
97  {
98  Init(conf);
99  }
int32 data_begin_
A cursor, pointing to the &#39;row&#39; where the next mini-batch begins,.
void Init(const NnetDataRandomizerOptions &conf)
Set the randomizer parameters (size)
int32 data_end_
A cursor, pointing to the &#39;row&#39; after the end of data,.

Member Function Documentation

◆ AddData()

void AddData ( const CuMatrixBase< BaseFloat > &  m)

Add data to randomization buffer.

Definition at line 47 of file nnet-randomizer.cc.

References data_, KALDI_ASSERT, CuMatrixBase< Real >::NumCols(), MatrixBase< Real >::NumRows(), and CuMatrixBase< Real >::NumRows().

Referenced by main(), and UnitTestMatrixRandomizer().

47  {
48  // pre-allocate before 1st use
49  if (data_.NumCols() == 0) {
50  data_.Resize(conf_.randomizer_size, m.NumCols());
51  }
52  // optionally put previous left-over to front
53  if (data_begin_ > 0) {
54  KALDI_ASSERT(data_begin_ <= data_end_); // sanity check,
55  int32 leftover = data_end_ - data_begin_;
56  KALDI_ASSERT(leftover < data_begin_); // no overlap,
57  if (leftover > 0) {
58  data_.RowRange(0, leftover).CopyFromMat(data_.RowRange(data_begin_, leftover));
59  }
60  data_begin_ = 0;
61  data_end_ = leftover;
62  // set zero to the rest of the buffer,
63  data_.RowRange(leftover, data_.NumRows() - leftover).SetZero();
64  }
65  // extend the buffer if necessary,
66  if (data_.NumRows() < data_end_ + m.NumRows()) {
67  // CuMatrix -> Matrix -> CuMatrix (needs less GPU memory),
68  Matrix<BaseFloat> data_aux(data_);
69  // Add extra 3% rows, so we don't reallocate soon:
70  int32 extra_rows = 0.03 * data_.NumRows();
71  data_.Resize(data_end_ + m.NumRows() + extra_rows, data_.NumCols());
72  data_.RowRange(0, data_aux.NumRows()).CopyFromMat(data_aux);
73  }
74  // copy the data
75  data_.RowRange(data_end_, m.NumRows()).CopyFromMat(m);
76  data_end_ += m.NumRows();
77 }
int32 data_begin_
A cursor, pointing to the &#39;row&#39; where the next mini-batch begins,.
kaldi::int32 int32
int32 randomizer_size
Maximum number of samples we have in memory,.
CuMatrix< BaseFloat > data_
int32 data_end_
A cursor, pointing to the &#39;row&#39; after the end of data,.
NnetDataRandomizerOptions conf_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ Done()

bool Done ( )
inline

Returns true, if no more data for another mini-batch (after current one)

Definition at line 123 of file nnet-randomizer.h.

Referenced by main(), and UnitTestMatrixRandomizer().

123  {
125  }
int32 data_begin_
A cursor, pointing to the &#39;row&#39; where the next mini-batch begins,.
int32 data_end_
A cursor, pointing to the &#39;row&#39; after the end of data,.
NnetDataRandomizerOptions conf_

◆ Init()

void Init ( const NnetDataRandomizerOptions conf)
inline

Set the randomizer parameters (size)

Definition at line 102 of file nnet-randomizer.h.

Referenced by UnitTestMatrixRandomizer().

102  {
103  conf_ = conf;
104  }
NnetDataRandomizerOptions conf_

◆ IsFull()

bool IsFull ( )
inline

Returns true, when capacity is full.

Definition at line 110 of file nnet-randomizer.h.

Referenced by main(), and UnitTestMatrixRandomizer().

110  {
111  return ((data_begin_ == 0) && (data_end_ > conf_.randomizer_size ));
112  }
int32 data_begin_
A cursor, pointing to the &#39;row&#39; where the next mini-batch begins,.
int32 randomizer_size
Maximum number of samples we have in memory,.
int32 data_end_
A cursor, pointing to the &#39;row&#39; after the end of data,.
NnetDataRandomizerOptions conf_

◆ Next()

void Next ( )

Sets cursor to next mini-batch.

Definition at line 98 of file nnet-randomizer.cc.

Referenced by main(), and UnitTestMatrixRandomizer().

98  {
100 }
int32 data_begin_
A cursor, pointing to the &#39;row&#39; where the next mini-batch begins,.
NnetDataRandomizerOptions conf_

◆ NumFrames()

int32 NumFrames ( )
inline

Number of frames stored inside the Randomizer.

Definition at line 115 of file nnet-randomizer.h.

References kaldi::cu::Randomize().

Referenced by main(), and UnitTestMatrixRandomizer().

115  {
116  return data_end_;
117  }
int32 data_end_
A cursor, pointing to the &#39;row&#39; after the end of data,.

◆ Randomize()

void Randomize ( const std::vector< int32 > &  mask)

Randomize matrix row-order using mask.

Definition at line 79 of file nnet-randomizer.cc.

References CuArray< T >::CopyFromVec(), data_, KALDI_ASSERT, and kaldi::cu::Randomize().

Referenced by main(), and UnitTestMatrixRandomizer().

79  {
82  KALDI_ASSERT(data_end_ == mask.size());
83  // Copy to auxiliary buffer for unshuffled data
84  data_aux_ = data_;
85  // Put the mask to GPU
86  CuArray<int32> mask_in_gpu(mask.size());
87  mask_in_gpu.CopyFromVec(mask);
88  // Randomize the data, mask is used to index rows in source matrix:
89  // (Here the vector 'mask_in_gpu' is typically shorter than number
90  // of rows in 'data_aux_', because the buffer 'data_aux_'
91  // is larger than capacity 'randomizer_size'.
92  // The extra rows in 'data_aux_' do not contain speech frames and
93  // are not copied from 'data_aux_', the extra rows in 'data_' are
94  // unchanged by cu::Randomize.)
95  cu::Randomize(data_aux_, mask_in_gpu, &data_);
96 }
int32 data_begin_
A cursor, pointing to the &#39;row&#39; where the next mini-batch begins,.
void Randomize(const CuMatrixBase< Real > &src, const CuArray< int32 > &copy_from_idx, CuMatrixBase< Real > *tgt)
Copies a permutation of src into tgt.
Definition: cu-math.cc:80
CuMatrix< BaseFloat > data_
int32 data_end_
A cursor, pointing to the &#39;row&#39; after the end of data,.
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
CuMatrix< BaseFloat > data_aux_

◆ Value()

const CuMatrixBase< BaseFloat > & Value ( )

Returns matrix-window with next mini-batch.

Definition at line 102 of file nnet-randomizer.cc.

References data_, KALDI_ASSERT, and kaldi::kUndefined.

Referenced by main(), and UnitTestMatrixRandomizer().

102  {
103  // make sure we have data for next minibatch,
105  // prepare the mini-batch buffer,
106  minibatch_.Resize(conf_.minibatch_size, data_.NumCols(), kUndefined);
107  minibatch_.CopyFromMat(data_.RowRange(data_begin_, conf_.minibatch_size));
108  return minibatch_;
109 }
int32 data_begin_
A cursor, pointing to the &#39;row&#39; where the next mini-batch begins,.
CuMatrix< BaseFloat > data_
int32 data_end_
A cursor, pointing to the &#39;row&#39; after the end of data,.
NnetDataRandomizerOptions conf_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
CuMatrix< BaseFloat > minibatch_

Member Data Documentation

◆ conf_

Definition at line 143 of file nnet-randomizer.h.

◆ data_

CuMatrix<BaseFloat> data_
private

Definition at line 134 of file nnet-randomizer.h.

◆ data_aux_

CuMatrix<BaseFloat> data_aux_
private

Definition at line 135 of file nnet-randomizer.h.

◆ data_begin_

int32 data_begin_
private

A cursor, pointing to the 'row' where the next mini-batch begins,.

Definition at line 139 of file nnet-randomizer.h.

◆ data_end_

int32 data_end_
private

A cursor, pointing to the 'row' after the end of data,.

Definition at line 141 of file nnet-randomizer.h.

◆ minibatch_

CuMatrix<BaseFloat> minibatch_
private

Definition at line 136 of file nnet-randomizer.h.


The documentation for this class was generated from the following files: