StdVectorRandomizer< T > Class Template Reference

Randomizes elements of a vector according to a mask. More...

#include <nnet-randomizer.h>

Collaboration diagram for StdVectorRandomizer< T >:

Public Member Functions

 StdVectorRandomizer ()
 
 StdVectorRandomizer (const NnetDataRandomizerOptions &conf)
 
void Init (const NnetDataRandomizerOptions &conf)
 Set the randomizer parameters (size) More...
 
void AddData (const std::vector< T > &v)
 Add data to randomization buffer. More...
 
bool IsFull ()
 Returns true, when capacity is full. More...
 
int32 NumFrames ()
 Number of frames stored inside the Randomizer. More...
 
void Randomize (const std::vector< int32 > &mask)
 Randomize matrix row-order using mask. More...
 
bool Done ()
 Returns true, if no more data for another mini-batch (after current one) More...
 
void Next ()
 Sets cursor to next mini-batch. More...
 
const std::vector< T > & Value ()
 Returns matrix-window with next mini-batch. More...
 

Private Attributes

std::vector< T > data_
 
std::vector< T > minibatch_
 
int32 data_begin_
 A cursor, pointing to the 'row' where the next mini-batch begins,. More...
 
int32 data_end_
 A cursor, pointing to the 'row' after the end of data,. More...
 
NnetDataRandomizerOptions conf_
 

Detailed Description

template<typename T>
class kaldi::nnet1::StdVectorRandomizer< T >

Randomizes elements of a vector according to a mask.

Definition at line 209 of file nnet-randomizer.h.

Constructor & Destructor Documentation

◆ StdVectorRandomizer() [1/2]

StdVectorRandomizer ( )
inline

Definition at line 211 of file nnet-randomizer.h.

211  :
212  data_begin_(0),
213  data_end_(0)
214  { }
int32 data_end_
A cursor, pointing to the &#39;row&#39; after the end of data,.
int32 data_begin_
A cursor, pointing to the &#39;row&#39; where the next mini-batch begins,.

◆ StdVectorRandomizer() [2/2]

StdVectorRandomizer ( const NnetDataRandomizerOptions conf)
inlineexplicit

Definition at line 216 of file nnet-randomizer.h.

216  :
217  data_begin_(0),
218  data_end_(0)
219  {
220  Init(conf);
221  }
int32 data_end_
A cursor, pointing to the &#39;row&#39; after the end of data,.
int32 data_begin_
A cursor, pointing to the &#39;row&#39; where the next mini-batch begins,.
void Init(const NnetDataRandomizerOptions &conf)
Set the randomizer parameters (size)

Member Function Documentation

◆ AddData()

void AddData ( const std::vector< T > &  v)

Add data to randomization buffer.

Definition at line 171 of file nnet-randomizer.cc.

References data_, and KALDI_ASSERT.

Referenced by main(), and UnitTestStdVectorRandomizer().

171  {
172  // pre-allocate before 1st use
173  if (data_.size() == 0) {
174  data_.resize(conf_.randomizer_size);
175  }
176  // optionally put previous left-over to front
177  if (data_begin_ > 0) {
178  KALDI_ASSERT(data_begin_ <= data_end_); // sanity check
179  int32 leftover = data_end_ - data_begin_;
180  KALDI_ASSERT(leftover < data_begin_); // no overlap
181  if (leftover > 0) {
182  typename std::vector<T>::iterator leftover_begin = data_.begin() + data_begin_;
183  std::copy(leftover_begin, leftover_begin + leftover, data_.begin());
184  }
185  data_begin_ = 0;
186  data_end_ = leftover;
187  }
188  // extend the buffer if necessary
189  if (data_.size() < data_end_ + v.size()) {
190  data_.resize(data_end_ + v.size() + 1000); // +1000 row surplus
191  }
192  // copy the data
193  std::copy(v.begin(), v.end(), data_.begin()+data_end_);
194  data_end_ += v.size();
195 }
NnetDataRandomizerOptions conf_
int32 data_end_
A cursor, pointing to the &#39;row&#39; after the end of data,.
int32 data_begin_
A cursor, pointing to the &#39;row&#39; where the next mini-batch begins,.
kaldi::int32 int32
int32 randomizer_size
Maximum number of samples we have in memory,.
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ Done()

bool Done ( )
inline

Returns true, if no more data for another mini-batch (after current one)

Definition at line 245 of file nnet-randomizer.h.

Referenced by UnitTestStdVectorRandomizer().

245  {
247  }
NnetDataRandomizerOptions conf_
int32 data_end_
A cursor, pointing to the &#39;row&#39; after the end of data,.
int32 data_begin_
A cursor, pointing to the &#39;row&#39; where the next mini-batch begins,.

◆ Init()

void Init ( const NnetDataRandomizerOptions conf)
inline

Set the randomizer parameters (size)

Definition at line 224 of file nnet-randomizer.h.

Referenced by UnitTestStdVectorRandomizer().

224  {
225  conf_ = conf;
226  }
NnetDataRandomizerOptions conf_

◆ IsFull()

bool IsFull ( )
inline

Returns true, when capacity is full.

Definition at line 232 of file nnet-randomizer.h.

Referenced by UnitTestStdVectorRandomizer().

232  {
233  return ((data_begin_ == 0) && (data_end_ > conf_.randomizer_size ));
234  }
NnetDataRandomizerOptions conf_
int32 data_end_
A cursor, pointing to the &#39;row&#39; after the end of data,.
int32 data_begin_
A cursor, pointing to the &#39;row&#39; where the next mini-batch begins,.
int32 randomizer_size
Maximum number of samples we have in memory,.

◆ Next()

void Next ( )

Sets cursor to next mini-batch.

Definition at line 211 of file nnet-randomizer.cc.

Referenced by main(), and UnitTestStdVectorRandomizer().

211  {
213 }
NnetDataRandomizerOptions conf_
int32 data_begin_
A cursor, pointing to the &#39;row&#39; where the next mini-batch begins,.

◆ NumFrames()

int32 NumFrames ( )
inline

Number of frames stored inside the Randomizer.

Definition at line 237 of file nnet-randomizer.h.

References kaldi::cu::Randomize().

Referenced by UnitTestStdVectorRandomizer().

237  {
238  return data_end_;
239  }
int32 data_end_
A cursor, pointing to the &#39;row&#39; after the end of data,.

◆ Randomize()

void Randomize ( const std::vector< int32 > &  mask)

Randomize matrix row-order using mask.

Definition at line 198 of file nnet-randomizer.cc.

References data_, rnnlm::i, and KALDI_ASSERT.

Referenced by main(), and UnitTestStdVectorRandomizer().

198  {
200  KALDI_ASSERT(data_end_ > 0);
201  KALDI_ASSERT(data_end_ == mask.size());
202  // Use auxiliary buffer for unshuffled data
203  std::vector<T> data_aux(data_);
204  // randomize the data, mask is used to index elements in source vector
205  for (int32 i = 0; i < mask.size(); i++) {
206  data_.at(i) = data_aux.at(mask.at(i));
207  }
208 }
int32 data_end_
A cursor, pointing to the &#39;row&#39; after the end of data,.
int32 data_begin_
A cursor, pointing to the &#39;row&#39; where the next mini-batch begins,.
kaldi::int32 int32
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ Value()

const std::vector< T > & Value ( )

Returns matrix-window with next mini-batch.

Definition at line 216 of file nnet-randomizer.cc.

References data_, and KALDI_ASSERT.

Referenced by main(), and UnitTestStdVectorRandomizer().

216  {
217  // make sure we have enough data for minibatch,
219  // prepare the mini-batch buffer,
221  typename std::vector<T>::iterator first = data_.begin() + data_begin_;
222  typename std::vector<T>::iterator last = first + conf_.minibatch_size;
223  std::copy(first, last, minibatch_.begin());
224  return minibatch_;
225 }
NnetDataRandomizerOptions conf_
int32 data_end_
A cursor, pointing to the &#39;row&#39; after the end of data,.
int32 data_begin_
A cursor, pointing to the &#39;row&#39; where the next mini-batch begins,.
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

Member Data Documentation

◆ conf_

Definition at line 264 of file nnet-randomizer.h.

◆ data_

std::vector<T> data_
private

Definition at line 256 of file nnet-randomizer.h.

◆ data_begin_

int32 data_begin_
private

A cursor, pointing to the 'row' where the next mini-batch begins,.

Definition at line 260 of file nnet-randomizer.h.

◆ data_end_

int32 data_end_
private

A cursor, pointing to the 'row' after the end of data,.

Definition at line 262 of file nnet-randomizer.h.

◆ minibatch_

std::vector<T> minibatch_
private

Definition at line 257 of file nnet-randomizer.h.


The documentation for this class was generated from the following files: