MatrixBuffer Class Reference

A buffer for caching (utterance-key, feature-matrix) pairs. More...

#include <nnet-matrix-buffer.h>

Collaboration diagram for MatrixBuffer:

Public Member Functions

 MatrixBuffer ()
 
 ~MatrixBuffer ()
 
void Init (SequentialBaseFloatMatrixReader *reader, MatrixBufferOptions opts=MatrixBufferOptions())
 
bool Done ()
 
void Next ()
 
void ResetLength ()
 
std::string Key ()
 
Matrix< BaseFloatValue ()
 
size_t SizeInBytes () const
 Total amount of features in the buffer (bytes),. More...
 
size_t SizeInMegaBytes () const
 Total amount of features in the buffer (Mega-bytes),. More...
 
size_t NumPairs () const
 Total number of (key,matrix) pairs in the buffer,. More...
 

Private Types

typedef std::pair< std::string, Matrix< BaseFloat > > PairType
 
typedef std::list< PairTypeListType
 
typedef std::map< size_t, ListTypeBufferType
 

Private Member Functions

void Read ()
 fills the buffer, More...
 
void DisposeValue ()
 removes 'current_' from data structure, More...
 

Private Attributes

SequentialBaseFloatMatrixReaderreader_
 
BufferType buffer_
 Buffer indexed by 'NumRows()',. More...
 
PairTypecurrent_
 The currently active (key,value) pair,. More...
 
MatrixBufferOptions opts_
 
size_t preferred_length_
 

Detailed Description

A buffer for caching (utterance-key, feature-matrix) pairs.

Typically, it reads 'matrix_buffer_size' megabytes of data, and returns records with similar number of speech frames through the standard Key(), Value(), Next(), Done() interface.

The preferred length is reset by ResetLength(). The buffer gets refilled after having less data than 50% of 'matrix_buffer_size'.

Definition at line 61 of file nnet-matrix-buffer.h.

Member Typedef Documentation

◆ BufferType

typedef std::map<size_t, ListType> BufferType
private

Definition at line 116 of file nnet-matrix-buffer.h.

◆ ListType

typedef std::list<PairType> ListType
private

Definition at line 115 of file nnet-matrix-buffer.h.

◆ PairType

typedef std::pair<std::string,Matrix<BaseFloat> > PairType
private

Definition at line 114 of file nnet-matrix-buffer.h.

Constructor & Destructor Documentation

◆ MatrixBuffer()

MatrixBuffer ( )
inline

Definition at line 63 of file nnet-matrix-buffer.h.

63  :
64  reader_(NULL),
65  current_(NULL),
67  { }
PairType * current_
The currently active (key,value) pair,.
SequentialBaseFloatMatrixReader * reader_

◆ ~MatrixBuffer()

~MatrixBuffer ( )
inline

Definition at line 69 of file nnet-matrix-buffer.h.

70  { }

Member Function Documentation

◆ DisposeValue()

void DisposeValue ( )
private

removes 'current_' from data structure,

Definition at line 215 of file nnet-matrix-buffer.h.

References KALDI_ASSERT.

215  {
216  // remove old 'Value()' matrix,
217  if (current_ != NULL) {
218  size_t r = current_->second.NumRows();
219  KALDI_ASSERT(current_ == &(buffer_[r].front()));
220  // remove the (key,value) pair,
221  buffer_[r].pop_front();
222  // eventually remove the 'NumRows()' key,
223  if (buffer_[r].empty()) { buffer_.erase(r); }
224  current_ = NULL;
225  }
226 }
PairType * current_
The currently active (key,value) pair,.
BufferType buffer_
Buffer indexed by &#39;NumRows()&#39;,.
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ Done()

bool Done ( )
inline

Definition at line 81 of file nnet-matrix-buffer.h.

81  {
82  return (reader_->Done() && NumPairs() <= 1);
83  }
SequentialBaseFloatMatrixReader * reader_
size_t NumPairs() const
Total number of (key,matrix) pairs in the buffer,.

◆ Init()

void Init ( SequentialBaseFloatMatrixReader reader,
MatrixBufferOptions  opts = MatrixBufferOptions() 
)
inline

Definition at line 72 of file nnet-matrix-buffer.h.

References KALDI_ASSERT.

Referenced by main().

73  {
74  KALDI_ASSERT(SizeInBytes() == 0);
75  reader_ = reader;
76  opts_ = opts;
77 
78  Read();
79  }
SequentialBaseFloatMatrixReader * reader_
void Read()
fills the buffer,
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
size_t SizeInBytes() const
Total amount of features in the buffer (bytes),.

◆ Key()

std::string Key ( )
inline

Definition at line 91 of file nnet-matrix-buffer.h.

91  {
92  return current_->first;
93  }
PairType * current_
The currently active (key,value) pair,.

◆ Next()

void Next ( )

Definition at line 126 of file nnet-matrix-buffer.h.

References rnnlm::i, and KALDI_ASSERT.

126  {
127  KALDI_ASSERT(!buffer_.empty());
128 
129  // remove old 'Value()' matrix,
130  DisposeValue();
131 
132  // start re-filling,
133  if (SizeInMegaBytes() < 0.5 * opts_.matrix_buffer_size) {
134  Read();
135  }
136 
137  KALDI_ASSERT(!buffer_.empty());
138 
139  // randomly select 'length' present in the 'map',
140  // (weighted by total #frames in the bin),
141  if (preferred_length_ == 0) {
142  int32 longest = (--buffer_.end())->first;
143  // pre-fill the vector of 'keys',
144  std::vector<int32> keys;
145  BufferType::iterator it;
146  for (it = buffer_.begin(); it != buffer_.end(); ++it) {
147  int32 key = it->first; // i.e. NumRows() of matrices in the bin,
148  int32 frames_in_bin = it->second.size() * key;
149  for (int32 i = 0; i < frames_in_bin; i += longest) {
150  keys.push_back(key); // keys are repeated,
151  }
152  }
153  // choose the key,
154  std::vector<int32>::iterator it2 = keys.begin();
155  std::advance(it2, rand() % keys.size());
156  preferred_length_ = (*it2); // NumRows(), key of the 'map',
157  }
158 
159  // select list by 'preferred_length_',
160  BufferType::iterator it = buffer_.lower_bound(preferred_length_);
161  if (it == buffer_.end()) { --it; } // or the last one,
162 
163  // take a front element 'ptr' from that list,
164  current_ = &(it->second.front());
165 }
PairType * current_
The currently active (key,value) pair,.
BufferType buffer_
Buffer indexed by &#39;NumRows()&#39;,.
kaldi::int32 int32
void DisposeValue()
removes &#39;current_&#39; from data structure,
void Read()
fills the buffer,
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
size_t SizeInMegaBytes() const
Total amount of features in the buffer (Mega-bytes),.

◆ NumPairs()

size_t NumPairs ( ) const

Total number of (key,matrix) pairs in the buffer,.

Definition at line 181 of file nnet-matrix-buffer.h.

181  {
182  size_t ans = 0;
183  for (BufferType::const_iterator it = buffer_.begin(); it != buffer_.end(); ++it) {
184  ans += it->second.size();
185  }
186  return ans;
187 }
BufferType buffer_
Buffer indexed by &#39;NumRows()&#39;,.

◆ Read()

void Read ( )
private

fills the buffer,

Definition at line 189 of file nnet-matrix-buffer.h.

References KALDI_LOG, and MatrixBase< Real >::NumRows().

189  {
190  if (!reader_->Done())
191  KALDI_LOG << "Read() started... Buffer size in MB: "
192  << SizeInMegaBytes() << ", max " << opts_.matrix_buffer_size
193  << ", having " << NumPairs() << " utterances.";
194  for ( ; !reader_->Done(); reader_->Next()) {
195  // see if we are full,
197  KALDI_LOG << "Read() finished... Buffer size in MB: "
198  << SizeInMegaBytes() << ", max " << opts_.matrix_buffer_size
199  << ", having " << NumPairs() << " utterances.";
200  break;
201  }
202  // get matrix,
203  const std::string& key = reader_->Key();
204  const Matrix<BaseFloat>& mat = reader_->Value();
205  size_t num_rows = mat.NumRows();
206  // see if 'num_rows' already in keys,
207  if (buffer_.find(num_rows) == buffer_.end()) {
208  buffer_[num_rows] = ListType(); // add empty list,
209  }
210  // add matrix to the buffer,
211  buffer_[num_rows].push_back(PairType(key, mat));
212  }
213 }
std::list< PairType > ListType
BufferType buffer_
Buffer indexed by &#39;NumRows()&#39;,.
std::pair< std::string, Matrix< BaseFloat > > PairType
SequentialBaseFloatMatrixReader * reader_
size_t NumPairs() const
Total number of (key,matrix) pairs in the buffer,.
size_t SizeInMegaBytes() const
Total amount of features in the buffer (Mega-bytes),.
#define KALDI_LOG
Definition: kaldi-error.h:153

◆ ResetLength()

void ResetLength ( )
inline

Definition at line 87 of file nnet-matrix-buffer.h.

87  {
89  }

◆ SizeInBytes()

size_t SizeInBytes ( ) const

Total amount of features in the buffer (bytes),.

Definition at line 167 of file nnet-matrix-buffer.h.

167  {
168  size_t ans = 0;
169  for (BufferType::const_iterator it = buffer_.begin(); it != buffer_.end(); ++it) {
170  for (ListType::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
171  ans += it2->second.SizeInBytes();
172  }
173  }
174  return ans;
175 }
BufferType buffer_
Buffer indexed by &#39;NumRows()&#39;,.

◆ SizeInMegaBytes()

size_t SizeInMegaBytes ( ) const

Total amount of features in the buffer (Mega-bytes),.

Definition at line 177 of file nnet-matrix-buffer.h.

177  {
178  return (SizeInBytes() / (1024 * 1024));
179 }
size_t SizeInBytes() const
Total amount of features in the buffer (bytes),.

◆ Value()

Matrix<BaseFloat> Value ( )
inline

Definition at line 94 of file nnet-matrix-buffer.h.

94  {
95  return current_->second;
96  }
PairType * current_
The currently active (key,value) pair,.

Member Data Documentation

◆ buffer_

BufferType buffer_
private

Buffer indexed by 'NumRows()',.

Definition at line 117 of file nnet-matrix-buffer.h.

◆ current_

PairType* current_
private

The currently active (key,value) pair,.

Definition at line 119 of file nnet-matrix-buffer.h.

◆ opts_

MatrixBufferOptions opts_
private

Definition at line 121 of file nnet-matrix-buffer.h.

◆ preferred_length_

size_t preferred_length_
private

Definition at line 123 of file nnet-matrix-buffer.h.

◆ reader_

Definition at line 112 of file nnet-matrix-buffer.h.


The documentation for this class was generated from the following file: