nnet-example.cc
Go to the documentation of this file.
1 // nnet3/nnet-example.cc
2 
3 // Copyright 2012-2015 Johns Hopkins University (author: Daniel Povey)
4 // 2014 Vimal Manohar
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 #include "nnet3/nnet-example.h"
22 #include "lat/lattice-functions.h"
23 #include "hmm/posterior.h"
24 
25 namespace kaldi {
26 namespace nnet3 {
27 
28 void NnetIo::Write(std::ostream &os, bool binary) const {
29  KALDI_ASSERT(features.NumRows() == static_cast<int32>(indexes.size()));
30  WriteToken(os, binary, "<NnetIo>");
31  WriteToken(os, binary, name);
32  WriteIndexVector(os, binary, indexes);
33  features.Write(os, binary);
34  WriteToken(os, binary, "</NnetIo>");
35  KALDI_ASSERT(static_cast<size_t>(features.NumRows()) == indexes.size());
36 }
37 
38 void NnetIo::Read(std::istream &is, bool binary) {
39  ExpectToken(is, binary, "<NnetIo>");
40  ReadToken(is, binary, &name);
41  ReadIndexVector(is, binary, &indexes);
42  features.Read(is, binary);
43  ExpectToken(is, binary, "</NnetIo>");
44 }
45 
46 bool NnetIo::operator == (const NnetIo &other) const {
47  if (name != other.name) return false;
48  if (indexes != other.indexes) return false;
49  if (features.NumRows() != other.features.NumRows() ||
50  features.NumCols() != other.features.NumCols())
51  return false;
52  Matrix<BaseFloat> this_mat, other_mat;
53  features.GetMatrix(&this_mat);
54  other.features.GetMatrix(&other_mat);
55  return ApproxEqual(this_mat, other_mat);
56 }
57 
58 NnetIo::NnetIo(const std::string &name,
59  int32 t_begin, const MatrixBase<BaseFloat> &feats,
60  int32 t_stride):
61  name(name), features(feats) {
62  int32 num_rows = feats.NumRows();
63  KALDI_ASSERT(num_rows > 0);
64  indexes.resize(num_rows); // sets all n,t,x to zeros.
65  for (int32 i = 0; i < num_rows; i++)
66  indexes[i].t = t_begin + i * t_stride;
67 }
68 
69 NnetIo::NnetIo(const std::string &name,
70  int32 t_begin, const GeneralMatrix &feats,
71  int32 t_stride):
72  name(name), features(feats) {
73  int32 num_rows = feats.NumRows();
74  KALDI_ASSERT(num_rows > 0);
75  indexes.resize(num_rows); // sets all n,t,x to zeros.
76  for (int32 i = 0; i < num_rows; i++)
77  indexes[i].t = t_begin + i * t_stride;
78 }
79 
80 void NnetIo::Swap(NnetIo *other) {
81  name.swap(other->name);
82  indexes.swap(other->indexes);
83  features.Swap(&(other->features));
84 }
85 
86 NnetIo::NnetIo(const std::string &name,
87  int32 dim,
88  int32 t_begin,
89  const Posterior &labels,
90  int32 t_stride):
91  name(name) {
92  int32 num_rows = labels.size();
93  KALDI_ASSERT(num_rows > 0);
94  SparseMatrix<BaseFloat> sparse_feats(dim, labels);
95  features = sparse_feats;
96  indexes.resize(num_rows); // sets all n,t,x to zeros.
97  for (int32 i = 0; i < num_rows; i++)
98  indexes[i].t = t_begin + i * t_stride;
99 }
100 
101 
102 
103 void NnetExample::Write(std::ostream &os, bool binary) const {
104  // Note: weight, label, input_frames and spk_info are members. This is a
105  // struct.
106  WriteToken(os, binary, "<Nnet3Eg>");
107  WriteToken(os, binary, "<NumIo>");
108  int32 size = io.size();
109  KALDI_ASSERT(size > 0 && "Writing empty nnet example");
110  WriteBasicType(os, binary, size);
111  for (int32 i = 0; i < size; i++)
112  io[i].Write(os, binary);
113  WriteToken(os, binary, "</Nnet3Eg>");
114 }
115 
116 void NnetExample::Read(std::istream &is, bool binary) {
117  ExpectToken(is, binary, "<Nnet3Eg>");
118  ExpectToken(is, binary, "<NumIo>");
119  int32 size;
120  ReadBasicType(is, binary, &size);
121  if (size <= 0 || size > 1000000)
122  KALDI_ERR << "Invalid size " << size;
123  io.resize(size);
124  for (int32 i = 0; i < size; i++)
125  io[i].Read(is, binary);
126  ExpectToken(is, binary, "</Nnet3Eg>");
127 }
128 
129 
131  std::vector<NnetIo>::iterator iter = io.begin(), end = io.end();
132  // calling features.Compress() will do nothing if they are sparse or already
133  // compressed.
134  for (; iter != end; ++iter)
135  iter->features.Compress();
136 }
137 
138 
140  const NnetIo &io) const noexcept {
141  StringHasher string_hasher;
142  IndexVectorHasher indexes_hasher;
143 
144  // numbers appearing here were taken at random from a list of primes.
145  size_t ans = string_hasher(io.name) +
146  indexes_hasher(io.indexes) +
147  19249 * io.features.NumRows() +
148  14731 * io.features.NumCols();
149  return ans;
150 }
151 
152 
154  const NnetIo &a, const NnetIo &b) const {
155  return a.name == b.name &&
156  a.features.NumRows() == b.features.NumRows() &&
157  a.features.NumCols() == b.features.NumCols() &&
158  a.indexes == b.indexes;
159 }
160 
161 
163  const NnetExample &eg) const noexcept {
164  // these numbers were chosen at random from a list of primes.
165  NnetIoStructureHasher io_hasher;
166  size_t size = eg.io.size(), ans = size * 35099;
167  for (size_t i = 0; i < size; i++)
168  ans = ans * 19157 + io_hasher(eg.io[i]);
169  return ans;
170 }
171 
173  const NnetExample &b) const {
174  NnetIoStructureCompare io_compare;
175  if (a.io.size() != b.io.size())
176  return false;
177  size_t size = a.io.size();
178  for (size_t i = 0; i < size; i++)
179  if (!io_compare(a.io[i], b.io[i]))
180  return false;
181  return true;
182 }
183 
184 
185 
186 } // namespace nnet3
187 } // namespace kaldi
NnetExample is the input data and corresponding label (or labels) for one or more frames of input...
Definition: nnet-example.h:111
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void WriteIndexVector(std::ostream &os, bool binary, const std::vector< Index > &vec)
Definition: nnet-common.cc:126
This class is a wrapper that enables you to store a matrix in one of three forms: either as a Matrix<...
void GetMatrix(Matrix< BaseFloat > *mat) const
Outputs the contents as a matrix.
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55
bool operator==(const NnetIo &other) const
Definition: nnet-example.cc:46
kaldi::int32 int32
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
Definition: io-funcs.cc:154
GeneralMatrix features
The features or labels.
Definition: nnet-example.h:46
bool operator()(const NnetExample &a, const NnetExample &b) const
std::vector< Index > indexes
"indexes" is a vector the same length as features.NumRows(), explaining the meaning of each row of th...
Definition: nnet-example.h:42
A hashing function object for strings.
Definition: stl-utils.h:248
void Read(std::istream &is, bool binary)
Note: if you write a compressed matrix in text form, it will be read as a regular full matrix...
void Swap(NnetIo *other)
Definition: nnet-example.cc:80
void Swap(GeneralMatrix *other)
MatrixIndexT NumCols() const
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
Definition: posterior.h:42
static void ExpectToken(const std::string &token, const std::string &what_we_are_parsing, const std::string **next_token)
void Write(std::ostream &os, bool binary) const
bool operator()(const NnetIo &a, const NnetIo &b) const
#define KALDI_ERR
Definition: kaldi-error.h:147
size_t operator()(const NnetIo &a) const noexcept
void Compress()
Compresses any (input) features that are not sparse.
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134
void ReadIndexVector(std::istream &is, bool binary, std::vector< Index > *vec)
Definition: nnet-common.cc:143
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
This comparison object compares just the structural aspects of the NnetIo object (name, indexes, feature dimension) without looking at the value of features.
Definition: nnet-example.h:101
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
MatrixIndexT NumRows() const
void Read(std::istream &is, bool binary)
void Read(std::istream &is, bool binary)
Definition: nnet-example.cc:38
This hashing object hashes just the structural aspects of the NnetIo object (name, indexes, feature dimension) without looking at the value of features.
Definition: nnet-example.h:94
size_t operator()(const NnetExample &eg) const noexcept
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34
std::string name
the name of the input in the neural net; in simple setups it will just be "input".
Definition: nnet-example.h:36
std::vector< NnetIo > io
"io" contains the input and output.
Definition: nnet-example.h:116
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
Definition: kaldi-math.h:265
void Write(std::ostream &os, bool binary) const
Definition: nnet-example.cc:28
void Write(std::ostream &os, bool binary) const