30 WriteToken(os, binary,
"<NnetDiscriminativeSup>");
36 WriteToken(os, binary,
"</NnetDiscriminativeSup>");
52 ExpectToken(is, binary,
"</NnetDiscriminativeSup>");
71 for (
int32 i = 0;
i < frames_per_sequence;
i++) {
72 for (
int32 j = 0;
j < num_sequences;
j++,k++) {
73 int32 n =
j, t =
i * frame_skip + first_frame, x = 0;
92 const std::string &
name,
98 supervision(supervision),
99 deriv_weights(deriv_weights) {
105 for (
int32 i = 0;
i < frames_per_sequence;
i++) {
106 for (
int32 j = 0;
j < num_sequences;
j++,k++) {
108 indexes[k].t =
i * frame_skip + first_frame;
128 WriteToken(os, binary,
"<Nnet3DiscriminativeEg>");
130 int32 size = inputs.size();
132 KALDI_ASSERT(size > 0 &&
"Attempting to write NnetDiscriminativeExample with no inputs");
133 if (!binary) os <<
'\n';
135 inputs[
i].Write(os, binary);
136 if (!binary) os <<
'\n';
139 size = outputs.size();
141 KALDI_ASSERT(size > 0 &&
"Attempting to write NnetDiscriminativeExample with no outputs");
142 if (!binary) os <<
'\n';
144 outputs[
i].Write(os, binary);
145 if (!binary) os <<
'\n';
147 WriteToken(os, binary,
"</Nnet3DiscriminativeEg>");
151 ExpectToken(is, binary,
"<Nnet3DiscriminativeEg>");
155 if (size < 1 || size > 1000000)
159 inputs[
i].
Read(is, binary);
162 if (size < 1 || size > 1000000)
164 outputs.resize(size);
166 outputs[
i].
Read(is, binary);
167 ExpectToken(is, binary,
"</Nnet3DiscriminativeEg>");
171 inputs.swap(other->
inputs);
176 std::vector<NnetIo>::iterator iter = inputs.begin(), end = inputs.end();
179 for (; iter != end; ++iter) iter->features.Compress();
183 inputs(other.inputs), outputs(other.outputs) { }
186 const std::vector<const NnetDiscriminativeSupervision*> &
inputs,
188 int32 num_inputs = inputs.size(),
190 for (
int32 n = 0;
n < num_inputs;
n++) {
192 num_indexes += inputs[
n]->indexes.size();
194 output->
name = inputs[0]->name;
195 std::vector<const discriminative::DiscriminativeSupervision*> input_supervision;
196 input_supervision.reserve(inputs.size());
197 for (
int32 n = 0;
n < num_inputs;
n++)
198 input_supervision.push_back(&(inputs[
n]->supervision));
201 &output_supervision);
205 output->
indexes.reserve(num_indexes);
206 for (
int32 n = 0; n < num_inputs; n++) {
207 const std::vector<Index> &src_indexes = inputs[
n]->indexes;
210 src_indexes.begin(), src_indexes.end());
211 std::vector<Index>::iterator iter = output->
indexes.begin() + cur_size,
215 for (; iter != end; ++iter) {
216 KALDI_ASSERT(iter->n == 0 &&
"Merging already-merged discriminative egs");
228 if (inputs[0]->deriv_weights.Dim() != 0) {
229 int32 frames_per_sequence = inputs[0]->deriv_weights.Dim();
232 frames_per_sequence * num_inputs);
233 for (
int32 n = 0; n < num_inputs; n++) {
238 for (
int32 t = 0; t < frames_per_sequence; t++) {
239 output->
deriv_weights(t * num_inputs + n) = src_deriv_weights(t);
249 std::vector<NnetDiscriminativeExample> *input,
251 int32 num_examples = input->size();
256 std::vector<NnetExample> eg_inputs(num_examples);
257 for (
int32 i = 0;
i < num_examples;
i++)
258 eg_inputs[
i].io.swap((*input)[
i].inputs);
262 for (
int32 i = 0; i < num_examples; i++)
263 eg_inputs[i].io.swap((*input)[i].inputs);
270 int32 num_output_names = (*input)[0].outputs.size();
271 output->
outputs.resize(num_output_names);
272 for (
int32 i = 0; i < num_output_names; i++) {
273 std::vector<const NnetDiscriminativeSupervision*> to_merge(num_examples);
274 for (
int32 j = 0;
j < num_examples;
j++) {
276 to_merge[
j] = &((*input)[
j].outputs[
i]);
286 bool need_model_derivative,
287 bool store_component_stats,
288 bool use_xent_regularization,
289 bool use_xent_derivative,
297 for (
size_t i = 0;
i < eg.
inputs.size();
i++) {
299 const std::string &name = io.
name;
301 if (node_index == -1 &&
303 KALDI_ERR <<
"Nnet example has input named '" << name
304 <<
"', but no such input node is in the network.";
312 for (
size_t i = 0;
i < eg.
outputs.size();
i++) {
315 const std::string &name = sup.
name;
317 if (node_index == -1 &&
319 KALDI_ERR <<
"Nnet example has output named '" << name
320 <<
"', but no such output node is in the network.";
325 io_spec.
has_deriv = need_model_derivative;
327 if (use_xent_regularization) {
328 size_t cur_size = request->
outputs.size();
329 request->
outputs.resize(cur_size + 1);
331 &io_spec_xent = request->
outputs[cur_size];
335 io_spec_xent = io_spec;
336 io_spec_xent.
name = name +
"-xent";
337 io_spec_xent.has_deriv = use_xent_derivative;
341 if (request->
inputs.empty())
342 KALDI_ERR <<
"No inputs in computation request.";
344 KALDI_ERR <<
"No outputs in computation request.";
348 const std::vector<std::string> &exclude_names,
350 std::vector<NnetIo>::iterator input_iter = eg->
inputs.begin(),
351 input_end = eg->
inputs.end();
352 for (; input_iter != input_end; ++input_iter) {
353 bool must_exclude =
false;
354 std::vector<string>::const_iterator exclude_iter = exclude_names.begin(),
355 exclude_end = exclude_names.end();
356 for (; exclude_iter != exclude_end; ++exclude_iter)
357 if (input_iter->name == *exclude_iter)
360 std::vector<Index>::iterator indexes_iter = input_iter->indexes.begin(),
361 indexes_end = input_iter->indexes.end();
362 for (; indexes_iter != indexes_end; ++indexes_iter)
363 indexes_iter->t += frame_shift;
369 std::vector<NnetDiscriminativeSupervision>::iterator
370 sup_iter = eg->
outputs.begin(),
372 for (; sup_iter != sup_end; ++sup_iter) {
373 std::vector<Index> &indexes = sup_iter->indexes;
374 KALDI_ASSERT(indexes.size() >= 2 && indexes[0].n == indexes[1].n &&
375 indexes[0].x == indexes[1].x);
376 int32 frame_subsampling_factor = indexes[1].t - indexes[0].t;
381 int32 supervision_frame_shift =
382 frame_subsampling_factor *
383 std::floor(0.5 + (frame_shift * 1.0 / frame_subsampling_factor));
384 if (supervision_frame_shift == 0)
386 std::vector<Index>::iterator indexes_iter = indexes.begin(),
387 indexes_end = indexes.end();
388 for (; indexes_iter != indexes_end; ++indexes_iter)
389 indexes_iter->t += supervision_frame_shift;
397 size_t size = eg.inputs.size(), ans = size * 35099;
398 for (
size_t i = 0;
i < size;
i++)
399 ans = ans * 19157 + io_hasher(eg.inputs[
i]);
400 for (
size_t i = 0;
i < eg.outputs.size();
i++) {
405 string_hasher(sup.
name) + indexes_hasher(sup.
indexes);
417 size_t size = a.
inputs.size();
418 for (
size_t i = 0;
i < size;
i++)
422 for (
size_t i = 0;
i < size;
i++)
432 for (
size_t i = 0;
i < a.
inputs.size();
i++) {
437 for (
size_t i = 0;
i < a.
outputs.size();
i++) {
448 finished_(false), num_egs_written_(0),
449 config_(config), writer_(writer) { }
459 std::vector<NnetDiscriminativeExample*> &vec =
eg_to_egs_[eg];
462 num_available = vec.size();
463 bool input_ended =
false;
466 if (minibatch_size != 0) {
469 std::vector<NnetDiscriminativeExample*> vec_copy(vec);
474 std::vector<NnetDiscriminativeExample> egs_to_merge(minibatch_size);
475 for (
int32 i = 0;
i < minibatch_size;
i++) {
476 egs_to_merge[
i].Swap(vec_copy[
i]);
484 std::vector<NnetDiscriminativeExample> *egs) {
488 size_t structure_hash = eg_hasher((*egs)[0]);
489 int32 minibatch_size = egs->size();
493 std::ostringstream key;
504 std::vector<std::vector<NnetDiscriminativeExample*> > all_egs;
508 for (; iter != end; ++iter)
509 all_egs.push_back(iter->second);
512 for (
size_t i = 0;
i < all_egs.size();
i++) {
513 int32 minibatch_size;
514 std::vector<NnetDiscriminativeExample*> &vec = all_egs[
i];
517 bool input_ended =
true;
518 while (!vec.empty() &&
520 input_ended)) != 0) {
524 std::vector<NnetDiscriminativeExample> egs_to_merge(minibatch_size);
525 for (
int32 i = 0;
i < minibatch_size;
i++) {
526 egs_to_merge[
i].Swap(vec[
i]);
529 vec.erase(vec.begin(), vec.begin() + minibatch_size);
535 size_t structure_hash = eg_hasher(*(vec[0]));
536 int32 num_discarded = vec.size();
538 for (
int32 i = 0;
i < num_discarded;
i++)
NnetExample is the input data and corresponding label (or labels) for one or more frames of input...
void ShiftDiscriminativeExampleTimes(int32 frame_shift, const std::vector< std::string > &exclude_names, NnetDiscriminativeExample *eg)
Shifts the time-index t of everything in the input of "eg" by adding "t_offset" to all "t" values– b...
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void WriteIndexVector(std::ostream &os, bool binary, const std::vector< Index > &vec)
void DiscardedExamples(int32 example_size, size_t structure_hash, int32 num_discarded)
Users call this function to inform this class that after processing all the data, for examples of ori...
void AcceptExample(NnetDiscriminativeExample *a)
bool store_component_stats
you should set need_component_stats to true if you need the average-activation and average-derivative...
bool need_model_derivative
if need_model_derivative is true, then we'll be doing either model training or model-derivative compu...
NnetDiscriminativeExampleWriter * writer_
void Write(std::ostream &os, bool binary) const
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
bool IsInputNode(int32 node) const
Returns true if this is an output node, meaning that it is of type kInput.
static void MergeSupervision(const std::vector< const NnetChainSupervision *> &inputs, NnetChainSupervision *output)
int32 MinibatchSize(int32 size_of_eg, int32 num_available_egs, bool input_ended) const
This function tells you what minibatch size should be used for this eg.
A templated class for writing objects to an archive or script file; see The Table concept...
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
std::vector< IoSpecification > inputs
void MergeSupervision(const std::vector< const DiscriminativeSupervision *> &input, DiscriminativeSupervision *output_supervision)
This function appends a list of supervision objects to create what will usually be a single such obje...
std::vector< Index > indexes
"indexes" is a vector the same length as features.NumRows(), explaining the meaning of each row of th...
A hashing function object for strings.
bool operator()(const NnetDiscriminativeExample &a, const NnetDiscriminativeExample &b) const
This hashing object hashes just the structural aspects of the NnetExample without looking at the valu...
void Write(const std::string &key, const T &value) const
struct Index is intended to represent the various indexes by which we number the rows of the matrices...
void Swap(DiscriminativeSupervision *other)
void PrintStats() const
Calling this will cause a log message with information about the examples to be printed.
void WriteVectorAsChar(std::ostream &os, bool binary, const VectorBase< BaseFloat > &vec)
void Read(std::istream &is, bool binary)
void WriteMinibatch(std::vector< NnetDiscriminativeExample > *egs)
NnetDiscriminativeSupervision()
bool IsOutputNode(int32 node) const
Returns true if this is an output node, meaning that it is of type kDescriptor and is not directly fo...
static void ExpectToken(const std::string &token, const std::string &what_we_are_parsing, const std::string **next_token)
size_t operator()(const NnetDiscriminativeExample &eg) const noexcept
void Write(std::ostream &os, bool binary) const
void Read(std::istream &is, bool binary)
int32 frames_per_sequence
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
MatrixIndexT Dim() const
Returns the dimension of the vector.
void Swap(NnetDiscriminativeSupervision *other)
void ReadIndexVector(std::istream &is, bool binary, std::vector< Index > *vec)
void WroteExample(int32 example_size, size_t structure_hash, int32 minibatch_size)
Users call this function to inform this class that one minibatch has been written aggregating 'miniba...
ExampleMergingStats stats_
DiscriminativeExampleMerger(const ExampleMergingConfig &config, NnetDiscriminativeExampleWriter *writer)
NnetDiscriminativeExample()
std::vector< Index > indexes
std::vector< Index > indexes
A class representing a vector.
#define KALDI_ASSERT(cond)
void Swap(NnetDiscriminativeExample *other)
std::vector< IoSpecification > outputs
This comparison object compares just the structural aspects of the NnetIo object (name, indexes, feature dimension) without looking at the value of features.
Vector< BaseFloat > deriv_weights
std::vector< NnetIo > inputs
'inputs' contains the input to the network– normally just it has just one element called "input"...
std::vector< NnetDiscriminativeSupervision > outputs
'outputs' contains the sequence output supervision.
This hashing object hashes just the structural aspects of the NnetIo object (name, indexes, feature dimension) without looking at the value of features.
void ReadVectorAsChar(std::istream &is, bool binary, Vector< BaseFloat > *vec)
void MergeDiscriminativeExamples(bool compress, std::vector< NnetDiscriminativeExample > *input, NnetDiscriminativeExample *output)
int32 GetNnetDiscriminativeExampleSize(const NnetDiscriminativeExample &a)
void GetDiscriminativeComputationRequest(const Nnet &nnet, const NnetDiscriminativeExample &eg, bool need_model_derivative, bool store_component_stats, bool use_xent_regularization, bool use_xent_derivative, ComputationRequest *request)
This function takes a NnetDiscriminativeExample and produces a ComputationRequest.
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
std::string name
the name of the input in the neural net; in simple setups it will just be "input".
bool operator==(const NnetDiscriminativeSupervision &other) const
const ExampleMergingConfig & config_
int32 GetNodeIndex(const std::string &node_name) const
returns index associated with this node name, or -1 if no such index.
Provides a vector abstraction class.
discriminative::DiscriminativeSupervision supervision
std::vector< NnetIo > io
"io" contains the input and output.
void Write(std::ostream &os, bool binary) const
NnetDiscriminativeExample is like NnetExample, but specialized for sequence training.
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)
void Read(std::istream &is, bool binary)
void MergeExamples(const std::vector< NnetExample > &src, bool compress, NnetExample *merged_eg)
Merge a set of input examples into a single example (typically the size of "src" will be the minibatc...