Combines 2 or more streams with NN-training targets into single one.
This is handy when training NN with more than one output layer (softmax). The format of NN-targets is 'posterior' and the dimensionality of the output stream is the sum of input-stream dimensions.
33 using namespace kaldi;
38 "Combine 2 or more streams with NN-training targets into single stream.\n" 39 "As the posterior streams are pasted, the output dimension is the sum\n" 40 "of the input dimensions. This is used when training NN with\n" 41 "multiple softmaxes on its output. This is used in multi-task, \n" 42 "multi-lingual or multi-database training. Depending on the context,\n" 43 "an utterance is not required to be in all the input streams.\n" 44 "For a multi-database training only 1 output layer will be active.\n" 46 "The lengths of utterances are provided as 1st argument.\n" 47 "The dimensions of input stream are set as 2nd in argument.\n" 48 "Follow the input and output streams which are in 'posterior' format.\n" 50 "Usage: paste-post <featlen-rspecifier> <dims-csl> <post1-rspecifier> " 51 "... <postN-rspecifier> <post-wspecifier>\n" 52 "e.g.: paste-post 'ark:feat-to-len $feats ark,t:-|' 1029:1124 " 53 "ark:post1.ark ark:post2.ark ark:pasted.ark\n";
57 bool allow_partial =
false;
58 po.Register(
"allow-partial", &allow_partial,
59 "Produce output also when the utterance is not in all input streams.");
63 if (po.NumArgs() < 5) {
68 std::string featlen_rspecifier = po.GetArg(1),
69 stream_dims_str = po.GetArg(2),
70 post_wspecifier = po.GetArg(po.NumArgs());
71 int32 stream_count = po.NumArgs() - 3;
74 std::vector<int32> stream_dims;
76 KALDI_ERR <<
"Invalid stream-dims string " << stream_dims_str;
78 if (stream_count != stream_dims.size()) {
79 KALDI_ERR <<
"Mismatch in input posterior-stream count " << stream_count
80 <<
" and --stream-dims count" << stream_dims.size()
81 <<
", " << stream_dims_str;
85 std::vector<int32> stream_offset(stream_dims.size()+1, 0);
86 for (int32 s = 0; s < stream_dims.size(); s++) {
87 stream_offset[s+1] = stream_offset[s] + stream_dims[s];
91 std::vector<RandomAccessPosteriorReader> posterior_reader(po.NumArgs()-3);
92 for (int32 s = 0; s < stream_count; s++) {
93 posterior_reader[s].Open(po.GetArg(s+3));
96 int32 num_done = 0, num_err = 0, num_empty = 0;
101 for (; !featlen_reader.Done(); featlen_reader.Next()) {
102 bool ok =
true, empty =
true;
103 std::string utt = featlen_reader.Key();
104 int32 num_frames = featlen_reader.Value();
108 std::string nonempty_streams;
109 for (int32 s = 0; s < stream_count; s++) {
110 if (posterior_reader[s].HasKey(utt)) {
111 nonempty_streams +=
" " +
ToString(s);
115 <<
", frames " << num_frames
116 <<
", pasted-from streams " << nonempty_streams;
123 for (int32 s = 0; s < stream_count; s++) {
124 if (!posterior_reader[s].HasKey(utt)) {
125 if (!allow_partial) {
127 <<
" in set " << (s+1) <<
" of posteriors.";
132 const Posterior& post_s = posterior_reader[s].Value(utt);
134 for (int32 f = 0; f < num_frames; f++) {
135 for (int32
i = 0;
i < post_s[f].size();
i++) {
136 int32
id = post_s[f][
i].first;
139 post[f].push_back(std::make_pair(stream_offset[s] +
id, val));
146 KALDI_WARN <<
"Uttenrace with no posteriors " << utt <<
", discarding";
151 posterior_writer.Write(featlen_reader.Key(), post);
157 KALDI_LOG <<
"Pasted posteriors for " << num_done <<
" sentences, " 158 <<
"missing sentences " << num_empty <<
", " 159 <<
"failed for " << num_err;
160 return (num_done != 0 ? 0 : 1);
161 }
catch(
const std::exception &e) {
162 std::cerr << e.what();
std::string ToString(const T &t)
Convert basic type to a string (please don't overuse),.
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
int32 GetVerboseLevel()
Get verbosity level, usually set via command line '–verbose=' switch.
A templated class for writing objects to an archive or script file; see The Table concept...
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
#define KALDI_ASSERT(cond)