32 int main(
int argc,
char *argv[]) {
33 using namespace kaldi;
38 "Combine 2 or more streams with NN-training targets into single stream.\n" 39 "As the posterior streams are pasted, the output dimension is the sum\n" 40 "of the input dimensions. This is used when training NN with\n" 41 "multiple softmaxes on its output. This is used in multi-task, \n" 42 "multi-lingual or multi-database training. Depending on the context,\n" 43 "an utterance is not required to be in all the input streams.\n" 44 "For a multi-database training only 1 output layer will be active.\n" 46 "The lengths of utterances are provided as 1st argument.\n" 47 "The dimensions of input stream are set as 2nd in argument.\n" 48 "Follow the input and output streams which are in 'posterior' format.\n" 50 "Usage: paste-post <featlen-rspecifier> <dims-csl> <post1-rspecifier> " 51 "... <postN-rspecifier> <post-wspecifier>\n" 52 "e.g.: paste-post 'ark:feat-to-len $feats ark,t:-|' 1029:1124 " 53 "ark:post1.ark ark:post2.ark ark:pasted.ark\n";
57 bool allow_partial =
false;
58 po.
Register(
"allow-partial", &allow_partial,
59 "Produce output also when the utterance is not in all input streams.");
68 std::string featlen_rspecifier = po.
GetArg(1),
69 stream_dims_str = po.
GetArg(2),
71 int32 stream_count = po.
NumArgs() - 3;
74 std::vector<int32> stream_dims;
76 KALDI_ERR <<
"Invalid stream-dims string " << stream_dims_str;
78 if (stream_count != stream_dims.size()) {
79 KALDI_ERR <<
"Mismatch in input posterior-stream count " << stream_count
80 <<
" and --stream-dims count" << stream_dims.size()
81 <<
", " << stream_dims_str;
85 std::vector<int32> stream_offset(stream_dims.size()+1, 0);
86 for (int32 s = 0; s < stream_dims.size(); s++) {
87 stream_offset[s+1] = stream_offset[s] + stream_dims[s];
91 std::vector<RandomAccessPosteriorReader> posterior_reader(po.
NumArgs()-3);
92 for (int32 s = 0; s < stream_count; s++) {
93 posterior_reader[s].Open(po.
GetArg(s+3));
96 int32 num_done = 0, num_err = 0, num_empty = 0;
101 for (; !featlen_reader.
Done(); featlen_reader.
Next()) {
102 bool ok =
true, empty =
true;
103 std::string utt = featlen_reader.
Key();
104 int32 num_frames = featlen_reader.
Value();
108 std::string nonempty_streams;
109 for (int32 s = 0; s < stream_count; s++) {
110 if (posterior_reader[s].HasKey(utt)) {
111 nonempty_streams +=
" " +
ToString(s);
115 <<
", frames " << num_frames
116 <<
", pasted-from streams " << nonempty_streams;
123 for (int32 s = 0; s < stream_count; s++) {
124 if (!posterior_reader[s].HasKey(utt)) {
125 if (!allow_partial) {
127 <<
" in set " << (s+1) <<
" of posteriors.";
132 const Posterior& post_s = posterior_reader[s].Value(utt);
134 for (int32 f = 0; f < num_frames; f++) {
135 for (int32
i = 0;
i < post_s[f].size();
i++) {
136 int32
id = post_s[f][
i].first;
139 post[f].push_back(std::make_pair(stream_offset[s] +
id, val));
146 KALDI_WARN <<
"Uttenrace with no posteriors " << utt <<
", discarding";
151 posterior_writer.
Write(featlen_reader.
Key(), post);
157 KALDI_LOG <<
"Pasted posteriors for " << num_done <<
" sentences, " 158 <<
"missing sentences " << num_empty <<
", " 159 <<
"failed for " << num_err;
160 return (num_done != 0 ? 0 : 1);
161 }
catch(
const std::exception &e) {
162 std::cerr << e.what();
std::string ToString(const T &t)
Convert basic type to a string (please don't overuse),.
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
int32 GetVerboseLevel()
Get verbosity level, usually set via command line '–verbose=' switch.
A templated class for writing objects to an archive or script file; see The Table concept...
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
int main(int argc, char *argv[])
Combines 2 or more streams with NN-training targets into single one.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
#define KALDI_ASSERT(cond)