27 int main(
int argc,
char *argv[]) {
29 using namespace kaldi;
33 "Convert posteriors (or pdf-level posteriors) to phone-level posteriors\n" 34 "See also: post-to-pdf-post, post-to-weights, get-post-on-ali\n" 36 "First, the usage when your posteriors are on transition-ids (the normal case):\n" 37 "Usage: post-to-phone-post [options] <model> <post-rspecifier> <phone-post-wspecifier>\n" 38 " e.g.: post-to-phone-post --binary=false 1.mdl \"ark:ali-to-post 1.ali|\" ark,t:-\n" 40 "Next, the usage when your posteriors are on pdfs (e.g. if they are neural-net\n" 42 "post-to-phone-post --transition-id-counts=final.tacc 1.mdl ark:pdf_post.ark ark,t:-\n" 43 "See documentation of --transition-id-counts option for more details.";
45 std::string tacc_rxfilename;
49 po.
Register(
"transition-id-counts", &tacc_rxfilename,
"Rxfilename where vector of counts\n" 50 "for transition-ids can be read (would normally come from training data\n" 51 "alignments, e.g. from ali-to-post and then post-to-tacc with --per-pdf=false)\n");
60 std::string model_rxfilename = po.
GetArg(1),
61 post_rspecifier = po.
GetArg(2),
62 phone_post_wspecifier = po.
GetArg(3);
70 Input ki(model_rxfilename, &binary_in);
76 if (tacc_rxfilename.empty()) {
78 for (; !posterior_reader.
Done(); posterior_reader.
Next()) {
82 posterior_writer.
Write(posterior_reader.
Key(), phone_posterior);
88 int32 num_pdfs = trans_model.
NumPdfs(),
90 if (transition_counts.
Dim() != num_tids + 1) {
91 KALDI_ERR <<
"Wrong size for transition counts in " << tacc_rxfilename
92 <<
", expected " << num_tids <<
" + 1, got " 93 << transition_counts.
Dim();
97 std::vector<std::unordered_map<int32, BaseFloat> > pdf_to_phones(num_pdfs);
99 for (int32
i = 1;
i <= num_tids;
i++) {
105 pdf_to_phones[pdf_id][phone] +=
count;
108 for (int32
i = 0;
i < num_pdfs;
i++) {
110 for (
auto p: pdf_to_phones[
i])
111 denominator += p.second;
112 for (
auto iter = pdf_to_phones[i].begin(); iter != pdf_to_phones[
i].end();
114 if (denominator != 0.0)
115 iter->second /= denominator;
117 iter->second = 1.0 / pdf_to_phones[
i].size();
122 for (; !posterior_reader.
Done(); posterior_reader.
Next()) {
124 int32 T = posterior.size();
126 std::unordered_map<int32, BaseFloat> phone_to_count;
127 for (int32 t = 0; t < T; t++) {
128 phone_to_count.clear();
129 for (
auto p : posterior[t]) {
130 int32 pdf_id = p.first;
132 if (pdf_id < 0 || pdf_id >= num_pdfs)
133 KALDI_ERR <<
"pdf-id on input out of range, expected [0.." << (num_pdfs-1)
134 <<
", got: " << pdf_id;
135 for (
auto q: pdf_to_phones[pdf_id]) {
136 int32 phone = q.first;
139 phone_to_count[phone] += count * prob;
142 for (
auto p : phone_to_count) {
143 phone_posterior[t].push_back(
144 std::pair<int32, BaseFloat>(p.first, p.second));
147 posterior_writer.
Write(posterior_reader.
Key(), phone_posterior);
151 KALDI_LOG <<
"Done converting posteriors to phone posteriors for " 152 << num_done <<
" utterances.";
153 return (num_done != 0 ? 0 : 1);
154 }
catch(
const std::exception &e) {
155 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
A templated class for writing objects to an archive or script file; see The Table concept...
int32 TransitionIdToPdf(int32 trans_id) const
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
int32 NumTransitionIds() const
Returns the total number of transition-ids (note, these are one-based).
void Read(std::istream &is, bool binary)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
int main(int argc, char *argv[])
MatrixIndexT Dim() const
Returns the dimension of the vector.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
A class representing a vector.
void ConvertPosteriorToPhones(const TransitionModel &tmodel, const Posterior &post_in, Posterior *post_out)
Converts a posterior over transition-ids to be a posterior over phones.
int32 TransitionIdToPhone(int32 trans_id) const