28 std::vector<int32> seen_phones;
29 for (
size_t i = 0;
i < questions.size();
i++)
30 for (
size_t j= 0;
j < questions[
i].size();
j++) seen_phones.push_back(questions[
i][
j]);
33 const std::vector<int32> &topo_phones = topo.
GetPhones();
34 if (seen_phones != topo_phones) {
35 std::ostringstream ss_seen, ss_topo;
38 KALDI_WARN <<
"ProcessTopo: phones seen in questions differ from those in topology: " 39 << ss_seen.str() <<
" vs. " << ss_topo.str();
40 if (seen_phones.size() > topo_phones.size()) {
41 KALDI_ERR <<
"ProcessTopo: phones are asked about that are undefined in the topology.";
45 int32 max_num_pdf_classes = 0;
46 for (
size_t i = 0;
i < topo_phones.size();
i++) {
49 max_num_pdf_classes = std::max(num_pdf_classes, max_num_pdf_classes);
51 KALDI_LOG <<
"Max # pdf classes is " << max_num_pdf_classes;
52 return max_num_pdf_classes;
57 int main(
int argc,
char *argv[]) {
58 using namespace kaldi;
60 using namespace kaldi;
65 "Usage: compile-questions [options] <topo> <questions-text-file> <questions-out>\n" 67 " compile-questions questions.txt questions.qst\n";
70 int32 num_iters_refine = 0;
75 "Write output in binary mode");
77 "Context window size [must match acc-tree-stats].");
79 "Central position in phone context window [must match acc-tree-stats]");
80 po.
Register(
"num-iters-refine", &num_iters_refine,
81 "Number of iters of refining questions at each node. >0 --> questions " 92 topo_filename = po.
GetArg(1),
93 questions_rxfilename = po.
GetArg(2),
94 questions_out_filename = po.
GetArg(3);
100 std::vector<std::vector<int32> > questions;
102 KALDI_ERR <<
"Could not read questions from " 104 for (
size_t i = 0;
i < questions.size();
i++) {
105 std::sort(questions[
i].begin(), questions[
i].end());
107 KALDI_ERR <<
"Questions contain duplicate phones";
109 size_t nq =
static_cast<int32
>(questions.size());
111 if (questions.size() != nq)
113 <<
" duplicate questions present in " << questions_rxfilename;
119 int32 max_num_pdfclasses =
ProcessTopo(topo, questions);
126 for (int32
n = 0;
n < N;
n++) {
127 KALDI_LOG <<
"Setting questions for phonetic-context position "<<
n;
133 std::vector<std::vector<int32> > pdfclass_questions(max_num_pdfclasses-1);
134 for (int32
i = 0;
i < max_num_pdfclasses - 1;
i++)
135 for (int32
j = 0;
j <=
i;
j++)
136 pdfclass_questions[
i].push_back(
j);
139 KALDI_LOG <<
"Setting questions for hmm-position [hmm-position ranges from 0 to "<< (max_num_pdfclasses-1) <<
"]";
143 KALDI_LOG <<
"Wrote questions to "<<questions_out_filename;
144 }
catch(
const std::exception &e) {
145 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
This class defines, for each EventKeyType, a set of initial questions that it tries and also a number...
A class for storing topology information for phones.
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
int32 ProcessTopo(const HmmTopology &topo, const std::vector< std::vector< int32 > > &questions)
void SetQuestionsOf(EventKeyType key, const QuestionsForKey &options_of_key)
void SortAndUniq(std::vector< T > *vec)
Sorts and uniq's (removes duplicates) from a vector.
void Register(const std::string &name, bool *ptr, const std::string &doc)
int32 NumPdfClasses(int32 phone) const
Returns the number of pdf-classes for this phone; throws exception if phone not covered by this topol...
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
static const EventKeyType kPdfClass
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
bool ReadIntegerVectorVectorSimple(const std::string &rxfilename, std::vector< std::vector< int32 > > *list)
QuestionsForKey is a class used to define the questions for a key, and also options that allow us to ...
std::vector< std::vector< EventValueType > > initial_questions
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
const std::vector< int32 > & GetPhones() const
Returns a reference to a sorted, unique list of phones covered by the topology (these phones will be ...
int NumArgs() const
Number of positional parameters (c.f. argc-1).
void WriteIntegerVector(std::ostream &os, bool binary, const std::vector< T > &v)
Function for writing STL vectors of integer types.
void WriteKaldiObject(const C &c, const std::string &filename, bool binary)
std::string PrintableRxfilename(const std::string &rxfilename)
PrintableRxfilename turns the rxfilename into a more human-readable form for error reporting...
int main(int argc, char *argv[])
bool IsSortedAndUniq(const std::vector< T > &vec)
Returns true if the vector is sorted and contains each element only once.