cluster-phones.cc File Reference
Include dependency graph for cluster-phones.cc:

Go to the source code of this file.

Functions

int main (int argc, char *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 31 of file cluster-phones.cc.

References kaldi::AutomaticallyObtainQuestions(), kaldi::DeleteBuildTreeStats(), ParseOptions::GetArg(), KALDI_ERR, KALDI_LOG, kaldi::KMeansClusterPhones(), ParseOptions::NumArgs(), kaldi::PrintableRxfilename(), kaldi::PrintableWxfilename(), ParseOptions::PrintUsage(), ParseOptions::Read(), kaldi::ReadBuildTreeStats(), kaldi::ReadIntegerVectorVectorSimple(), ParseOptions::Register(), kaldi::SplitStringToIntegers(), Input::Stream(), and kaldi::WriteIntegerVectorVectorSimple().

31  {
32  using namespace kaldi;
33  try {
34  using namespace kaldi;
35  typedef kaldi::int32 int32;
36 
37  const char *usage =
38  "Cluster phones (or sets of phones) into sets for various purposes\n"
39  "Usage: cluster-phones [options] <tree-stats-in> <phone-sets-in> <clustered-phones-out>\n"
40  "e.g.: \n"
41  " cluster-phones 1.tacc phonesets.txt questions.txt\n";
42  // Format of phonesets.txt is e.g.
43  // 1
44  // 2 3 4
45  // 5 6
46  // ...
47  // Format of questions.txt output is similar, but with more lines (and the same phone
48  // may appear on multiple lines).
49 
50  // bool binary = true;
51  int32 P = 1, N = 3; // Note: N does not matter.
52  std::string pdf_class_list_str = "1"; // 1 is just the central position of 3.
53  std::string mode = "questions";
54  int32 num_classes = -1;
55 
56  ParseOptions po(usage);
57  // po.Register("binary", &binary, "Write output in binary mode");
58  po.Register("central-position", &P, "Central position in context window [must match acc-tree-stats]");
59  po.Register("context-width", &N, "Does not have any effect-- included for scripting convenience.");
60  po.Register("pdf-class-list", &pdf_class_list_str, "Colon-separated list of HMM positions to consider [Default = 1: just central position for 3-state models].");
61  po.Register("mode", &mode, "Mode of operation: \"questions\"->sets suitable for decision trees; \"k-means\"->k-means algorithm, output k classes (set num-classes options)\n");
62  po.Register("num-classes", &num_classes, "For k-means mode, number of classes.");
63 
64 
65  po.Read(argc, argv);
66 
67  if (po.NumArgs() != 3) {
68  po.PrintUsage();
69  exit(1);
70  }
71 
72 
73  std::string stats_rxfilename = po.GetArg(1),
74  phone_sets_rxfilename = po.GetArg(2),
75  phone_sets_wxfilename = po.GetArg(3);
76 
77 
78  BuildTreeStatsType stats;
79  { // Read tree stats.
80  bool binary_in;
81  GaussClusterable gc; // dummy needed to provide type.
82  Input ki(stats_rxfilename, &binary_in);
83  ReadBuildTreeStats(ki.Stream(), binary_in, gc, &stats);
84  }
85 
86  std::vector<int32> pdf_class_list;
87  if (!SplitStringToIntegers(pdf_class_list_str, ":", false, &pdf_class_list)
88  || pdf_class_list.empty()) {
89  KALDI_ERR << "Invalid pdf-class-list string [expecting colon-separated list of integers]: "
90  << pdf_class_list_str;
91  }
92 
93  std::vector<std::vector< int32> > phone_sets;
94  if (!ReadIntegerVectorVectorSimple(phone_sets_rxfilename, &phone_sets))
95  KALDI_ERR << "Could not read phone sets from "
96  << PrintableRxfilename(phone_sets_rxfilename);
97 
98  if (phone_sets.size() == 0)
99  KALDI_ERR << "No phone sets in phone sets file ";
100 
101  std::vector<std::vector<int32> > phone_sets_out;
102 
103  if (mode == "questions") {
104  if (num_classes != -1)
105  KALDI_ERR << "num-classes option is not (currently) compatible "
106  "with \"questions\" mode.";
108  phone_sets,
109  pdf_class_list,
110  P,
111  &phone_sets_out);
112  } else if (mode == "k-means") {
113  if (num_classes <= 1 ||
114  static_cast<size_t>(num_classes) > phone_sets.size())
115  KALDI_ERR << "num-classes invalid: num_classes is " << num_classes
116  << ", number of phone sets is " << phone_sets.size();
117  KMeansClusterPhones(stats,
118  phone_sets,
119  pdf_class_list,
120  P,
121  num_classes,
122  &phone_sets_out);
123  }
124 
125  if (!WriteIntegerVectorVectorSimple(phone_sets_wxfilename, phone_sets_out))
126  KALDI_ERR << "Error writing questions to "
127  << PrintableWxfilename(phone_sets_wxfilename);
128  else
129  KALDI_LOG << "Wrote questions to "<<phone_sets_wxfilename;
130 
131  DeleteBuildTreeStats(&stats);
132  } catch(const std::exception &e) {
133  std::cerr << e.what();
134  return -1;
135  }
136 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
Definition: text-utils.h:68
void AutomaticallyObtainQuestions(BuildTreeStatsType &stats, const std::vector< std::vector< int32 > > &phone_sets_in, const std::vector< int32 > &all_pdf_classes_in, int32 P, std::vector< std::vector< int32 > > *questions_out)
Outputs sets of phones that are reasonable for questions to ask in the tree-building algorithm...
Definition: build-tree.cc:615
kaldi::int32 int32
void DeleteBuildTreeStats(BuildTreeStatsType *stats)
This frees the Clusterable* pointers in "stats", where non-NULL, and sets them to NULL...
void ReadBuildTreeStats(std::istream &is, bool binary, const Clusterable &example, BuildTreeStatsType *stats)
Reads BuildTreeStats object.
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
bool ReadIntegerVectorVectorSimple(const std::string &rxfilename, std::vector< std::vector< int32 > > *list)
#define KALDI_ERR
Definition: kaldi-error.h:147
bool WriteIntegerVectorVectorSimple(const std::string &wxfilename, const std::vector< std::vector< int32 > > &list)
std::string PrintableRxfilename(const std::string &rxfilename)
PrintableRxfilename turns the rxfilename into a more human-readable form for error reporting...
Definition: kaldi-io.cc:61
std::string PrintableWxfilename(const std::string &wxfilename)
PrintableWxfilename turns the wxfilename into a more human-readable form for error reporting...
Definition: kaldi-io.cc:73
std::vector< std::pair< EventType, Clusterable * > > BuildTreeStatsType
GaussClusterable wraps Gaussian statistics in a form accessible to generic clustering algorithms...
#define KALDI_LOG
Definition: kaldi-error.h:153
void KMeansClusterPhones(BuildTreeStatsType &stats, const std::vector< std::vector< int32 > > &phone_sets_in, const std::vector< int32 > &all_pdf_classes_in, int32 P, int32 num_classes, std::vector< std::vector< int32 > > *sets_out)
This function clusters the phones (or some initially specified sets of phones) into sets of phones...
Definition: build-tree.cc:748