37 Input input(rxfilename);
38 std::istream &is = input.
Stream();
40 while (std::getline(is, line)) {
41 list_out->push_back(std::vector<int32>());
43 KALDI_ERR <<
"Bad line in shared phones list: " << line <<
" (reading " 45 std::sort(list_out->rbegin()->begin(), list_out->rbegin()->end());
47 KALDI_ERR <<
"Bad line in shared phones list (repeated phone): " << line
54 const std::vector<int32> &phone2num_pdf_classes,
55 const std::vector<int32> &ci_phones_list,
56 const std::vector<std::vector<int32> > &bi_counts,
57 int32 biphone_min_count,
58 const std::vector<int32> &mono_counts,
59 int32 mono_min_count) {
63 std::set<int32> all_phones;
64 for (
size_t i = 0;
i < phone_sets.size();
i++) {
67 for (
size_t j = 0;
j < phone_sets[
i].size();
j++) {
69 all_phones.insert(phone_sets[i][j]);
75 int32 numpdfs_per_phone = phone2num_pdf_classes[1];
76 int32 current_pdfid = 0;
77 std::map<EventValueType, EventMap*> level1_map;
79 for (
size_t i = 0;
i < ci_phones_list.size();
i++) {
80 std::map<EventValueType, EventAnswerType> level2_map;
81 level2_map[0] = current_pdfid++;
82 if (numpdfs_per_phone == 2) level2_map[1] = current_pdfid++;
89 int32 zerophone_pdf = -1;
92 std::vector<int32> monophone_pdf(phone_sets.size(), -1);
94 for (
size_t i = 0;
i < phone_sets.size();
i++) {
96 if (numpdfs_per_phone == 1) {
98 std::map<EventValueType, EventAnswerType> level2_map;
99 level2_map[0] = current_pdfid++;
100 for (
size_t j = 0;
j < phone_sets.size();
j++) {
101 int32 pdfid = current_pdfid++;
102 std::vector<int32> pset = phone_sets[
j];
104 for (
size_t k = 0; k < pset.size(); k++)
105 level2_map[pset[k]] = pdfid;
107 std::vector<int32> pset = phone_sets[
i];
109 for (
size_t k = 0; k < pset.size(); k++)
113 std::vector<int32> right_phoneset = phone_sets[
i];
116 std::map<EventValueType, EventMap*> level2_map;
118 std::map<EventValueType, EventAnswerType> level3_map;
119 level3_map[0] = current_pdfid++;
120 level3_map[1] = current_pdfid++;
122 for (
size_t i = 0;
i < ci_phones_list.size();
i++)
125 for (
size_t j = 0;
j < phone_sets.size();
j++) {
126 std::vector<int32> left_phoneset = phone_sets[
j];
128 std::map<EventValueType, EventAnswerType> level3_map;
129 if (bi_counts.empty() ||
130 bi_counts[left_phoneset[0]][right_phoneset[0]] >= biphone_min_count) {
131 level3_map[0] = current_pdfid++;
132 level3_map[1] = current_pdfid++;
133 }
else if (mono_counts.empty() ||
134 mono_counts[right_phoneset[0]] > mono_min_count) {
136 KALDI_VLOG(2) <<
"Reverting to mono for biphone (" << left_phoneset[0]
137 <<
"," << right_phoneset[0] <<
")";
138 if (monophone_pdf[
i] == -1) {
139 KALDI_VLOG(1) <<
"Reserving mono PDFs for phone-set " <<
i;
140 monophone_pdf[
i] = current_pdfid++;
143 level3_map[0] = monophone_pdf[
i];
144 level3_map[1] = monophone_pdf[
i] + 1;
146 KALDI_VLOG(2) <<
"Reverting to zerophone for biphone (" 148 <<
"," << right_phoneset[0] <<
")";
150 if (zerophone_pdf == -1) {
152 zerophone_pdf = current_pdfid++;
155 level3_map[0] = zerophone_pdf;
156 level3_map[1] = zerophone_pdf + 1;
159 for (
size_t k = 0; k < left_phoneset.size(); k++) {
160 int32 left_phone = left_phoneset[k];
164 for (
size_t k = 0; k < right_phoneset.size(); k++) {
165 std::map<EventValueType, EventMap*> level2_copy;
166 for (
auto const& kv: level2_map)
167 level2_copy[kv.first] = kv.second->Copy(std::vector<EventMap*>());
168 int32 right_phone = right_phoneset[k];
174 KALDI_LOG <<
"Num PDFs: " << current_pdfid;
181 const std::vector<int32> phone2num_pdf_classes,
182 const std::vector<int32> &ci_phones_list,
183 const std::vector<std::vector<int32> > &bi_counts,
184 int32 biphone_min_count,
185 const std::vector<int32> &mono_counts,
186 int32 mono_min_count) {
188 std::set<int32> ci_phones;
189 for (
size_t i = 0;
i < ci_phones_list.size();
i++)
190 ci_phones.insert(ci_phones_list[
i]);
191 for (
int32 i = phone_sets.size() - 1; i >= 0; i--) {
192 for (
int32 j = phone_sets[i].size() - 1;
j >= 0;
j--) {
193 if (ci_phones.find(phone_sets[i][
j]) != ci_phones.end()) {
194 phone_sets[
i].erase(phone_sets[i].begin() + j);
195 if (phone_sets[i].empty())
196 phone_sets.erase(phone_sets.begin() +
i);
201 std::vector<bool> share_roots(phone_sets.size(),
false);
205 phone2num_pdf_classes,
206 ci_phones_list, bi_counts,
207 biphone_min_count, mono_counts,
226 std::vector<int32> *mono_counts,
227 std::vector<std::vector<int32> > *bi_counts) {
229 mono_counts->resize(num_phones + 1, 0);
230 bi_counts->resize(num_phones + 1, std::vector<int>(num_phones + 1, 0));
231 std::ifstream infile(filename);
233 while (std::getline(infile, line)) {
234 std::istringstream iss(line);
237 if ((std::istringstream(line) >> a >> b >> c)) {
242 (*bi_counts)[a][b] = c;
243 }
else if ((std::istringstream(line) >> b >> c)) {
247 (*mono_counts)[b] = c;
249 KALDI_ERR <<
"Bad line in phone stats file: " << line;
254 int main(
int argc,
char *argv[]) {
256 using namespace kaldi;
260 "Initialize a biphone context-dependency tree with all the\n" 261 "leaves (i.e. a full tree). Intended for end-to-end tree-free models.\n" 262 "Usage: gmm-init-biphone <topology-in> <dim> <model-out> <tree-out> \n" 264 " gmm-init-biphone topo 39 bi.mdl bi.tree\n";
267 std::string shared_phones_rxfilename, phone_counts_rxfilename;
268 int32 min_biphone_count = 100, min_mono_count = 20;
269 std::string ci_phones_str;
270 std::vector<int32> ci_phones;
274 po.
Register(
"binary", &binary,
"Write output in binary mode");
275 po.
Register(
"shared-phones", &shared_phones_rxfilename,
276 "rxfilename containing, on each line, a list of phones " 277 "whose pdfs should be shared.");
278 po.
Register(
"ci-phones", &ci_phones_str,
"Colon-separated list of " 279 "integer indices of context-independent phones.");
280 po.
Register(
"phone-counts", &phone_counts_rxfilename,
281 "rxfilename containing, on each line, a biphone/phone and " 282 "its count in the training data.");
283 po.
Register(
"min-biphone-count", &min_biphone_count,
"Minimum number of " 284 "occurences of a biphone in training data to reserve pdfs " 286 po.
Register(
"min-monophone-count", &min_mono_count,
"Minimum number of " 287 "occurences of a monophone in training data to reserve pdfs " 297 std::string topo_filename = po.
GetArg(1);
301 <<
". It should be a positive integer.";
302 std::string model_filename = po.
GetArg(3);
303 std::string tree_filename = po.
GetArg(4);
305 if (!ci_phones_str.empty()) {
307 std::sort(ci_phones.begin(), ci_phones.end());
308 if (!
IsSortedAndUniq(ci_phones) || ci_phones.empty() || ci_phones[0] == 0)
309 KALDI_ERR <<
"Invalid --ci-phones option: " << ci_phones_str;
313 glob_inv_var.
Set(1.0);
319 Input ki(topo_filename, &binary_in);
322 const std::vector<int32> &phones = topo.
GetPhones();
324 std::vector<int32> phone2num_pdf_classes(1 + phones.back());
325 for (
size_t i = 0;
i < phones.size();
i++) {
329 phone2num_pdf_classes[phones[i]] == 2);
332 std::vector<int32> mono_counts;
333 std::vector<std::vector<int32> > bi_counts;
334 if (!phone_counts_rxfilename.empty()) {
336 &mono_counts, &bi_counts);
337 KALDI_LOG <<
"Loaded mono/bi phone counts.";
343 std::vector<std::vector<int32> > shared_phones;
344 if (shared_phones_rxfilename ==
"") {
345 shared_phones.resize(phones.size());
346 for (
size_t i = 0;
i < phones.size();
i++)
347 shared_phones[
i].push_back(phones[
i]);
353 ci_phones, bi_counts,
355 mono_counts, min_mono_count);
364 inv_var.
Row(0).CopyFromVec(glob_inv_var);
366 mu.
Row(0).CopyFromVec(glob_mean);
374 for (
int i = 0;
i < num_pdfs;
i++)
381 Output ko(model_filename, binary);
387 ctx_dep->
Write(
Output(tree_filename, binary).Stream(),
392 }
catch(
const std::exception &e) {
393 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void AddPdf(const DiagGmm &gmm)
Adds a GMM to the model, and increments the total number of PDFs.
bool ConvertStringToInteger(const std::string &str, Int *out)
Converts a string into an integer via strtoll and returns false if there was any kind of problem (i...
void SetInvVarsAndMeans(const MatrixBase< Real > &invvars, const MatrixBase< Real > &means)
Use SetInvVarsAndMeans if updating both means and (inverse) variances.
A class for storing topology information for phones.
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
void Resize(int32 nMix, int32 dim)
Resizes arrays to this dim. Does not initialize data.
int32 ComputeGconsts()
Sets the gconsts.
void Read(std::istream &is, bool binary)
void Register(const std::string &name, bool *ptr, const std::string &doc)
int32 NumPdfClasses(int32 phone) const
Returns the number of pdf-classes for this phone; throws exception if phone not covered by this topol...
void ReadSharedPhonesList(std::string rxfilename, std::vector< std::vector< int32 > > *list_out)
static const EventKeyType kPdfClass
virtual int32 NumPdfs() const
NumPdfs() returns the number of acoustic pdfs (they are numbered 0.. NumPdfs()-1).
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
void Write(std::ostream &os, bool binary) const
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
int main(int argc, char *argv[])
static void ReadPhoneCounts(std::string &filename, int32 num_phones, std::vector< int32 > *mono_counts, std::vector< std::vector< int32 > > *bi_counts)
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
const std::vector< int32 > & GetPhones() const
Returns a reference to a sorted, unique list of phones covered by the topology (these phones will be ...
int NumArgs() const
Number of positional parameters (c.f. argc-1).
A class that is capable of representing a generic mapping from EventType (which is a vector of (key...
void Write(std::ostream &os, bool binary) const
A class representing a vector.
#define KALDI_ASSERT(cond)
void Set(Real f)
Set all members of a vector to a specified value.
void Write(std::ostream &out_stream, bool binary) const
Definition for Gaussian Mixture Model with diagonal covariances.
std::string PrintableRxfilename(const std::string &rxfilename)
PrintableRxfilename turns the rxfilename into a more human-readable form for error reporting...
void SetWeights(const VectorBase< Real > &w)
Mutators for both float or double.
EventMap * GetFullBiphoneStubMap(const std::vector< std::vector< int32 > > &phone_sets, const std::vector< int32 > &phone2num_pdf_classes, const std::vector< int32 > &ci_phones_list, const std::vector< std::vector< int32 > > &bi_counts, int32 biphone_min_count, const std::vector< int32 > &mono_counts, int32 mono_min_count)
bool IsSortedAndUniq(const std::vector< T > &vec)
Returns true if the vector is sorted and contains each element only once.
ContextDependency * BiphoneContextDependencyFull(std::vector< std::vector< int32 > > phone_sets, const std::vector< int32 > phone2num_pdf_classes, const std::vector< int32 > &ci_phones_list, const std::vector< std::vector< int32 > > &bi_counts, int32 biphone_min_count, const std::vector< int32 > &mono_counts, int32 mono_min_count)