256 using namespace kaldi;
260 "Initialize a biphone context-dependency tree with all the\n" 261 "leaves (i.e. a full tree). Intended for end-to-end tree-free models.\n" 262 "Usage: gmm-init-biphone <topology-in> <dim> <model-out> <tree-out> \n" 264 " gmm-init-biphone topo 39 bi.mdl bi.tree\n";
267 std::string shared_phones_rxfilename, phone_counts_rxfilename;
268 int32 min_biphone_count = 100, min_mono_count = 20;
269 std::string ci_phones_str;
270 std::vector<int32> ci_phones;
274 po.Register(
"binary", &binary,
"Write output in binary mode");
275 po.Register(
"shared-phones", &shared_phones_rxfilename,
276 "rxfilename containing, on each line, a list of phones " 277 "whose pdfs should be shared.");
278 po.Register(
"ci-phones", &ci_phones_str,
"Colon-separated list of " 279 "integer indices of context-independent phones.");
280 po.Register(
"phone-counts", &phone_counts_rxfilename,
281 "rxfilename containing, on each line, a biphone/phone and " 282 "its count in the training data.");
283 po.Register(
"min-biphone-count", &min_biphone_count,
"Minimum number of " 284 "occurences of a biphone in training data to reserve pdfs " 286 po.Register(
"min-monophone-count", &min_mono_count,
"Minimum number of " 287 "occurences of a monophone in training data to reserve pdfs " 291 if (po.NumArgs() != 4) {
297 std::string topo_filename = po.GetArg(1);
300 KALDI_ERR <<
"Bad dimension:" << po.GetArg(2)
301 <<
". It should be a positive integer.";
302 std::string model_filename = po.GetArg(3);
303 std::string tree_filename = po.GetArg(4);
305 if (!ci_phones_str.empty()) {
307 std::sort(ci_phones.begin(), ci_phones.end());
308 if (!
IsSortedAndUniq(ci_phones) || ci_phones.empty() || ci_phones[0] == 0)
309 KALDI_ERR <<
"Invalid --ci-phones option: " << ci_phones_str;
313 glob_inv_var.Set(1.0);
319 Input ki(topo_filename, &binary_in);
320 topo.
Read(ki.Stream(), binary_in);
322 const std::vector<int32> &phones = topo.
GetPhones();
324 std::vector<int32> phone2num_pdf_classes(1 + phones.back());
325 for (
size_t i = 0;
i < phones.size();
i++) {
329 phone2num_pdf_classes[phones[i]] == 2);
332 std::vector<int32> mono_counts;
333 std::vector<std::vector<int32> > bi_counts;
334 if (!phone_counts_rxfilename.empty()) {
336 &mono_counts, &bi_counts);
337 KALDI_LOG <<
"Loaded mono/bi phone counts.";
343 std::vector<std::vector<int32> > shared_phones;
344 if (shared_phones_rxfilename ==
"") {
345 shared_phones.resize(phones.size());
346 for (
size_t i = 0;
i < phones.size();
i++)
347 shared_phones[
i].push_back(phones[
i]);
353 ci_phones, bi_counts,
355 mono_counts, min_mono_count);
364 inv_var.Row(0).CopyFromVec(glob_inv_var);
366 mu.Row(0).CopyFromVec(glob_mean);
374 for (
int i = 0;
i < num_pdfs;
i++)
381 Output ko(model_filename, binary);
382 trans_model.Write(ko.Stream(), binary);
383 am_gmm.
Write(ko.Stream(), binary);
387 ctx_dep->
Write(
Output(tree_filename, binary).Stream(),
392 }
catch(
const std::exception &e) {
393 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void AddPdf(const DiagGmm &gmm)
Adds a GMM to the model, and increments the total number of PDFs.
bool ConvertStringToInteger(const std::string &str, Int *out)
Converts a string into an integer via strtoll and returns false if there was any kind of problem (i...
void SetInvVarsAndMeans(const MatrixBase< Real > &invvars, const MatrixBase< Real > &means)
Use SetInvVarsAndMeans if updating both means and (inverse) variances.
A class for storing topology information for phones.
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
void Resize(int32 nMix, int32 dim)
Resizes arrays to this dim. Does not initialize data.
int32 ComputeGconsts()
Sets the gconsts.
void Read(std::istream &is, bool binary)
int32 NumPdfClasses(int32 phone) const
Returns the number of pdf-classes for this phone; throws exception if phone not covered by this topol...
void ReadSharedPhonesList(std::string rxfilename, std::vector< std::vector< int32 > > *list_out)
virtual int32 NumPdfs() const
NumPdfs() returns the number of acoustic pdfs (they are numbered 0.. NumPdfs()-1).
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
void Write(std::ostream &os, bool binary) const
static void ReadPhoneCounts(std::string &filename, int32 num_phones, std::vector< int32 > *mono_counts, std::vector< std::vector< int32 > > *bi_counts)
const std::vector< int32 > & GetPhones() const
Returns a reference to a sorted, unique list of phones covered by the topology (these phones will be ...
A class representing a vector.
#define KALDI_ASSERT(cond)
void Write(std::ostream &out_stream, bool binary) const
Definition for Gaussian Mixture Model with diagonal covariances.
void SetWeights(const VectorBase< Real > &w)
Mutators for both float or double.
bool IsSortedAndUniq(const std::vector< T > &vec)
Returns true if the vector is sorted and contains each element only once.
ContextDependency * BiphoneContextDependencyFull(std::vector< std::vector< int32 > > phone_sets, const std::vector< int32 > phone2num_pdf_classes, const std::vector< int32 > &ci_phones_list, const std::vector< std::vector< int32 > > &bi_counts, int32 biphone_min_count, const std::vector< int32 > &mono_counts, int32 mono_min_count)