gmm-init-biphone.cc File Reference
Include dependency graph for gmm-init-biphone.cc:

Go to the source code of this file.

Namespaces

 kaldi
 This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for mispronunciations detection tasks, the reference:
 

Functions

void ReadSharedPhonesList (std::string rxfilename, std::vector< std::vector< int32 > > *list_out)
 
EventMapGetFullBiphoneStubMap (const std::vector< std::vector< int32 > > &phone_sets, const std::vector< int32 > &phone2num_pdf_classes, const std::vector< int32 > &ci_phones_list, const std::vector< std::vector< int32 > > &bi_counts, int32 biphone_min_count, const std::vector< int32 > &mono_counts, int32 mono_min_count)
 
ContextDependencyBiphoneContextDependencyFull (std::vector< std::vector< int32 > > phone_sets, const std::vector< int32 > phone2num_pdf_classes, const std::vector< int32 > &ci_phones_list, const std::vector< std::vector< int32 > > &bi_counts, int32 biphone_min_count, const std::vector< int32 > &mono_counts, int32 mono_min_count)
 
static void ReadPhoneCounts (std::string &filename, int32 num_phones, std::vector< int32 > *mono_counts, std::vector< std::vector< int32 > > *bi_counts)
 
int main (int argc, char *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 254 of file gmm-init-biphone.cc.

References AmDiagGmm::AddPdf(), kaldi::BiphoneContextDependencyFull(), DiagGmm::ComputeGconsts(), kaldi::ConvertStringToInteger(), ParseOptions::GetArg(), HmmTopology::GetPhones(), rnnlm::i, kaldi::IsSortedAndUniq(), KALDI_ASSERT, KALDI_ERR, KALDI_LOG, ParseOptions::NumArgs(), HmmTopology::NumPdfClasses(), ContextDependency::NumPdfs(), ParseOptions::PrintUsage(), ParseOptions::Read(), HmmTopology::Read(), ReadPhoneCounts(), kaldi::ReadSharedPhonesList(), ParseOptions::Register(), DiagGmm::Resize(), MatrixBase< Real >::Row(), VectorBase< Real >::Set(), DiagGmm::SetInvVarsAndMeans(), DiagGmm::SetWeights(), kaldi::SplitStringToIntegers(), Output::Stream(), Input::Stream(), AmDiagGmm::Write(), ContextDependency::Write(), and TransitionModel::Write().

254  {
255  try {
256  using namespace kaldi;
257  using kaldi::int32;
258 
259  const char *usage =
260  "Initialize a biphone context-dependency tree with all the\n"
261  "leaves (i.e. a full tree). Intended for end-to-end tree-free models.\n"
262  "Usage: gmm-init-biphone <topology-in> <dim> <model-out> <tree-out> \n"
263  "e.g.: \n"
264  " gmm-init-biphone topo 39 bi.mdl bi.tree\n";
265 
266  bool binary = true;
267  std::string shared_phones_rxfilename, phone_counts_rxfilename;
268  int32 min_biphone_count = 100, min_mono_count = 20;
269  std::string ci_phones_str;
270  std::vector<int32> ci_phones; // Sorted, uniqe vector of
271  // context-independent phones.
272 
273  ParseOptions po(usage);
274  po.Register("binary", &binary, "Write output in binary mode");
275  po.Register("shared-phones", &shared_phones_rxfilename,
276  "rxfilename containing, on each line, a list of phones "
277  "whose pdfs should be shared.");
278  po.Register("ci-phones", &ci_phones_str, "Colon-separated list of "
279  "integer indices of context-independent phones.");
280  po.Register("phone-counts", &phone_counts_rxfilename,
281  "rxfilename containing, on each line, a biphone/phone and "
282  "its count in the training data.");
283  po.Register("min-biphone-count", &min_biphone_count, "Minimum number of "
284  "occurences of a biphone in training data to reserve pdfs "
285  "for it.");
286  po.Register("min-monophone-count", &min_mono_count, "Minimum number of "
287  "occurences of a monophone in training data to reserve pdfs "
288  "for it.");
289  po.Read(argc, argv);
290 
291  if (po.NumArgs() != 4) {
292  po.PrintUsage();
293  exit(1);
294  }
295 
296 
297  std::string topo_filename = po.GetArg(1);
298  int dim = 0;
299  if (!ConvertStringToInteger(po.GetArg(2), &dim) || dim <= 0 || dim > 10000)
300  KALDI_ERR << "Bad dimension:" << po.GetArg(2)
301  << ". It should be a positive integer.";
302  std::string model_filename = po.GetArg(3);
303  std::string tree_filename = po.GetArg(4);
304 
305  if (!ci_phones_str.empty()) {
306  SplitStringToIntegers(ci_phones_str, ":", false, &ci_phones);
307  std::sort(ci_phones.begin(), ci_phones.end());
308  if (!IsSortedAndUniq(ci_phones) || ci_phones.empty() || ci_phones[0] == 0)
309  KALDI_ERR << "Invalid --ci-phones option: " << ci_phones_str;
310  }
311 
312  Vector<BaseFloat> glob_inv_var(dim);
313  glob_inv_var.Set(1.0);
314  Vector<BaseFloat> glob_mean(dim);
315  glob_mean.Set(1.0);
316 
317  HmmTopology topo;
318  bool binary_in;
319  Input ki(topo_filename, &binary_in);
320  topo.Read(ki.Stream(), binary_in);
321 
322  const std::vector<int32> &phones = topo.GetPhones();
323 
324  std::vector<int32> phone2num_pdf_classes(1 + phones.back());
325  for (size_t i = 0; i < phones.size(); i++) {
326  phone2num_pdf_classes[phones[i]] = topo.NumPdfClasses(phones[i]);
327  // For now we only support 1 or 2 pdf's per phone
328  KALDI_ASSERT(phone2num_pdf_classes[phones[i]] == 1 ||
329  phone2num_pdf_classes[phones[i]] == 2);
330  }
331 
332  std::vector<int32> mono_counts;
333  std::vector<std::vector<int32> > bi_counts;
334  if (!phone_counts_rxfilename.empty()) {
335  ReadPhoneCounts(phone_counts_rxfilename, phones.size(),
336  &mono_counts, &bi_counts);
337  KALDI_LOG << "Loaded mono/bi phone counts.";
338  }
339 
340 
341  // Now the tree:
342  ContextDependency *ctx_dep = NULL;
343  std::vector<std::vector<int32> > shared_phones;
344  if (shared_phones_rxfilename == "") {
345  shared_phones.resize(phones.size());
346  for (size_t i = 0; i < phones.size(); i++)
347  shared_phones[i].push_back(phones[i]);
348  } else {
349  ReadSharedPhonesList(shared_phones_rxfilename, &shared_phones);
350  // ReadSharedPhonesList crashes on error.
351  }
352  ctx_dep = BiphoneContextDependencyFull(shared_phones, phone2num_pdf_classes,
353  ci_phones, bi_counts,
354  min_biphone_count,
355  mono_counts, min_mono_count);
356 
357  int32 num_pdfs = ctx_dep->NumPdfs();
358 
359  AmDiagGmm am_gmm;
360  DiagGmm gmm;
361  gmm.Resize(1, dim);
362  { // Initialize the gmm.
363  Matrix<BaseFloat> inv_var(1, dim);
364  inv_var.Row(0).CopyFromVec(glob_inv_var);
365  Matrix<BaseFloat> mu(1, dim);
366  mu.Row(0).CopyFromVec(glob_mean);
367  Vector<BaseFloat> weights(1);
368  weights.Set(1.0);
369  gmm.SetInvVarsAndMeans(inv_var, mu);
370  gmm.SetWeights(weights);
371  gmm.ComputeGconsts();
372  }
373 
374  for (int i = 0; i < num_pdfs; i++)
375  am_gmm.AddPdf(gmm);
376 
377  // Now the transition model:
378  TransitionModel trans_model(*ctx_dep, topo);
379 
380  {
381  Output ko(model_filename, binary);
382  trans_model.Write(ko.Stream(), binary);
383  am_gmm.Write(ko.Stream(), binary);
384  }
385 
386  // Now write the tree.
387  ctx_dep->Write(Output(tree_filename, binary).Stream(),
388  binary);
389 
390  delete ctx_dep;
391  return 0;
392  } catch(const std::exception &e) {
393  std::cerr << e.what();
394  return -1;
395  }
396 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void AddPdf(const DiagGmm &gmm)
Adds a GMM to the model, and increments the total number of PDFs.
Definition: am-diag-gmm.cc:57
bool ConvertStringToInteger(const std::string &str, Int *out)
Converts a string into an integer via strtoll and returns false if there was any kind of problem (i...
Definition: text-utils.h:118
void SetInvVarsAndMeans(const MatrixBase< Real > &invvars, const MatrixBase< Real > &means)
Use SetInvVarsAndMeans if updating both means and (inverse) variances.
Definition: diag-gmm-inl.h:63
A class for storing topology information for phones.
Definition: hmm-topology.h:93
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
Definition: text-utils.h:68
void Resize(int32 nMix, int32 dim)
Resizes arrays to this dim. Does not initialize data.
Definition: diag-gmm.cc:66
int32 ComputeGconsts()
Sets the gconsts.
Definition: diag-gmm.cc:114
kaldi::int32 int32
void Read(std::istream &is, bool binary)
Definition: hmm-topology.cc:39
int32 NumPdfClasses(int32 phone) const
Returns the number of pdf-classes for this phone; throws exception if phone not covered by this topol...
void ReadSharedPhonesList(std::string rxfilename, std::vector< std::vector< int32 > > *list_out)
virtual int32 NumPdfs() const
NumPdfs() returns the number of acoustic pdfs (they are numbered 0.. NumPdfs()-1).
Definition: context-dep.h:71
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
void Write(std::ostream &os, bool binary) const
Definition: context-dep.cc:145
static void ReadPhoneCounts(std::string &filename, int32 num_phones, std::vector< int32 > *mono_counts, std::vector< std::vector< int32 > > *bi_counts)
#define KALDI_ERR
Definition: kaldi-error.h:147
const std::vector< int32 > & GetPhones() const
Returns a reference to a sorted, unique list of phones covered by the topology (these phones will be ...
Definition: hmm-topology.h:163
A class representing a vector.
Definition: kaldi-vector.h:406
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void Write(std::ostream &out_stream, bool binary) const
Definition: am-diag-gmm.cc:163
Definition for Gaussian Mixture Model with diagonal covariances.
Definition: diag-gmm.h:42
void SetWeights(const VectorBase< Real > &w)
Mutators for both float or double.
Definition: diag-gmm-inl.h:28
bool IsSortedAndUniq(const std::vector< T > &vec)
Returns true if the vector is sorted and contains each element only once.
Definition: stl-utils.h:63
#define KALDI_LOG
Definition: kaldi-error.h:153
ContextDependency * BiphoneContextDependencyFull(std::vector< std::vector< int32 > > phone_sets, const std::vector< int32 > phone2num_pdf_classes, const std::vector< int32 > &ci_phones_list, const std::vector< std::vector< int32 > > &bi_counts, int32 biphone_min_count, const std::vector< int32 > &mono_counts, int32 mono_min_count)

◆ ReadPhoneCounts()

static void ReadPhoneCounts ( std::string &  filename,
int32  num_phones,
std::vector< int32 > *  mono_counts,
std::vector< std::vector< int32 > > *  bi_counts 
)
static

Definition at line 225 of file gmm-init-biphone.cc.

References KALDI_ASSERT, and KALDI_ERR.

Referenced by main().

227  {
228  // The actual phones start from id = 1 (so the last phone has id = num_phones).
229  mono_counts->resize(num_phones + 1, 0);
230  bi_counts->resize(num_phones + 1, std::vector<int>(num_phones + 1, 0));
231  std::ifstream infile(filename);
232  std::string line;
233  while (std::getline(infile, line)) {
234  std::istringstream iss(line);
235  int a, b;
236  long c;
237  if ((std::istringstream(line) >> a >> b >> c)) {
238  // It's a biphone count.
239  KALDI_ASSERT(a >= 0 && a <= num_phones); // 0 means no-left-context
240  KALDI_ASSERT(b > 0 && b <= num_phones);
241  KALDI_ASSERT(c >= 0);
242  (*bi_counts)[a][b] = c;
243  } else if ((std::istringstream(line) >> b >> c)) {
244  // It's a monophone count.
245  KALDI_ASSERT(b > 0 && b <= num_phones);
246  KALDI_ASSERT(c >= 0);
247  (*mono_counts)[b] = c;
248  } else {
249  KALDI_ERR << "Bad line in phone stats file: " << line;
250  }
251  }
252 }
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185