interpolate-pitch.cc File Reference
Include dependency graph for interpolate-pitch.cc:

Go to the source code of this file.

Classes

struct  PitchInterpolatorOptions
 
struct  PitchInterpolatorStats
 
class  PitchInterpolator
 

Namespaces

 kaldi
 This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for mispronunciations detection tasks, the reference:
 

Functions

void LinearlyInterpolatePitch (Matrix< BaseFloat > *mat)
 
int main (int argc, char *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 292 of file interpolate-pitch.cc.

References SequentialTableReader< Holder >::Done(), ParseOptions::GetArg(), KALDI_ERR, KALDI_LOG, KALDI_VLOG, KALDI_WARN, SequentialTableReader< Holder >::Key(), kaldi::LinearlyInterpolatePitch(), SequentialTableReader< Holder >::Next(), ParseOptions::NumArgs(), MatrixBase< Real >::NumCols(), MatrixBase< Real >::NumRows(), TableWriter< Holder >::Open(), PitchInterpolatorStats::Print(), ParseOptions::PrintUsage(), ParseOptions::Read(), PitchInterpolatorOptions::Register(), ParseOptions::Register(), SequentialTableReader< Holder >::Value(), and TableWriter< Holder >::Write().

292  {
293  try {
294  using namespace kaldi;
295  const char *usage =
296  "This is a rather special-purpose program which processes 2-dimensional\n"
297  "features consisting of (prob-of-voicing, pitch). By default we do model-based\n"
298  "pitch smoothing and interpolation (see code), or if --linear-interpolation=true,\n"
299  "just linear interpolation across gaps where pitch == 0 (not predicted).\n"
300  "Usage: interpolate-pitch [options...] <feats-rspecifier> <feats-wspecifier>\n";
301 
302 
303  // construct all the global objects
304  ParseOptions opts(usage);
305 
306  bool linear_interpolation = false;
307  PitchInterpolatorOptions interpolate_opts;
308 
309  opts.Register("linear-interpolation",
310  &linear_interpolation, "If true, just do simple linear "
311  "interpolation across gaps (else, model-based)");
312  interpolate_opts.Register(&opts);
313 
314  // parse options (+filling the registered variables)
315  opts.Read(argc, argv);
316 
317  if (opts.NumArgs() != 2) {
318  opts.PrintUsage();
319  exit(1);
320  }
321 
322  std::string input_rspecifier = opts.GetArg(1);
323  std::string output_wspecifier = opts.GetArg(2);
324 
325  SequentialBaseFloatMatrixReader reader(input_rspecifier);
326  BaseFloatMatrixWriter kaldi_writer; // typedef to TableWriter<something>.
327 
328  if (!kaldi_writer.Open(output_wspecifier))
329  KALDI_ERR << "Could not initialize output with wspecifier "
330  << output_wspecifier;
331 
332  int32 num_done = 0, num_err = 0;
334 
335  for (; !reader.Done(); reader.Next()) {
336  std::string utt = reader.Key();
337  Matrix<BaseFloat> features = reader.Value();
338  int num_frames = features.NumRows();
339 
340  if (num_frames == 0 && features.NumCols() != 2) {
341  KALDI_WARN << "Feature file has bad size "
342  << features.NumRows() << " by " << features.NumCols();
343  num_err++;
344  continue;
345  }
346 
347  if (linear_interpolation) LinearlyInterpolatePitch(&features);
348  else {
349  // work happens in constructor of this class.
350  PitchInterpolator pi(interpolate_opts, &features, &stats);
351  }
352  kaldi_writer.Write(utt, features);
353  num_done++;
354 
355  if (num_done % 10 == 0)
356  KALDI_LOG << "Processed " << num_done << " utterances";
357  KALDI_VLOG(2) << "Processed features for key " << utt;
358  }
359  if (!linear_interpolation) stats.Print();
360  KALDI_LOG << "Done " << num_done << " utterances, " << num_err
361  << " with errors.";
362  return (num_done != 0 ? 0 : 1);
363  } catch(const std::exception &e) {
364  std::cerr << e.what();
365  return -1;
366  }
367 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
bool Open(const std::string &wspecifier)
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
void Register(OptionsItf *opts)
void LinearlyInterpolatePitch(Matrix< BaseFloat > *mat)
void Write(const std::string &key, const T &value) const
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
#define KALDI_LOG
Definition: kaldi-error.h:153