process-kaldi-pitch-feats.cc
Go to the documentation of this file.
1 // featbin/process-kaldi-pitch-feats.cc
2 
3 // Copyright 2013 Pegah Ghahremani
4 // Johns Hopkins University (author: Daniel Povey)
5 // 2014 IMSL, PKU-HKUST (author: Wei Shi)
6 //
7 // See ../../COPYING for clarification regarding multiple authors
8 //
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 //
13 // http://www.apache.org/licenses/LICENSE-2.0
14 //
15 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
17 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
18 // MERCHANTABLITY OR NON-INFRINGEMENT.
19 // See the Apache 2 License for the specific language governing permissions and
20 // limitations under the License.
21 
22 #include "base/kaldi-common.h"
23 #include "util/common-utils.h"
24 #include "feat/pitch-functions.h"
25 #include "feat/wave-reader.h"
26 
27 
28 int main(int argc, char *argv[]) {
29  try {
30  using namespace kaldi;
31  const char *usage =
32  "Post-process Kaldi pitch features, consisting of pitch and NCCF, into\n"
33  "features suitable for input to ASR system. Default setup produces\n"
34  "3-dimensional features consisting of (pov-feature, pitch-feature,\n"
35  "delta-pitch-feature), where pov-feature is warped NCCF, pitch-feature\n"
36  "is log-pitch with POV-weighted mean subtraction over 1.5 second window,\n"
37  "and delta-pitch-feature is delta feature computed on raw log pitch.\n"
38  "In general, you can select from four features: (pov-feature, \n"
39  "pitch-feature, delta-pitch-feature, raw-log-pitch), produced in that \n"
40  "order, by setting the boolean options (--add-pov-feature, \n"
41  "--add-normalized-log-pitch, --add-delta-pitch and --add-raw-log-pitch)\n"
42  "\n"
43  "Usage: process-kaldi-pitch-feats [options...] <feat-rspecifier> <feats-wspecifier>\n"
44  "\n"
45  "e.g.: compute-kaldi-pitch-feats [args] ark:- | process-kaldi-pitch-feats ark:- ark:feats.ark\n"
46  "\n"
47  "See also: compute-kaldi-pitch-feats, compute-and-process-kaldi-pitch-feats\n";
48 
49  ParseOptions po(usage);
50 
51  int32 srand_seed = 0;
52 
53  ProcessPitchOptions process_opts;
54  process_opts.Register(&po);
55 
56  po.Register("srand", &srand_seed, "Seed for random number generator, used to "
57  "add noise to delta-log-pitch features");
58 
59  po.Read(argc, argv);
60 
61  if (po.NumArgs() != 2) {
62  po.PrintUsage();
63  exit(1);
64  }
65 
66  srand(srand_seed);
67 
68  std::string feat_rspecifier = po.GetArg(1),
69  feat_wspecifier = po.GetArg(2);
70 
71  SequentialBaseFloatMatrixReader feat_reader(feat_rspecifier);
72  BaseFloatMatrixWriter feat_writer(feat_wspecifier);
73 
74  int32 num_done = 0;
75  for (; !feat_reader.Done(); feat_reader.Next()) {
76  std::string utt = feat_reader.Key();
77  const Matrix<BaseFloat> &features = feat_reader.Value();
78 
79  Matrix<BaseFloat> processed_feats(features);
80  ProcessPitch(process_opts, features, &processed_feats);
81 
82  feat_writer.Write(utt, processed_feats);
83  num_done++;
84  }
85  KALDI_LOG << "Post-processed pitch for " << num_done << " utterances.";
86  return (num_done != 0 ? 0 : 1);
87  } catch(const std::exception &e) {
88  std::cerr << e.what();
89  return -1;
90  }
91 }
92 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
int main(int argc, char *argv[])
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
void ProcessPitch(const ProcessPitchOptions &opts, const MatrixBase< BaseFloat > &input, Matrix< BaseFloat > *output)
This function processes the raw (NCCF, pitch) quantities computed by ComputeKaldiPitch, and processes them into features.
#define KALDI_LOG
Definition: kaldi-error.h:153
void Register(ParseOptions *opts)