32 for (
int32 i = 0;
i < num_frames;
i++) {
35 (*mat)(
i, 0) =
Log((p + 0.0001) / (1.0001 - p));
41 for (
int32 i = 0;
i < num_frames;
i++) {
43 (*mat)(
i, 1) =
Log((*mat)(
i, 1));
55 for (i = 0; i < num_frames; i++)
56 temp_pitch(i) = features(i, 1);
60 int32 mid_win = (normalization_window_size - 1) / 2;
61 for (i = 0; (i < num_frames) && (i < normalization_window_size); i++) {
62 mean += features(i, 1);
66 if (num_frames <= normalization_window_size) {
67 for (i = 0; i < num_frames; i++) {
68 features(i, 1) -= mean;
71 for (i = 0; i <= mid_win; i++) {
72 features(i, 1) -= mean;
74 for (i = (mid_win + 1); i < num_frames; i++) {
75 if (i + (mid_win + 1) < num_frames)
76 mean -= (temp_pitch(i - (mid_win + 1)) -
77 temp_pitch(i + (mid_win + 1))) / normalization_window_size;
78 features(i,1) -= mean;
89 int32 width = (average_window_size - 1) / 2,
i;
92 for (
i = width;
i < num_frames - width ;
i++) {
93 temp_pitch(
i) = features(
i, 1);
94 for(
int j = 1;
j <= width; ++
j) {
95 temp_pitch(
i) += (features(
i -
j, 1) + features(
i +
j, 1));
97 temp_pitch(
i) /= (2 * width + 1);
99 for (
i = width;
i < num_frames - width;
i++)
100 features(
i, 1) = temp_pitch(
i);
105 int main(
int argc,
char *argv[]) {
107 using namespace kaldi;
109 "This is a rather special-purpose program which processes 2-dimensional\n" 110 "features consisting of (prob-of-voicing, pitch) into something suitable\n" 111 "to put into a speech recognizer. First use interpolate-feats\n" 112 "Usage: process-pitch-feats [options...] <feats-rspecifier> <feats-wspecifier>\n";
118 int32 normalization_window_size = 151;
119 int32 average_window_size = 5;
122 po.
Register(
"normalization-window-size",
123 &normalization_window_size,
"Size of window used for " 124 "moving window nomalization (must be odd).");
126 &average_window_size,
127 "Size of moving average window (must be odd).");
137 KALDI_ASSERT(average_window_size > 0 && average_window_size % 2 == 1 &&
138 "--average-window-size option must be an odd positive number.");
139 KALDI_ASSERT(normalization_window_size > 0 && normalization_window_size % 2 == 1 &&
140 "--normalization-window-size option must be an odd positive number.");
142 std::string input_rspecifier = po.
GetArg(1);
143 std::string output_wspecifier = po.
GetArg(2);
148 if (!kaldi_writer.
Open(output_wspecifier))
149 KALDI_ERR <<
"Could not initialize output with wspecifier " 150 << output_wspecifier;
152 int32 num_done = 0, num_err = 0;
154 for (; !reader.
Done(); reader.
Next()) {
155 std::string utt = reader.
Key();
157 int num_frames = features.
NumRows();
159 if (num_frames == 0 && features.
NumCols() != 2) {
170 kaldi_writer.
Write(utt, features);
173 if (num_done % 10 == 0)
174 KALDI_LOG <<
"Processed " << num_done <<
" utterances";
175 KALDI_VLOG(2) <<
"Processed features for key " << utt;
177 KALDI_LOG <<
"Done " << num_done <<
" utterances, " << num_err
179 return (num_done != 0 ? 0 : 1);
180 }
catch(
const std::exception &e) {
181 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
bool Open(const std::string &wspecifier)
void SubtractMovingAverage(int32 normalization_window_size, Matrix< BaseFloat > *mat)
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
int main(int argc, char *argv[])
A templated class for writing objects to an archive or script file; see The Table concept...
void TakeLogOfPitch(Matrix< BaseFloat > *mat)
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
A class representing a vector.
void ProcessPovFeatures(Matrix< BaseFloat > *mat)
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
void SetToMovingAverage(int32 average_window_size, Matrix< BaseFloat > *mat)