28 using namespace kaldi;
30 "Create spectrogram feature files.\n" 31 "Usage: compute-spectrogram-feats [options...] <wav-rspecifier> " 32 "<feats-wspecifier>\n";
38 bool subtract_mean =
false;
41 std::string output_format =
"kaldi";
42 std::string utt2dur_wspecifier;
47 po.Register(
"output-format", &output_format,
48 "Format of the output files [kaldi, htk]");
49 po.Register(
"subtract-mean", &subtract_mean,
"Subtract mean of each " 50 "feature file [CMS]; not recommended to do it this way. ");
51 po.Register(
"channel", &channel,
"Channel to extract (-1 -> expect mono, " 52 "0 -> left, 1 -> right)");
53 po.Register(
"min-duration", &min_duration,
"Minimum duration of segments " 54 "to process (in seconds).");
55 po.Register(
"write-utt2dur", &utt2dur_wspecifier,
"Wspecifier to write " 56 "duration of each utterance in seconds, e.g. 'ark,t:utt2dur'.");
60 if (po.NumArgs() != 2) {
65 std::string wav_rspecifier = po.GetArg(1);
67 std::string output_wspecifier = po.GetArg(2);
75 if (output_format ==
"kaldi") {
76 if (!kaldi_writer.
Open(output_wspecifier))
77 KALDI_ERR <<
"Could not initialize output with wspecifier " 79 }
else if (output_format ==
"htk") {
80 if (!htk_writer.
Open(output_wspecifier))
81 KALDI_ERR <<
"Could not initialize output with wspecifier " 84 KALDI_ERR <<
"Invalid output_format string " << output_format;
89 int32 num_utts = 0, num_success = 0;
90 for (; !reader.Done(); reader.Next()) {
92 std::string utt = reader.Key();
93 const WaveData &wave_data = reader.Value();
94 if (wave_data.
Duration() < min_duration) {
95 KALDI_WARN <<
"File: " << utt <<
" is too short (" 96 << wave_data.
Duration() <<
" sec): producing no output.";
106 KALDI_WARN <<
"Channel not specified but you have data with " 107 << num_chan <<
" channels; defaulting to zero";
109 if (this_chan >= num_chan) {
110 KALDI_WARN <<
"File with id " << utt <<
" has " 111 << num_chan <<
" channels but you specified channel " 112 << channel <<
", producing no output.";
121 spec.ComputeFeatures(waveform, wave_data.
SampFreq(), 1.0, &features);
123 KALDI_WARN <<
"Failed to compute features for utterance " << utt;
129 mean.Scale(1.0 / features.NumRows());
130 for (
int32 i = 0;
i < features.NumRows();
i++)
131 features.Row(
i).
AddVec(-1.0, mean);
133 if (output_format ==
"kaldi") {
134 kaldi_writer.
Write(utt, features);
136 std::pair<Matrix<BaseFloat>,
HtkHeader> p;
137 p.first.Resize(features.NumRows(), features.NumCols());
138 p.first.CopyFromMat(features);
143 static_cast<int16
>(
sizeof(
float)*features.NumCols()),
147 htk_writer.
Write(utt, p);
149 if (utt2dur_writer.IsOpen()) {
150 utt2dur_writer.Write(utt, wave_data.
Duration());
152 if(num_utts % 10 == 0)
153 KALDI_LOG <<
"Processed " << num_utts <<
" utterances";
154 KALDI_VLOG(2) <<
"Processed features for key " << utt;
157 KALDI_LOG <<
" Done " << num_success <<
" out of " << num_utts
159 return (num_success != 0 ? 0 : 1);
160 }
catch(
const std::exception& e) {
161 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
bool Open(const std::string &wspecifier)
void AddRowSumMat(Real alpha, const MatrixBase< Real > &M, Real beta=1.0)
Does *this = alpha * (sum of rows of M) + beta * *this.
A templated class for writing objects to an archive or script file; see The Table concept...
BaseFloat SampFreq() const
void Register(OptionsItf *opts)
const Matrix< BaseFloat > & Data() const
void Write(const std::string &key, const T &value) const
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
This class's purpose is to read in Wave files.
A class representing a vector.
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
BaseFloat Duration() const
This templated class is intended for offline feature extraction, i.e.
SpectrogramOptions contains basic options for computing spectrogram features.
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
FrameExtractionOptions frame_opts