26 int main(
int argc, 
char *argv[]) {
    28     using namespace kaldi;
    30         "Create MFCC feature files.\n"    31         "Usage:  compute-mfcc-feats [options...] <wav-rspecifier> "    32         "<feats-wspecifier>\n";
    38     bool subtract_mean = 
false;
    40     std::string vtln_map_rspecifier;
    41     std::string utt2spk_rspecifier;
    44     std::string output_format = 
"kaldi";
    45     std::string utt2dur_wspecifier;
    51     po.
Register(
"output-format", &output_format, 
"Format of the output "    52                 "files [kaldi, htk]");
    53     po.
Register(
"subtract-mean", &subtract_mean, 
"Subtract mean of each "    54                 "feature file [CMS]; not recommended to do it this way. ");
    55     po.
Register(
"vtln-warp", &vtln_warp, 
"Vtln warp factor (only applicable "    56                 "if vtln-map not specified)");
    57     po.
Register(
"vtln-map", &vtln_map_rspecifier, 
"Map from utterance or "    58                 "speaker-id to vtln warp factor (rspecifier)");
    59     po.
Register(
"utt2spk", &utt2spk_rspecifier, 
"Utterance to speaker-id map "    60                 "rspecifier (if doing VTLN and you have warps per speaker)");
    61     po.
Register(
"channel", &channel, 
"Channel to extract (-1 -> expect mono, "    62                 "0 -> left, 1 -> right)");
    63     po.
Register(
"min-duration", &min_duration, 
"Minimum duration of segments "    64                 "to process (in seconds).");
    65     po.
Register(
"write-utt2dur", &utt2dur_wspecifier, 
"Wspecifier to write "    66                 "duration of each utterance in seconds, e.g. 'ark,t:utt2dur'.");
    75     std::string wav_rspecifier = po.
GetArg(1);
    77     std::string output_wspecifier = po.
GetArg(2);
    81     if (utt2spk_rspecifier != 
"" && vtln_map_rspecifier == 
"")
    82       KALDI_ERR << (
"The --utt2spk option is only needed if "    83                     "the --vtln-map option is used.");
    91     if (output_format == 
"kaldi") {
    92       if (!kaldi_writer.
Open(output_wspecifier))
    93         KALDI_ERR << 
"Could not initialize output with wspecifier "    95     } 
else if (output_format == 
"htk") {
    96       if (!htk_writer.
Open(output_wspecifier))
    97         KALDI_ERR << 
"Could not initialize output with wspecifier "   100       KALDI_ERR << 
"Invalid output_format string " << output_format;
   105     int32 num_utts = 0, num_success = 0;
   106     for (; !reader.
Done(); reader.
Next()) {
   108       std::string utt = reader.
Key();
   110       if (wave_data.
Duration() < min_duration) {
   111         KALDI_WARN << 
"File: " << utt << 
" is too short ("   112                    << wave_data.
Duration() << 
" sec): producing no output.";
   122             KALDI_WARN << 
"Channel not specified but you have data with "   123                        << num_chan  << 
" channels; defaulting to zero";
   125           if (this_chan >= num_chan) {
   126             KALDI_WARN << 
"File with id " << utt << 
" has "   127                        << num_chan << 
" channels but you specified channel "   128                        << channel << 
", producing no output.";
   134       if (vtln_map_rspecifier != 
"") {
   135         if (!vtln_map_reader.HasKey(utt)) {
   136           KALDI_WARN << 
"No vtln-map entry for utterance-id (or speaker-id) "   140         vtln_warp_local = vtln_map_reader.Value(utt);
   142         vtln_warp_local = vtln_warp;
   149                              vtln_warp_local, &features);
   151         KALDI_WARN << 
"Failed to compute features for utterance " << utt;
   157         mean.Scale(1.0 / features.NumRows());
   158         for (
int32 i = 0; 
i < features.NumRows(); 
i++)
   159           features.Row(
i).
AddVec(-1.0, mean);
   161       if (output_format == 
"kaldi") {
   162         kaldi_writer.
Write(utt, features);
   164         std::pair<Matrix<BaseFloat>, 
HtkHeader> p;
   165         p.first.Resize(features.NumRows(), features.NumCols());
   166         p.first.CopyFromMat(features);
   170           static_cast<int16
>(
sizeof(
float)*(features.NumCols())),
   171           static_cast<uint16
>( 006 | 
   175         htk_writer.
Write(utt, p);
   177       if (utt2dur_writer.
IsOpen()) {
   180       if (num_utts % 10 == 0)
   181         KALDI_LOG << 
"Processed " << num_utts << 
" utterances";
   182       KALDI_VLOG(2) << 
"Processed features for key " << utt;
   185     KALDI_LOG << 
" Done " << num_success << 
" out of " << num_utts
   187     return (num_success != 0 ? 0 : 1);
   188   } 
catch(
const std::exception &e) {
   189     std::cerr << e.what();
 This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
 
bool Open(const std::string &wspecifier)
 
void AddRowSumMat(Real alpha, const MatrixBase< Real > &M, Real beta=1.0)
Does *this = alpha * (sum of rows of M) + beta * *this. 
 
void ComputeFeatures(const VectorBase< BaseFloat > &wave, BaseFloat sample_freq, BaseFloat vtln_warp, Matrix< BaseFloat > *output)
Computes the features for one file (one sequence of features). 
 
MfccOptions contains basic options for computing MFCC features. 
 
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor]. 
 
This class is for when you are reading something in random access, but it may actually be stored per-...
 
A templated class for writing objects to an archive or script file; see The Table concept...
 
BaseFloat SampFreq() const
 
const Matrix< BaseFloat > & Data() const
 
void Write(const std::string &key, const T &value) const
 
void Register(const std::string &name, bool *ptr, const std::string &doc)
 
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
 
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
 
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables. 
 
int main(int argc, char *argv[])
 
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility. 
 
This class's purpose is to read in Wave files. 
 
int NumArgs() const
Number of positional parameters (c.f. argc-1). 
 
A class representing a vector. 
 
#define KALDI_ASSERT(cond)
 
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix). 
 
void Register(OptionsItf *opts)
 
BaseFloat Duration() const
 
This templated class is intended for offline feature extraction, i.e. 
 
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
 
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...