feature-mfcc.h
Go to the documentation of this file.
1 // feat/feature-mfcc.h
2 
3 // Copyright 2009-2011 Karel Vesely; Petr Motlicek; Saarland University
4 // 2014-2016 Johns Hopkins University (author: Daniel Povey)
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 #ifndef KALDI_FEAT_FEATURE_MFCC_H_
22 #define KALDI_FEAT_FEATURE_MFCC_H_
23 
24 #include <map>
25 #include <string>
26 
27 #include "feat/feature-common.h"
28 #include "feat/feature-functions.h"
29 #include "feat/feature-window.h"
30 #include "feat/mel-computations.h"
31 
32 namespace kaldi {
35 
36 
38 struct MfccOptions {
41  int32 num_ceps; // e.g. 13: num cepstral coeffs, counting zero.
42  bool use_energy; // use energy; else C0
43  BaseFloat energy_floor; // 0 by default; set to a value like 1.0 or 0.1 if
44  // you disable dithering.
45  bool raw_energy; // If true, compute energy before preemphasis and windowing
46  BaseFloat cepstral_lifter; // Scaling factor on cepstra for HTK compatibility.
47  // if 0.0, no liftering is done.
48  bool htk_compat; // if true, put energy/C0 last and introduce a factor of
49  // sqrt(2) on C0 to be the same as HTK.
50 
51  MfccOptions() : mel_opts(23),
52  // defaults the #mel-banks to 23 for the MFCC computations.
53  // this seems to be common for 16khz-sampled data,
54  // but for 8khz-sampled data, 15 may be better.
55  num_ceps(13),
56  use_energy(true),
57  energy_floor(0.0),
58  raw_energy(true),
59  cepstral_lifter(22.0),
60  htk_compat(false) {}
61 
62  void Register(OptionsItf *opts) {
63  frame_opts.Register(opts);
64  mel_opts.Register(opts);
65  opts->Register("num-ceps", &num_ceps,
66  "Number of cepstra in MFCC computation (including C0)");
67  opts->Register("use-energy", &use_energy,
68  "Use energy (not C0) in MFCC computation");
69  opts->Register("energy-floor", &energy_floor,
70  "Floor on energy (absolute, not relative) in MFCC computation. "
71  "Only makes a difference if --use-energy=true; only necessary if "
72  "--dither=0.0. Suggested values: 0.1 or 1.0");
73  opts->Register("raw-energy", &raw_energy,
74  "If true, compute energy before preemphasis and windowing");
75  opts->Register("cepstral-lifter", &cepstral_lifter,
76  "Constant that controls scaling of MFCCs");
77  opts->Register("htk-compat", &htk_compat,
78  "If true, put energy or C0 last and use a factor of sqrt(2) on "
79  "C0. Warning: not sufficient to get HTK compatible features "
80  "(need to change other parameters).");
81  }
82 };
83 
84 
85 
86 // This is the new-style interface to the MFCC computation.
87 class MfccComputer {
88  public:
90  explicit MfccComputer(const MfccOptions &opts);
91  MfccComputer(const MfccComputer &other);
92 
94  return opts_.frame_opts;
95  }
96 
97  int32 Dim() const { return opts_.num_ceps; }
98 
99  bool NeedRawLogEnergy() const { return opts_.use_energy && opts_.raw_energy; }
100 
122  void Compute(BaseFloat signal_raw_log_energy,
123  BaseFloat vtln_warp,
124  VectorBase<BaseFloat> *signal_frame,
125  VectorBase<BaseFloat> *feature);
126 
127  ~MfccComputer();
128  private:
129  // disallow assignment.
130  MfccComputer &operator = (const MfccComputer &in);
131 
132  protected:
133  const MelBanks *GetMelBanks(BaseFloat vtln_warp);
134 
137  Matrix<BaseFloat> dct_matrix_; // matrix we left-multiply by to perform DCT.
139  std::map<BaseFloat, MelBanks*> mel_banks_; // BaseFloat is VTLN coefficient.
141 
142  // note: mel_energies_ is specific to the frame we're processing, it's
143  // just a temporary workspace.
145 };
146 
148 
149 
151 } // namespace kaldi
152 
153 
154 #endif // KALDI_FEAT_FEATURE_MFCC_H_
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
OfflineFeatureTpl< MfccComputer > Mfcc
Definition: feature-mfcc.h:147
void Register(OptionsItf *opts)
MfccOptions contains basic options for computing MFCC features.
Definition: feature-mfcc.h:38
kaldi::int32 int32
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
Vector< BaseFloat > mel_energies_
Definition: feature-mfcc.h:144
BaseFloat energy_floor
Definition: feature-mfcc.h:43
BaseFloat cepstral_lifter
Definition: feature-mfcc.h:46
BaseFloat log_energy_floor_
Definition: feature-mfcc.h:138
std::map< BaseFloat, MelBanks * > mel_banks_
Definition: feature-mfcc.h:139
MelBanksOptions mel_opts
Definition: feature-mfcc.h:40
FrameExtractionOptions frame_opts
Definition: feature-mfcc.h:39
Matrix< BaseFloat > dct_matrix_
Definition: feature-mfcc.h:137
const FrameExtractionOptions & GetFrameOptions() const
Definition: feature-mfcc.h:93
void Register(OptionsItf *opts)
MfccOptions Options
Definition: feature-mfcc.h:89
A class representing a vector.
Definition: kaldi-vector.h:406
Vector< BaseFloat > lifter_coeffs_
Definition: feature-mfcc.h:136
bool NeedRawLogEnergy() const
Definition: feature-mfcc.h:99
void Register(OptionsItf *opts)
Definition: feature-mfcc.h:62
This templated class is intended for offline feature extraction, i.e.
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
SplitRadixRealFft< BaseFloat > * srfft_
Definition: feature-mfcc.h:140
int32 Dim() const
Definition: feature-mfcc.h:97