online-feature-pipeline.h
Go to the documentation of this file.
1 // online2/online-feature-pipeline.h
2 
3 // Copyright 2013-2014 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
21 #ifndef KALDI_ONLINE2_ONLINE_FEATURE_PIPELINE_H_
22 #define KALDI_ONLINE2_ONLINE_FEATURE_PIPELINE_H_
23 
24 #include <string>
25 #include <vector>
26 #include <deque>
27 
28 #include "matrix/matrix-lib.h"
29 #include "util/common-utils.h"
30 #include "base/kaldi-error.h"
31 #include "feat/online-feature.h"
32 #include "feat/pitch-functions.h"
33 
34 namespace kaldi {
37 
42 
50  std::string feature_type;
51  std::string mfcc_config;
52  std::string plp_config;
53  std::string fbank_config;
54  bool add_pitch;
55  std::string pitch_config;
56  std::string pitch_process_config;
57  std::string cmvn_config;
59  bool add_deltas;
60  std::string delta_config;
62  std::string splice_config;
63  std::string lda_rxfilename;
64 
66  feature_type("mfcc"), add_pitch(false), add_deltas(false),
67  splice_feats(false) { }
68 
69  void Register(OptionsItf *opts) {
70  opts->Register("feature-type", &feature_type,
71  "Base feature type [mfcc, plp, fbank]");
72  opts->Register("mfcc-config", &mfcc_config, "Configuration file for "
73  "MFCC features (e.g. conf/mfcc.conf)");
74  opts->Register("plp-config", &plp_config, "Configuration file for "
75  "PLP features (e.g. conf/plp.conf)");
76  opts->Register("fbank-config", &fbank_config, "Configuration file for "
77  "filterbank features (e.g. conf/fbank.conf)");
78  opts->Register("add-pitch", &add_pitch, "Append pitch features to raw "
79  "MFCC/PLP features.");
80  opts->Register("pitch-config", &pitch_config, "Configuration file for "
81  "pitch features (e.g. conf/pitch.conf)");
82  opts->Register("pitch-process-config", &pitch_process_config,
83  "Configuration file for post-processing pitch features "
84  "(e.g. conf/pitch_process.conf)");
85  opts->Register("cmvn-config", &cmvn_config, "Configuration class "
86  "file for online CMVN features (e.g. conf/online_cmvn.conf)");
87  opts->Register("global-cmvn-stats", &global_cmvn_stats_rxfilename,
88  "(Extended) filename for global CMVN stats, e.g. obtained "
89  "from 'matrix-sum scp:data/train/cmvn.scp -'");
90  opts->Register("add-deltas", &add_deltas,
91  "Append delta features.");
92  opts->Register("delta-config", &delta_config, "Configuration file for "
93  "delta feature computation (if not supplied, will not apply "
94  "delta features; supply empty config to use defaults.)");
95  opts->Register("splice-feats", &splice_feats, "Splice features with left and "
96  "right context.");
97  opts->Register("splice-config", &splice_config, "Configuration file "
98  "for frame splicing, if done (e.g. prior to LDA)");
99  opts->Register("lda-matrix", &lda_rxfilename, "Filename of LDA matrix (if "
100  "using LDA), e.g. exp/foo/final.mat");
101  }
102 };
103 
104 
105 
114  feature_type("mfcc"), add_pitch(false), add_deltas(true),
115  splice_feats(false) { }
116 
118  const OnlineFeaturePipelineCommandLineConfig &cmdline_config);
119 
120  BaseFloat FrameShiftInSeconds() const;
121 
122  std::string feature_type; // "mfcc" or "plp" or "fbank"
123 
124  MfccOptions mfcc_opts; // options for MFCC computation,
125  // if feature_type == "mfcc"
126  PlpOptions plp_opts; // Options for PLP computation, if feature_type == "plp"
127  FbankOptions fbank_opts; // Options for filterbank computation, if
128  // feature_type == "fbank"
129 
130  bool add_pitch;
131  PitchExtractionOptions pitch_opts; // Options for pitch extraction, if done.
133  // processing
134 
135  OnlineCmvnOptions cmvn_opts; // Options for online CMN/CMVN computation.
136 
138  DeltaFeaturesOptions delta_opts; // Options for delta computation, if done.
139 
141  OnlineSpliceOptions splice_opts; // Options for frame splicing, if done.
142 
143  std::string lda_rxfilename; // Filename for reading LDA or LDA+MLLT matrix,
144  // if used.
145  std::string global_cmvn_stats_rxfilename; // Filename used for reading global
146  // CMVN stats
147 };
148 
149 
150 
158  public:
160 
162  virtual int32 Dim() const;
163  virtual bool IsLastFrame(int32 frame) const;
164  virtual int32 NumFramesReady() const;
165  virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat);
166 
167  // This is supplied for debug purposes.
168  void GetAsMatrix(Matrix<BaseFloat> *feats);
169 
170  void FreezeCmvn(); // stop it from moving further (do this when you start
171  // using fMLLR). This will crash if NumFramesReady() == 0.
172 
175  void SetCmvnState(const OnlineCmvnState &cmvn_state);
176  void GetCmvnState(OnlineCmvnState *cmvn_state);
177 
181  void AcceptWaveform(BaseFloat sampling_rate,
182  const VectorBase<BaseFloat> &waveform);
183 
185  return config_.FrameShiftInSeconds();
186  }
187 
188  // InputFinished() tells the class you won't be providing any
189  // more waveform. This will help flush out the last few frames
190  // of delta or LDA features, and finalize the pitch features
191  // (making them more accurate).
192  void InputFinished();
193 
194  // This object is used to set the fMLLR transform. Call it with
195  // the empty matrix if you want to stop it using any transform.
196  void SetTransform(const MatrixBase<BaseFloat> &transform);
197 
198 
199  // Returns true if an fMLLR transform has been set using
200  // SetTransform().
201  bool HaveFmllrTransform() { return fmllr_ != NULL; }
202 
208  OnlineFeaturePipeline *New() const;
209 
210  virtual ~OnlineFeaturePipeline();
211 
212  private:
217  const Matrix<BaseFloat> &lda_mat,
218  const Matrix<BaseFloat> &global_cmvn_stats);
219 
223  void Init();
224 
226  Matrix<BaseFloat> lda_mat_; // LDA matrix, if supplied.
227  Matrix<BaseFloat> global_cmvn_stats_; // Global CMVN stats.
228 
229  OnlineBaseFeature *base_feature_; // MFCC/PLP/Fbank
230  OnlinePitchFeature *pitch_; // Raw pitch
231  OnlineProcessPitch *pitch_feature_; // Processed pitch
232  OnlineFeatureInterface *feature_; // CMVN (+ processed pitch)
233 
235  OnlineFeatureInterface *splice_or_delta_; // This may be NULL if we're not
236  // doing splicing or deltas.
237 
238  OnlineFeatureInterface *lda_; // If non-NULL, the LDA or LDA+MLLT transform.
239 
243  OnlineFeatureInterface* UnadaptedFeature() const;
244 
245  OnlineFeatureInterface *fmllr_; // non-NULL if we currently have an fMLLR
246  // transform.
247 
251  OnlineFeatureInterface* AdaptedFeature() const;
252 };
253 
254 
255 
256 
258 } // namespace kaldi
259 
260 
261 
262 #endif // KALDI_ONLINE2_ONLINE_FEATURE_PIPELINE_H_
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
OnlineFeatureInterface * feature_
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
MfccOptions contains basic options for computing MFCC features.
Definition: feature-mfcc.h:38
This class does an online version of the cepstral mean and [optionally] variance, but note that this ...
This online-feature class implements post processing of pitch features.
kaldi::int32 int32
This configuration class is to set up OnlineFeaturePipelineConfig, which in turn is the configuration...
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
OnlineFeaturePipeline is a class that&#39;s responsible for putting together the various stages of the fe...
Struct OnlineCmvnState stores the state of CMVN adaptation between utterances (but not the state of t...
Add a virtual class for "source" features such as MFCC or PLP or pitch features.
OnlineFeatureInterface * splice_or_delta_
This configuration class is responsible for storing the configuration options for OnlineFeaturePipeli...
OnlineFeatureInterface is an interface for online feature processing (it is also usable in the offlin...
OnlineFeaturePipelineConfig config_
PlpOptions contains basic options for computing PLP features.
Definition: feature-plp.h:42
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
FbankOptions contains basic options for computing filterbank features.
Definition: feature-fbank.h:41