online-nnet2-feature-pipeline.h
Go to the documentation of this file.
1 // online2/online-nnet2-feature-pipeline.h
2 
3 // Copyright 2013-2014 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
21 #ifndef KALDI_ONLINE2_ONLINE_NNET2_FEATURE_PIPELINE_H_
22 #define KALDI_ONLINE2_ONLINE_NNET2_FEATURE_PIPELINE_H_
23 
24 #include <string>
25 #include <vector>
26 #include <deque>
27 
28 #include "matrix/matrix-lib.h"
29 #include "util/common-utils.h"
30 #include "base/kaldi-error.h"
31 #include "feat/online-feature.h"
32 #include "feat/pitch-functions.h"
34 
35 namespace kaldi {
38 
63 
64 
70  std::string feature_type; // "plp" or "mfcc" or "fbank"
71  std::string mfcc_config;
72  std::string plp_config;
73  std::string fbank_config;
74  std::string cmvn_config;
76 
77  // Note: if we do add pitch, it will not be added to the features we give to
78  // the iVector extractor but only to the features we give to the neural
79  // network, after the base features but before the iVector. We don't think
80  // the iVector will be particularly helpful in normalizing the pitch features.
81  bool add_pitch;
82 
83  // the following contains the type of options that you could give to
84  // compute-and-process-kaldi-pitch-feats.
85  std::string online_pitch_config;
86 
87  // The configuration variables in ivector_extraction_config relate to the
88  // iVector extractor and options related to it, see type
89  // OnlineIvectorExtractionConfig.
91 
92  // Config that relates to how we weight silence for (ivector) adaptation
93  // this is registered directly to the command line as you might want to
94  // play with it in test time.
96 
98  feature_type("mfcc"), add_pitch(false) { }
99 
100 
101  void Register(OptionsItf *opts) {
102  opts->Register("feature-type", &feature_type,
103  "Base feature type [mfcc, plp, fbank]");
104  opts->Register("mfcc-config", &mfcc_config, "Configuration file for "
105  "MFCC features (e.g. conf/mfcc.conf)");
106  opts->Register("plp-config", &plp_config, "Configuration file for "
107  "PLP features (e.g. conf/plp.conf)");
108  opts->Register("fbank-config", &fbank_config, "Configuration file for "
109  "filterbank features (e.g. conf/fbank.conf)");
110  opts->Register("cmvn-config", &cmvn_config, "Configuration file for "
111  "online cmvn features (e.g. conf/online_cmvn.conf). "
112  "Controls features on nnet3 input (not ivector features). "
113  "If not set, the OnlineCmvn is disabled.");
114  opts->Register("global-cmvn-stats", &global_cmvn_stats_rxfilename,
115  "filename with global stats for OnlineCmvn for features "
116  "on nnet3 input (not ivector features)");
117  opts->Register("add-pitch", &add_pitch, "Append pitch features to raw "
118  "MFCC/PLP/filterbank features [but not for iVector extraction]");
119  opts->Register("online-pitch-config", &online_pitch_config, "Configuration "
120  "file for online pitch features, if --add-pitch=true (e.g. "
121  "conf/online_pitch.conf)");
122  opts->Register("ivector-extraction-config", &ivector_extraction_config,
123  "Configuration file for online iVector extraction, "
124  "see class OnlineIvectorExtractionConfig in the code");
125  silence_weighting_config.RegisterWithPrefix("ivector-silence-weighting", opts);
126  }
127 };
128 
129 
140  feature_type("mfcc"), add_pitch(false), use_cmvn(false) { }
141 
143  const OnlineNnet2FeaturePipelineConfig &config);
144 
145  BaseFloat FrameShiftInSeconds() const;
146 
147  std::string feature_type;
148 
150  PlpOptions plp_opts;
153 
155  bool add_pitch;
158 
161  bool use_cmvn;
164 
171 
178 
179  int32 IvectorDim() { return ivector_extractor_info.extractor.IvectorDim(); }
180  private:
182 };
183 
184 
185 
199  public:
204  const OnlineNnet2FeaturePipelineInfo &info);
205 
207 
212  virtual int32 Dim() const;
213 
214  virtual bool IsLastFrame(int32 frame) const;
215  virtual int32 NumFramesReady() const;
216  virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat);
217 
228  void UpdateFrameWeights(
229  const std::vector<std::pair<int32, BaseFloat> > &delta_weights);
230 
234  void SetAdaptationState(
235  const OnlineIvectorExtractorAdaptationState &adaptation_state);
236 
242  void GetAdaptationState(
243  OnlineIvectorExtractorAdaptationState *adaptation_state) const;
244 
247  void SetCmvnState(const OnlineCmvnState &cmvn_state);
248  void GetCmvnState(OnlineCmvnState *cmvn_state);
249 
254  void AcceptWaveform(BaseFloat sampling_rate,
255  const VectorBase<BaseFloat> &waveform);
256 
257  BaseFloat FrameShiftInSeconds() const { return info_.FrameShiftInSeconds(); }
258 
264  void InputFinished();
265 
272  return ivector_feature_;
273  }
274 
278  return ivector_feature_;
279  }
280 
285  return nnet3_feature_;
286  }
287 
288  virtual ~OnlineNnet2FeaturePipeline();
289 
290  private:
292 
294 
297 
301 
306 
311 
313 
318 
323 
326 };
327 
328 
330 } // namespace kaldi
331 
332 
333 
334 #endif // KALDI_ONLINE2_ONLINE_NNET2_FEATURE_PIPELINE_H_
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
int32 dim_
we cache the feature dimension, to save time when calling Dim().
bool use_cmvn
Options for pitch post-processing.
bool add_pitch
Options for filterbank computation, if feature_type == "fbank".
FbankOptions fbank_opts
Options for PLP computation, if feature_type == "plp".
This configuration class is to set up OnlineNnet2FeaturePipelineInfo, which in turn is the configurat...
This struct contains various things that are needed (as const references) by class OnlineIvectorExtra...
MfccOptions contains basic options for computing MFCC features.
Definition: feature-mfcc.h:38
OnlineFeatureInterface * feature_plus_optional_pitch_
Global CMVN stats.
OnlineFeatureInterface * nnet3_feature_
iVector feature, if used.
This class does an online version of the cepstral mean and [optionally] variance, but note that this ...
const OnlineIvectorFeature * IvectorFeature() const
A const accessor for the iVector extractor.
ProcessPitchOptions pitch_process_opts
Options for pitch extraction, if done.
This class stores the adaptation state from the online iVector extractor, which can help you to initi...
This online-feature class implements post processing of pitch features.
OnlineCmvn * cmvn_feature_
Processed pitch, if pitch used.
OnlineFeatureInterface * final_feature_
final_feature_ is feature_plus_optional_cmvn_ appended (OnlineAppendFeature) with ivector_feature_...
kaldi::int32 int32
#define KALDI_DISALLOW_COPY_AND_ASSIGN(type)
Definition: kaldi-utils.h:121
OnlineIvectorFeature * IvectorFeature()
This function returns the iVector-extracting part of the feature pipeline (or NULL if iVectors are no...
This class is responsible for storing configuration variables, objects and options for OnlineNnet2Fea...
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
OnlineFeatureInterface * InputFeature()
This function returns the part of the feature pipeline that would be given as the primary (non-iVecto...
void RegisterWithPrefix(std::string prefix, OptionsItf *opts)
const OnlineNnet2FeaturePipelineInfo & info_
Struct OnlineCmvnState stores the state of CMVN adaptation between utterances (but not the state of t...
OnlineFeatureInterface * feature_plus_optional_cmvn_
feature_plus_optional_cmvn_ is the feature_plus_optional_pitch_ transformed with OnlineCmvn if cmvn i...
This file contains code for online iVector extraction in a form compatible with OnlineFeatureInterfac...
Add a virtual class for "source" features such as MFCC or PLP or pitch features.
std::string global_cmvn_stats_rxfilename
Options for online cmvn, read from config file.
Matrix< double > global_cmvn_stats_
LDA matrix, if supplied.
OnlineNnet2FeaturePipeline is a class that&#39;s responsible for putting together the various parts of th...
OnlineSilenceWeightingConfig silence_weighting_config
Config for weighting silence in iVector adaptation.
OnlineFeatureInterface is an interface for online feature processing (it is also usable in the offlin...
PlpOptions contains basic options for computing PLP features.
Definition: feature-plp.h:42
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
FbankOptions contains basic options for computing filterbank features.
Definition: feature-fbank.h:41
MfccOptions mfcc_opts
"mfcc" or "plp" or "fbank"
OnlineProcessPitch * pitch_feature_
Raw pitch, if used.
bool use_ivectors
Filename used for reading global cmvn stats in OnlineCmvn.
OnlineIvectorFeature is an online feature-extraction class that&#39;s responsible for extracting iVectors...
OnlinePitchFeature * pitch_
MFCC/PLP/filterbank.