online-feature-pipeline.cc
Go to the documentation of this file.
1 // online2/online-feature-pipeline.cc
2 
3 // Copyright 2013 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
21 #include "transform/cmvn.h"
22 
23 namespace kaldi {
24 
25 
28  if (config.feature_type == "mfcc" || config.feature_type == "plp" ||
29  config.feature_type == "fbank") {
30  feature_type = config.feature_type;
31  } else {
32  KALDI_ERR << "Invalid feature type: " << config.feature_type << ". "
33  << "Supported feature types: mfcc, plp, fbank.";
34  }
35 
36  if (config.mfcc_config != "") {
38  if (feature_type != "mfcc")
39  KALDI_WARN << "--mfcc-config option has no effect "
40  << "since feature type is set to " << feature_type << ".";
41  } // else use the defaults.
42 
43  if (config.plp_config != "") {
45  if (feature_type != "plp")
46  KALDI_WARN << "--plp-config option has no effect "
47  << "since feature type is set to " << feature_type << ".";
48  } // else use the defaults.
49 
50  if (config.fbank_config != "") {
52  if (feature_type != "fbank")
53  KALDI_WARN << "--fbank-config option has no effect "
54  << "since feature type is set to " << feature_type << ".";
55  } // else use the defaults.
56 
57  add_pitch = config.add_pitch;
58  if (config.pitch_config != "") {
60  if (!add_pitch)
61  KALDI_WARN << "--pitch-config option has no effect "
62  << "since you did not supply --add-pitch option.";
63  } // else use the defaults.
64 
65  if (config.pitch_process_config != "") {
67  if (!add_pitch)
68  KALDI_WARN << "--pitch-process-config option has no effect "
69  << "since you did not supply --add-pitch option.";
70  } // else use the defaults.
71 
72  if (config.cmvn_config != "") {
74  } // else use the defaults.
75 
78  KALDI_ERR << "--global-cmvn-stats option is required.";
79 
80  add_deltas = config.add_deltas;
81  if (config.delta_config != "") {
83  if (!add_deltas)
84  KALDI_WARN << "--delta-config option has no effect "
85  << "since you did not supply --add-deltas option.";
86  } // else use the defaults.
87 
88  splice_feats = config.splice_feats;
89  if (config.splice_config != "") {
91  if (!splice_feats)
92  KALDI_WARN << "--splice-config option has no effect "
93  << "since you did not supply --splice-feats option.";
94  } // else use the defaults.
95 
96  if (config.add_deltas && config.splice_feats)
97  KALDI_ERR << "You cannot supply both --add-deltas "
98  << "and --splice-feats options";
99 
101 }
102 
103 
105  const OnlineFeaturePipelineConfig &config,
106  const Matrix<BaseFloat> &lda_mat,
107  const Matrix<BaseFloat> &global_cmvn_stats):
108  config_(config), lda_mat_(lda_mat), global_cmvn_stats_(global_cmvn_stats) {
109  Init();
110 }
111 
112 
114  const OnlineFeaturePipelineConfig &config):
115  config_(config) {
116  if (config.lda_rxfilename != "")
118  if (config.global_cmvn_stats_rxfilename != "")
121  Init();
122 }
123 
127 }
128 
130  if (lda_) return lda_;
131  else if (splice_or_delta_) return splice_or_delta_;
132  else {
133  KALDI_ASSERT(feature_ != NULL);
134  return feature_;
135  }
136 }
137 
139  if (fmllr_) return fmllr_;
140  else
141  return UnadaptedFeature();
142 }
143 
144 
146  cmvn_->SetState(cmvn_state);
147 }
148 
150  int32 frame = cmvn_->NumFramesReady() - 1;
151  // the following call will crash if no frames are ready.
152  cmvn_->GetState(frame, cmvn_state);
153 }
154 
155 
156 // Init() is to be called from the constructor; it assumes the pointer
157 // members are all uninitialized but config_ and lda_mat_ are
158 // initialized.
160  if (config_.feature_type == "mfcc") {
162  } else if (config_.feature_type == "plp") {
164  } else if (config_.feature_type == "fbank") {
166  } else {
167  KALDI_ERR << "Code error: invalid feature type " << config_.feature_type;
168  }
169 
170  {
172  if (config_.add_pitch) {
173  int32 global_dim = global_cmvn_stats_.NumCols() - 1;
174  int32 dim = base_feature_->Dim();
175  KALDI_ASSERT(global_dim >= dim);
176  if (global_dim > dim) {
177  Matrix<BaseFloat> last_col(global_cmvn_stats_.ColRange(global_dim, 1));
179  kCopyData);
180  global_cmvn_stats_.ColRange(dim, 1).CopyFromMat(last_col);
181  }
182  }
183  Matrix<double> global_cmvn_stats_dbl(global_cmvn_stats_);
184  OnlineCmvnState initial_state(global_cmvn_stats_dbl);
185  cmvn_ = new OnlineCmvn(config_.cmvn_opts, initial_state, base_feature_);
186  }
187 
188  if (config_.add_pitch) {
191  pitch_);
193  } else {
194  pitch_ = NULL;
195  pitch_feature_ = NULL;
196  feature_ = cmvn_;
197  }
198 
200  KALDI_ERR << "You cannot supply both --add-deltas and "
201  << "--splice-feats options.";
202  } else if (config_.splice_feats) {
204  feature_);
205  } else if (config_.add_deltas) {
207  feature_);
208  } else {
209  splice_or_delta_ = NULL;
210  }
211 
212  if (lda_mat_.NumRows() != 0) {
214  (splice_or_delta_ != NULL ?
216  } else {
217  lda_ = NULL;
218  }
219 
220  fmllr_ = NULL; // This will be set up if the user calls SetTransform().
221 }
222 
224  const MatrixBase<BaseFloat> &transform) {
225  if (fmllr_ != NULL) { // we already had a transform; delete this
226  // object.
227  delete fmllr_;
228  fmllr_ = NULL;
229  }
230  if (transform.NumRows() != 0) {
232  fmllr_ = new OnlineTransform(transform, feat);
233  }
234 }
235 
236 
238  cmvn_->Freeze(cmvn_->NumFramesReady() - 1);
239 }
240 
242  return AdaptedFeature()->Dim();
243 }
245  return AdaptedFeature()->IsLastFrame(frame);
246 }
248  return AdaptedFeature()->NumFramesReady();
249 }
250 
252  VectorBase<BaseFloat> *feat) {
253  AdaptedFeature()->GetFrame(frame, feat);
254 }
255 
257  // Note: the delete command only deletes pointers that are non-NULL. Not all
258  // of the pointers below will be non-NULL.
259  delete fmllr_;
260  delete lda_;
261  delete splice_or_delta_;
262  // Guard against double deleting the cmvn_ ptr
263  if (pitch_feature_) {
264  delete feature_; // equal to cmvn_ if pitch feats are not appended
265  delete pitch_feature_;
266  delete pitch_;
267  }
268  delete cmvn_;
269  delete base_feature_;
270 }
271 
273  BaseFloat sampling_rate,
274  const VectorBase<BaseFloat> &waveform) {
275  base_feature_->AcceptWaveform(sampling_rate, waveform);
276  if (pitch_)
277  pitch_->AcceptWaveform(sampling_rate, waveform);
278 }
279 
282  if (pitch_)
284 }
285 
287  if (feature_type == "mfcc") {
288  return mfcc_opts.frame_opts.frame_shift_ms / 1000.0f;
289  } else if (feature_type == "plp") {
290  return plp_opts.frame_opts.frame_shift_ms / 1000.0f;
291  } else if (feature_type == "fbank") {
292  return fbank_opts.frame_opts.frame_shift_ms / 1000.0f;
293  } else {
294  KALDI_ERR << "Unknown feature type " << feature_type;
295  return 0.0;
296  }
297 }
298 
300  if (pitch_) {
301  feats->Resize(NumFramesReady(), pitch_feature_->Dim());
302  for (int32 i = 0; i < NumFramesReady(); i++) {
303  SubVector<BaseFloat> row(*feats, i);
304  pitch_feature_->GetFrame(i, &row);
305  }
306  }
307 }
308 
309 } // namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
virtual int32 NumFramesReady() const
returns the feature dimension.
virtual int32 Dim() const
Member functions from OnlineFeatureInterface:
OnlineFeatureInterface * AdaptedFeature() const
returns adapted feature if fmllr_ exists, else UnadaptedFeature().
void ReadConfigFromFile(const std::string &config_filename, C *c)
This template is provided for convenience in reading config classes from files; this is not the stand...
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)
Gets the feature vector for this frame.
OnlineGenericBaseFeature< PlpComputer > OnlinePlp
OnlineGenericBaseFeature< MfccComputer > OnlineMfcc
OnlineGenericBaseFeature< FbankComputer > OnlineFbank
virtual int32 Dim() const
OnlineFeatureInterface * feature_
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
virtual void InputFinished()
InputFinished() tells the class you won&#39;t be providing any more waveform.
This class does an online version of the cepstral mean and [optionally] variance, but note that this ...
virtual void AcceptWaveform(BaseFloat sampling_rate, const VectorBase< BaseFloat > &waveform)
This would be called from the application, when you get more wave data.
This online-feature class implements post processing of pitch features.
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)=0
Gets the feature vector for this frame.
This file contains a class OnlineFeaturePipeline for online feature extraction, which puts together v...
kaldi::int32 int32
virtual bool IsLastFrame(int32 frame) const
Returns true if this is the last frame.
OnlineFeatureInterface * UnadaptedFeature() const
returns lda_ if it exists, else splice_or_delta_, else cmvn_.
This configuration class is to set up OnlineFeaturePipelineConfig, which in turn is the configuration...
OnlineFeaturePipeline is a class that&#39;s responsible for putting together the various stages of the fe...
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Definition: kaldi-io.cc:832
void GetState(int32 cur_frame, OnlineCmvnState *cmvn_state)
OnlineFeaturePipeline(const OnlineFeaturePipelineConfig &cfg)
void GetCmvnState(OnlineCmvnState *cmvn_state)
SubMatrix< Real > ColRange(const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Definition: kaldi-matrix.h:213
void Freeze(int32 cur_frame)
This online-feature class implements combination of two feature streams (such as pitch, plp) into one stream.
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)
Gets the feature vector for this frame.
void SetState(const OnlineCmvnState &cmvn_state)
Struct OnlineCmvnState stores the state of CMVN adaptation between utterances (but not the state of t...
virtual bool IsLastFrame(int32 frame) const =0
Returns true if this is the last frame.
#define KALDI_ERR
Definition: kaldi-error.h:147
This online-feature class implements any affine or linear transform.
#define KALDI_WARN
Definition: kaldi-error.h:150
void GetAsMatrix(Matrix< BaseFloat > *feats)
void Init()
Init() is to be called from the constructor; it assumes the pointer members are all uninitialized but...
OnlineFeaturePipeline * New() const
returns a newly initialized copy of *this– this does not duplicate all the internal state or the spe...
void AcceptWaveform(BaseFloat sampling_rate, const VectorBase< BaseFloat > &waveform)
Accept more data to process (won&#39;t actually process it, will just copy it).
OnlineFeatureInterface * splice_or_delta_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void SetTransform(const MatrixBase< BaseFloat > &transform)
This configuration class is responsible for storing the configuration options for OnlineFeaturePipeli...
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
OnlineFeatureInterface is an interface for online feature processing (it is also usable in the offlin...
OnlineFeaturePipelineConfig config_
virtual void AcceptWaveform(BaseFloat sampling_rate, const VectorBase< BaseFloat > &waveform)=0
This would be called from the application, when you get more wave data.
virtual void InputFinished()=0
InputFinished() tells the class you won&#39;t be providing any more waveform.
virtual int32 NumFramesReady() const
returns the feature dimension.
void SetCmvnState(const OnlineCmvnState &cmvn_state)
Set the CMVN state to a particular value (will generally be called after Copy().
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
virtual int32 NumFramesReady() const =0
returns the feature dimension.
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
virtual int32 Dim() const =0