30 int main(
int argc,
char *argv[]) {
32 using namespace kaldi;
36 typedef kaldi::int64 int64;
39 "Reads in wav file(s) and processes them as in online2-wav-nnet2-latgen-faster,\n" 40 "but instead of decoding, dumps the features. Most of the parameters\n" 41 "are set via configuration variables.\n" 43 "Usage: online2-wav-dump-features [options] <spk2utt-rspecifier> <wav-rspecifier> <feature-wspecifier>\n" 44 "The spk2utt-rspecifier can just be <utterance-id> <utterance-id> if\n" 45 "you want to generate features utterance by utterance.\n" 46 "Alternate usage: online2-wav-dump-features [options] --print-ivector-dim=true\n" 47 "See steps/online/nnet2/{dump_nnet_activations,get_egs.sh} for examples.\n";
55 bool print_ivector_dim =
false;
57 po.
Register(
"chunk-length", &chunk_length_secs,
58 "Length of chunk size in seconds, that we process.");
59 po.
Register(
"print-ivector-dim", &print_ivector_dim,
60 "If true, print iVector dimension (possibly zero) and exit. This " 61 "version requires no arguments.");
67 if (!print_ivector_dim && po.
NumArgs() != 3) {
74 if (print_ivector_dim) {
75 std::cout << feature_info.
IvectorDim() << std::endl;
79 std::string spk2utt_rspecifier = po.
GetArg(1),
80 wav_rspecifier = po.
GetArg(2),
81 feats_wspecifier = po.
GetArg(3);
84 int32 num_done = 0, num_err = 0;
85 int64 num_frames_tot = 0;
91 for (; !spk2utt_reader.
Done(); spk2utt_reader.
Next()) {
92 std::string spk = spk2utt_reader.
Key();
93 const std::vector<std::string> &uttlist = spk2utt_reader.
Value();
96 for (
size_t i = 0;
i < uttlist.size();
i++) {
97 std::string utt = uttlist[
i];
98 if (!wav_reader.
HasKey(utt)) {
99 KALDI_WARN <<
"Did not find audio for utterance " << utt;
109 feature_pipeline.SetAdaptationState(adaptation_state);
111 std::vector<Vector<BaseFloat> *> feature_data;
119 int32 chunk_length =
int32(samp_freq * chunk_length_secs);
120 if (chunk_length == 0) chunk_length = 1;
122 int32 samp_offset = 0;
123 while (samp_offset < data.Dim()) {
124 int32 samp_remaining = data.Dim() - samp_offset;
125 int32 num_samp = chunk_length < samp_remaining ? chunk_length
129 feature_pipeline.AcceptWaveform(samp_freq, wave_part);
130 samp_offset += num_samp;
131 if (samp_offset == data.Dim())
132 feature_pipeline.InputFinished();
134 while (static_cast<int32>(feature_data.size()) <
135 feature_pipeline.NumFramesReady()) {
136 int32 t =
static_cast<int32
>(feature_data.size());
139 feature_pipeline.GetFrame(t, feature_data.back());
142 int32 T =
static_cast<int32
>(feature_data.size());
144 KALDI_WARN <<
"Got no frames of data for utterance " << utt;
149 for (int32 t = 0; t < T; t++) {
150 feats.
Row(t).CopyFromVec(*(feature_data[t]));
151 delete feature_data[t];
154 feats_writer.
Write(utt, feats);
155 feature_pipeline.GetAdaptationState(&adaptation_state);
159 KALDI_LOG <<
"Processed " << num_done <<
" utterances, " 160 << num_err <<
" with errors; " << num_frames_tot
161 <<
" frames in total.";
162 return (num_done != 0 ? 0 : 1);
163 }
catch(
const std::exception& e) {
164 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
This configuration class is to set up OnlineNnet2FeaturePipelineInfo, which in turn is the configurat...
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
A templated class for writing objects to an archive or script file; see The Table concept...
BaseFloat SampFreq() const
const Matrix< BaseFloat > & Data() const
This file contains a different version of the feature-extraction pipeline in online-feature-pipeline...
void Write(const std::string &key, const T &value) const
This class is responsible for storing configuration variables, objects and options for OnlineNnet2Fea...
void Register(const std::string &name, bool *ptr, const std::string &doc)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
const T & Value(const std::string &key)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
void Register(OptionsItf *opts)
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
MatrixIndexT Dim() const
Returns the dimension of the vector.
bool HasKey(const std::string &key)
This class's purpose is to read in Wave files.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
A class representing a vector.
OnlineNnet2FeaturePipeline is a class that's responsible for putting together the various parts of th...
int main(int argc, char *argv[])
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
OnlineIvectorExtractionInfo ivector_extractor_info