30 int main(
int argc,
char *argv[]) {
31 using namespace kaldi;
35 "Perform forward pass through Neural Network.\n" 36 "Usage: nnet-forward [options] <nnet1-in> <feature-rspecifier> <feature-wspecifier>\n" 37 "e.g.: nnet-forward final.nnet ark:input.ark ark:output.ark\n";
44 std::string feature_transform;
45 po.
Register(
"feature-transform", &feature_transform,
46 "Feature transform in front of main network (in nnet format)");
48 bool no_softmax =
false;
49 po.
Register(
"no-softmax", &no_softmax,
50 "Removes the last component with Softmax, if found. The pre-softmax " 51 "activations are the output of the network. Decoding them leads to " 52 "the same lattices as if we had used 'log-posteriors'.");
54 bool apply_log =
false;
55 po.
Register(
"apply-log", &apply_log,
"Transform NN output by log()");
57 std::string use_gpu=
"no";
59 "yes|no|optional, only has effect if compiled with CUDA");
61 using namespace kaldi;
72 std::string model_filename = po.
GetArg(1),
73 feature_rspecifier = po.
GetArg(2),
74 feature_wspecifier = po.
GetArg(3);
78 CuDevice::Instantiate().SelectGpuId(use_gpu);
82 if (feature_transform !=
"") {
83 nnet_transf.
Read(feature_transform);
87 nnet.
Read(model_filename);
94 <<
" from the nnet " << model_filename;
97 KALDI_WARN <<
"Last component 'NOT-REMOVED' by --no-softmax=true, " 103 if (apply_log && no_softmax) {
104 KALDI_ERR <<
"Cannot use both --apply-log=true --no-softmax=true, " 105 <<
"use only one of the two!";
115 kaldi::int64 tot_t = 0;
128 for (; !feature_reader.
Done(); feature_reader.
Next()) {
131 std::string utt = feature_reader.
Key();
132 KALDI_VLOG(2) <<
"Processing utterance " << num_done+1
134 <<
", " << mat.
NumRows() <<
"frm";
138 KALDI_ERR <<
"NaN or inf found in features for " << utt;
147 KALDI_ERR <<
"NaN or inf found in transformed-features for " << utt;
153 KALDI_ERR <<
"NaN or inf found in nn-output for " << utt;
158 if (!(nnet_out.
Min() >= 0.0 && nnet_out.
Max() <= 1.0)) {
159 KALDI_WARN <<
"Applying 'log()' to data which don't seem to be " 160 <<
"probabilities," << utt;
176 KALDI_ERR <<
"NaN or inf found in final output nn-output for " << utt;
178 feature_writer.
Write(feature_reader.
Key(), nnet_out_host);
181 if (num_done % 100 == 0) {
183 KALDI_VLOG(1) <<
"After " << num_done <<
" utterances: time elapsed = " 184 << time_now/60 <<
" min; processed " << tot_t/time_now
185 <<
" frames per second.";
192 KALDI_LOG <<
"Done " << num_done <<
" files" 193 <<
" in " << time.
Elapsed()/60 <<
"min," 194 <<
" (fps " << tot_t/time.
Elapsed() <<
")";
198 CuDevice::Instantiate().PrintProfile();
202 if (num_done == 0)
return -1;
204 }
catch(
const std::exception &e) {
205 std::cerr << e.what();
void RemoveLastComponent()
Remove the last of the Components,.
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
std::string class_frame_counts
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
int32 GetVerboseLevel()
Get verbosity level, usually set via command line '–verbose=' switch.
#define KALDI_ISFINITE(x)
A templated class for writing objects to an archive or script file; see The Table concept...
This class represents a matrix that's stored on the GPU if we have one, and in memory if not...
void Min(const CuMatrixBase< Real > &A)
Do, elementwise, *this = min(*this, A).
ComponentType
Component type identification mechanism,.
void SubtractOnLogpost(CuMatrixBase< BaseFloat > *llk)
Subtract pdf priors from log-posteriors to get pseudo log-likelihoods.
void Write(const std::string &key, const T &value) const
const Component & GetLastComponent() const
LastComponent accessor,.
void Register(const std::string &name, bool *ptr, const std::string &doc)
static const char * TypeToMarker(ComponentType t)
Converts component type to marker,.
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
void Max(const CuMatrixBase< Real > &A)
Do, elementwise, *this = max(*this, A).
int main(int argc, char *argv[])
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
void Read(const std::string &rxfilename)
Read Nnet from 'rxfilename',.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
Real Sum() const
Returns sum of all elements in matrix.
void Feedforward(const CuMatrixBase< BaseFloat > &in, CuMatrix< BaseFloat > *out)
Perform forward pass through the network (with 2 swapping buffers),.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
void SetDropoutRate(BaseFloat r)
Set the dropout rate.
virtual ComponentType GetType() const =0
Get Type Identification of the component,.
double Elapsed() const
Returns time in seconds.
void Register(OptionsItf *opts)