27 int main(
int argc,
char *argv[]) {
29 using namespace kaldi;
32 typedef kaldi::int64 int64;
35 "Does the neural net computation for each file of input features, and\n" 36 "outputs as a matrix the result. Used mostly for debugging.\n" 37 "Note: if you want it to apply a log (e.g. for log-likelihoods), use\n" 38 "--apply-log=true. Unlike nnet-am-compute, this version reads a 'raw'\n" 41 "Usage: nnet-compute [options] <raw-nnet-in> <feature-rspecifier> " 42 "<feature-or-loglikes-wspecifier>\n";
44 bool apply_log =
false;
45 bool pad_input =
true;
47 po.
Register(
"apply-log", &apply_log,
"Apply a log to the result of the computation " 48 "before outputting.");
49 po.
Register(
"pad-input", &pad_input,
"If true, duplicate the first and last frames " 50 "of input features as required for temporal context, to prevent #frames " 51 "of output being less than those of input.");
60 std::string raw_nnet_rxfilename = po.
GetArg(1),
61 features_rspecifier = po.
GetArg(2),
62 features_or_loglikes_wspecifier = po.
GetArg(3);
67 int64 num_done = 0, num_frames = 0;
71 for (; !feature_reader.
Done(); feature_reader.
Next()) {
72 std::string utt = feature_reader.
Key();
78 if (output_frames <= 0) {
79 KALDI_WARN <<
"Skipping utterance " << utt <<
" because output " 90 writer.
Write(utt, output);
95 KALDI_LOG <<
"Processed " << num_done <<
" feature files, " 96 << num_frames <<
" frames of input were processed.";
98 return (num_done == 0 ? 1 : 0);
99 }
catch(
const std::exception &e) {
100 std::cerr << e.what() <<
'\n';
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
int32 LeftContext() const
Returns the left-context summed over all the Components...
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
int main(int argc, char *argv[])
void ApplyFloor(Real floor_val)
int32 OutputDim() const
The output dimension of the network – typically the number of pdfs.
A templated class for writing objects to an archive or script file; see The Table concept...
This class represents a matrix that's stored on the GPU if we have one, and in memory if not...
void NnetComputation(const Nnet &nnet, const CuMatrixBase< BaseFloat > &input, bool pad_input, CuMatrixBase< BaseFloat > *output)
Does the basic neural net computation, on a sequence of data (e.g.
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
int32 RightContext() const
Returns the right-context summed over all the Components...
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
MatrixIndexT NumRows() const
Dimensions.