27 int main(
int argc,
char *argv[]) {
29 using namespace kaldi;
32 typedef kaldi::int64 int64;
35 "Does the neural net computation, taking as input the nnet-training examples\n" 36 "(typically an archive with the extension .egs), ignoring the labels; it\n" 37 "outputs as a matrix the result. Used mostly for debugging.\n" 39 "Usage: nnet-compute-from-egs [options] <raw-nnet-in> <egs-rspecifier> " 40 "<feature-wspecifier>\n" 41 "e.g.: nnet-compute-from-egs 'nnet-to-raw-nnet final.mdl -|' egs.10.1.ark ark:-\n";
52 std::string raw_nnet_rxfilename = po.
GetArg(1),
53 examples_rspecifier = po.
GetArg(2),
54 features_or_loglikes_wspecifier = po.
GetArg(3);
67 for (; !example_reader.
Done(); example_reader.
Next()) {
71 spk_dim = eg.
spk_info.Dim(), dim = basic_dim + spk_dim;
73 input_block(context, dim);
74 input_block.
Range(0, context, 0, basic_dim).CopyFromMat(
75 input_frames.Range(start_offset, context, 0, basic_dim));
77 input_block.
Range(0, context, basic_dim, spk_dim).CopyRowsFromVec(
81 gpu_input_block.
Swap(&input_block);
84 bool pad_input =
false;
90 KALDI_LOG <<
"Processed " << num_egs <<
" examples.";
92 return (num_egs == 0 ? 1 : 0);
93 }
catch(
const std::exception &e) {
94 std::cerr << e.what() <<
'\n';
CompressedMatrix input_frames
The input data, with NumRows() >= labels.size() + left_context; it includes features to the left and ...
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
int32 LeftContext() const
Returns the left-context summed over all the Components...
NnetExample is the input data and corresponding label (or labels) for one or more frames of input...
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
int32 left_context
The number of frames of left context (we can work out the #frames of right context from input_frames...
int32 OutputDim() const
The output dimension of the network – typically the number of pdfs.
A templated class for writing objects to an archive or script file; see The Table concept...
This class represents a matrix that's stored on the GPU if we have one, and in memory if not...
void NnetComputation(const Nnet &nnet, const CuMatrixBase< BaseFloat > &input, bool pad_input, CuMatrixBase< BaseFloat > *output)
Does the basic neural net computation, on a sequence of data (e.g.
void Write(const std::string &key, const T &value) const
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
void Swap(Matrix< Real > *mat)
int32 RightContext() const
Returns the right-context summed over all the Components...
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
int main(int argc, char *argv[])
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
SubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Return a sub-part of matrix.
MatrixIndexT NumCols() const
Returns number of columns (or zero for emtpy matrix).
Vector< BaseFloat > spk_info
The speaker-specific input, if any, or an empty vector if we're not using this features.