32 int main(
int argc, 
char *argv[]) {
    34     using namespace kaldi;
    37     using fst::SymbolTable;
    42         "Align features given neural-net-based model\n"    43         "Usage:   nnet-align-compiled [options] <model-in> <graphs-rspecifier> "    44         "<feature-rspecifier> <alignments-wspecifier>\n"    46         " nnet-align-compiled 1.mdl ark:graphs.fsts scp:train.scp ark:1.ali\n"    48         " compile-train-graphs tree 1.mdl lex.fst 'ark:sym2int.pl -f 2- words.txt text|' \\\n"    49         "   ark:- | nnet-align-compiled 1.mdl ark:- scp:train.scp t, ark:1.ali\n";
    53     std::string use_gpu = 
"yes";
    57     std::string per_frame_acwt_wspecifier;
    60     po.
Register(
"transition-scale", &transition_scale,
    61                 "Transition-probability scale [relative to acoustics]");
    62     po.
Register(
"acoustic-scale", &acoustic_scale,
    63                 "Scaling factor for acoustic likelihoods");
    64     po.
Register(
"self-loop-scale", &self_loop_scale,
    65                 "Scale of self-loop versus non-self-loop "    66                 "log probs [relative to acoustics]");
    67     po.
Register(
"write-per-frame-acoustic-loglikes", &per_frame_acwt_wspecifier,
    68                 "Wspecifier for table of vectors containing the acoustic log-likelihoods "    69                 "per frame for each utterance. E.g. ark:foo/per_frame_logprobs.1.ark");
    71                 "yes|no|optional|wait, only has effect if compiled with CUDA");
    80     CuDevice::Instantiate().SelectGpuId(use_gpu);
    83     std::string model_in_filename = po.
GetArg(1),
    84         fst_rspecifier = po.
GetArg(2),
    85         feature_rspecifier = po.
GetArg(3),
    86         alignment_wspecifier = po.
GetArg(4),
    89     int num_done = 0, num_err = 0, num_retry = 0;
    90     double tot_like = 0.0;
    91     kaldi::int64 frame_count = 0;
    98         Input ki(model_in_filename, &binary);
   109       for (; !fst_reader.
Done(); fst_reader.
Next()) {
   110         std::string utt = fst_reader.
Key();
   111         if (!feature_reader.
HasKey(utt)) {
   112           KALDI_WARN << 
"No features for utterance " << utt;
   117         VectorFst<StdArc> decode_fst(fst_reader.
Value());
   123           KALDI_WARN << 
"Zero-length utterance: " << utt;
   129           std::vector<int32> disambig_syms;  
   131                              transition_scale, self_loop_scale,
   135         bool pad_input = 
true;
   137                                        pad_input, acoustic_scale);
   140                               acoustic_scale, &decode_fst, &nnet_decodable,
   141                               &alignment_writer, &scores_writer,
   142                               &num_done, &num_err, &num_retry,
   143                               &tot_like, &frame_count, &per_frame_acwt_writer);
   145       KALDI_LOG << 
"Overall log-likelihood per frame is " << (tot_like/frame_count)
   146                 << 
" over " << frame_count<< 
" frames.";
   147       KALDI_LOG << 
"Retried " << num_retry << 
" out of "   148                 << (num_done + num_err) << 
" utterances.";
   149       KALDI_LOG << 
"Done " << num_done << 
", errors on " << num_err;
   152     CuDevice::Instantiate().PrintProfile();
   154     return (num_done != 0 ? 0 : 1);
   155   } 
catch(
const std::exception &e) {
   156     std::cerr << e.what();
 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
 
void Register(OptionsItf *opts)
 
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor]. 
 
void Read(std::istream &is, bool binary)
 
A templated class for writing objects to an archive or script file; see The Table concept...
 
This class represents a matrix that's stored on the GPU if we have one, and in memory if not...
 
void Register(const std::string &name, bool *ptr, const std::string &doc)
 
Allows random access to a collection of objects in an archive or script file; see The Table concept...
 
void AddTransitionProbs(const TransitionModel &trans_model, const std::vector< int32 > &disambig_syms, BaseFloat transition_scale, BaseFloat self_loop_scale, fst::VectorFst< fst::StdArc > *fst)
Adds transition-probs, with the supplied scales (see Scaling of transition and acoustic probabilities...
 
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
 
const T & Value(const std::string &key)
 
void Read(std::istream &is, bool binary)
 
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
 
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables. 
 
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility. 
 
bool HasKey(const std::string &key)
 
int NumArgs() const
Number of positional parameters (c.f. argc-1). 
 
DecodableAmNnet is a decodable object that decodes with a neural net acoustic model of type AmNnet...
 
int main(int argc, char *argv[])
 
MatrixIndexT NumRows() const
Dimensions. 
 
void AlignUtteranceWrapper(const AlignConfig &config, const std::string &utt, BaseFloat acoustic_scale, fst::VectorFst< fst::StdArc > *fst, DecodableInterface *decodable, Int32VectorWriter *alignment_writer, BaseFloatWriter *scores_writer, int32 *num_done, int32 *num_error, int32 *num_retried, double *tot_like, int64 *frame_count, BaseFloatVectorWriter *per_frame_acwt_writer)
AlignUtteranceWapper is a wrapper for alignment code used in training, that is called from many diffe...
 
std::string GetOptArg(int param) const