33 using namespace kaldi;
35 using fst::SymbolTable;
40 "Generate alignments, reading log-likelihoods as matrices.\n" 41 " (model is needed only for the integer mappings in its transition-model)\n" 42 "Usage: align-mapped [options] <tree-in> <trans-model-in> <lexicon-fst-in> " 43 "<feature-rspecifier> <transcriptions-rspecifier> <alignments-wspecifier>\n" 45 " align-mapped tree trans.mdl lex.fst scp:train.scp ark:train.tra ark:nnet.ali\n";
49 std::string disambig_rxfilename;
54 po.Register(
"acoustic-scale", &acoustic_scale,
"Scaling factor for acoustic likelihoods");
55 po.Register(
"read-disambig-syms", &disambig_rxfilename,
"File containing " 56 "list of disambiguation symbols in phone symbol table");
60 if (po.NumArgs() != 6) {
65 std::string tree_in_filename = po.GetArg(1),
66 model_in_filename = po.GetArg(2),
67 lex_in_filename = po.GetArg(3),
68 feature_rspecifier = po.GetArg(4),
69 transcript_rspecifier = po.GetArg(5),
70 alignment_wspecifier = po.GetArg(6);
81 std::vector<int32> disambig_syms;
82 if (disambig_rxfilename !=
"")
84 KALDI_ERR <<
"fstcomposecontext: Could not read disambiguation symbols from " 85 << disambig_rxfilename;
96 int num_done = 0, num_err = 0, num_retry = 0;
97 double tot_like = 0.0;
98 kaldi::int64 frame_count = 0;
100 for (; !loglikes_reader.Done(); loglikes_reader.Next()) {
101 std::string utt = loglikes_reader.Key();
102 if (!transcript_reader.HasKey(utt)) {
103 KALDI_WARN <<
"No transcript for utterance " << utt;
108 const std::vector<int32> &transcript = transcript_reader.Value(utt);
110 VectorFst<StdArc> decode_fst;
111 if (!gc.CompileGraphFromText(transcript, &decode_fst)) {
112 KALDI_WARN <<
"Problem creating decoding graph for utterance " <<
113 utt <<
" [serious error]";
118 KALDI_WARN <<
"Empty loglikes matrix for utterance: " << utt;
126 acoustic_scale, &decode_fst, &decodable,
127 &alignment_writer, NULL,
128 &num_done, &num_err, &num_retry,
129 &tot_like, &frame_count);
131 KALDI_LOG <<
"Overall log-likelihood per frame is " << (tot_like/frame_count)
132 <<
" over " << frame_count<<
" frames.";
133 KALDI_LOG <<
"Retried " << num_retry <<
" out of " 134 << (num_done + num_err) <<
" utterances.";
135 KALDI_LOG <<
"Done " << num_done <<
", errors on " << num_err;
136 return (num_done != 0 ? 0 : 1);
137 }
catch(
const std::exception &e) {
138 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void Register(OptionsItf *opts)
A templated class for writing objects to an archive or script file; see The Table concept...
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
void Register(OptionsItf *opts)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
void ReadFstKaldi(std::istream &is, bool binary, VectorFst< Arc > *fst)
bool ReadIntegerVectorSimple(const std::string &rxfilename, std::vector< int32 > *list)
ReadFromList attempts to read this list of integers, one per line, from the given file...
void AlignUtteranceWrapper(const AlignConfig &config, const std::string &utt, BaseFloat acoustic_scale, fst::VectorFst< fst::StdArc > *fst, DecodableInterface *decodable, Int32VectorWriter *alignment_writer, BaseFloatWriter *scores_writer, int32 *num_done, int32 *num_error, int32 *num_retried, double *tot_like, int64 *frame_count, BaseFloatVectorWriter *per_frame_acwt_writer)
AlignUtteranceWapper is a wrapper for alignment code used in training, that is called from many diffe...