34 using namespace kaldi;
36 using fst::SymbolTable;
41 "Align features given [GMM-based] models.\n" 42 "Usage: gmm-align [options] tree-in model-in lexicon-fst-in feature-rspecifier " 43 "transcriptions-rspecifier alignments-wspecifier\n" 45 " gmm-align tree 1.mdl lex.fst scp:train.scp " 46 "'ark:sym2int.pl -f 2- words.txt text|' ark:1.ali\n";
50 std::string disambig_rxfilename;
54 po.Register(
"acoustic-scale", &acoustic_scale,
"Scaling factor for acoustic likelihoods");
55 po.Register(
"read-disambig-syms", &disambig_rxfilename,
"File containing " 56 "list of disambiguation symbols in phone symbol table");
61 if (po.NumArgs() != 6) {
66 std::string tree_in_filename = po.GetArg(1);
67 std::string model_in_filename = po.GetArg(2);
68 std::string lex_in_filename = po.GetArg(3);
69 std::string feature_rspecifier = po.GetArg(4);
70 std::string transcript_rspecifier = po.GetArg(5);
71 std::string alignment_wspecifier = po.GetArg(6);
80 Input ki(model_in_filename, &binary);
81 trans_model.
Read(ki.Stream(), binary);
82 am_gmm.
Read(ki.Stream(), binary);
88 std::vector<int32> disambig_syms;
89 if (disambig_rxfilename !=
"")
91 KALDI_ERR <<
"fstcomposecontext: Could not read disambiguation symbols from " 92 << disambig_rxfilename;
103 int32 num_done = 0, num_err = 0, num_retry = 0;
104 double tot_like = 0.0;
105 kaldi::int64 frame_count = 0;
106 for (; !feature_reader.Done(); feature_reader.Next()) {
107 std::string utt = feature_reader.Key();
108 if (!transcript_reader.HasKey(utt)) {
109 KALDI_WARN <<
"No transcript found for utterance " << utt;
115 const std::vector<int32> &transcript = transcript_reader.Value(utt);
117 VectorFst<StdArc> decode_fst;
118 if (!gc.CompileGraphFromText(transcript, &decode_fst)) {
119 KALDI_WARN <<
"Problem creating decoding graph for utterance " 120 << utt <<
" [serious error]";
125 KALDI_WARN <<
"Zero-length features for utterance: " << utt;
135 acoustic_scale, &decode_fst, &gmm_decodable,
136 &alignment_writer, NULL,
137 &num_done, &num_err, &num_retry,
138 &tot_like, &frame_count);
140 KALDI_LOG <<
"Overall log-likelihood per frame is " << (tot_like/frame_count)
141 <<
" over " << frame_count<<
" frames.";
142 KALDI_LOG <<
"Retried " << num_retry <<
" out of " 143 << (num_done + num_err) <<
" utterances.";
144 KALDI_LOG <<
"Done " << num_done <<
", errors on " << num_err;
145 return (num_done != 0 ? 0 : 1);
146 }
catch(
const std::exception &e) {
147 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void Register(OptionsItf *opts)
A templated class for writing objects to an archive or script file; see The Table concept...
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
void Register(OptionsItf *opts)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
void Read(std::istream &is, bool binary)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
void ReadFstKaldi(std::istream &is, bool binary, VectorFst< Arc > *fst)
bool ReadIntegerVectorSimple(const std::string &rxfilename, std::vector< int32 > *list)
ReadFromList attempts to read this list of integers, one per line, from the given file...
void AlignUtteranceWrapper(const AlignConfig &config, const std::string &utt, BaseFloat acoustic_scale, fst::VectorFst< fst::StdArc > *fst, DecodableInterface *decodable, Int32VectorWriter *alignment_writer, BaseFloatWriter *scores_writer, int32 *num_done, int32 *num_error, int32 *num_retried, double *tot_like, int64 *frame_count, BaseFloatVectorWriter *per_frame_acwt_writer)
AlignUtteranceWapper is a wrapper for alignment code used in training, that is called from many diffe...
void Read(std::istream &in_stream, bool binary)