31 using namespace kaldi;
33 using fst::SymbolTable;
38 "Creates training graphs (without transition-probabilities, by default)\n" 40 "Usage: compile-train-graphs [options] <tree-in> <model-in> " 41 "<lexicon-fst-in> <transcriptions-rspecifier> <graphs-wspecifier>\n" 43 " compile-train-graphs tree 1.mdl lex.fst " 44 "'ark:sym2int.pl -f 2- words.txt text|' ark:graphs.fsts\n";
48 int32 batch_size = 250;
52 std::string disambig_rxfilename;
55 po.Register(
"batch-size", &batch_size,
56 "Number of FSTs to compile at a time (more -> faster but uses " 57 "more memory. E.g. 500");
58 po.Register(
"read-disambig-syms", &disambig_rxfilename,
"File containing " 59 "list of disambiguation symbols in phone symbol table");
63 if (po.NumArgs() != 5) {
68 std::string tree_rxfilename = po.GetArg(1);
69 std::string model_rxfilename = po.GetArg(2);
70 std::string lex_rxfilename = po.GetArg(3);
71 std::string transcript_rspecifier = po.GetArg(4);
72 std::string fsts_wspecifier = po.GetArg(5);
83 std::vector<int32> disambig_syms;
84 if (disambig_rxfilename !=
"")
86 KALDI_ERR <<
"fstcomposecontext: Could not read disambiguation symbols from " 87 << disambig_rxfilename;
96 int num_succeed = 0, num_fail = 0;
98 if (batch_size == 1) {
100 for (; !transcript_reader.Done(); transcript_reader.Next()) {
101 std::string key = transcript_reader.Key();
102 const std::vector<int32> &transcript = transcript_reader.Value();
103 VectorFst<StdArc> decode_fst;
105 if (!gc.CompileGraphFromText(transcript, &decode_fst)) {
106 decode_fst.DeleteStates();
108 if (decode_fst.Start() != fst::kNoStateId) {
110 fst_writer.Write(key, decode_fst);
112 KALDI_WARN <<
"Empty decoding graph for utterance " 118 std::vector<std::string> keys;
119 std::vector<std::vector<int32> > transcripts;
120 while (!transcript_reader.Done()) {
123 for (; !transcript_reader.Done() &&
124 static_cast<int32
>(transcripts.size()) < batch_size;
125 transcript_reader.Next()) {
126 keys.push_back(transcript_reader.Key());
127 transcripts.push_back(transcript_reader.Value());
129 std::vector<fst::VectorFst<fst::StdArc>* > fsts;
130 if (!gc.CompileGraphsFromText(transcripts, &fsts)) {
131 KALDI_ERR <<
"Not expecting CompileGraphs to fail.";
134 for (
size_t i = 0;
i < fsts.size();
i++) {
135 if (fsts[
i]->Start() != fst::kNoStateId) {
137 fst_writer.Write(keys[
i], *(fsts[i]));
139 KALDI_WARN <<
"Empty decoding graph for utterance " 147 KALDI_LOG <<
"compile-train-graphs: succeeded for " << num_succeed
148 <<
" graphs, failed for " << num_fail;
149 return (num_succeed != 0 ? 0 : 1);
150 }
catch(
const std::exception &e) {
151 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void DeletePointers(std::vector< A *> *v)
Deletes any non-NULL pointers in the vector v, and sets the corresponding entries of v to NULL...
BaseFloat self_loop_scale
A templated class for writing objects to an archive or script file; see The Table concept...
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
void Register(OptionsItf *opts)
BaseFloat transition_scale
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
#define KALDI_ASSERT(cond)
void ReadFstKaldi(std::istream &is, bool binary, VectorFst< Arc > *fst)
bool ReadIntegerVectorSimple(const std::string &rxfilename, std::vector< int32 > *list)
ReadFromList attempts to read this list of integers, one per line, from the given file...