compile-graph.cc
Go to the documentation of this file.
1 // bin/compile-graph.cc
2 
3 // Copyright 2018 Johns Hopkins University (Author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include "base/kaldi-common.h"
21 #include "util/common-utils.h"
22 #include "tree/context-dep.h"
23 #include "hmm/transition-model.h"
24 #include "hmm/hmm-utils.h"
25 #include "fstext/fstext-lib.h"
26 #include "fstext/push-special.h"
28 #include "decoder/grammar-fst.h"
29 
30 
31 
32 int main(int argc, char *argv[]) {
33  try {
34  using namespace kaldi;
35  typedef kaldi::int32 int32;
36  using fst::SymbolTable;
37  using fst::VectorFst;
38  using fst::StdArc;
39 
40 
41  const char *usage =
42  "Creates HCLG decoding graph. Similar to mkgraph.sh but done in code.\n"
43  "\n"
44  "Usage: compile-graph [options] <tree-in> <model-in> <lexicon-fst-in> "
45  " <gammar-rspecifier> <hclg-wspecifier>\n"
46  "e.g.: \n"
47  " compile-train-graphs-fsts tree 1.mdl L_disambig.fst G.fst HCLG.fst\n";
48  ParseOptions po(usage);
49 
50 
51  BaseFloat transition_scale = 1.0;
52  BaseFloat self_loop_scale = 1.0; // Caution: the script default is 0.1.
53  int32 nonterm_phones_offset = -1;
54  std::string disambig_rxfilename;
55 
56 
57  po.Register("read-disambig-syms", &disambig_rxfilename, "File containing "
58  "list of disambiguation symbols in phone symbol table");
59  po.Register("transition-scale", &transition_scale, "Scale of transition "
60  "probabilities (excluding self-loops).");
61  po.Register("self-loop-scale", &self_loop_scale, "Scale of self-loop vs. "
62  "non-self-loop probability mass. Caution: the default of "
63  "mkgraph.sh is 0.1, but this defaults to 1.0.");
64  po.Register("nonterm-phones-offset", &nonterm_phones_offset, "Integer "
65  "value of symbol #nonterm_bos in phones.txt, if present. "
66  "(Only relevant for grammar decoding).");
67 
68  po.Read(argc, argv);
69 
70  if (po.NumArgs() != 5) {
71  po.PrintUsage();
72  exit(1);
73  }
74 
75  std::string tree_rxfilename = po.GetArg(1),
76  model_rxfilename = po.GetArg(2),
77  lex_rxfilename = po.GetArg(3),
78  grammar_rxfilename = po.GetArg(4),
79  hclg_wxfilename = po.GetArg(5);
80 
81  ContextDependency ctx_dep; // the tree.
82  ReadKaldiObject(tree_rxfilename, &ctx_dep);
83 
84  TransitionModel trans_model;
85  ReadKaldiObject(model_rxfilename, &trans_model);
86 
87  VectorFst<StdArc> *lex_fst = fst::ReadFstKaldi(lex_rxfilename),
88  *grammar_fst = fst::ReadFstKaldi(grammar_rxfilename);
89 
90  std::vector<int32> disambig_syms;
91  if (disambig_rxfilename != "")
92  if (!ReadIntegerVectorSimple(disambig_rxfilename, &disambig_syms))
93  KALDI_ERR << "Could not read disambiguation symbols from "
94  << disambig_rxfilename;
95  if (disambig_syms.empty())
96  KALDI_WARN << "You supplied no disambiguation symbols; note, these are "
97  << "typically necessary when compiling graphs from FSTs (i.e. "
98  << "supply L_disambig.fst and the list of disambig syms with\n"
99  << "--read-disambig-syms)";
100 
101  const std::vector<int32> &phone_syms = trans_model.GetPhones();
102  SortAndUniq(&disambig_syms);
103  for (int32 i = 0; i < disambig_syms.size(); i++)
104  if (std::binary_search(phone_syms.begin(), phone_syms.end(),
105  disambig_syms[i]))
106  KALDI_ERR << "Disambiguation symbol " << disambig_syms[i]
107  << " is also a phone.";
108 
109  VectorFst<StdArc> lg_fst;
110  TableCompose(*lex_fst, *grammar_fst, &lg_fst);
111 
112  DeterminizeStarInLog(&lg_fst, fst::kDelta);
113 
114  MinimizeEncoded(&lg_fst, fst::kDelta);
115 
116  fst::PushSpecial(&lg_fst, fst::kDelta);
117 
118  delete grammar_fst;
119  delete lex_fst;
120 
121  VectorFst<StdArc> clg_fst;
122 
123  std::vector<std::vector<int32> > ilabels;
124 
125  int32 context_width = ctx_dep.ContextWidth(),
126  central_position = ctx_dep.CentralPosition();
127 
128  if (nonterm_phones_offset < 0) {
129  // The normal case.
130  ComposeContext(disambig_syms, context_width, central_position,
131  &lg_fst, &clg_fst, &ilabels);
132  } else {
133  // The grammar-FST case. See ../doc/grammar.dox for an intro.
134  if (context_width != 2 || central_position != 1) {
135  KALDI_ERR << "Grammar-fst graph creation only supports models with left-"
136  "biphone context. (--nonterm-phones-offset option was supplied).";
137  }
138  ComposeContextLeftBiphone(nonterm_phones_offset, disambig_syms,
139  lg_fst, &clg_fst, &ilabels);
140  }
141  lg_fst.DeleteStates();
142 
143  HTransducerConfig h_cfg;
144  h_cfg.transition_scale = transition_scale;
145  h_cfg.nonterm_phones_offset = nonterm_phones_offset;
146  std::vector<int32> disambig_syms_h; // disambiguation symbols on
147  // input side of H.
148  VectorFst<StdArc> *h_fst = GetHTransducer(ilabels,
149  ctx_dep,
150  trans_model,
151  h_cfg,
152  &disambig_syms_h);
153 
154  VectorFst<StdArc> hclg_fst; // transition-id to word.
155  TableCompose(*h_fst, clg_fst, &hclg_fst);
156  clg_fst.DeleteStates();
157  delete h_fst;
158 
159  KALDI_ASSERT(hclg_fst.Start() != fst::kNoStateId);
160 
161  // Epsilon-removal and determinization combined. This will fail if not determinizable.
162  DeterminizeStarInLog(&hclg_fst);
163 
164  if (!disambig_syms_h.empty()) {
165  RemoveSomeInputSymbols(disambig_syms_h, &hclg_fst);
166  RemoveEpsLocal(&hclg_fst);
167  }
168 
169  // Encoded minimization.
170  MinimizeEncoded(&hclg_fst);
171 
172  std::vector<int32> disambig;
173  bool check_no_self_loops = true,
174  reorder = true;
175  AddSelfLoops(trans_model,
176  disambig,
177  self_loop_scale,
178  reorder,
179  check_no_self_loops,
180  &hclg_fst);
181 
182  if (nonterm_phones_offset >= 0)
183  PrepareForGrammarFst(nonterm_phones_offset, &hclg_fst);
184 
185  { // convert 'hclg' to ConstFst and write.
186  fst::ConstFst<StdArc> const_hclg(hclg_fst);
187  bool binary = true, write_binary_header = false; // suppress the ^@B
188  Output ko(hclg_wxfilename, binary, write_binary_header);
189  fst::FstWriteOptions wopts(PrintableWxfilename(hclg_wxfilename));
190  const_hclg.Write(ko.Stream(), wopts);
191  }
192 
193  KALDI_LOG << "Wrote graph with " << hclg_fst.NumStates()
194  << " states to " << hclg_wxfilename;
195  return 0;
196  } catch(const std::exception &e) {
197  std::cerr << e.what();
198  return -1;
199  }
200 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
virtual int32 ContextWidth() const
ContextWidth() returns the value N (e.g.
Definition: context-dep.h:61
void TableCompose(const Fst< Arc > &ifst1, const Fst< Arc > &ifst2, MutableFst< Arc > *ofst, const TableComposeOptions &opts=TableComposeOptions())
const std::vector< int32 > & GetPhones() const
Returns a sorted, unique list of phones.
void RemoveEpsLocal(MutableFst< Arc > *fst)
RemoveEpsLocal remove some (but not necessarily all) epsilons in an FST, using an algorithm that is g...
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
fst::StdArc StdArc
void AddSelfLoops(const TransitionModel &trans_model, const std::vector< int32 > &disambig_syms, BaseFloat self_loop_scale, bool reorder, bool check_no_self_loops, fst::VectorFst< fst::StdArc > *fst)
For context, see AddSelfLoops().
Definition: hmm-utils.cc:602
void MinimizeEncoded(VectorFst< Arc > *fst, float delta=kDelta)
Definition: fstext-utils.h:114
kaldi::int32 int32
int main(int argc, char *argv[])
void SortAndUniq(std::vector< T > *vec)
Sorts and uniq&#39;s (removes duplicates) from a vector.
Definition: stl-utils.h:39
void PrepareForGrammarFst(int32 nonterm_phones_offset, VectorFst< StdArc > *fst)
This function prepares &#39;ifst&#39; for use in GrammarFst: it ensures that it has the expected properties...
Definition: grammar-fst.cc:982
void Register(const std::string &name, bool *ptr, const std::string &doc)
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Definition: kaldi-io.cc:832
void DeterminizeStarInLog(VectorFst< StdArc > *fst, float delta, bool *debug_ptr, int max_states)
Configuration class for the GetHTransducer() function; see The HTransducerConfig configuration class ...
Definition: hmm-utils.h:36
float BaseFloat
Definition: kaldi-types.h:29
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
std::ostream & Stream()
Definition: kaldi-io.cc:701
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
void ComposeContextLeftBiphone(int32 nonterm_phones_offset, const vector< int32 > &disambig_syms_in, const VectorFst< StdArc > &ifst, VectorFst< StdArc > *ofst, std::vector< std::vector< int32 > > *ilabels)
This is a variant of the function ComposeContext() which is to be used with our "grammar FST" framewo...
virtual int32 CentralPosition() const
Central position P of the phone context, in 0-based numbering, e.g.
Definition: context-dep.h:62
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
fst::VectorFst< fst::StdArc > * GetHTransducer(const std::vector< std::vector< int32 > > &ilabel_info, const ContextDependencyInterface &ctx_dep, const TransitionModel &trans_model, const HTransducerConfig &config, std::vector< int32 > *disambig_syms_left)
Returns the H tranducer; result owned by caller.
Definition: hmm-utils.cc:254
BaseFloat transition_scale
Transition log-prob scale, see Scaling of transition and acoustic probabilities.
Definition: hmm-utils.h:40
int NumArgs() const
Number of positional parameters (c.f. argc-1).
void ComposeContext(const vector< int32 > &disambig_syms_in, int32 context_width, int32 central_position, VectorFst< StdArc > *ifst, VectorFst< StdArc > *ofst, vector< vector< int32 > > *ilabels_out, bool project_ifst)
Used in the command-line tool fstcomposecontext.
Definition: context-fst.cc:246
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void PushSpecial(VectorFst< StdArc > *fst, float delta)
void ReadFstKaldi(std::istream &is, bool binary, VectorFst< Arc > *fst)
std::string PrintableWxfilename(const std::string &wxfilename)
PrintableWxfilename turns the wxfilename into a more human-readable form for error reporting...
Definition: kaldi-io.cc:73
bool ReadIntegerVectorSimple(const std::string &rxfilename, std::vector< int32 > *list)
ReadFromList attempts to read this list of integers, one per line, from the given file...
#define KALDI_LOG
Definition: kaldi-error.h:153
void RemoveSomeInputSymbols(const std::vector< I > &to_remove, MutableFst< Arc > *fst)
RemoveSomeInputSymbols removes any symbol that appears in "to_remove", from the input side of the FST...