make-grammar-fst.cc File Reference
#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "fst/fstlib.h"
#include "fstext/table-matcher.h"
#include "fstext/kaldi-fst-io.h"
#include "decoder/grammar-fst.h"
Include dependency graph for make-grammar-fst.cc:

Go to the source code of this file.

Namespaces

 fst
 For an extended explanation of the framework of which grammar-fsts are a part, please see Support for grammars and graphs with on-the-fly parts. (i.e.
 

Functions

ConstFst< StdArc > * ReadAsConstFst (std::string rxfilename)
 
int main (int argc, char *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 45 of file make-grammar-fst.cc.

References kaldi::ConvertStringToInteger(), fst::CopyToVectorFst(), ParseOptions::GetArg(), rnnlm::i, KALDI_ERR, KALDI_LOG, ParseOptions::NumArgs(), fst::PrepareForGrammarFst(), kaldi::PrintableWxfilename(), ParseOptions::PrintUsage(), ParseOptions::Read(), fst::ReadAsConstFst(), fst::ReadFstKaldi(), ParseOptions::Register(), Output::Stream(), fst::WriteFstKaldi(), and kaldi::WriteKaldiObject().

45  {
46  try {
47  using namespace kaldi;
48  using namespace fst;
49  using kaldi::int32;
50 
51  const char *usage =
52  "Construct GrammarFst and write it to disk (or convert it to ConstFst\n"
53  "and write that to disk instead). Mostly intended for demonstration\n"
54  "and testing purposes (since it may be more convenient to construct\n"
55  "GrammarFst from code). See kaldi-asr.org/doc/grammar.html\n"
56  "Can also be used to prepares FSTs for this use, by calling\n"
57  "PrepareForGrammarFst(), which does things like adding final-probs and\n"
58  "making small structural tweaks to the FST\n"
59  "\n"
60  "Usage (1): make-grammar-fst [options] <top-level-fst> <symbol1> <fst1> \\\n"
61  " [<symbol2> <fst2> ...]] <fst-out>\n"
62  "\n"
63  "<symbol1>, <symbol2> are the integer ids of the corresponding\n"
64  " user-defined nonterminal symbols (e.g. #nonterm:contact_list) in the\n"
65  " phones.txt file.\n"
66  "e.g.: make-grammar-fst --nonterm-phones-offset=317 HCLG.fst \\\n"
67  " 320 HCLG1.fst HCLG_grammar.fst\n"
68  "\n"
69  "Usage (2): make-grammar-fst <fst-in> <fst-out>\n"
70  " Prepare individual FST for compilation into GrammarFst.\n"
71  " E.g. make-grammar-fst HCLG.fst HCLGmod.fst. The outputs of this\n"
72  " will then become the arguments <top-level-fst>, <fst1>, ... for usage\n"
73  " pattern (1).\n"
74  "\n"
75  "The --nonterm-phones-offset option is required for both usage patterns.\n";
76 
77 
78  ParseOptions po(usage);
79 
80 
81  int32 nonterm_phones_offset = -1;
82  bool write_as_grammar = true;
83 
84  po.Register("nonterm-phones-offset", &nonterm_phones_offset,
85  "Integer id of #nonterm_bos in phones.txt");
86  po.Register("write-as-grammar", &write_as_grammar, "If true, "
87  "write as GrammarFst object; if false, convert to "
88  "ConstFst<StdArc> (readable by standard decoders) "
89  "and write that.");
90 
91  po.Read(argc, argv);
92 
93 
94  if (po.NumArgs() < 2 || po.NumArgs() % 2 != 0) {
95  po.PrintUsage();
96  exit(1);
97  }
98 
99  if (nonterm_phones_offset < 0)
100  KALDI_ERR << "The --nonterm-phones-offset option must be supplied "
101  "and positive.";
102 
103  if (po.NumArgs() == 2) {
104  // this usage pattern calls PrepareForGrammarFst().
105  VectorFst<StdArc> *fst = ReadFstKaldi(po.GetArg(1));
106  PrepareForGrammarFst(nonterm_phones_offset, fst);
107  // This will write it as VectorFst; to avoid it having to be converted to
108  // ConstFst when read again by make-grammar-fst, you may want to pipe
109  // through fstconvert --fst_type=const.
110  WriteFstKaldi(*fst, po.GetArg(2));
111  exit(0);
112  }
113 
114  std::string top_fst_str = po.GetArg(1),
115  fst_out_str = po.GetArg(po.NumArgs());
116 
117  std::shared_ptr<const ConstFst<StdArc> > top_fst(
118  ReadAsConstFst(top_fst_str));
119  std::vector<std::pair<int32, std::shared_ptr<const ConstFst<StdArc> > > > pairs;
120 
121  int32 num_pairs = (po.NumArgs() - 2) / 2;
122  for (int32 i = 1; i <= num_pairs; i++) {
123  int32 nonterminal;
124  std::string nonterm_str = po.GetArg(2*i);
125  if (!ConvertStringToInteger(nonterm_str, &nonterminal) ||
126  nonterminal <= 0)
127  KALDI_ERR << "Expected positive integer as nonterminal, got: "
128  << nonterm_str;
129  std::string fst_str = po.GetArg(2*i + 1);
130  std::shared_ptr<const ConstFst<StdArc> > this_fst(ReadAsConstFst(fst_str));
131  pairs.push_back(std::pair<int32, std::shared_ptr<const ConstFst<StdArc> > >(
132  nonterminal, this_fst));
133  }
134 
135  GrammarFst *grammar_fst = new GrammarFst(nonterm_phones_offset,
136  top_fst,
137  pairs);
138 
139  if (write_as_grammar) {
140  bool binary = true; // GrammarFst does not support non-binary write.
141  WriteKaldiObject(*grammar_fst, fst_out_str, binary);
142  delete grammar_fst;
143  } else {
144  VectorFst<StdArc> vfst;
145  CopyToVectorFst(grammar_fst, &vfst);
146  delete grammar_fst;
147  ConstFst<StdArc> cfst(vfst);
148  // We don't have a wrapper in kaldi-fst-io.h for writing type
149  // ConstFst<StdArc>, so do it manually.
150  bool binary = true, write_binary_header = false; // suppress the ^@B
151  Output ko(fst_out_str, binary, write_binary_header);
152  FstWriteOptions wopts(kaldi::PrintableWxfilename(fst_out_str));
153  cfst.Write(ko.Stream(), wopts);
154  }
155 
156  KALDI_LOG << "Created grammar FST and wrote it to "
157  << fst_out_str;
158  } catch(const std::exception &e) {
159  std::cerr << e.what();
160  return -1;
161  }
162 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
bool ConvertStringToInteger(const std::string &str, Int *out)
Converts a string into an integer via strtoll and returns false if there was any kind of problem (i...
Definition: text-utils.h:118
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
Definition: graph.dox:21
kaldi::int32 int32
void PrepareForGrammarFst(int32 nonterm_phones_offset, VectorFst< StdArc > *fst)
This function prepares &#39;ifst&#39; for use in GrammarFst: it ensures that it has the expected properties...
Definition: grammar-fst.cc:982
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
ConstFst< StdArc > * ReadAsConstFst(std::string rxfilename)
#define KALDI_ERR
Definition: kaldi-error.h:147
GrammarFst is an FST that is &#39;stitched together&#39; from multiple FSTs, that can recursively incorporate...
Definition: grammar-fst.h:96
void WriteFstKaldi(std::ostream &os, bool binary, const VectorFst< Arc > &t)
void ReadFstKaldi(std::istream &is, bool binary, VectorFst< Arc > *fst)
void WriteKaldiObject(const C &c, const std::string &filename, bool binary)
Definition: kaldi-io.h:257
std::string PrintableWxfilename(const std::string &wxfilename)
PrintableWxfilename turns the wxfilename into a more human-readable form for error reporting...
Definition: kaldi-io.cc:73
void CopyToVectorFst(GrammarFst *grammar_fst, VectorFst< StdArc > *vector_fst)
This function copies a GrammarFst to a VectorFst (intended mostly for testing and comparison purposes...
Definition: grammar-fst.cc:988
#define KALDI_LOG
Definition: kaldi-error.h:153