All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
fstmakecontextfst.cc File Reference
#include "base/kaldi-common.h"
#include "util/kaldi-io.h"
#include "util/common-utils.h"
#include "fst/fstlib.h"
#include "fstext/fstext-utils.h"
#include "fstext/context-fst.h"
#include "fstext/kaldi-fst-io.h"
Include dependency graph for fstmakecontextfst.cc:

Go to the source code of this file.

Functions

int main (int argc, char *argv[])
 

Function Documentation

int main ( int  argc,
char *  argv[] 
)

Definition at line 32 of file fstmakecontextfst.cc.

References kaldi::ConvertStringToInteger(), ParseOptions::GetArg(), ParseOptions::GetOptArg(), fst::GetSymbols(), rnnlm::i, KALDI_ERR, ParseOptions::NumArgs(), kaldi::PrintableRxfilename(), kaldi::PrintableWxfilename(), ParseOptions::PrintUsage(), ParseOptions::Read(), kaldi::ReadIntegerVectorSimple(), ParseOptions::Register(), fst::WriteFstKaldi(), fst::WriteILabelInfo(), and kaldi::WriteIntegerVectorSimple().

32  {
33  try {
34  using namespace kaldi;
35  using namespace fst;
36  using kaldi::int32;
37 
38  const char *usage =
39  "Constructs a context FST with a specified context-width and context-position.\n"
40  "Outputs the context FST, and a file in Kaldi format that describes what the\n"
41  "input labels mean. Note: this is very inefficient if there are a lot of phones,\n"
42  "better to use fstcomposecontext instead\n"
43  "\n"
44  "Usage: fstmakecontextfst <phones-symbol-table> <subsequential-symbol> <ilabels-output-file> [<out-fst>]\n"
45  "E.g.: fstmakecontextfst phones.txt 42 ilabels.sym > C.fst\n";
46 
47  bool binary = true; // binary output to ilabels_output_file.
48  std::string disambig_rxfilename, disambig_wxfilename;
49  int32 N = 3, P = 1;
50 
51  ParseOptions po(usage);
52  po.Register("read-disambig-syms", &disambig_rxfilename,
53  "List of disambiguation symbols to read");
54  po.Register("write-disambig-syms", &disambig_wxfilename,
55  "List of disambiguation symbols to write");
56  po.Register("context-size", &N, "Size of phonetic context window");
57  po.Register("central-position", &P,
58  "Designated central position in context window");
59  po.Register("binary", &binary,
60  "Write ilabels output file in binary Kaldi format");
61 
62  po.Read(argc, argv);
63 
64  if (po.NumArgs() < 3 || po.NumArgs() > 4) {
65  po.PrintUsage();
66  exit(1);
67  }
68 
69  std::string phones_symtab_filename = po.GetArg(1);
70  int32 subseq_sym;
71  if (!ConvertStringToInteger(po.GetArg(2), &subseq_sym))
72  KALDI_ERR << "Invalid subsequential symbol " << po.GetArg(2);
73  std::string ilabels_out_filename = po.GetArg(3);
74  std::string fst_out_filename = po.GetOptArg(4);
75 
76 
77  std::vector<kaldi::int32> phone_syms;
78  {
79  fst::SymbolTable *phones_symtab = NULL;
80  { // read phone symbol table.
81  std::ifstream is(phones_symtab_filename.c_str());
82  phones_symtab = fst::SymbolTable::ReadText(is, phones_symtab_filename);
83  if (!phones_symtab) KALDI_ERR << "Could not read phones symbol-table file "<<phones_symtab_filename;
84  }
85  GetSymbols(*phones_symtab,
86  false, // don't include eps,
87  &phone_syms);
88  delete phones_symtab;
89  }
90 
91  if ( (disambig_wxfilename != "") && (disambig_rxfilename == "") )
92  KALDI_ERR << "fstmakecontextfst: cannot specify --write-disambig-syms if "
93  "not specifying --read-disambig-syms\n";
94 
95  std::vector<int32> disambig_in;
96  if (disambig_rxfilename != "") {
97  if (!ReadIntegerVectorSimple(disambig_rxfilename, &disambig_in))
98  KALDI_ERR << "fstcomposecontext: Could not read disambiguation symbols from "
99  << PrintableRxfilename(disambig_rxfilename);
100  }
101 
102  if (std::binary_search(phone_syms.begin(), phone_syms.end(), subseq_sym)
103  ||std::binary_search(disambig_in.begin(), disambig_in.end(), subseq_sym))
104  KALDI_ERR << "Invalid subsequential symbol "<<(subseq_sym)<<", already a phone or disambiguation symbol.";
105 
106 
107  ContextFst<StdArc, int32> cfst(subseq_sym,
108  phone_syms,
109  disambig_in,
110  N,
111  P);
112 
113  VectorFst<StdArc> vfst(cfst); // Copy the fst to a VectorFst.
114 
115  WriteFstKaldi(vfst, fst_out_filename);
116 
117  const std::vector<std::vector<int32> > &ilabels = cfst.ILabelInfo();
118  WriteILabelInfo(Output(ilabels_out_filename, binary).Stream(),
119  binary, ilabels);
120 
121  if (disambig_wxfilename != "") {
122  std::vector<int32> disambig_out;
123  for (size_t i = 0; i < ilabels.size(); i++)
124  if (ilabels[i].size() == 1 && ilabels[i][0] <= 0)
125  disambig_out.push_back(static_cast<int32>(i));
126  if (!WriteIntegerVectorSimple(disambig_wxfilename, disambig_out))
127  KALDI_ERR << "fstcomposecontext: Could not write disambiguation symbols to "
128  << PrintableWxfilename(disambig_wxfilename);
129  }
130  return 0;
131  } catch(const std::exception &e) {
132  std::cerr << e.what();
133  return -1;
134  }
135 }
Relabels neural network egs with the read pdf-id alignments.
Definition: chain.dox:20
bool ConvertStringToInteger(const std::string &str, Int *out)
Converts a string into an integer via strtoll and returns false if there was any kind of problem (i...
Definition: text-utils.h:118
void GetSymbols(const SymbolTable &symtab, bool include_eps, vector< I > *syms_out)
Definition: graph.dox:21
bool WriteIntegerVectorSimple(std::string wxfilename, const std::vector< int32 > &list)
WriteToList attempts to write this list of integers, one per line, to the given file, in text format.
void WriteILabelInfo(std::ostream &os, bool binary, const vector< vector< I > > &info)
Useful utility function for writing these vectors to disk.
std::string PrintableRxfilename(std::string rxfilename)
PrintableRxfilename turns the rxfilename into a more human-readable form for error reporting...
Definition: kaldi-io.cc:58
std::string PrintableWxfilename(std::string wxfilename)
PrintableWxfilename turns the filename into a more human-readable form for error reporting, i.e.
Definition: kaldi-io.cc:70
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
#define KALDI_ERR
Definition: kaldi-error.h:127
void WriteFstKaldi(std::ostream &os, bool binary, const VectorFst< Arc > &t)
bool ReadIntegerVectorSimple(std::string rxfilename, std::vector< int32 > *list)
ReadFromList attempts to read this list of integers, one per line, from the given file...