All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
fstcomposecontext.cc File Reference
#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "fst/fstlib.h"
#include "fstext/context-fst.h"
#include "fstext/fstext-utils.h"
#include "fstext/kaldi-fst-io.h"
Include dependency graph for fstcomposecontext.cc:

Go to the source code of this file.

Functions

int main (int argc, char *argv[])
 

Function Documentation

int main ( int  argc,
char *  argv[] 
)

Definition at line 61 of file fstcomposecontext.cc.

References fst::ComposeContext(), ParseOptions::GetArg(), ParseOptions::GetOptArg(), rnnlm::i, KALDI_ERR, KALDI_WARN, ParseOptions::NumArgs(), kaldi::PrintableRxfilename(), kaldi::PrintableWxfilename(), ParseOptions::PrintUsage(), ParseOptions::Read(), fst::ReadFstKaldi(), kaldi::ReadIntegerVectorSimple(), ParseOptions::Register(), fst::WriteFstKaldi(), fst::WriteILabelInfo(), and kaldi::WriteIntegerVectorSimple().

61  {
62  try {
63  using namespace kaldi;
64  using namespace fst;
65  using kaldi::int32;
66  /*
67  # fstcomposecontext composes efficiently with a context fst
68  # that it generates. Without --disambig-syms specified, it
69  # assumes that all input symbols of in.fst are phones.
70  # It adds the subsequential symbol itself (it does not
71  # appear in the output so doesn't need to be specified by the user).
72  # the disambig.list is a list of disambiguation symbols on the LHS
73  # of in.fst. The symbols on the LHS of out.fst are indexes into
74  # the ilabels.list file, which is a kaldi-format file containing a
75  # vector<vector<int32> >, which specifies what the labels mean in
76  # terms of windows of symbols.
77  fstcomposecontext ilabels.sym [ in.fst [ out.fst ] ]
78  --disambig-syms=disambig.list
79  --context-size=3
80  --central-position=1
81  --binary=false
82  */
83 
84  const char *usage =
85  "Composes on the left with a dynamically created context FST\n"
86  "\n"
87  "Usage: fstcomposecontext <ilabels-output-file> [<in.fst> [<out.fst>] ]\n"
88  "E.g: fstcomposecontext ilabels.sym < LG.fst > CLG.fst\n";
89 
90 
91  ParseOptions po(usage);
92  bool binary = true;
93  std::string disambig_rxfilename,
94  disambig_wxfilename;
95  int32 N = 3, P = 1;
96  po.Register("binary", &binary,
97  "If true, output ilabels-output-file in binary format");
98  po.Register("read-disambig-syms", &disambig_rxfilename,
99  "List of disambiguation symbols on input of in.fst");
100  po.Register("write-disambig-syms", &disambig_wxfilename,
101  "List of disambiguation symbols on input of out.fst");
102  po.Register("context-size", &N, "Size of phone context window");
103  po.Register("central-position", &P,
104  "Designated central position in context window");
105 
106  po.Read(argc, argv);
107 
108  if (po.NumArgs() < 1 || po.NumArgs() > 3) {
109  po.PrintUsage();
110  exit(1);
111  }
112 
113  std::string ilabels_out_filename = po.GetArg(1),
114  fst_in_filename = po.GetOptArg(2),
115  fst_out_filename = po.GetOptArg(3);
116 
117  VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_filename);
118 
119  if ( (disambig_wxfilename != "") && (disambig_rxfilename == "") )
120  KALDI_ERR << "fstcomposecontext: cannot specify --write-disambig-syms if "
121  "not specifying --read-disambig-syms\n";
122 
123  std::vector<int32> disambig_in;
124  if (disambig_rxfilename != "")
125  if (!ReadIntegerVectorSimple(disambig_rxfilename, &disambig_in))
126  KALDI_ERR << "fstcomposecontext: Could not read disambiguation symbols from "
127  << PrintableRxfilename(disambig_rxfilename);
128 
129  if (disambig_in.empty()) {
130  KALDI_WARN << "Disambiguation symbols list is empty; this likely "
131  << "indicates an error in data preparation.";
132  }
133 
134  std::vector<std::vector<int32> > ilabels;
135  VectorFst<StdArc> composed_fst;
136 
137  // Work gets done here (see context-fst.h)
138  ComposeContext(disambig_in, N, P, fst, &composed_fst, &ilabels);
139 
140  WriteILabelInfo(Output(ilabels_out_filename, binary).Stream(),
141  binary, ilabels);
142 
143  if (disambig_wxfilename != "") {
144  std::vector<int32> disambig_out;
145  for (size_t i = 0; i < ilabels.size(); i++)
146  if (ilabels[i].size() == 1 && ilabels[i][0] <= 0)
147  disambig_out.push_back(static_cast<int32>(i));
148  if (!WriteIntegerVectorSimple(disambig_wxfilename, disambig_out)) {
149  std::cerr << "fstcomposecontext: Could not write disambiguation symbols to "
150  << PrintableWxfilename(disambig_wxfilename) << '\n';
151  return 1;
152  }
153  }
154 
155  WriteFstKaldi(composed_fst, fst_out_filename);
156  delete fst;
157  return 0;
158  } catch(const std::exception &e) {
159  std::cerr << e.what();
160  return -1;
161  }
162 }
Relabels neural network egs with the read pdf-id alignments.
Definition: chain.dox:20
Definition: graph.dox:21
bool WriteIntegerVectorSimple(std::string wxfilename, const std::vector< int32 > &list)
WriteToList attempts to write this list of integers, one per line, to the given file, in text format.
void WriteILabelInfo(std::ostream &os, bool binary, const vector< vector< I > > &info)
Useful utility function for writing these vectors to disk.
std::string PrintableRxfilename(std::string rxfilename)
PrintableRxfilename turns the rxfilename into a more human-readable form for error reporting...
Definition: kaldi-io.cc:58
std::string PrintableWxfilename(std::string wxfilename)
PrintableWxfilename turns the filename into a more human-readable form for error reporting, i.e.
Definition: kaldi-io.cc:70
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
void ComposeContext(const vector< int32 > &disambig_syms_in, int N, int P, VectorFst< StdArc > *ifst, VectorFst< StdArc > *ofst, vector< vector< int32 > > *ilabels_out)
Used in the command-line tool fstcomposecontext.
#define KALDI_ERR
Definition: kaldi-error.h:127
#define KALDI_WARN
Definition: kaldi-error.h:130
void WriteFstKaldi(std::ostream &os, bool binary, const VectorFst< Arc > &t)
void ReadFstKaldi(std::istream &is, bool binary, VectorFst< Arc > *fst)
bool ReadIntegerVectorSimple(std::string rxfilename, std::vector< int32 > *list)
ReadFromList attempts to read this list of integers, one per line, from the given file...