All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
make-ilabel-transducer.cc File Reference
#include "hmm/transition-model.h"
#include "hmm/hmm-utils.h"
#include "tree/context-dep.h"
#include "util/common-utils.h"
#include "fst/fstlib.h"
#include "fstext/table-matcher.h"
#include "fstext/fstext-utils.h"
#include "fstext/context-fst.h"
Include dependency graph for make-ilabel-transducer.cc:

Go to the source code of this file.

Functions

int main (int argc, char *argv[])
 

Function Documentation

int main ( int  argc,
char *  argv[] 
)

Definition at line 29 of file make-ilabel-transducer.cc.

References fst::CreateMapFst(), ParseOptions::GetArg(), kaldi::GetIlabelMapping(), ParseOptions::GetOptArg(), KALDI_ASSERT, KALDI_ERR, ParseOptions::NumArgs(), kaldi::PrintableWxfilename(), ParseOptions::PrintUsage(), ParseOptions::Read(), fst::ReadILabelInfo(), kaldi::ReadKaldiObject(), ParseOptions::Register(), Input::Stream(), fst::WriteILabelInfo(), and kaldi::WriteIntegerVectorSimple().

29  {
30  try {
31  using namespace kaldi;
32  typedef kaldi::int32 int32;
33  using fst::SymbolTable;
34  using fst::VectorFst;
35  using fst::StdArc;
36 
37  const char *usage =
38  "Make transducer that de-duplicates context-dependent ilabels that map to the same state\n"
39  "Usage: make-ilabel-transducer ilabel-info-right tree-file transition-gmm/model ilabel-info-left [mapping-fst-out]\n"
40  "e.g.: \n"
41  " make-ilabel-transducer old_ilabel_info 1.tree 1.mdl new_ilabel_info > convert.fst\n";
42  ParseOptions po(usage);
43 
44  bool binary = true;
45  std::string disambig_wxfilename;
46  std::string old2new_map_wxfilename;
47  po.Register("write-disambig-syms", &disambig_wxfilename, "List of disambiguation symbols after the remapping");
48  po.Register("old-to-new-mapping", &old2new_map_wxfilename, "Mapping from old to new symbols (wxfilename)");
49  po.Register("binary", &binary, "Write output ilabels in binary format");
50  po.Read(argc, argv);
51 
52  if (po.NumArgs() < 4 || po.NumArgs() > 5) {
53  po.PrintUsage();
54  exit(1);
55  }
56 
57  std::string ilabel_info_rxfilename = po.GetArg(1),
58  tree_filename = po.GetArg(2),
59  model_filename = po.GetArg(3),
60  ilabel_info_wxfilename = po.GetArg(4),
61  fst_out_filename = po.GetOptArg(5);
62  if (fst_out_filename == "-") fst_out_filename = "";
63 
64  std::vector<std::vector<int32> > old_ilabels;
65  {
66  bool binary_in;
67  Input ki(ilabel_info_rxfilename, &binary_in);
68  fst::ReadILabelInfo(ki.Stream(), binary_in, &old_ilabels);
69  }
70 
71  ContextDependency ctx_dep;
72  ReadKaldiObject(tree_filename, &ctx_dep);
73 
74  TransitionModel trans_model;
75  ReadKaldiObject(model_filename, &trans_model);
76 
77 
78  std::vector<int32> old2new_mapping;
79 
80  // Most of the work gets done here.
81  GetIlabelMapping(old_ilabels,
82  ctx_dep,
83  trans_model,
84  &old2new_mapping);
85 
86  if (old2new_map_wxfilename != "")
87  if (!WriteIntegerVectorSimple(old2new_map_wxfilename, old2new_mapping))
88  KALDI_ERR << "Error writing map from old to new symbols to "
89  << PrintableWxfilename(old2new_map_wxfilename);
90 
91  std::vector<std::vector<int32> > new_ilabels;
92  KALDI_ASSERT(old2new_mapping.size() != 0);
93  new_ilabels.resize(1 + *std::max_element(old2new_mapping.begin(),
94  old2new_mapping.end()));
95  for (size_t old_idx = 0; old_idx < old2new_mapping.size(); old_idx++) {
96  int32 new_idx = old2new_mapping[old_idx];
97  if (new_ilabels[new_idx].empty()) // select the 1st one we come across..
98  new_ilabels[new_idx] = old_ilabels[old_idx];
99  }
100 
101  // Output the ilabels.
102  fst::WriteILabelInfo(Output(ilabel_info_wxfilename, binary).Stream(),
103  binary, new_ilabels);
104 
105  // Output the disambig symbols, if requested.
106  if (disambig_wxfilename != "") {
107  std::vector<int32> new_disambig;
108  for (size_t new_idx = 0; new_idx < new_ilabels.size(); new_idx++) {
109  if (new_ilabels[new_idx].size() == 1 && new_ilabels[new_idx][0] <= 0) {
110  new_disambig.push_back(new_idx);
111  }
112  }
113  if (! WriteIntegerVectorSimple(disambig_wxfilename, new_disambig)) {
114  KALDI_ERR << "Could not write disambiguation symbols to "
115  << kaldi::PrintableWxfilename(disambig_wxfilename);
116  }
117  }
118 
119  // Create the mapping FST.
120  VectorFst<StdArc> map_fst;
121  CreateMapFst(old2new_mapping, &map_fst);
122 
123 #if _MSC_VER
124  if (fst_out_filename == "")
125  _setmode(_fileno(stdout), _O_BINARY);
126 #endif
127 
128  if (!map_fst.Write(fst_out_filename)) {
129  KALDI_ERR << "Error writing output fst to "
130  << (fst_out_filename == "" ? " standard output "
131  : fst_out_filename);
132  }
133  return 0;
134  } catch(const std::exception &e) {
135  std::cerr << e.what();
136  return -1;
137  }
138 }
Relabels neural network egs with the read pdf-id alignments.
Definition: chain.dox:20
fst::StdArc StdArc
void GetIlabelMapping(const std::vector< std::vector< int32 > > &ilabel_info_old, const ContextDependencyInterface &ctx_dep, const TransitionModel &trans_model, std::vector< int32 > *old2new_map)
GetIlabelMapping produces a mapping that's similar to HTK's logical-to-physical model mapping (i...
Definition: hmm-utils.cc:301
bool WriteIntegerVectorSimple(std::string wxfilename, const std::vector< int32 > &list)
WriteToList attempts to write this list of integers, one per line, to the given file, in text format.
void WriteILabelInfo(std::ostream &os, bool binary, const vector< vector< I > > &info)
Useful utility function for writing these vectors to disk.
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Definition: kaldi-io.cc:818
std::string PrintableWxfilename(std::string wxfilename)
PrintableWxfilename turns the filename into a more human-readable form for error reporting, i.e.
Definition: kaldi-io.cc:70
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
#define KALDI_ERR
Definition: kaldi-error.h:127
void CreateMapFst(const vector< I > &symbol_map, MutableFst< Arc > *fst)
CreateMapFst will create an FST representing this symbol_map.
Definition: factor-inl.h:285
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169
void ReadILabelInfo(std::istream &is, bool binary, vector< vector< I > > *info)
Useful utility function for reading these vectors from disk.