fstrmsymbols.cc File Reference
#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "fst/fstlib.h"
#include "fstext/determinize-star.h"
#include "fstext/fstext-utils.h"
#include "fstext/kaldi-fst-io.h"
Include dependency graph for fstrmsymbols.cc:

Go to the source code of this file.

Namespaces

 fst
 For an extended explanation of the framework of which grammar-fsts are a part, please see Support for grammars and graphs with on-the-fly parts. (i.e.
 

Functions

template<class Arc , class I >
void RemoveArcsWithSomeInputSymbols (const std::vector< I > &symbols_in, VectorFst< Arc > *fst)
 
template<class Arc , class I >
void PenalizeArcsWithSomeInputSymbols (const std::vector< I > &symbols_in, float penalty, VectorFst< Arc > *fst)
 
int main (int argc, char *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 85 of file fstrmsymbols.cc.

References fst::CastOrConvertToVectorFst(), ParseOptions::GetArg(), ParseOptions::GetOptArg(), KALDI_ERR, ParseOptions::NumArgs(), fst::PenalizeArcsWithSomeInputSymbols(), ParseOptions::PrintUsage(), ParseOptions::Read(), fst::ReadFstKaldiGeneric(), kaldi::ReadIntegerVectorSimple(), ParseOptions::Register(), fst::RemoveArcsWithSomeInputSymbols(), fst::RemoveSomeInputSymbols(), and fst::WriteFstKaldi().

85  {
86  try {
87  using namespace kaldi;
88  using namespace fst;
89  using kaldi::int32;
90 
91  bool apply_to_output = false;
92  bool remove_arcs = false;
93  float penalty = -std::numeric_limits<BaseFloat>::infinity();
94 
95  const char *usage =
96  "With no options, replaces a subset of symbols with epsilon, wherever\n"
97  "they appear on the input side of an FST."
98  "With --remove-arcs=true, will remove arcs that contain these symbols\n"
99  "on the input\n"
100  "With --penalty=<float>, will add the specified penalty to the\n"
101  "cost of any arc that has one of the given symbols on its input side\n"
102  "In all cases, the option --apply-to-output=true (or for\n"
103  "back-compatibility, --remove-from-output=true) makes this apply\n"
104  "to the output side.\n"
105  "\n"
106  "Usage: fstrmsymbols [options] <in-disambig-list> [<in.fst> [<out.fst>]]\n"
107  "E.g: fstrmsymbols in.list < in.fst > out.fst\n"
108  "<in-disambig-list> is an rxfilename specifying a file containing list of integers\n"
109  "representing symbols, in text form, one per line.\n";
110 
111  ParseOptions po(usage);
112  po.Register("remove-from-output", &apply_to_output, "If true, this applies to symbols "
113  "on the output, not the input, side. (For back compatibility; use "
114  "--apply-to-output insead)");
115  po.Register("apply-to-output", &apply_to_output, "If true, this applies to symbols "
116  "on the output, not the input, side.");
117  po.Register("remove-arcs", &remove_arcs, "If true, instead of converting the symbol "
118  "to <eps>, remove the arcs.");
119  po.Register("penalty", &penalty, "If specified, instead of converting "
120  "the symbol to <eps>, penalize the arc it is on by adding this "
121  "value to its cost.");
122 
123 
124  po.Read(argc, argv);
125 
126  if (remove_arcs &&
127  penalty != -std::numeric_limits<BaseFloat>::infinity())
128  KALDI_ERR << "--remove-arc and --penalty options are mutually exclusive";
129 
130  if (po.NumArgs() < 1 || po.NumArgs() > 3) {
131  po.PrintUsage();
132  exit(1);
133  }
134 
135  std::string disambig_rxfilename = po.GetArg(1),
136  fst_rxfilename = po.GetOptArg(2),
137  fst_wxfilename = po.GetOptArg(3);
138 
139  VectorFst<StdArc> *fst = CastOrConvertToVectorFst(
140  ReadFstKaldiGeneric(fst_rxfilename));
141 
142  std::vector<int32> disambig_in;
143  if (!ReadIntegerVectorSimple(disambig_rxfilename, &disambig_in))
144  KALDI_ERR << "fstrmsymbols: Could not read disambiguation symbols from "
145  << (disambig_rxfilename == "" ? "standard input" : disambig_rxfilename);
146 
147  if (apply_to_output) Invert(fst);
148  if (remove_arcs) {
149  RemoveArcsWithSomeInputSymbols(disambig_in, fst);
150  } else if (penalty != -std::numeric_limits<BaseFloat>::infinity()) {
151  PenalizeArcsWithSomeInputSymbols(disambig_in, penalty, fst);
152  } else {
153  RemoveSomeInputSymbols(disambig_in, fst);
154  }
155  if (apply_to_output) Invert(fst);
156 
157  WriteFstKaldi(*fst, fst_wxfilename);
158 
159  delete fst;
160  return 0;
161  } catch(const std::exception &e) {
162  std::cerr << e.what();
163  return -1;
164  }
165 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
Fst< StdArc > * ReadFstKaldiGeneric(std::string rxfilename, bool throw_on_err)
Definition: kaldi-fst-io.cc:45
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
Definition: graph.dox:21
kaldi::int32 int32
void PenalizeArcsWithSomeInputSymbols(const std::vector< I > &symbols_in, float penalty, VectorFst< Arc > *fst)
Definition: fstrmsymbols.cc:58
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
void RemoveArcsWithSomeInputSymbols(const std::vector< I > &symbols_in, VectorFst< Arc > *fst)
Definition: fstrmsymbols.cc:33
#define KALDI_ERR
Definition: kaldi-error.h:147
void WriteFstKaldi(std::ostream &os, bool binary, const VectorFst< Arc > &t)
VectorFst< StdArc > * CastOrConvertToVectorFst(Fst< StdArc > *fst)
Definition: kaldi-fst-io.cc:94
bool ReadIntegerVectorSimple(const std::string &rxfilename, std::vector< int32 > *list)
ReadFromList attempts to read this list of integers, one per line, from the given file...
void RemoveSomeInputSymbols(const std::vector< I > &to_remove, MutableFst< Arc > *fst)
RemoveSomeInputSymbols removes any symbol that appears in "to_remove", from the input side of the FST...