23 #include "fst/fstlib.h" 32 template<
class Arc,
class I>
34 VectorFst<Arc> *
fst) {
39 StateId num_states = fst->NumStates();
40 StateId dead_state = fst->AddState();
41 for (StateId s = 0; s < num_states; s++) {
42 for (MutableArcIterator<VectorFst<Arc> > iter(fst, s);
43 !iter.Done(); iter.Next()) {
44 if (symbol_set.
count(iter.Value().ilabel) != 0) {
45 Arc arc = iter.Value();
46 arc.nextstate = dead_state;
53 if (fst->NumStates() == 0)
54 KALDI_WARN <<
"After Connect(), fst was empty.";
57 template<
class Arc,
class I>
60 VectorFst<Arc> *
fst) {
65 Weight penalty_weight(penalty);
69 StateId num_states = fst->NumStates();
70 for (StateId s = 0; s < num_states; s++) {
71 for (MutableArcIterator<VectorFst<Arc> > iter(fst, s);
72 !iter.Done(); iter.Next()) {
73 if (symbol_set.
count(iter.Value().ilabel) != 0) {
74 Arc arc = iter.Value();
75 arc.weight =
Times(arc.weight, penalty_weight);
85 int main(
int argc,
char *argv[]) {
87 using namespace kaldi;
91 bool apply_to_output =
false;
92 bool remove_arcs =
false;
93 float penalty = -std::numeric_limits<BaseFloat>::infinity();
96 "With no options, replaces a subset of symbols with epsilon, wherever\n" 97 "they appear on the input side of an FST." 98 "With --remove-arcs=true, will remove arcs that contain these symbols\n" 100 "With --penalty=<float>, will add the specified penalty to the\n" 101 "cost of any arc that has one of the given symbols on its input side\n" 102 "In all cases, the option --apply-to-output=true (or for\n" 103 "back-compatibility, --remove-from-output=true) makes this apply\n" 104 "to the output side.\n" 106 "Usage: fstrmsymbols [options] <in-disambig-list> [<in.fst> [<out.fst>]]\n" 107 "E.g: fstrmsymbols in.list < in.fst > out.fst\n" 108 "<in-disambig-list> is an rxfilename specifying a file containing list of integers\n" 109 "representing symbols, in text form, one per line.\n";
112 po.
Register(
"remove-from-output", &apply_to_output,
"If true, this applies to symbols " 113 "on the output, not the input, side. (For back compatibility; use " 114 "--apply-to-output insead)");
115 po.
Register(
"apply-to-output", &apply_to_output,
"If true, this applies to symbols " 116 "on the output, not the input, side.");
117 po.
Register(
"remove-arcs", &remove_arcs,
"If true, instead of converting the symbol " 118 "to <eps>, remove the arcs.");
119 po.
Register(
"penalty", &penalty,
"If specified, instead of converting " 120 "the symbol to <eps>, penalize the arc it is on by adding this " 121 "value to its cost.");
127 penalty != -std::numeric_limits<BaseFloat>::infinity())
128 KALDI_ERR <<
"--remove-arc and --penalty options are mutually exclusive";
135 std::string disambig_rxfilename = po.
GetArg(1),
142 std::vector<int32> disambig_in;
144 KALDI_ERR <<
"fstrmsymbols: Could not read disambiguation symbols from " 145 << (disambig_rxfilename ==
"" ?
"standard input" : disambig_rxfilename);
147 if (apply_to_output) Invert(fst);
150 }
else if (penalty != -std::numeric_limits<BaseFloat>::infinity()) {
155 if (apply_to_output) Invert(fst);
161 }
catch(
const std::exception &e) {
162 std::cerr << e.what();
fst::StdArc::StateId StateId
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Fst< StdArc > * ReadFstKaldiGeneric(std::string rxfilename, bool throw_on_err)
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
int main(int argc, char *argv[])
void Register(const std::string &name, bool *ptr, const std::string &doc)
void PenalizeArcsWithSomeInputSymbols(const std::vector< I > &symbols_in, float penalty, VectorFst< Arc > *fst)
LatticeWeightTpl< FloatType > Times(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
void RemoveArcsWithSomeInputSymbols(const std::vector< I > &symbols_in, VectorFst< Arc > *fst)
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
fst::StdArc::Weight Weight
int NumArgs() const
Number of positional parameters (c.f. argc-1).
void WriteFstKaldi(std::ostream &os, bool binary, const VectorFst< Arc > &t)
VectorFst< StdArc > * CastOrConvertToVectorFst(Fst< StdArc > *fst)
bool ReadIntegerVectorSimple(const std::string &rxfilename, std::vector< int32 > *list)
ReadFromList attempts to read this list of integers, one per line, from the given file...
std::string GetOptArg(int param) const
void RemoveSomeInputSymbols(const std::vector< I > &to_remove, MutableFst< Arc > *fst)
RemoveSomeInputSymbols removes any symbol that appears in "to_remove", from the input side of the FST...