fstdeterminizestart.cc
Go to the documentation of this file.
1 // fstbin/fstrmepslocal.cc
2 
3 // Copyright 2009-2011 Microsoft Corporation
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
21 #include "base/kaldi-common.h"
22 #include "util/kaldi-io.h"
23 #include "util/parse-options.h"
24 #include "util/text-utils.h"
25 #include "fst/fstlib.h"
27 #include "fstext/fstext-utils.h"
28 #include "fstext/kaldi-fst-io.h"
29 
30 namespace fst {
32  VectorFst<StdArc> *fst) {
33  bool was_input_deterministic = true;
34  typedef StdArc Arc;
35  typedef Arc::StateId StateId;
36  typedef Arc::Label Label;
37  typedef Arc::Weight Weight;
38 
39  struct InfoForIlabel {
40  std::vector<size_t> arc_indexes; // indexes of all arcs with this ilabel
41  float tot_cost; // total cost of all arcs leaving state s for this
42  // ilabel, summed as if they were negative log-probs.
43  StateId new_state; // state-id of new state, if any, that we have created
44  // to remove duplicate symbols with this ilabel.
45  InfoForIlabel(): new_state(-1) { }
46  };
47 
48  std::unordered_map<Label, InfoForIlabel> label_map;
49 
50  size_t arc_index = 0;
51  for (ArcIterator<VectorFst<Arc> > aiter(*fst, s);
52  !aiter.Done(); aiter.Next(), ++arc_index) {
53  const Arc &arc = aiter.Value();
54  InfoForIlabel &info = label_map[arc.ilabel];
55  if (info.arc_indexes.empty()) {
56  info.tot_cost = arc.weight.Value();
57  } else {
58  info.tot_cost = -kaldi::LogAdd(-info.tot_cost, -arc.weight.Value());
59  was_input_deterministic = false;
60  }
61  info.arc_indexes.push_back(arc_index);
62  }
63 
64  if (was_input_deterministic)
65  return; // Nothing to do.
66 
67  // 'new_arcs' will contain the modified list of arcs
68  // leaving state s
69  std::vector<Arc> new_arcs;
70  new_arcs.reserve(arc_index);
71  arc_index = 0;
72  for (ArcIterator<VectorFst<Arc> > aiter(*fst, s);
73  !aiter.Done(); aiter.Next(), ++arc_index) {
74  const Arc &arc = aiter.Value();
75  Label ilabel = arc.ilabel;
76  InfoForIlabel &info = label_map[ilabel];
77  if (info.arc_indexes.size() == 1) {
78  new_arcs.push_back(arc); // no changes needed
79  } else {
80  if (info.new_state < 0) {
81  info.new_state = fst->AddState();
82  // add arc from state 's' to newly created state.
83  new_arcs.push_back(Arc(ilabel, 0, Weight(info.tot_cost),
84  info.new_state));
85  }
86  // add arc from new state to original destination of this arc.
87  fst->AddArc(info.new_state, Arc(0, arc.olabel,
88  Weight(arc.weight.Value() - info.tot_cost),
89  arc.nextstate));
90  }
91  }
92  fst->DeleteArcs(s);
93  for (size_t i = 0; i < new_arcs.size(); i++)
94  fst->AddArc(s, new_arcs[i]);
95 }
96 
97 }
98 
99 
100 /*
101  A test example:
102  ( echo "0 1 1 0 0.69"; echo "1 2 0 2 0.69"; echo "2 0"; ) | fstcompile | fstdeterminizestart | fstprint
103 # prints:
104 # 0 1 1 2
105 # 1
106  ( echo "0 1 0 0"; echo "0 0"; echo "1 0" ) | fstcompile | fstrmepslocal | fstprint
107 # 0
108  ( echo "0 1 0 0"; echo "0 0"; echo "1 0" ) | fstcompile | fstrmepslocal | fstprint
109  ( echo "0 1 0 0"; echo "0 0"; echo "1 0" ) | fstcompile | fstrmepslocal --use-log=true | fstprint
110 # 0 -0.693147182
111 
112 */
113 
114 
115 
116 int main(int argc, char *argv[]) {
117  try {
118  using namespace kaldi;
119  using namespace fst;
120  using kaldi::int32;
121 
122  const char *usage =
123  "Removes some (but not all) epsilons in an algorithm that will always reduce the number of\n"
124  "arcs+states. Option to preserves equivalence in tropical or log semiring, and\n"
125  "if in tropical, stochasticit in either log or tropical.\n"
126  "\n"
127  "Usage: fstrmepslocal [in.fst [out.fst] ]\n";
128 
129  ParseOptions po(usage);
130  bool use_log = false;
131  bool stochastic_in_log = true;
132  po.Register("use-log", &use_log,
133  "Preserve equivalence in log semiring [false->tropical]\n");
134  po.Register("stochastic-in-log", &stochastic_in_log,
135  "Preserve stochasticity in log semiring [false->tropical]\n");
136  po.Read(argc, argv);
137 
138  if (po.NumArgs() > 2) {
139  po.PrintUsage();
140  exit(1);
141  }
142 
143  std::string fst_in_filename = po.GetOptArg(1),
144  fst_out_filename = po.GetOptArg(2);
145 
146  VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_filename);
147 
148  fst::InputDeterminizeSingleState(fst->Start(), fst);
149  WriteFstKaldi(*fst, fst_out_filename);
150  delete fst;
151  return 0;
152  } catch(const std::exception &e) {
153  std::cerr << e.what();
154  return -1;
155  }
156 }
fst::StdArc::StateId StateId
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
Definition: graph.dox:21
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
fst::StdArc StdArc
kaldi::int32 int32
void Register(const std::string &name, bool *ptr, const std::string &doc)
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
static void InputDeterminizeSingleState(StdArc::StateId s, VectorFst< StdArc > *fst)
This utility function input-determinizes a specified state s of the FST &#39;fst&#39;.
Definition: grammar-fst.cc:472
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
fst::StdArc::Label Label
fst::StdArc::Weight Weight
double LogAdd(double x, double y)
Definition: kaldi-math.h:184
int main(int argc, char *argv[])
int NumArgs() const
Number of positional parameters (c.f. argc-1).
void WriteFstKaldi(std::ostream &os, bool binary, const VectorFst< Arc > &t)
void ReadFstKaldi(std::istream &is, bool binary, VectorFst< Arc > *fst)
std::string GetOptArg(int param) const