pre-determinize-test.cc
Go to the documentation of this file.
1 // fstext/pre-determinize-test.cc
2 
3 // Copyright 2009-2011 Microsoft Corporation
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include "base/kaldi-math.h"
21 #include "fstext/pre-determinize.h"
22 #include "fstext/fst-test-utils.h"
23 #include "fstext/fstext-utils.h"
24 
25 // Just check that it compiles, for now.
26 
27 namespace fst
28 {
29  using std::vector;
30  using std::cout;
31 
32 // Don't instantiate with log semiring, as RandEquivalent may fail.
33 template<class Arc> void TestPreDeterminize() {
34  typedef typename Arc::Label Label;
35  typedef typename Arc::StateId StateId;
36  typedef typename Arc::Weight Weight;
37 
38  VectorFst<Arc> *fst = new VectorFst<Arc>();
39  int n_syms = 2 + kaldi::Rand() % 5, n_states = 3 + kaldi::Rand() % 10, n_arcs = 5 + kaldi::Rand() % 30, n_final = 1 + kaldi::Rand()%3; // Up to 2 unique symbols.
40  cout << "Testing pre-determinize with "<<n_syms<<" symbols, "<<n_states<<" states and "<<n_arcs<<" arcs and "<<n_final<<" final states.\n";
41  SymbolTable *sptr = NULL;
42 
43  vector<Label> all_syms; // including epsilon.
44  // Put symbols in the symbol table from 1..n_syms-1.
45  for (size_t i = 0;i < (size_t)n_syms;i++)
46  all_syms.push_back(i);
47 
48  // Create states.
49  vector<StateId> all_states;
50  for (size_t i = 0;i < (size_t)n_states;i++) {
51  StateId this_state = fst->AddState();
52  if (i == 0) fst->SetStart(i);
53  all_states.push_back(this_state);
54  }
55  // Set final states.
56  for (size_t j = 0;j < (size_t)n_final;j++) {
57  StateId id = all_states[kaldi::Rand() % n_states];
58  Weight weight = (Weight)(0.33*(kaldi::Rand() % 5) );
59  printf("calling SetFinal with %d and %f\n", id, weight.Value());
60  fst->SetFinal(id, weight);
61  }
62  // Create arcs.
63  for (size_t i = 0;i < (size_t)n_arcs;i++) {
64  Arc a;
65  a.nextstate = all_states[kaldi::Rand() % n_states];
66  a.ilabel = all_syms[kaldi::Rand() % n_syms];
67  a.olabel = all_syms[kaldi::Rand() % n_syms]; // same input+output vocab.
68  a.weight = (Weight) (0.33*(kaldi::Rand() % 2));
69  StateId start_state = all_states[kaldi::Rand() % n_states];
70  fst->AddArc(start_state, a);
71  }
72 
73  std::cout <<" printing before trimming\n";
74  {
75  FstPrinter<Arc> fstprinter(*fst, sptr, sptr, NULL, false, true, "\t");
76  fstprinter.Print(&std::cout, "standard output");
77  }
78  // Trim resulting FST.
79  Connect(fst);
80 
81  std::cout <<" printing after trimming\n";
82  {
83  FstPrinter<Arc> fstprinter(*fst, sptr, sptr, NULL, false, true, "\t");
84  fstprinter.Print(&std::cout, "standard output");
85  }
86 
87  VectorFst<Arc> *fst_copy_orig = new VectorFst<Arc>(*fst);
88 
89  vector<Label> extra_syms;
90  if (fst->Start() != kNoStateId) { // "Connect" did not make it empty....
91  typename Arc::Label highest_sym = HighestNumberedInputSymbol(*fst);
92  PreDeterminize(fst, highest_sym+1, &extra_syms);
93  }
94 
95  std::cout <<" printing after predeterminization\n";
96  {
97  FstPrinter<Arc> fstprinter(*fst, sptr, sptr, NULL, false, true, "\t");
98  fstprinter.Print(&std::cout, "standard output");
99  }
100 
101 
102  { // Remove epsilon. All default args.
103  bool connect = true;
104  Weight weight_threshold = Weight::Zero();
105  int64 nstate = -1; // Relates to pruning.
106  double delta = kDelta; // I think a small weight value. Relates to some kind of pruning,
107  // I guess. But with no epsilon cycles, probably doensn't matter.
108  RmEpsilon(fst, connect, weight_threshold, nstate, delta);
109  }
110 
111  std::cout <<" printing after epsilon removal\n";
112  {
113  FstPrinter<Arc> fstprinter(*fst, sptr, sptr, NULL, false, true, "\t");
114  fstprinter.Print(&std::cout, "standard output");
115  }
116 
117 
118  VectorFst<Arc> ofst;
119  DeterminizeOptions<Arc> opts; // Default options.
120  Determinize(*fst, &ofst, opts);
121  std::cout <<" printing after determinization\n";
122  {
123  FstPrinter<Arc> fstprinter(ofst, sptr, sptr, NULL, false, true, "\t");
124  fstprinter.Print(&std::cout, "standard output");
125  }
126 
127  int64 num_removed = DeleteISymbols(&ofst, extra_syms);
128  std::cout <<" printing after removing "<<num_removed<<" instances of extra symbols\n";
129  {
130  FstPrinter<Arc> fstprinter(ofst, sptr, sptr, NULL, false, true, "\t");
131  fstprinter.Print(&std::cout, "standard output");
132  }
133 
134  std::cout <<" Checking equivalent to original FST.\n";
135  // giving Rand() as a seed stops the random number generator from always being reset to
136  // the same point each time, while maintaining determinism of the test.
137  assert(RandEquivalent(ofst, *fst_copy_orig, 5/*paths*/, 0.01/*delta*/, kaldi::Rand()/*seed*/, 100/*path length-- max?*/));
138 
139  delete fst;
140  delete fst_copy_orig;
141 }
142 
143 template<class Arc> void TestAddSelfLoops() {
144  typedef typename Arc::Label Label;
145  typedef typename Arc::StateId StateId;
146  typedef typename Arc::Weight Weight;
147 
148  VectorFst<Arc> *fst = new VectorFst<Arc>();
149  SymbolTable *ilabels = new SymbolTable("my-symbol-table");
150  SymbolTable *olabels = new SymbolTable("my-symbol-table-2");
151  Label i0 = ilabels->AddSymbol("<eps>");
152  Label i1 = ilabels->AddSymbol("1");
153  Label i2 = ilabels->AddSymbol("2");
154 
155  Label o0 = olabels->AddSymbol("<eps>");
156  Label o1 = olabels->AddSymbol("1");
157 
158  assert(i0 == 0 && o0 == 0);
159  StateId s0 = fst->AddState(), s1 = fst->AddState(), s2 = fst->AddState();
160  fst->SetStart(s0);
161  assert(s0 == 0);
162 
163  fst->SetFinal(s2, (Weight)2); // state 2 is final.
164  {
165  Arc arc;
166  arc.ilabel = i1;
167  arc.olabel = o0;
168  arc.nextstate = 1;
169  arc.weight = (Weight)1;
170  fst->AddArc(s0, arc); // arc from 0 to 1 with epsilon out.
171  }
172  {
173  Arc arc;
174  arc.ilabel = i2;
175  arc.olabel = o1;
176  arc.nextstate = 2;
177  arc.weight = (Weight)2;
178  fst->AddArc(s1, arc); // arc from 1 to 2 with "1" out.
179  }
180  std::cout <<" printing before adding self-loops\n";
181  {
182  FstPrinter<Arc> fstprinter(*fst, ilabels, olabels, NULL, false, true, "\t");
183  fstprinter.Print(&std::cout, "standard output");
184  }
185 
186 
187  // So states 1 and 2 should have self-loops on.
188  size_t num_extra = kaldi::Rand() % 5;
189  vector<Label> extra_ilabels, extra_olabels;
190  CreateNewSymbols(ilabels, num_extra, "in#", &extra_ilabels);
191  CreateNewSymbols(olabels, num_extra, "out#", &extra_olabels);
192 
193  AddSelfLoops(fst, extra_ilabels, extra_olabels);
194 
195  assert(fst->NumArcs(0) == 1);
196  assert(fst->NumArcs(1) == 1 + num_extra);
197  assert(fst->NumArcs(2) == num_extra);
198 
199  std::cout <<" printing after adding self-loops\n";
200  {
201  FstPrinter<Arc> fstprinter(*fst, ilabels, olabels, NULL, false, true, "\t");
202  fstprinter.Print(&std::cout, "standard output");
203  }
204 
205  delete fst;
206  delete ilabels;
207  delete olabels;
208 }
209 
210 } // end namespace fst.
211 
212 
213 int main() {
214  for (int i = 0;i < 10;i++) { // run it multiple times; it's a randomized testing algorithm.
215  fst::TestPreDeterminize<fst::StdArc>();
216  }
217  for (int i = 0;i < 5;i++) {
218  fst::TestAddSelfLoops<fst::StdArc>();
219  }
220 }
fst::StdArc::StateId StateId
void PreDeterminize(MutableFst< Arc > *fst, typename Arc::Label first_new_sym, std::vector< Int > *symsOut)
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
Definition: graph.dox:21
void CreateNewSymbols(SymbolTable *input_sym_table, int nSym, std::string prefix, std::vector< Label > *symsOut)
int main()
void TestPreDeterminize()
void AddSelfLoops(MutableFst< Arc > *fst, std::vector< typename Arc::Label > &isyms, std::vector< typename Arc::Label > &osyms)
AddSelfLoops is a function you will probably want to use alongside PreDeterminize, to add self-loops to any FSTs that you compose on the left hand side of the one modified by PreDeterminize.
fst::StdArc::Label Label
int Rand(struct RandomState *state)
Definition: kaldi-math.cc:45
fst::StdArc::Weight Weight
Arc::Label HighestNumberedInputSymbol(const Fst< Arc > &fst)
Returns the highest numbered input symbol id of the FST (or zero for an empty FST.
void TestAddSelfLoops()
int64 DeleteISymbols(MutableFst< Arc > *fst, std::vector< typename Arc::Label > isyms)