trivial-factor-weight-test.cc
Go to the documentation of this file.
1 // fstext/trivial-factor-weight-test.cc
2 
3 // Copyright 2009-2011 Microsoft Corporation
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include "base/kaldi-math.h"
21 #include "fstext/pre-determinize.h"
24 #include "fstext/fst-test-utils.h"
25 // Just check that it compiles, for now.
26 
27 namespace fst
28 {
29  using std::cout;
30  using std::vector;
31 
32 // Don't instantiate with log semiring, as RandEquivalent may fail.
33 template<class Arc> void TestFactor() {
34  typedef typename Arc::Label Label;
35  typedef typename Arc::StateId StateId;
36  typedef typename Arc::Weight Weight;
37 
38  VectorFst<Arc> *fst = new VectorFst<Arc>();
39  int n_syms = 2 + kaldi::Rand() % 5, n_states = 3 + kaldi::Rand() % 10, n_arcs = 5 + kaldi::Rand() % 30, n_final = 1 + kaldi::Rand()%3; // Up to 2 unique symbols.
40  cout << "Testing pre-determinize with "<<n_syms<<" symbols, "<<n_states<<" states and "<<n_arcs<<" arcs and "<<n_final<<" final states.\n";
41  SymbolTable *sptr = NULL;
42 
43  vector<Label> all_syms; // including epsilon.
44  // Put symbols in the symbol table from 1..n_syms-1.
45  for (size_t i = 0;i < (size_t)n_syms;i++)
46  all_syms.push_back(i);
47 
48  // Create states.
49  vector<StateId> all_states;
50  for (size_t i = 0;i < (size_t)n_states;i++) {
51  StateId this_state = fst->AddState();
52  if (i == 0) fst->SetStart(i);
53  all_states.push_back(this_state);
54  }
55  // Set final states.
56  for (size_t j = 0;j < (size_t)n_final;j++) {
57  StateId id = all_states[kaldi::Rand() % n_states];
58  Weight weight = (Weight)(0.33*(kaldi::Rand() % 5) );
59  printf("calling SetFinal with %d and %f\n", id, weight.Value());
60  fst->SetFinal(id, weight);
61  }
62  // Create arcs.
63  for (size_t i = 0;i < (size_t)n_arcs;i++) {
64  Arc a;
65  a.nextstate = all_states[kaldi::Rand() % n_states];
66  a.ilabel = all_syms[kaldi::Rand() % n_syms];
67  a.olabel = all_syms[kaldi::Rand() % n_syms]; // same input+output vocab.
68  a.weight = (Weight) (0.33*(kaldi::Rand() % 2));
69  StateId start_state = all_states[kaldi::Rand() % n_states];
70  fst->AddArc(start_state, a);
71  }
72 
73  std::cout <<" printing before trimming\n";
74  {
75  FstPrinter<Arc> fstprinter(*fst, sptr, sptr, NULL, false, true, "\t");
76  fstprinter.Print(&std::cout, "standard output");
77  }
78  // Trim resulting FST.
79  Connect(fst);
80 
81  std::cout <<" printing after trimming\n";
82  {
83  FstPrinter<Arc> fstprinter(*fst, sptr, sptr, NULL, false, true, "\t");
84  fstprinter.Print(&std::cout, "standard output");
85  }
86 
87  vector<Label> extra_syms;
88  if (fst->Start() != kNoStateId) { // "Connect" did not make it empty....
89  PreDeterminize(fst, 1000, &extra_syms);
90  }
91 
92  std::cout <<" printing after predeterminization\n";
93  {
94  FstPrinter<Arc> fstprinter(*fst, sptr, sptr, NULL, false, true, "\t");
95  fstprinter.Print(&std::cout, "standard output");
96  }
97 
98 
99  { // Remove epsilon. All default args.
100  bool connect = true;
101  Weight weight_threshold = Weight::Zero();
102  int64 nstate = -1; // Relates to pruning.
103  double delta = kDelta; // I think a small weight value. Relates to some kind of pruning,
104  // I guess. But with no epsilon cycles, probably doensn't matter.
105  RmEpsilon(fst, connect, weight_threshold, nstate, delta);
106  }
107 
108  std::cout <<" printing after double-epsilon removal\n";
109  {
110  FstPrinter<Arc> fstprinter(*fst, sptr, sptr, NULL, false, true, "\t");
111  fstprinter.Print(&std::cout, "standard output");
112  }
113  VectorFst<Arc> ofst_star;
114 
115  {
116  printf("Converting to Gallic semiring");
117  VectorFst<GallicArc<Arc> > gallic_fst;
118  VectorFst<GallicArc<Arc> > gallic_fst_noeps;
119  VectorFst<GallicArc<Arc> > gallic_fst_det;
120 
121 
122  {
123  printf("Determinizing with DeterminizeStar, converting to Gallic\n");
124  DeterminizeStar(*fst, &gallic_fst);
125  }
126 
127  {
128  std::cout <<" printing gallic FST\n";
129  FstPrinter<GallicArc<Arc> > fstprinter(gallic_fst, sptr, sptr, NULL, false, true, "\t");
130  fstprinter.Print(&std::cout, "standard output");
131  }
132 
133 
134  // Map(ofst_star, &gallic_fst, ToGallicMapper<Arc, STRING_LEFT>());
135 
136  printf("Converting gallic back to regular\n");
138  typename Arc::Weight, GALLIC_LEFT> > fwfst(gallic_fst);
139  {
140  std::cout <<" printing factor-weight FST\n";
141  FstPrinter<GallicArc<Arc> > fstprinter(fwfst, sptr, sptr, NULL, false, true, "\t");
142  fstprinter.Print(&std::cout, "standard output");
143  }
144 
145  Map(fwfst, &ofst_star, FromGallicMapper<Arc, GALLIC_LEFT>());
146 
147  {
148  std::cout <<" printing after converting back to regular FST\n";
149  FstPrinter<Arc> fstprinter(ofst_star, sptr, sptr, NULL, false, true, "\t");
150  fstprinter.Print(&std::cout, "standard output");
151  }
152 
153 
154  VectorFst<GallicArc<Arc> > new_gallic_fst;
155  Map(ofst_star, &new_gallic_fst, ToGallicMapper<Arc, GALLIC_LEFT>());
156 
157  assert(RandEquivalent(gallic_fst, new_gallic_fst, 5/*paths*/, 0.01/*delta*/, kaldi::Rand()/*seed*/, 100/*path length-- max?*/));
158 
159  }
160 
161  delete fst;
162 }
163 
164 
165 template<class Arc, class inttype> void TestStringRepository() {
166  typedef typename Arc::Label Label;
167 
169 
170  int N = 1000;
171  if (sizeof(inttype) == 1) N = 64;
172  vector<vector<Label> > strings(N);
173  vector<inttype> ids(N);
174 
175  for (size_t i = 0;i < N;i++) {
176  size_t len = kaldi::Rand() % 4;
177  vector<Label> vec;
178  for (size_t j = 0;j < len;j++) vec.push_back( (kaldi::Rand()%10) + 150*(kaldi::Rand()%2)); // make it have reasonable range.
179  if (i < 500 && vec.size() == 0) ids[i] = sr.IdOfEmpty();
180  else if (i < 500 && vec.size() == 1) ids[i] = sr.IdOfLabel(vec[0]);
181  else ids[i] = sr.IdOfSeq(vec);
182 
183  strings[i] = vec;
184  }
185 
186  for (size_t i = 0;i < N;i++) {
187  vector<Label> tmpv;
188  tmpv.push_back(10); // just put in garbage.
189  sr.SeqOfId(ids[i], &tmpv);
190  assert(tmpv == strings[i]);
191  assert(sr.IdOfSeq(strings[i]) == ids[i]);
192  if (strings[i].size() == 0) assert(ids[i] == sr.IdOfEmpty());
193  if (strings[i].size() == 1) assert(ids[i] == sr.IdOfLabel(strings[i][0]));
194 
195  if (sizeof(inttype) != 1) {
196  size_t prefix_len = kaldi::Rand() % (strings[i].size() + 1);
197  inttype s2 = sr.RemovePrefix(ids[i], prefix_len);
198  vector<Label> vec2;
199  sr.SeqOfId(s2, &vec2);
200  for (size_t j = 0;j < strings[i].size()-prefix_len;j++) {
201  assert(vec2[j] == strings[i][j+prefix_len]);
202  }
203  }
204 
205  }
206 }
207 
208 } // end namespace fst
209 
210 int main() {
211  for (int i = 0;i < 25;i++) {
212  fst::TestFactor<fst::StdArc>();
213  }
214 }
fst::StdArc::StateId StateId
TrivialFactorWeightFst takes as template parameter a FactorIterator as defined above.
void PreDeterminize(MutableFst< Arc > *fst, typename Arc::Label first_new_sym, std::vector< Int > *symsOut)
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
Definition: graph.dox:21
StringId IdOfSeq(const std::vector< Label > &v)
StringId RemovePrefix(StringId id, size_t prefix_len)
static void TestFactor()
Definition: factor-test.cc:32
void SeqOfId(StringId id, std::vector< Label > *v)
StringId IdOfLabel(Label l)
fst::StdArc::Label Label
int Rand(struct RandomState *state)
Definition: kaldi-math.cc:45
fst::StdArc::Weight Weight
void TestStringRepository()
bool DeterminizeStar(F &ifst, MutableFst< typename F::Arc > *ofst, float delta, bool *debug_ptr, int max_states, bool allow_partial)
This function implements the normal version of DeterminizeStar, in which the output strings are repre...