determinize-star-inl.h
Go to the documentation of this file.
1 // fstext/determinize-star-inl.h
2 
3 // Copyright 2009-2011 Microsoft Corporation; Jan Silovsky
4 // 2015 Hainan Xu
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 #ifndef KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
22 #define KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
23 // Do not include this file directly. It is included by determinize-star.h
24 
25 #include "base/kaldi-error.h"
26 
27 #include <unordered_map>
28 using std::unordered_map;
29 
30 #include <vector>
31 #include <climits>
32 
33 namespace fst {
34 
35 // This class maps back and forth from/to integer id's to sequences of strings.
36 // used in determinization algorithm.
37 
38 template<class Label, class StringId> class StringRepository {
39  // Label and StringId are both integer types, possibly the same.
40  // This is a utility that maps back and forth between a vector<Label> and StringId
41  // representation of sequences of Labels. It is to save memory, and to save compute.
42  // We treat sequences of length zero and one separately, for efficiency.
43 
44  public:
45  class VectorKey { // Hash function object.
46  public:
47  size_t operator()(const std::vector<Label> *vec) const {
48  assert(vec != NULL);
49  size_t hash = 0, factor = 1;
50  for (typename std::vector<Label>::const_iterator it = vec->begin();
51  it != vec->end(); it++) {
52  hash += factor*(*it);
53  factor *= 103333; // just an arbitrary prime number.
54  }
55  return hash;
56  }
57  };
58  class VectorEqual { // Equality-operator function object.
59  public:
60  size_t operator()(const std::vector<Label> *vec1, const std::vector<Label> *vec2) const {
61  return (*vec1 == *vec2);
62  }
63  };
64 
65  typedef unordered_map<const std::vector<Label>*, StringId, VectorKey, VectorEqual> MapType;
66 
67  StringId IdOfEmpty() { return no_symbol; }
68 
69  StringId IdOfLabel(Label l) {
70  if (l>= 0 && l <= (Label) single_symbol_range) {
71  return l + single_symbol_start;
72  } else {
73  // l is out of the allowed range so we have to treat it as a sequence of length one. Should be v. rare.
74  std::vector<Label> v; v.push_back(l);
75  return IdOfSeqInternal(v);
76  }
77  }
78 
79  StringId IdOfSeq(const std::vector<Label> &v) { // also works for sizes 0 and 1.
80  size_t sz = v.size();
81  if (sz == 0) return no_symbol;
82  else if (v.size() == 1) return IdOfLabel(v[0]);
83  else return IdOfSeqInternal(v);
84  }
85 
86  inline bool IsEmptyString(StringId id) {
87  return id == no_symbol;
88  }
89  void SeqOfId(StringId id, std::vector<Label> *v) {
90  if (id == no_symbol) v->clear();
91  else if (id>=single_symbol_start) {
92  v->resize(1); (*v)[0] = id - single_symbol_start;
93  } else {
94  assert(static_cast<size_t>(id) < vec_.size());
95  *v = *(vec_[id]);
96  }
97  }
98  StringId RemovePrefix(StringId id, size_t prefix_len) {
99  if (prefix_len == 0) return id;
100  else {
101  std::vector<Label> v;
102  SeqOfId(id, &v);
103  size_t sz = v.size();
104  assert(sz >= prefix_len);
105  std::vector<Label> v_noprefix(sz - prefix_len);
106  for (size_t i = 0;i < sz-prefix_len;i++) v_noprefix[i] = v[i+prefix_len];
107  return IdOfSeq(v_noprefix);
108  }
109  }
110 
112  // The following are really just constants but don't want to complicate compilation so make them
113  // class variables. Due to the brokenness of <limits>, they can't be accessed as constants.
114  string_end = (std::numeric_limits<StringId>::max() / 2) - 1; // all hash values must be <= this.
115  no_symbol = (std::numeric_limits<StringId>::max() / 2); // reserved for empty sequence.
116  single_symbol_start = (std::numeric_limits<StringId>::max() / 2) + 1;
117  single_symbol_range = std::numeric_limits<StringId>::max() - single_symbol_start;
118  }
119  void Destroy() {
120  for (typename std::vector<std::vector<Label>* >::iterator iter = vec_.begin(); iter != vec_.end(); ++iter)
121  delete *iter;
122  std::vector<std::vector<Label>* > tmp_vec;
123  tmp_vec.swap(vec_);
124  MapType tmp_map;
125  tmp_map.swap(map_);
126  }
128  Destroy();
129  }
130 
131  private:
133 
134  StringId IdOfSeqInternal(const std::vector<Label> &v) {
135  typename MapType::iterator iter = map_.find(&v);
136  if (iter != map_.end()) {
137  return iter->second;
138  } else { // must add it to map.
139  StringId this_id = (StringId) vec_.size();
140  std::vector<Label> *v_new = new std::vector<Label> (v);
141  vec_.push_back(v_new);
142  map_[v_new] = this_id;
143  assert(this_id < string_end); // or we used up the labels.
144  return this_id;
145  }
146  }
147 
148  std::vector<std::vector<Label>* > vec_;
149  MapType map_;
150 
151  static const StringId string_start = (StringId) 0; // This must not change. It's assumed.
152  StringId string_end; // = (numeric_limits<StringId>::max() / 2) - 1; // all hash values must be <= this.
153  StringId no_symbol; // = (numeric_limits<StringId>::max() / 2); // reserved for empty sequence.
154  StringId single_symbol_start; // = (numeric_limits<StringId>::max() / 2) + 1;
155  StringId single_symbol_range; // = numeric_limits<StringId>::max() - single_symbol_start;
156 };
157 
158 
159 template<class F> class DeterminizerStar {
160  typedef typename F::Arc Arc;
161  public:
162  // Output to Gallic acceptor (so the strings go on weights, and there is a 1-1 correspondence
163  // between our states and the states in ofst. If destroy == true, release memory as we go
164  // (but we cannot output again).
165  void Output(MutableFst<GallicArc<Arc> > *ofst, bool destroy = true);
166 
167  // Output to standard FST. We will create extra states to handle sequences of symbols
168  // on the output. If destroy == true, release memory as we go
169  // (but we cannot output again).
170 
171  void Output(MutableFst<Arc> *ofst, bool destroy = true);
172 
173 
174  // Initializer. After initializing the object you will typically call
175  // Determinize() and then one of the Output functions.
176  DeterminizerStar(const Fst<Arc> &ifst, float delta = kDelta,
177  int max_states = -1, bool allow_partial = false):
178  ifst_(ifst.Copy()), delta_(delta), max_states_(max_states),
179  determinized_(false), allow_partial_(allow_partial),
180  is_partial_(false), equal_(delta),
181  hash_(ifst.Properties(kExpanded, false) ?
182  down_cast<const ExpandedFst<Arc>*,
183  const Fst<Arc> >(&ifst)->NumStates()/2 + 3 : 20,
184  hasher_, equal_),
185  epsilon_closure_(ifst_, max_states, &repository_, delta) { }
186 
187  void Determinize(bool *debug_ptr) {
188  assert(!determinized_);
189  // This determinizes the input fst but leaves it in the "special format"
190  // in "output_arcs_".
191  InputStateId start_id = ifst_->Start();
192  if (start_id == kNoStateId) { determinized_ = true; return; } // Nothing to do.
193  else { // Insert start state into hash and queue.
194  Element elem;
195  elem.state = start_id;
196  elem.weight = Weight::One();
197  elem.string = repository_.IdOfEmpty(); // Id of empty sequence.
198  std::vector<Element> vec;
199  vec.push_back(elem);
200  OutputStateId cur_id = SubsetToStateId(vec);
201  assert(cur_id == 0 && "Do not call Determinize twice.");
202  }
203  while (!Q_.empty()) {
204  std::pair<std::vector<Element>*, OutputStateId> cur_pair = Q_.front();
205  Q_.pop_front();
206  ProcessSubset(cur_pair);
207  if (debug_ptr && *debug_ptr) Debug(); // will exit.
208  if (max_states_ > 0 && output_arcs_.size() > max_states_) {
209  if (allow_partial_ == false) {
210  KALDI_ERR << "Determinization aborted since passed " << max_states_
211  << " states";
212  } else {
213  KALDI_WARN << "Determinization terminated since passed " << max_states_
214  << " states, partial results will be generated";
215  is_partial_ = true;
216  break;
217  }
218  }
219  }
220  determinized_ = true;
221  }
222 
223  bool IsPartial() {
224  return is_partial_;
225  }
226 
227  // frees all except output_arcs_, which contains the important info
228  // we need to output.
229  void FreeMostMemory() {
230  if (ifst_) {
231  delete ifst_;
232  ifst_ = NULL;
233  }
234  for (typename SubsetHash::iterator iter = hash_.begin();
235  iter != hash_.end(); ++iter)
236  delete iter->first;
237  SubsetHash tmp;
238  tmp.swap(hash_);
239  }
240 
242  FreeMostMemory();
243  }
244  private:
245  typedef typename Arc::Label Label;
246  typedef typename Arc::Weight Weight;
247  typedef typename Arc::StateId InputStateId;
248  typedef typename Arc::StateId OutputStateId; // same as above but distinguish states in output Fst.
249  typedef typename Arc::Label StringId; // Id type used in the StringRepository
251 
252 
253  // Element of a subset [of original states]
254 
255  struct Element {
256  InputStateId state;
257  StringId string;
258  Weight weight;
259  bool operator != (const Element &other) const {
260  return (state != other.state || string != other.string ||
261  weight != other.weight);
262  }
263  };
264 
265  // Arcs in the format we temporarily create in this class (a representation, essentially of
266  // a Gallic Fst).
267  struct TempArc {
268  Label ilabel;
269  StringId ostring; // Look it up in the StringRepository, it's a sequence of Labels.
270  OutputStateId nextstate; // or kNoState for final weights.
271  Weight weight;
272  };
273 
274 
275  // Hashing function used in hash of subsets.
276  // A subset is a pointer to vector<Element>.
277  // The Elements are in sorted order on state id, and without repeated states.
278  // Because the order of Elements is fixed, we can use a hashing function that is
279  // order-dependent. However the weights are not included in the hashing function--
280  // we hash subsets that differ only in weight to the same key. This is not optimal
281  // in terms of the O(N) performance but typically if we have a lot of determinized
282  // states that differ only in weight then the input probably was pathological in some way,
283  // or even non-determinizable.
284  // We don't quantize the weights, in order to avoid inexactness in simple cases.
285  // Instead we apply the delta when comparing subsets for equality, and allow a small
286  // difference.
287 
288  class SubsetKey {
289  public:
290  size_t operator ()(const std::vector<Element> * subset) const { // hashes only the state and string.
291  size_t hash = 0, factor = 1;
292  for (typename std::vector<Element>::const_iterator iter = subset->begin();
293  iter != subset->end(); ++iter) {
294  hash *= factor;
295  hash += iter->state + 103333 * iter->string;
296  factor *= 23531; // these numbers are primes.
297  }
298  return hash;
299  }
300  };
301 
302  // This is the equality operator on subsets. It checks for exact match on state-id
303  // and string, and approximate match on weights.
304  class SubsetEqual {
305  public:
306  bool operator ()(const std::vector<Element> *s1,
307  const std::vector<Element> *s2) const {
308  size_t sz = s1->size();
309  assert(sz >= 0);
310  if (sz != s2->size()) return false;
311  typename std::vector<Element>::const_iterator iter1 = s1->begin(),
312  iter1_end = s1->end(), iter2 = s2->begin();
313  for (; iter1 < iter1_end; ++iter1, ++iter2) {
314  if (iter1->state != iter2->state ||
315  iter1->string != iter2->string ||
316  ! ApproxEqual(iter1->weight, iter2->weight, delta_))
317  return false;
318  }
319  return true;
320  }
321  float delta_;
322  SubsetEqual(float delta): delta_(delta) {}
323  SubsetEqual(): delta_(kDelta) {}
324  };
325 
326  // Operator that says whether two Elements have the same states.
327  // Used only for debug.
329  public:
330  bool operator ()(const std::vector<Element> *s1, const std::vector<Element> *s2) const {
331  size_t sz = s1->size();
332  assert(sz>=0);
333  if (sz != s2->size()) return false;
334  typename std::vector<Element>::const_iterator iter1 = s1->begin(),
335  iter1_end = s1->end(), iter2=s2->begin();
336  for (; iter1 < iter1_end; ++iter1, ++iter2) {
337  if (iter1->state != iter2->state) return false;
338  }
339  return true;
340  }
341  };
342 
343  // Define the hash type we use to store subsets.
344  typedef unordered_map<const std::vector<Element>*, OutputStateId, SubsetKey, SubsetEqual> SubsetHash;
345 
347  public:
348  EpsilonClosure(const Fst<Arc> *ifst, int max_states,
349  StringRepository<Label, StringId> *repository, float delta):
350  ifst_(ifst), max_states_(max_states), repository_(repository),
351  delta_(delta) {
352 
353  }
354 
355  // This function computes epsilon closure of subset of states by following epsilon links.
356  // Called by ProcessSubset.
357  // Has no side effects except on the repository.
358  void GetEpsilonClosure(const std::vector<Element> &input_subset,
359  std::vector<Element> *output_subset);
360 
361  private:
364  EpsilonClosureInfo(const Element &e, const Weight &w, bool i) :
365  element(e), weight_to_process(w), in_queue(i) {}
366  // the weight in the Element struct is the total current weight
367  // that has been processed already
369  // this stores the weight that we haven't processed (propagated)
371  // whether "this" struct is in the queue
372  // we store the info here so that we don't have to look it up every time
373  bool in_queue;
374  bool operator<(const EpsilonClosureInfo &other) const {
375  return this->element.state < other.element.state;
376  }
377  };
378 
379  // to further speed up EpsilonClosure() computation, we have 2 queues
380  // the 2nd queue is used when we first iterate over the input set -
381  // if queue_2_.empty() then we directly set output_set equal to input_set
382  // and return immediately
383  // Since Epsilon arcs are relatively rare, this way we could efficiently
384  // detect the epsilon-free case, without having to waste our computation e.g.
385  // allocating the EpsilonClosureInfo structure; this also lets us do a
386  // level-by-level traversal, which could avoid some (unfortunately not all)
387  // duplicate computation if epsilons form a DAG that is not a tree
388  //
389  // We put the queues here for better efficiency for memory allocation
390  std::deque<typename Arc::StateId> queue_;
391  std::vector<Element> queue_2_;
392 
393  // the following 2 structures together form our *virtual "map"*
394  // basically we need a map from state_id to EpsilonClosureInfo that operates
395  // in O(1) time, while still takes relatively small mem, and this does it well
396  // for efficiency we don't clear id_to_index_ of its outdated information
397  // As a result each time we do a look-up, we need to check
398  // if (ecinfo_[id_to_index_[id]].element.state == id)
399  // Yet this is still faster than using a std::map<StateId, EpsilonClosureInfo>
400  std::vector<int> id_to_index_;
401  // unlike id_to_index_, we clear the content of ecinfo_ each time we call
402  // EpsilonClosure(). This needed because we need an efficient way to
403  // traverse the virtual map - it is just too costly to traverse the
404  // id_to_index_ vector.
405  std::vector<EpsilonClosureInfo> ecinfo_;
406 
407  // Add one element (elem) into cur_subset
408  // it also adds the necessary stuff to queue_, set the correct weight
409  void AddOneElement(const Element &elem, const Weight &unprocessed_weight);
410 
411  // Sub-routine that we call in EpsilonClosure()
412  // It takes the current "unprocessed_weight" and propagate it to the
413  // states accessible from elem.state by an epsilon arc
414  // and add the results to cur_subset.
415  // save_to_queue_2 is set true when we iterate over the initial subset
416  // - then we save it to queue_2 s.t. if it's empty, we directly return
417  // the input set
418  void ExpandOneElement(const Element &elem,
419  bool sorted,
420  const Weight &unprocessed_weight,
421  bool save_to_queue_2 = false);
422 
423  // no pointers below would take the ownership
424  const Fst<Arc> *ifst_;
427  float delta_;
428  };
429 
430 
431  // This function works out the final-weight of the determinized state.
432  // called by ProcessSubset.
433  // Has no side effects except on the variable repository_, and output_arcs_.
434 
435  void ProcessFinal(const std::vector<Element> &closed_subset, OutputStateId state) {
436  // processes final-weights for this subset.
437  bool is_final = false;
438  StringId final_string = 0; // = 0 to keep compiler happy.
439  Weight final_weight = Weight::One(); // This value will never be accessed, and
440  // we just set it to avoid spurious compiler warnings. We avoid setting it
441  // to Zero() because floating-point infinities can sometimes generate
442  // interrupts and slow things down.
443  typename std::vector<Element>::const_iterator iter = closed_subset.begin(),
444  end = closed_subset.end();
445  for (; iter != end; ++iter) {
446  const Element &elem = *iter;
447  Weight this_final_weight = ifst_->Final(elem.state);
448  if (this_final_weight != Weight::Zero()) {
449  if (!is_final) { // first final-weight
450  final_string = elem.string;
451  final_weight = Times(elem.weight, this_final_weight);
452  is_final = true;
453  } else { // already have one.
454  if (final_string != elem.string) {
455  KALDI_ERR << "FST was not functional -> not determinizable";
456  }
457  final_weight = Plus(final_weight, Times(elem.weight, this_final_weight));
458  }
459  }
460  }
461  if (is_final) {
462  // store final weights in TempArc structure, just like a transition.
463  TempArc temp_arc;
464  temp_arc.ilabel = 0;
465  temp_arc.nextstate = kNoStateId; // special marker meaning "final weight".
466  temp_arc.ostring = final_string;
467  temp_arc.weight = final_weight;
468  output_arcs_[state].push_back(temp_arc);
469  }
470  }
471 
472  // ProcessTransition is called from "ProcessTransitions". Broken out for
473  // clarity. Has side effects on output_arcs_, and (via SubsetToStateId), Q_
474  // and hash_.
475  void ProcessTransition(OutputStateId state, Label ilabel, std::vector<Element> *subset);
476 
477  // "less than" operator for pair<Label, Element>. Used in ProcessTransitions.
478  // Lexicographical order, with comparing the state only for "Element".
479 
481  public:
482  inline bool operator () (const std::pair<Label, Element> &p1, const std::pair<Label, Element> &p2) {
483  if (p1.first < p2.first) return true;
484  else if (p1.first > p2.first) return false;
485  else {
486  return p1.second.state < p2.second.state;
487  }
488  }
489  };
490 
491 
492  // ProcessTransitions handles transitions out of this subset of states.
493  // Ignores epsilon transitions (epsilon closure already handled that).
494  // Does not consider final states. Breaks the transitions up by ilabel,
495  // and creates a new transition in determinized FST, for each ilabel.
496  // Does this by creating a big vector of pairs <Label, Element> and then sorting them
497  // using a lexicographical ordering, and calling ProcessTransition for each range
498  // with the same ilabel.
499  // Side effects on repository, and (via ProcessTransition) on Q_, hash_,
500  // and output_arcs_.
501  void ProcessTransitions(const std::vector<Element> &closed_subset, OutputStateId state) {
502  std::vector<std::pair<Label, Element> > all_elems;
503  { // Push back into "all_elems", elements corresponding to all non-epsilon-input transitions
504  // out of all states in "closed_subset".
505  typename std::vector<Element>::const_iterator iter = closed_subset.begin(),
506  end = closed_subset.end();
507  for (; iter != end; ++iter) {
508  const Element &elem = *iter;
509  for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state);
510  !aiter.Done(); aiter.Next()) {
511  const Arc &arc = aiter.Value();
512  if (arc.ilabel != 0) { // Non-epsilon transition -- ignore epsilons here.
513  std::pair<Label, Element> this_pr;
514  this_pr.first = arc.ilabel;
515  Element &next_elem(this_pr.second);
516  next_elem.state = arc.nextstate;
517  next_elem.weight = Times(elem.weight, arc.weight);
518  if (arc.olabel == 0) // output epsilon-- this is simple case so
519  // handle separately for efficiency
520  next_elem.string = elem.string;
521  else {
522  std::vector<Label> seq;
523  repository_.SeqOfId(elem.string, &seq);
524  seq.push_back(arc.olabel);
525  next_elem.string = repository_.IdOfSeq(seq);
526  }
527  all_elems.push_back(this_pr);
528  }
529  }
530  }
531  }
532  PairComparator pc;
533  std::sort(all_elems.begin(), all_elems.end(), pc);
534  // now sorted first on input label, then on state.
535  typedef typename std::vector<std::pair<Label, Element> >::const_iterator PairIter;
536  PairIter cur = all_elems.begin(), end = all_elems.end();
537  std::vector<Element> this_subset;
538  while (cur != end) {
539  // Process ranges that share the same input symbol.
540  Label ilabel = cur->first;
541  this_subset.clear();
542  while (cur != end && cur->first == ilabel) {
543  this_subset.push_back(cur->second);
544  cur++;
545  }
546  // We now have a subset for this ilabel.
547  ProcessTransition(state, ilabel, &this_subset);
548  }
549  }
550 
551  // SubsetToStateId converts a subset (vector of Elements) to a StateId in the output
552  // fst. This is a hash lookup; if no such state exists, it adds a new state to the hash
553  // and adds a new pair to the queue.
554  // Side effects on hash_ and Q_, and on output_arcs_ [just affects the size].
555  OutputStateId SubsetToStateId(const std::vector<Element> &subset) { // may add the subset to the queue.
556  typedef typename SubsetHash::iterator IterType;
557  IterType iter = hash_.find(&subset);
558  if (iter == hash_.end()) { // was not there.
559  std::vector<Element> *new_subset = new std::vector<Element>(subset);
560  OutputStateId new_state_id = (OutputStateId) output_arcs_.size();
561  bool ans = hash_.insert(std::pair<const std::vector<Element>*,
562  OutputStateId>(new_subset,
563  new_state_id)).second;
564  assert(ans);
565  output_arcs_.push_back(std::vector<TempArc>());
566  if (allow_partial_ == false) {
567  // If --allow-partial is not requested, we do the old way.
568  Q_.push_front(std::pair<std::vector<Element>*, OutputStateId>(new_subset, new_state_id));
569  } else {
570  // If --allow-partial is requested, we do breadth first search. This
571  // ensures that when we return partial results, we return the states
572  // that are reachable by the fewest steps from the start state.
573  Q_.push_back(std::pair<std::vector<Element>*, OutputStateId>(new_subset, new_state_id));
574  }
575  return new_state_id;
576  } else {
577  return iter->second; // the OutputStateId.
578  }
579  }
580 
581 
582  // ProcessSubset does the processing of a determinized state, i.e. it creates
583  // transitions out of it and adds new determinized states to the queue if necessary.
584  // The first stage is "EpsilonClosure" (follow epsilons to get a possibly larger set
585  // of (states, weights)). After that we ignore epsilons. We process the final-weight
586  // of the state, and then handle transitions out (this may add more determinized states
587  // to the queue).
588  void ProcessSubset(const std::pair<std::vector<Element>*, OutputStateId> & pair) {
589  const std::vector<Element> *subset = pair.first;
590  OutputStateId state = pair.second;
591 
592  std::vector<Element> closed_subset; // subset after epsilon closure.
593  epsilon_closure_.GetEpsilonClosure(*subset, &closed_subset);
594 
595  // Now follow non-epsilon arcs [and also process final states]
596  ProcessFinal(closed_subset, state);
597 
598  // Now handle transitions out of these states.
599  ProcessTransitions(closed_subset, state);
600  }
601 
602  void Debug();
603 
605  std::deque<std::pair<std::vector<Element>*, OutputStateId> > Q_; // queue of subsets to be processed.
606 
607  std::vector<std::vector<TempArc> > output_arcs_; // essentially an FST in our format.
608 
609  const Fst<Arc> *ifst_;
610  float delta_;
612  bool determinized_; // used to check usage.
613  bool allow_partial_; // output paritial results or not
614  bool is_partial_; // if we get partial results or not
615  SubsetKey hasher_; // object that computes keys-- has no data members.
616  SubsetEqual equal_; // object that compares subsets-- only data member is delta_.
617  SubsetHash hash_; // hash from Subset to StateId in final Fst.
618 
619  StringRepository<Label, StringId> repository_; // associate integer id's with sequences of labels.
621 };
622 
623 
624 template<class F>
625 bool DeterminizeStar(F &ifst, MutableFst<typename F::Arc> *ofst,
626  float delta, bool *debug_ptr, int max_states,
627  bool allow_partial) {
628  ofst->SetOutputSymbols(ifst.OutputSymbols());
629  ofst->SetInputSymbols(ifst.InputSymbols());
630  DeterminizerStar<F> det(ifst, delta, max_states, allow_partial);
631  det.Determinize(debug_ptr);
632  det.Output(ofst);
633  return det.IsPartial();
634 }
635 
636 
637 template<class F>
638 bool DeterminizeStar(F &ifst,
639  MutableFst<GallicArc<typename F::Arc> > *ofst, float delta,
640  bool *debug_ptr, int max_states,
641  bool allow_partial) {
642  ofst->SetOutputSymbols(ifst.InputSymbols());
643  ofst->SetInputSymbols(ifst.InputSymbols());
644  DeterminizerStar<F> det(ifst, delta, max_states, allow_partial);
645  det.Determinize(debug_ptr);
646  det.Output(ofst);
647  return det.IsPartial();
648 }
649 
650 template<class F>
652  GetEpsilonClosure(const std::vector<Element> &input_subset,
653  std::vector<Element> *output_subset) {
654  ecinfo_.resize(0);
655  size_t size = input_subset.size();
656  // find whether input fst is known to be sorted in input label.
657  bool sorted =
658  ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
659 
660  // size is still the input_subset.size()
661  for (size_t i = 0; i < size; i++) {
662  ExpandOneElement(input_subset[i], sorted, input_subset[i].weight, true);
663  }
664 
665  size_t s = queue_2_.size();
666  if (s == 0) {
667  *output_subset = input_subset;
668  return;
669  } else {
670  // queue_2 not empty. Need to create the vector<info>
671  for (size_t i = 0; i < size; i++) {
672  // the weight has not been processed yet,
673  // so put all of them in the "weight_to_process"
674  ecinfo_.push_back(EpsilonClosureInfo(input_subset[i],
675  input_subset[i].weight,
676  false));
677  ecinfo_.back().element.weight = Weight::Zero(); // clear the weight
678 
679  if (id_to_index_.size() < input_subset[i].state + 1) {
680  id_to_index_.resize(2 * input_subset[i].state + 1, -1);
681  }
682  id_to_index_[input_subset[i].state] = ecinfo_.size() - 1;
683  }
684  }
685 
686  {
687  Element elem;
688  elem.weight = Weight::Zero();
689  for (size_t i = 0; i < s; i++) {
690  elem.state = queue_2_[i].state;
691  elem.string = queue_2_[i].string;
692  AddOneElement(elem, queue_2_[i].weight);
693  }
694  queue_2_.resize(0);
695  }
696 
697  int counter = 0; // relates to max-states option, used for test.
698  while (!queue_.empty()) {
699  InputStateId id = queue_.front();
700 
701  // no need to check validity of the index
702  // since anything in the queue we are sure they're in the "virtual set"
703  int index = id_to_index_[id];
704  EpsilonClosureInfo &info = ecinfo_[index];
705  Element &elem = info.element;
706  Weight unprocessed_weight = info.weight_to_process;
707 
708  elem.weight = Plus(elem.weight, unprocessed_weight);
709  info.weight_to_process = Weight::Zero();
710 
711  info.in_queue = false;
712  queue_.pop_front();
713 
714  if (max_states_ > 0 && counter++ > max_states_) {
715  KALDI_ERR << "Determinization aborted since looped more than "
716  << max_states_ << " times during epsilon closure";
717  }
718 
719  // generally we need to be careful about iterator-invalidation problem
720  // here we pass a reference (elem), which could be an issue.
721  // In the beginning of ExpandOneElement, we make a copy of elem.string
722  // to avoid that issue
723  ExpandOneElement(elem, sorted, unprocessed_weight);
724  }
725 
726  {
727  // this sorting is based on StateId
728  sort(ecinfo_.begin(), ecinfo_.end());
729 
730  output_subset->clear();
731 
732  size = ecinfo_.size();
733  output_subset->reserve(size);
734  for (size_t i = 0; i < size; i++) {
735  EpsilonClosureInfo& info = ecinfo_[i];
736  if (info.weight_to_process != Weight::Zero()) {
737  info.element.weight = Plus(info.element.weight, info.weight_to_process);
738  }
739  output_subset->push_back(info.element);
740  }
741  }
742 }
743 
744 template<class F>
746  AddOneElement(const Element &elem, const Weight &unprocessed_weight) {
747  // first we try to find the element info in the ecinfo_ vector
748  int index = -1;
749  if (elem.state < id_to_index_.size()) {
750  index = id_to_index_[elem.state];
751  }
752  if (index != -1) {
753  if (index >= ecinfo_.size()) {
754  index = -1;
755  }
756  // since ecinfo_ might store outdated information, we need to check
757  else if (ecinfo_[index].element.state != elem.state) {
758  index = -1;
759  }
760  }
761 
762  if (index == -1) {
763  // was no such StateId: insert and add to queue.
764  ecinfo_.push_back(EpsilonClosureInfo(elem, unprocessed_weight, true));
765  size_t size = id_to_index_.size();
766  if (size < elem.state + 1) {
767  // double the size to reduce memory operations
768  id_to_index_.resize(2 * elem.state + 1, -1);
769  }
770  id_to_index_[elem.state] = ecinfo_.size() - 1;
771  queue_.push_back(elem.state);
772 
773  } else { // one is already there. Add weights.
774  EpsilonClosureInfo &info = ecinfo_[index];
775  if (info.element.string != elem.string) {
776  // Non-functional FST.
777  std::ostringstream ss;
778  ss << "FST was not functional -> not determinizable.";
779  { // Print some debugging information. Can be helpful to debug
780  // the inputs when FSTs are mysteriously non-functional.
781  std::vector<Label> tmp_seq;
782  repository_->SeqOfId(info.element.string, &tmp_seq);
783  ss << "\nFirst string:";
784  for (size_t i = 0; i < tmp_seq.size(); i++)
785  ss << ' ' << tmp_seq[i];
786  ss << "\nSecond string:";
787  repository_->SeqOfId(elem.string, &tmp_seq);
788  for (size_t i = 0; i < tmp_seq.size(); i++)
789  ss << ' ' << tmp_seq[i];
790  }
791  KALDI_ERR << ss.str();
792  }
793 
794  info.weight_to_process =
795  Plus(info.weight_to_process, unprocessed_weight);
796 
797  if (!info.in_queue) {
798  // this is because the code in "else" below: the
799  // iter->second.weight_to_process might not be Zero()
800  Weight weight = Plus(info.element.weight, info.weight_to_process);
801 
802  // What is done below is, we propagate the weight (by adding them
803  // to the queue only when the change is big enough;
804  // otherwise we just store the weight, until before returning
805  // we add the element.weight and weight_to_process together
806  if (! ApproxEqual(weight, info.element.weight, delta_)) {
807  // add extra part of weight to queue.
808  info.in_queue = true;
809  queue_.push_back(elem.state);
810  }
811  }
812  }
813 }
814 
815 template<class F>
817  const Element &elem,
818  bool sorted,
819  const Weight &unprocessed_weight,
820  bool save_to_queue_2) {
821  StringId str = elem.string; // copy it here because there is an iterator-
822  // - invalidation problem (it really happens for some FSTs)
823 
824  // now we are going to propagate the "unprocessed_weight"
825  for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state);
826  !aiter.Done(); aiter.Next()) {
827  const Arc &arc = aiter.Value();
828  if (sorted && arc.ilabel > 0) {
829  break;
830  // Break from the loop: due to sorting there will be no
831  // more transitions with epsilons as input labels.
832  }
833  if (arc.ilabel != 0) {
834  continue; // we only process epsilons here
835  }
836  Element next_elem;
837  next_elem.state = arc.nextstate;
838  next_elem.weight = Weight::Zero();
839  Weight next_unprocessed_weight
840  = Times(unprocessed_weight, arc.weight);
841 
842  // now must append strings
843  if (arc.olabel == 0) {
844  next_elem.string = str;
845  } else {
846  std::vector<Label> seq;
847  repository_->SeqOfId(str, &seq);
848  if (arc.olabel != 0)
849  seq.push_back(arc.olabel);
850  next_elem.string = repository_->IdOfSeq(seq);
851  }
852  if (save_to_queue_2) {
853  next_elem.weight = next_unprocessed_weight;
854  queue_2_.push_back(next_elem);
855  } else {
856  AddOneElement(next_elem, next_unprocessed_weight);
857  }
858  }
859 }
860 
861 template<class F>
862 void DeterminizerStar<F>::Output(MutableFst<GallicArc<Arc> > *ofst,
863  bool destroy) {
864  assert(determinized_);
865  if (destroy) determinized_ = false;
866  typedef GallicWeight<Label, Weight> ThisGallicWeight;
867  typedef typename Arc::StateId StateId;
868  if (destroy)
869  FreeMostMemory();
870  StateId nStates = static_cast<StateId>(output_arcs_.size());
871  ofst->DeleteStates();
872  ofst->SetStart(kNoStateId);
873  if (nStates == 0) {
874  return;
875  }
876  for (StateId s = 0;s < nStates;s++) {
877  OutputStateId news = ofst->AddState();
878  assert(news == s);
879  }
880  ofst->SetStart(0);
881  // now process transitions.
882  for (StateId this_state = 0; this_state < nStates; this_state++) {
883  std::vector<TempArc> &this_vec(output_arcs_[this_state]);
884  typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
885  end = this_vec.end();
886  for (; iter != end; ++iter) {
887  const TempArc &temp_arc(*iter);
888  GallicArc<Arc> new_arc;
889  std::vector<Label> seq;
890  repository_.SeqOfId(temp_arc.ostring, &seq);
891  StringWeight<Label, STRING_LEFT> string_weight;
892  for (size_t i = 0;i < seq.size();i++) string_weight.PushBack(seq[i]);
893  ThisGallicWeight gallic_weight(string_weight, temp_arc.weight);
894 
895  if (temp_arc.nextstate == kNoStateId) { // is really final weight.
896  ofst->SetFinal(this_state, gallic_weight);
897  } else { // is really an arc.
898  new_arc.nextstate = temp_arc.nextstate;
899  new_arc.ilabel = temp_arc.ilabel;
900  new_arc.olabel = temp_arc.ilabel; // acceptor. input == output.
901  new_arc.weight = gallic_weight; // includes string and weight.
902  ofst->AddArc(this_state, new_arc);
903  }
904  }
905  // Free up memory. Do this inside the loop as ofst is also allocating memory
906  if (destroy) { std::vector<TempArc> temp; temp.swap(this_vec); }
907  }
908  if (destroy) { std::vector<std::vector<TempArc> > temp; temp.swap(output_arcs_); }
909 }
910 
911 template<class F>
912 void DeterminizerStar<F>::Output(MutableFst<Arc> *ofst, bool destroy) {
913  assert(determinized_);
914  if (destroy) determinized_ = false;
915  // Outputs to standard fst.
916  OutputStateId num_states = static_cast<OutputStateId>(output_arcs_.size());
917  if (destroy)
918  FreeMostMemory();
919  ofst->DeleteStates();
920  if (num_states == 0) {
921  ofst->SetStart(kNoStateId);
922  return;
923  }
924  // Add basic states-- but will add extra ones to account for strings on output.
925  for (OutputStateId s = 0; s < num_states; s++) {
926  OutputStateId news = ofst->AddState();
927  assert(news == s);
928  }
929  ofst->SetStart(0);
930  for (OutputStateId this_state = 0; this_state < num_states; this_state++) {
931  std::vector<TempArc> &this_vec(output_arcs_[this_state]);
932 
933  typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
934  end = this_vec.end();
935  for (; iter != end; ++iter) {
936  const TempArc &temp_arc(*iter);
937  std::vector<Label> seq;
938  repository_.SeqOfId(temp_arc.ostring, &seq);
939  if (temp_arc.nextstate == kNoStateId) { // Really a final weight.
940  // Make a sequence of states going to a final state, with the strings as labels.
941  // Put the weight on the first arc.
942  OutputStateId cur_state = this_state;
943  for (size_t i = 0; i < seq.size();i++) {
944  OutputStateId next_state = ofst->AddState();
945  Arc arc;
946  arc.nextstate = next_state;
947  arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
948  arc.ilabel = 0; // epsilon.
949  arc.olabel = seq[i];
950  ofst->AddArc(cur_state, arc);
951  cur_state = next_state;
952  }
953  ofst->SetFinal(cur_state, (seq.size() == 0 ? temp_arc.weight : Weight::One()));
954  } else { // Really an arc.
955  OutputStateId cur_state = this_state;
956  // Have to be careful with this integer comparison (i+1 < seq.size()) because unsigned.
957  // i < seq.size()-1 could fail for zero-length sequences.
958  for (size_t i = 0; i+1 < seq.size();i++) {
959  // for all but the last element of seq, create new state.
960  OutputStateId next_state = ofst->AddState();
961  Arc arc;
962  arc.nextstate = next_state;
963  arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
964  arc.ilabel = (i == 0 ? temp_arc.ilabel : 0); // put ilabel on first element of seq.
965  arc.olabel = seq[i];
966  ofst->AddArc(cur_state, arc);
967  cur_state = next_state;
968  }
969  // Add the final arc in the sequence.
970  Arc arc;
971  arc.nextstate = temp_arc.nextstate;
972  arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
973  arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
974  arc.olabel = (seq.size() > 0 ? seq.back() : 0);
975  ofst->AddArc(cur_state, arc);
976  }
977  }
978  // Free up memory. Do this inside the loop as ofst is also allocating memory
979  if (destroy) { std::vector<TempArc> temp; temp.swap(this_vec); }
980  }
981  if (destroy) {
982  std::vector<std::vector<TempArc> > temp;
983  temp.swap(output_arcs_);
984  repository_.Destroy();
985  }
986 }
987 
988 template<class F> void DeterminizerStar<F>::
989 ProcessTransition(OutputStateId state, Label ilabel, std::vector<Element> *subset) {
990  // At input, "subset" may contain duplicates for a given dest state (but in sorted
991  // order). This function removes duplicates from "subset", normalizes it, and adds
992  // a transition to the dest. state (possibly affecting Q_ and hash_, if state did not
993  // exist).
994 
995  typedef typename std::vector<Element>::iterator IterType;
996  { // This block makes the subset have one unique Element per state, adding the weights.
997  IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
998  size_t num_out = 0;
999  // Merge elements with same state-id
1000  while (cur_in != end) { // while we have more elements to process.
1001  // At this point, cur_out points to location of next place we want to put an element,
1002  // cur_in points to location of next element we want to process.
1003  if (cur_in != cur_out) *cur_out = *cur_in;
1004  cur_in++;
1005  while (cur_in != end && cur_in->state == cur_out->state) { // merge elements.
1006  if (cur_in->string != cur_out->string) {
1007  KALDI_ERR << "FST was not functional -> not determinizable";
1008  }
1009  cur_out->weight = Plus(cur_out->weight, cur_in->weight);
1010  cur_in++;
1011  }
1012  cur_out++;
1013  num_out++;
1014  }
1015  subset->resize(num_out);
1016  }
1017 
1018  StringId common_str;
1019  Weight tot_weight;
1020  { // This block computes common_str and tot_weight (essentially: the common divisor)
1021  // and removes them from the elements.
1022  std::vector<Label> seq;
1023 
1024  IterType begin = subset->begin(), iter, end = subset->end();
1025  { // This block computes "seq", which is the common prefix, and "common_str",
1026  // which is the StringId version of "seq".
1027  std::vector<Label> tmp_seq;
1028  for (iter = begin; iter != end; ++iter) {
1029  if (iter == begin) {
1030  repository_.SeqOfId(iter->string, &seq);
1031  } else {
1032  repository_.SeqOfId(iter->string, &tmp_seq);
1033  if (tmp_seq.size() < seq.size()) seq.resize(tmp_seq.size()); // size of shortest one.
1034  for (size_t i = 0; i < seq.size(); i++) // seq.size() is the shorter one at this point.
1035  if (tmp_seq[i] != seq[i]) seq.resize(i);
1036  }
1037  if (seq.size() == 0) break; // will not get any prefix.
1038  }
1039  common_str = repository_.IdOfSeq(seq);
1040  }
1041 
1042  { // This block computes "tot_weight".
1043  iter = begin;
1044  tot_weight = iter->weight;
1045  for (++iter; iter != end; ++iter)
1046  tot_weight = Plus(tot_weight, iter->weight);
1047  }
1048 
1049  // Now divide out common stuff from elements.
1050  size_t prefix_len = seq.size();
1051  for (iter = begin; iter != end; ++iter) {
1052  iter->weight = Divide(iter->weight, tot_weight);
1053  iter->string = repository_.RemovePrefix(iter->string, prefix_len);
1054  }
1055  }
1056 
1057  // Now add an arc to the state that the subset represents.
1058  // We may create a new state id for this (in SubsetToStateId).
1059  TempArc temp_arc;
1060  temp_arc.ilabel = ilabel;
1061  temp_arc.nextstate = SubsetToStateId(*subset); // may or may not really add the subset.
1062  temp_arc.ostring = common_str;
1063  temp_arc.weight = tot_weight;
1064  output_arcs_[state].push_back(temp_arc); // record the arc.
1065 }
1066 
1067 template<class F>
1069  // this function called if you send a signal
1070  // SIGUSR1 to the process (and it's caught by the handler in
1071  // fstdeterminizestar). It prints out some traceback
1072  // info and exits.
1073 
1074  KALDI_WARN << "Debug function called (probably SIGUSR1 caught)";
1075  // free up memory from the hash as we need a little memory
1076  { SubsetHash hash_tmp; std::swap(hash_tmp, hash_); }
1077 
1078  if (output_arcs_.size() <= 2) {
1079  KALDI_ERR << "Nothing to trace back";
1080  }
1081  size_t max_state = output_arcs_.size() - 2; // don't take the last
1082  // one as we might be halfway into constructing it.
1083 
1084  std::vector<OutputStateId> predecessor(max_state+1, kNoStateId);
1085  for (size_t i = 0; i < max_state; i++) {
1086  for (size_t j = 0; j < output_arcs_[i].size(); j++) {
1087  OutputStateId nextstate = output_arcs_[i][j].nextstate;
1088  // Always find an earlier-numbered predecessor; this
1089  // is always possible because of the way the algorithm
1090  // works.
1091  if (nextstate <= max_state && nextstate > i)
1092  predecessor[nextstate] = i;
1093  }
1094  }
1095  std::vector<std::pair<Label, StringId> > traceback;
1096  // 'traceback' is a pair of (ilabel, olabel-seq).
1097  OutputStateId cur_state = max_state; // A recently constructed state.
1098 
1099  while (cur_state != 0 && cur_state != kNoStateId) {
1100  OutputStateId last_state = predecessor[cur_state];
1101  std::pair<Label, StringId> p;
1102  size_t i;
1103  for (i = 0; i < output_arcs_[last_state].size(); i++) {
1104  if (output_arcs_[last_state][i].nextstate == cur_state) {
1105  p.first = output_arcs_[last_state][i].ilabel;
1106  p.second = output_arcs_[last_state][i].ostring;
1107  traceback.push_back(p);
1108  break;
1109  }
1110  }
1111  KALDI_ASSERT(i != output_arcs_[last_state].size()); // Or fell off loop.
1112  cur_state = last_state;
1113  }
1114  if (cur_state == kNoStateId)
1115  KALDI_WARN << "Traceback did not reach start state "
1116  << "(possibly debug-code error)";
1117 
1118  std::stringstream ss;
1119  ss << "Traceback follows in format "
1120  << "ilabel (olabel olabel) ilabel (olabel) ... :";
1121  for (ssize_t i = traceback.size() - 1; i >= 0; i--) {
1122  ss << ' ' << traceback[i].first << " ( ";
1123  std::vector<Label> seq;
1124  repository_.SeqOfId(traceback[i].second, &seq);
1125  for (size_t j = 0; j < seq.size(); j++)
1126  ss << seq[j] << ' ';
1127  ss << ')';
1128  }
1129  KALDI_ERR << ss.str();
1130 }
1131 
1132 } // namespace fst
1133 
1134 #endif // KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
EpsilonClosureInfo(const Element &e, const Weight &w, bool i)
fst::StdArc::StateId StateId
LatticeWeightTpl< FloatType > Divide(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2, DivideType typ=DIVIDE_ANY)
OutputStateId SubsetToStateId(const std::vector< Element > &subset)
bool operator<(const EpsilonClosureInfo &other) const
void AddOneElement(const Element &elem, const Weight &unprocessed_weight)
bool operator!=(const LatticeWeightTpl< FloatType > &wa, const LatticeWeightTpl< FloatType > &wb)
unordered_map< const std::vector< Label > *, StringId, VectorKey, VectorEqual > MapType
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
Definition: graph.dox:21
const Fst< Arc > * ifst_
void GetEpsilonClosure(const std::vector< Element > &input_subset, std::vector< Element > *output_subset)
LatticeWeightTpl< FloatType > Plus(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
void Determinize(bool *debug_ptr)
std::deque< typename Arc::StateId > queue_
void swap(basic_filebuf< CharT, Traits > &x, basic_filebuf< CharT, Traits > &y)
bool ApproxEqual(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2, float delta=kDelta)
void ProcessTransition(OutputStateId state, Label ilabel, std::vector< Element > *subset)
void ProcessSubset(const std::pair< std::vector< Element > *, OutputStateId > &pair)
StringId IdOfSeq(const std::vector< Label > &v)
StringId IdOfSeqInternal(const std::vector< Label > &v)
StringId RemovePrefix(StringId id, size_t prefix_len)
LatticeWeightTpl< FloatType > Times(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
std::vector< std::vector< Label > *> vec_
DeterminizerStar(const Fst< Arc > &ifst, float delta=kDelta, int max_states=-1, bool allow_partial=false)
void ExpandOneElement(const Element &elem, bool sorted, const Weight &unprocessed_weight, bool save_to_queue_2=false)
void Output(MutableFst< GallicArc< Arc > > *ofst, bool destroy=true)
StringRepository< Label, StringId > StringRepositoryType
void SeqOfId(StringId id, std::vector< Label > *v)
std::deque< std::pair< std::vector< Element > *, OutputStateId > > Q_
void ProcessTransitions(const std::vector< Element > &closed_subset, OutputStateId state)
void ProcessFinal(const std::vector< Element > &closed_subset, OutputStateId state)
StringId IdOfLabel(Label l)
size_t operator()(const std::vector< Label > *vec1, const std::vector< Label > *vec2) const
#define KALDI_ERR
Definition: kaldi-error.h:147
KALDI_DISALLOW_COPY_AND_ASSIGN(StringRepository)
std::vector< std::vector< TempArc > > output_arcs_
#define KALDI_WARN
Definition: kaldi-error.h:150
std::vector< EpsilonClosureInfo > ecinfo_
fst::StdArc::Label Label
StringRepository< Label, StringId > * repository_
size_t operator()(const std::vector< Label > *vec) const
fst::StdArc::Weight Weight
EpsilonClosure(const Fst< Arc > *ifst, int max_states, StringRepository< Label, StringId > *repository, float delta)
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
bool IsEmptyString(StringId id)
static const StringId string_start
StringRepository< Label, StringId > repository_
bool DeterminizeStar(F &ifst, MutableFst< typename F::Arc > *ofst, float delta, bool *debug_ptr, int max_states, bool allow_partial)
This function implements the normal version of DeterminizeStar, in which the output strings are repre...
unordered_map< const std::vector< Element > *, OutputStateId, SubsetKey, SubsetEqual > SubsetHash
void Copy(const CuMatrixBase< Real > &src, const CuArray< int32 > &copy_from_indices, CuMatrixBase< Real > *tgt)
Copies elements from src into tgt as given by copy_from_indices.
Definition: cu-math.cc:173