All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
LatticeDeterminizerPruned< Weight, IntType > Class Template Reference
Collaboration diagram for LatticeDeterminizerPruned< Weight, IntType >:

Classes

struct  Element
 
struct  OutputState
 
class  PairComparator
 
class  SubsetEqual
 
class  SubsetEqualStates
 
class  SubsetKey
 
struct  Task
 
struct  TaskCompare
 
struct  TempArc
 

Public Types

typedef
CompactLatticeWeightTpl
< Weight, IntType > 
CompactWeight
 
typedef ArcTpl< CompactWeightCompactArc
 
typedef ArcTpl< WeightArc
 

Public Member Functions

void Output (MutableFst< CompactArc > *ofst, bool destroy=true)
 
void Output (MutableFst< Arc > *ofst, bool destroy=true)
 
 LatticeDeterminizerPruned (const ExpandedFst< Arc > &ifst, double beam, DeterminizeLatticePrunedOptions opts)
 
void FreeOutputStates ()
 
void FreeMostMemory ()
 
 ~LatticeDeterminizerPruned ()
 
void RebuildRepository ()
 
bool CheckMemoryUsage ()
 
bool Determinize (double *effective_beam)
 

Private Types

enum  IsymbolOrFinal { OSF_UNKNOWN = 0, OSF_NO = 1, OSF_YES = 2 }
 
typedef Arc::Label Label
 
typedef Arc::StateId StateId
 
typedef Arc::StateId InputStateId
 
typedef Arc::StateId OutputStateId
 
typedef
LatticeStringRepository
< IntType > 
StringRepositoryType
 
typedef const
StringRepositoryType::Entry * 
StringId
 
typedef unordered_map< const
vector< Element >
*, OutputStateId, SubsetKey,
SubsetEqual
MinimalSubsetHash
 
typedef unordered_map< const
vector< Element > *, Element,
SubsetKey, SubsetEqual
InitialSubsetHash
 

Private Member Functions

void ConvertToMinimal (vector< Element > *subset)
 
OutputStateId MinimalToStateId (const vector< Element > &subset, const double forward_cost)
 
OutputStateId InitialToStateId (const vector< Element > &subset_in, double forward_cost, Weight *remaining_weight, StringId *common_prefix)
 
int Compare (const Weight &a_w, StringId a_str, const Weight &b_w, StringId b_str) const
 
void EpsilonClosure (vector< Element > *subset)
 
void ProcessFinal (OutputStateId output_state_id)
 
void NormalizeSubset (vector< Element > *elems, Weight *tot_weight, StringId *common_str)
 
void MakeSubsetUnique (vector< Element > *subset)
 
void ProcessTransition (OutputStateId ostate_id, Label ilabel, vector< Element > *subset)
 
void ProcessTransitions (OutputStateId output_state_id)
 
bool IsIsymbolOrFinal (InputStateId state)
 
void ComputeBackwardWeight ()
 
void InitializeDeterminization ()
 
 KALDI_DISALLOW_COPY_AND_ASSIGN (LatticeDeterminizerPruned)
 
void AddStrings (const vector< Element > &vec, vector< StringId > *needed_strings)
 

Private Attributes

vector< OutputState * > output_states_
 
int num_arcs_
 
int num_elems_
 
const ExpandedFst< Arc > * ifst_
 
std::vector< double > backward_costs_
 
double beam_
 
double cutoff_
 
DeterminizeLatticePrunedOptions opts_
 
SubsetKey hasher_
 
SubsetEqual equal_
 
bool determinized_
 
MinimalSubsetHash minimal_hash_
 
InitialSubsetHash initial_hash_
 
std::priority_queue< Task
*, vector< Task * >
, TaskCompare
queue_
 
vector< pair< Label, Element > > all_elems_tmp_
 
vector< char > isymbol_or_final_
 
LatticeStringRepository< IntType > repository_
 

Detailed Description

template<class Weight, class IntType>
class fst::LatticeDeterminizerPruned< Weight, IntType >

Definition at line 43 of file determinize-lattice-pruned.cc.

Member Typedef Documentation

typedef ArcTpl<Weight> Arc

Definition at line 51 of file determinize-lattice-pruned.cc.

typedef ArcTpl<CompactWeight> CompactArc

Definition at line 50 of file determinize-lattice-pruned.cc.

Definition at line 49 of file determinize-lattice-pruned.cc.

typedef unordered_map<const vector<Element>*, Element, SubsetKey, SubsetEqual> InitialSubsetHash
private

Definition at line 495 of file determinize-lattice-pruned.cc.

typedef Arc::StateId InputStateId
private

Definition at line 382 of file determinize-lattice-pruned.cc.

typedef Arc::Label Label
private

Definition at line 380 of file determinize-lattice-pruned.cc.

typedef unordered_map<const vector<Element>*, OutputStateId, SubsetKey, SubsetEqual> MinimalSubsetHash
private

Definition at line 487 of file determinize-lattice-pruned.cc.

typedef Arc::StateId OutputStateId
private

Definition at line 383 of file determinize-lattice-pruned.cc.

typedef Arc::StateId StateId
private

Definition at line 381 of file determinize-lattice-pruned.cc.

typedef const StringRepositoryType::Entry* StringId
private

Definition at line 387 of file determinize-lattice-pruned.cc.

Definition at line 386 of file determinize-lattice-pruned.cc.

Member Enumeration Documentation

Constructor & Destructor Documentation

LatticeDeterminizerPruned ( const ExpandedFst< Arc > &  ifst,
double  beam,
DeterminizeLatticePrunedOptions  opts 
)
inline

Definition at line 187 of file determinize-lattice-pruned.cc.

References KALDI_ASSERT.

189  :
190  num_arcs_(0), num_elems_(0), ifst_(ifst.Copy()), beam_(beam), opts_(opts),
191  equal_(opts_.delta), determinized_(false),
193  KALDI_ASSERT(Weight::Properties() & kIdempotent); // this algorithm won't
194  // work correctly otherwise.
195  }
DeterminizeLatticePrunedOptions opts_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169

Definition at line 239 of file determinize-lattice-pruned.cc.

239  {
240  FreeMostMemory();
242  // rest is deleted by destructors.
243  }

Member Function Documentation

void AddStrings ( const vector< Element > &  vec,
vector< StringId > *  needed_strings 
)
inlineprivate

Definition at line 1179 of file determinize-lattice-pruned.cc.

1180  {
1181  for (typename std::vector<Element>::const_iterator iter = vec.begin();
1182  iter != vec.end(); ++iter)
1183  needed_strings->push_back(iter->string);
1184  }
bool CheckMemoryUsage ( )
inline

Definition at line 288 of file determinize-lattice-pruned.cc.

References KALDI_VLOG, KALDI_WARN, and LatticeDeterminizerPruned< Weight, IntType >::Task::priority_cost.

288  {
289  int32 repo_size = repository_.MemSize(),
290  arcs_size = num_arcs_ * sizeof(TempArc),
291  elems_size = num_elems_ * sizeof(Element),
292  total_size = repo_size + arcs_size + elems_size;
293  if (opts_.max_mem > 0 && total_size > opts_.max_mem) { // We passed the memory threshold.
294  // This is usually due to the repository getting large, so we
295  // clean this out.
297  int32 new_repo_size = repository_.MemSize(),
298  new_total_size = new_repo_size + arcs_size + elems_size;
299 
300  KALDI_VLOG(2) << "Rebuilt repository in determinize-lattice: repository shrank from "
301  << repo_size << " to " << new_repo_size << " bytes (approximately)";
302 
303  if (new_total_size > static_cast<int32>(opts_.max_mem * 0.8)) {
304  // Rebuilding didn't help enough-- we need a margin to stop
305  // having to rebuild too often. We'll just return to the user at
306  // this point, with a partial lattice that's pruned tighter than
307  // the specified beam. Here we figure out what the effective
308  // beam was.
309  double effective_beam = beam_;
310  if (!queue_.empty()) { // Note: queue should probably not be empty; we're
311  // just being paranoid here.
312  Task *task = queue_.top();
313  double total_weight = backward_costs_[ifst_->Start()]; // best weight of FST.
314  effective_beam = task->priority_cost - total_weight;
315  }
316  KALDI_WARN << "Did not reach requested beam in determinize-lattice: "
317  << "size exceeds maximum " << opts_.max_mem
318  << " bytes; (repo,arcs,elems) = (" << repo_size << ","
319  << arcs_size << "," << elems_size
320  << "), after rebuilding, repo size was " << new_repo_size
321  << ", effective beam was " << effective_beam
322  << " vs. requested beam " << beam_;
323  return false;
324  }
325  }
326  return true;
327  }
DeterminizeLatticePrunedOptions opts_
#define KALDI_WARN
Definition: kaldi-error.h:130
LatticeStringRepository< IntType > repository_
#define KALDI_VLOG(v)
Definition: kaldi-error.h:136
std::priority_queue< Task *, vector< Task * >, TaskCompare > queue_
int Compare ( const Weight a_w,
StringId  a_str,
const Weight b_w,
StringId  b_str 
) const
inlineprivate

Definition at line 605 of file determinize-lattice-pruned.cc.

References fst::Compare(), rnnlm::i, and KALDI_ASSERT.

606  {
607  int weight_comp = fst::Compare(a_w, b_w);
608  if (weight_comp != 0) return weight_comp;
609  // now comparing strings.
610  if (a_str == b_str) return 0;
611  vector<IntType> a_vec, b_vec;
612  repository_.ConvertToVector(a_str, &a_vec);
613  repository_.ConvertToVector(b_str, &b_vec);
614  // First compare their lengths.
615  int a_len = a_vec.size(), b_len = b_vec.size();
616  // use opposite order on the string lengths (c.f. Compare in
617  // lattice-weight.h)
618  if (a_len > b_len) return -1;
619  else if (a_len < b_len) return 1;
620  for(int i = 0; i < a_len; i++) {
621  if (a_vec[i] < b_vec[i]) return -1;
622  else if (a_vec[i] > b_vec[i]) return 1;
623  }
624  KALDI_ASSERT(0); // because we checked if a_str == b_str above, shouldn't reach here
625  return 0;
626  }
LatticeStringRepository< IntType > repository_
int Compare(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
Compare returns -1 if w1 < w2, +1 if w1 > w2, and 0 if w1 == w2.
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169
void ComputeBackwardWeight ( )
inlineprivate

Definition at line 1013 of file determinize-lattice-pruned.cc.

References fst::ConvertToCost(), KALDI_ASSERT, and KALDI_WARN.

1013  {
1014  // Sets up the backward_costs_ array, and the cutoff_ variable.
1015  KALDI_ASSERT(beam_ > 0);
1016 
1017  // Only handle the toplogically sorted case.
1018  backward_costs_.resize(ifst_->NumStates());
1019  for (StateId s = ifst_->NumStates() - 1; s >= 0; s--) {
1020  double &cost = backward_costs_[s];
1021  cost = ConvertToCost(ifst_->Final(s));
1022  for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, s);
1023  !aiter.Done(); aiter.Next()) {
1024  const Arc &arc = aiter.Value();
1025  cost = std::min(cost,
1026  ConvertToCost(arc.weight) + backward_costs_[arc.nextstate]);
1027  }
1028  }
1029 
1030  if (ifst_->Start() == kNoStateId) return; // we'll be returning
1031  // an empty FST.
1032 
1033  double best_cost = backward_costs_[ifst_->Start()];
1034  if (best_cost == numeric_limits<double>::infinity())
1035  KALDI_WARN << "Total weight of input lattice is zero.";
1036  cutoff_ = best_cost + beam_;
1037  }
double ConvertToCost(const LatticeWeightTpl< Float > &w)
#define KALDI_WARN
Definition: kaldi-error.h:130
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169
void ConvertToMinimal ( vector< Element > *  subset)
inlineprivate

Definition at line 501 of file determinize-lattice-pruned.cc.

References KALDI_ASSERT.

501  {
502  KALDI_ASSERT(!subset->empty());
503  typename vector<Element>::iterator cur_in = subset->begin(),
504  cur_out = subset->begin(), end = subset->end();
505  while (cur_in != end) {
506  if(IsIsymbolOrFinal(cur_in->state)) { // keep it...
507  *cur_out = *cur_in;
508  cur_out++;
509  }
510  cur_in++;
511  }
512  subset->resize(cur_out - subset->begin());
513  }
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169
bool Determinize ( double *  effective_beam)
inline

Definition at line 329 of file determinize-lattice-pruned.cc.

References KALDI_ASSERT, KALDI_VLOG, LatticeDeterminizerPruned< Weight, IntType >::Task::label, LatticeDeterminizerPruned< Weight, IntType >::Task::priority_cost, LatticeDeterminizerPruned< Weight, IntType >::Task::state, and LatticeDeterminizerPruned< Weight, IntType >::Task::subset.

Referenced by fst::DeterminizeLatticePruned().

329  {
331  // This determinizes the input fst but leaves it in the "special format"
332  // in "output_arcs_". Must be called after Initialize(). To get the
333  // output, call one of the Output routines.
334 
335  InitializeDeterminization(); // some start-up tasks.
336  while (!queue_.empty()) {
337  Task *task = queue_.top();
338  // Note: the queue contains only tasks that are "within the beam".
339  // We also have to check whether we have reached one of the user-specified
340  // maximums, of estimated memory, arcs, or states. The condition for
341  // ending is:
342  // num-states is more than user specified, OR
343  // num-arcs is more than user specified, OR
344  // memory passed a user-specified threshold and cleanup failed
345  // to get it below that threshold.
346  size_t num_states = output_states_.size();
347  if ((opts_.max_states > 0 && num_states > opts_.max_states) ||
348  (opts_.max_arcs > 0 && num_arcs_ > opts_.max_arcs) ||
349  (num_states % 10 == 0 && !CheckMemoryUsage())) { // note: at some point
350  // it was num_states % 100, not num_states % 10, but I encountered an example
351  // where memory was exhausted before we reached state #100.
352  KALDI_VLOG(1) << "Lattice determinization terminated but not "
353  << " because of lattice-beam. (#states, #arcs) is ( "
354  << output_states_.size() << ", " << num_arcs_
355  << " ), versus limits ( " << opts_.max_states << ", "
356  << opts_.max_arcs << " ) (else, may be memory limit).";
357  break;
358  // we terminate the determinization here-- whatever we already expanded is
359  // what we'll return... because we expanded stuff in order of total
360  // (forward-backward) weight, the stuff we returned first is the most
361  // important.
362  }
363  queue_.pop();
364  ProcessTransition(task->state, task->label, &(task->subset));
365  delete task;
366  }
367  determinized_ = true;
368  if (effective_beam != NULL) {
369  if (queue_.empty()) *effective_beam = beam_;
370  else
371  *effective_beam = queue_.top()->priority_cost -
372  backward_costs_[ifst_->Start()];
373  }
374  return (queue_.empty()); // return success if queue was empty, i.e. we processed
375  // all tasks and did not break out of the loop early due to reaching a memory,
376  // arc or state limit.
377  }
DeterminizeLatticePrunedOptions opts_
void ProcessTransition(OutputStateId ostate_id, Label ilabel, vector< Element > *subset)
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169
#define KALDI_VLOG(v)
Definition: kaldi-error.h:136
std::priority_queue< Task *, vector< Task * >, TaskCompare > queue_
void EpsilonClosure ( vector< Element > *  subset)
inlineprivate

Definition at line 633 of file determinize-lattice-pruned.cc.

References fst::Compare(), KALDI_ERR, LatticeDeterminizerPruned< Weight, IntType >::Element::state, LatticeDeterminizerPruned< Weight, IntType >::Element::string, fst::Times(), and LatticeDeterminizerPruned< Weight, IntType >::Element::weight.

633  {
634  // at input, subset must have only one example of each StateId. [will still
635  // be so at output]. This function follows input-epsilons, and augments the
636  // subset accordingly.
637 
638  std::priority_queue<Element, vector<Element>, greater<Element> > queue;
639  unordered_map<InputStateId, Element> cur_subset;
640  typedef typename unordered_map<InputStateId, Element>::iterator MapIter;
641  typedef typename vector<Element>::const_iterator VecIter;
642 
643  for (VecIter iter = subset->begin(); iter != subset->end(); ++iter) {
644  queue.push(*iter);
645  cur_subset[iter->state] = *iter;
646  }
647 
648  // find whether input fst is known to be sorted on input label.
649  bool sorted = ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
650  bool replaced_elems = false; // relates to an optimization, see below.
651  int counter = 0; // stops infinite loops here for non-lattice-determinizable input
652  // (e.g. input with negative-cost epsilon loops); useful in testing.
653  while (queue.size() != 0) {
654  Element elem = queue.top();
655  queue.pop();
656 
657  // The next if-statement is a kind of optimization. It's to prevent us
658  // unnecessarily repeating the processing of a state. "cur_subset" always
659  // contains only one Element with a particular state. The issue is that
660  // whenever we modify the Element corresponding to that state in "cur_subset",
661  // both the new (optimal) and old (less-optimal) Element will still be in
662  // "queue". The next if-statement stops us from wasting compute by
663  // processing the old Element.
664  if (replaced_elems && cur_subset[elem.state] != elem)
665  continue;
666  if (opts_.max_loop > 0 && counter++ > opts_.max_loop) {
667  KALDI_ERR << "Lattice determinization aborted since looped more than "
668  << opts_.max_loop << " times during epsilon closure.\n";
669  throw std::runtime_error("looped more than max-arcs times in lattice determinization");
670  }
671  for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, elem.state); !aiter.Done(); aiter.Next()) {
672  const Arc &arc = aiter.Value();
673  if (sorted && arc.ilabel != 0) break; // Break from the loop: due to sorting there will be no
674  // more transitions with epsilons as input labels.
675  if (arc.ilabel == 0
676  && arc.weight != Weight::Zero()) { // Epsilon transition.
677  Element next_elem;
678  next_elem.state = arc.nextstate;
679  next_elem.weight = Times(elem.weight, arc.weight);
680  // next_elem.string is not set up yet... create it only
681  // when we know we need it (this is an optimization)
682 
683  MapIter iter = cur_subset.find(next_elem.state);
684  if (iter == cur_subset.end()) {
685  // was no such StateId: insert and add to queue.
686  next_elem.string = (arc.olabel == 0 ? elem.string :
687  repository_.Successor(elem.string, arc.olabel));
688  cur_subset[next_elem.state] = next_elem;
689  queue.push(next_elem);
690  } else {
691  // was not inserted because one already there. In normal
692  // determinization we'd add the weights. Here, we find which one
693  // has the better weight, and keep its corresponding string.
694  int comp = fst::Compare(next_elem.weight, iter->second.weight);
695  if (comp == 0) { // A tie on weights. This should be a rare case;
696  // we don't optimize for it.
697  next_elem.string = (arc.olabel == 0 ? elem.string :
698  repository_.Successor(elem.string,
699  arc.olabel));
700  comp = Compare(next_elem.weight, next_elem.string,
701  iter->second.weight, iter->second.string);
702  }
703  if(comp == 1) { // next_elem is better, so use its (weight, string)
704  next_elem.string = (arc.olabel == 0 ? elem.string :
705  repository_.Successor(elem.string, arc.olabel));
706  iter->second.string = next_elem.string;
707  iter->second.weight = next_elem.weight;
708  queue.push(next_elem);
709  replaced_elems = true;
710  }
711  // else it is the same or worse, so use original one.
712  }
713  }
714  }
715  }
716 
717  { // copy cur_subset to subset.
718  subset->clear();
719  subset->reserve(cur_subset.size());
720  MapIter iter = cur_subset.begin(), end = cur_subset.end();
721  for (; iter != end; ++iter) subset->push_back(iter->second);
722  // sort by state ID, because the subset hash function is order-dependent(see SubsetKey)
723  std::sort(subset->begin(), subset->end());
724  }
725  }
DeterminizeLatticePrunedOptions opts_
LatticeWeightTpl< FloatType > Times(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
int Compare(const Weight &a_w, StringId a_str, const Weight &b_w, StringId b_str) const
#define KALDI_ERR
Definition: kaldi-error.h:127
LatticeStringRepository< IntType > repository_
int Compare(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
Compare returns -1 if w1 < w2, +1 if w1 > w2, and 0 if w1 == w2.
void FreeMostMemory ( )
inline

Definition at line 206 of file determinize-lattice-pruned.cc.

References rnnlm::i, LatticeDeterminizerPruned< Weight, IntType >::ifst_, LatticeDeterminizerPruned< Weight, IntType >::initial_hash_, LatticeDeterminizerPruned< Weight, IntType >::minimal_hash_, and LatticeDeterminizerPruned< Weight, IntType >::output_states_.

Referenced by LatticeDeterminizerPruned< Weight, IntType >::Output().

206  {
207  if (ifst_) {
208  delete ifst_;
209  ifst_ = NULL;
210  }
211  { MinimalSubsetHash tmp; tmp.swap(minimal_hash_); }
212 
213  for (size_t i = 0; i < output_states_.size(); i++) {
214  vector<Element> empty_subset;
215  empty_subset.swap(output_states_[i]->minimal_subset);
216  }
217 
218  for (typename InitialSubsetHash::iterator iter = initial_hash_.begin();
219  iter != initial_hash_.end(); ++iter)
220  delete iter->first;
221  { InitialSubsetHash tmp; tmp.swap(initial_hash_); }
222  for (size_t i = 0; i < output_states_.size(); i++) {
223  vector<Element> tmp;
224  tmp.swap(output_states_[i]->minimal_subset);
225  }
226  { vector<char> tmp; tmp.swap(isymbol_or_final_); }
227  { // Free up the queue. I'm not sure how to make sure all
228  // the memory is really freed (no swap() function)... doesn't really
229  // matter much though.
230  while (!queue_.empty()) {
231  Task *t = queue_.top();
232  delete t;
233  queue_.pop();
234  }
235  }
236  { vector<pair<Label, Element> > tmp; tmp.swap(all_elems_tmp_); }
237  }
unordered_map< const vector< Element > *, Element, SubsetKey, SubsetEqual > InitialSubsetHash
vector< pair< Label, Element > > all_elems_tmp_
unordered_map< const vector< Element > *, OutputStateId, SubsetKey, SubsetEqual > MinimalSubsetHash
std::priority_queue< Task *, vector< Task * >, TaskCompare > queue_
void FreeOutputStates ( )
inline

Definition at line 197 of file determinize-lattice-pruned.cc.

References rnnlm::i, and LatticeDeterminizerPruned< Weight, IntType >::output_states_.

Referenced by LatticeDeterminizerPruned< Weight, IntType >::Output().

197  {
198  for (size_t i = 0; i < output_states_.size(); i++)
199  delete output_states_[i];
200  vector<OutputState*> temp;
201  temp.swap(output_states_);
202  }
void InitializeDeterminization ( )
inlineprivate

Definition at line 1039 of file determinize-lattice-pruned.cc.

References KALDI_ASSERT, and LatticeDeterminizerPruned< Weight, IntType >::OutputState::minimal_subset.

1039  {
1040  // We insist that the input lattice be topologically sorted. This is not a
1041  // fundamental limitation of the algorithm (which in principle should be
1042  // applicable to even cyclic FSTs), but it helps us more efficiently
1043  // compute the backward_costs_ array. There may be some other reason we
1044  // require this, that escapes me at the moment.
1045  KALDI_ASSERT(ifst_->Properties(kTopSorted, true) != 0);
1047 #if !(__GNUC__ == 4 && __GNUC_MINOR__ == 0)
1048  if(ifst_->Properties(kExpanded, false) != 0) { // if we know the number of
1049  // states in ifst_, it might be a bit more efficient
1050  // to pre-size the hashes so we're not constantly rebuilding them.
1051  StateId num_states =
1052  down_cast<const ExpandedFst<Arc>*, const Fst<Arc> >(ifst_)->NumStates();
1053  minimal_hash_.rehash(num_states/2 + 3);
1054  initial_hash_.rehash(num_states/2 + 3);
1055  }
1056 #endif
1057  InputStateId start_id = ifst_->Start();
1058  if (start_id != kNoStateId) {
1059  /* Create determinized-state corresponding to the start state....
1060  Unlike all the other states, we don't "normalize" the representation
1061  of this determinized-state before we put it into minimal_hash_. This is actually
1062  what we want, as otherwise we'd have problems dealing with any extra weight
1063  and string and might have to create a "super-initial" state which would make
1064  the output nondeterministic. Normalization is only needed to make the
1065  determinized output more minimal anyway, it's not needed for correctness.
1066  Note, we don't put anything in the initial_hash_. The initial_hash_ is only
1067  a lookaside buffer anyway, so this isn't a problem-- it will get populated
1068  later if it needs to be.
1069  */
1070  vector<Element> subset(1);
1071  subset[0].state = start_id;
1072  subset[0].weight = Weight::One();
1073  subset[0].string = repository_.EmptyString(); // Id of empty sequence.
1074  EpsilonClosure(&subset); // follow through epsilon-input links
1075  ConvertToMinimal(&subset); // remove all but final states and
1076  // states with input-labels on arcs out of them.
1077  // Weight::One() is the "forward-weight" of this determinized state...
1078  // i.e. the minimal cost from the start of the determinized FST to this
1079  // state [One() because it's the start state].
1080  OutputState *initial_state = new OutputState(subset, 0);
1081  KALDI_ASSERT(output_states_.empty());
1082  output_states_.push_back(initial_state);
1083  num_elems_ += subset.size();
1084  OutputStateId initial_state_id = 0;
1085  minimal_hash_[&(initial_state->minimal_subset)] = initial_state_id;
1086  ProcessFinal(initial_state_id);
1087  ProcessTransitions(initial_state_id); // this will add tasks to
1088  // the queue, which we'll start processing in Determinize().
1089  }
1090  }
void ProcessTransitions(OutputStateId output_state_id)
void ProcessFinal(OutputStateId output_state_id)
LatticeStringRepository< IntType > repository_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169
void EpsilonClosure(vector< Element > *subset)
void ConvertToMinimal(vector< Element > *subset)
OutputStateId InitialToStateId ( const vector< Element > &  subset_in,
double  forward_cost,
Weight remaining_weight,
StringId common_prefix 
)
inlineprivate

Definition at line 552 of file determinize-lattice-pruned.cc.

References fst::ConvertToCost(), KALDI_WARN, LatticeDeterminizerPruned< Weight, IntType >::Element::state, LatticeDeterminizerPruned< Weight, IntType >::Element::string, and LatticeDeterminizerPruned< Weight, IntType >::Element::weight.

555  {
556  typename InitialSubsetHash::const_iterator iter
557  = initial_hash_.find(&subset_in);
558  if (iter != initial_hash_.end()) { // Found a matching subset.
559  const Element &elem = iter->second;
560  *remaining_weight = elem.weight;
561  *common_prefix = elem.string;
562  if (elem.weight == Weight::Zero())
563  KALDI_WARN << "Zero weight!";
564  return elem.state;
565  }
566  // else no matching subset-- have to work it out.
567  vector<Element> subset(subset_in);
568  // Follow through epsilons. Will add no duplicate states. note: after
569  // EpsilonClosure, it is the same as "canonical" subset, except not
570  // normalized (actually we never compute the normalized canonical subset,
571  // only the normalized minimal one).
572  EpsilonClosure(&subset); // follow epsilons.
573  ConvertToMinimal(&subset); // remove all but emitting and final states.
574 
575  Element elem; // will be used to store remaining weight and string, and
576  // OutputStateId, in initial_hash_;
577  NormalizeSubset(&subset, &elem.weight, &elem.string); // normalize subset; put
578  // common string and weight in "elem". The subset is now a minimal,
579  // normalized subset.
580 
581  forward_cost += ConvertToCost(elem.weight);
582  OutputStateId ans = MinimalToStateId(subset, forward_cost);
583  *remaining_weight = elem.weight;
584  *common_prefix = elem.string;
585  if (elem.weight == Weight::Zero())
586  KALDI_WARN << "Zero weight!";
587 
588  // Before returning "ans", add the initial subset to the hash,
589  // so that we can bypass the epsilon-closure etc., next time
590  // we process the same initial subset.
591  vector<Element> *initial_subset_ptr = new vector<Element>(subset_in);
592  elem.state = ans;
593  initial_hash_[initial_subset_ptr] = elem;
594  num_elems_ += initial_subset_ptr->size(); // keep track of memory usage.
595  return ans;
596  }
OutputStateId MinimalToStateId(const vector< Element > &subset, const double forward_cost)
double ConvertToCost(const LatticeWeightTpl< Float > &w)
#define KALDI_WARN
Definition: kaldi-error.h:130
void NormalizeSubset(vector< Element > *elems, Weight *tot_weight, StringId *common_str)
void EpsilonClosure(vector< Element > *subset)
void ConvertToMinimal(vector< Element > *subset)
bool IsIsymbolOrFinal ( InputStateId  state)
inlineprivate

Definition at line 987 of file determinize-lattice-pruned.cc.

References KALDI_ASSERT.

987  { // returns true if this state
988  // of the input FST either is final or has an osymbol on an arc out of it.
989  // Uses the vector isymbol_or_final_ as a cache for this info.
990  KALDI_ASSERT(state >= 0);
991  if (isymbol_or_final_.size() <= state)
992  isymbol_or_final_.resize(state+1, static_cast<char>(OSF_UNKNOWN));
993  if (isymbol_or_final_[state] == static_cast<char>(OSF_NO))
994  return false;
995  else if (isymbol_or_final_[state] == static_cast<char>(OSF_YES))
996  return true;
997  // else work it out...
998  isymbol_or_final_[state] = static_cast<char>(OSF_NO);
999  if (ifst_->Final(state) != Weight::Zero())
1000  isymbol_or_final_[state] = static_cast<char>(OSF_YES);
1001  for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, state);
1002  !aiter.Done();
1003  aiter.Next()) {
1004  const Arc &arc = aiter.Value();
1005  if (arc.ilabel != 0 && arc.weight != Weight::Zero()) {
1006  isymbol_or_final_[state] = static_cast<char>(OSF_YES);
1007  return true;
1008  }
1009  }
1010  return IsIsymbolOrFinal(state); // will only recurse once.
1011  }
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169
KALDI_DISALLOW_COPY_AND_ASSIGN ( LatticeDeterminizerPruned< Weight, IntType >  )
private
void MakeSubsetUnique ( vector< Element > *  subset)
inlineprivate

Definition at line 809 of file determinize-lattice-pruned.cc.

References fst::Compare(), and KALDI_ASSERT.

809  {
810  typedef typename vector<Element>::iterator IterType;
811 
812  // This KALDI_ASSERT is designed to fail (usually) if the subset is not sorted on
813  // state.
814  KALDI_ASSERT(subset->size() < 2 || (*subset)[0].state <= (*subset)[1].state);
815 
816  IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
817  size_t num_out = 0;
818  // Merge elements with same state-id
819  while (cur_in != end) { // while we have more elements to process.
820  // At this point, cur_out points to location of next place we want to put an element,
821  // cur_in points to location of next element we want to process.
822  if (cur_in != cur_out) *cur_out = *cur_in;
823  cur_in++;
824  while (cur_in != end && cur_in->state == cur_out->state) {
825  if (Compare(cur_in->weight, cur_in->string,
826  cur_out->weight, cur_out->string) == 1) {
827  // if *cur_in > *cur_out in semiring, then take *cur_in.
828  cur_out->string = cur_in->string;
829  cur_out->weight = cur_in->weight;
830  }
831  cur_in++;
832  }
833  cur_out++;
834  num_out++;
835  }
836  subset->resize(num_out);
837  }
int Compare(const Weight &a_w, StringId a_str, const Weight &b_w, StringId b_str) const
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169
OutputStateId MinimalToStateId ( const vector< Element > &  subset,
const double  forward_cost 
)
inlineprivate

Definition at line 520 of file determinize-lattice-pruned.cc.

References LatticeDeterminizerPruned< Weight, IntType >::OutputState::forward_cost, KALDI_WARN, and LatticeDeterminizerPruned< Weight, IntType >::OutputState::minimal_subset.

521  {
522  typename MinimalSubsetHash::const_iterator iter
523  = minimal_hash_.find(&subset);
524  if (iter != minimal_hash_.end()) { // Found a matching subset.
525  OutputStateId state_id = iter->second;
526  const OutputState &state = *(output_states_[state_id]);
527  // Below is just a check that the algorithm is working...
528  if (forward_cost < state.forward_cost - 0.1) {
529  // for large weights, this check could fail due to roundoff.
530  KALDI_WARN << "New cost is less (check the difference is small) "
531  << forward_cost << ", "
532  << state.forward_cost;
533  }
534  return state_id;
535  }
536  OutputStateId state_id = static_cast<OutputStateId>(output_states_.size());
537  OutputState *new_state = new OutputState(subset, forward_cost);
538  minimal_hash_[&(new_state->minimal_subset)] = state_id;
539  output_states_.push_back(new_state);
540  num_elems_ += subset.size();
541  // Note: in the previous algorithm, we pushed the new state-id onto the queue
542  // at this point. Here, the queue happens elsewhere, and we directly process
543  // the state (which result in stuff getting added to the queue).
544  ProcessFinal(state_id); // will work out the final-prob.
545  ProcessTransitions(state_id); // will process transitions and add stuff to the queue.
546  return state_id;
547  }
void ProcessTransitions(OutputStateId output_state_id)
#define KALDI_WARN
Definition: kaldi-error.h:130
void ProcessFinal(OutputStateId output_state_id)
void NormalizeSubset ( vector< Element > *  elems,
Weight tot_weight,
StringId common_str 
)
inlineprivate

Definition at line 776 of file determinize-lattice-pruned.cc.

References fst::Divide(), rnnlm::i, KALDI_ASSERT, KALDI_WARN, and fst::Plus().

778  {
779  if(elems->empty()) { // just set common_str, tot_weight
780  // to defaults and return...
781  KALDI_WARN << "empty subset";
782  *common_str = repository_.EmptyString();
783  *tot_weight = Weight::Zero();
784  return;
785  }
786  size_t size = elems->size();
787  vector<IntType> common_prefix;
788  repository_.ConvertToVector((*elems)[0].string, &common_prefix);
789  Weight weight = (*elems)[0].weight;
790  for(size_t i = 1; i < size; i++) {
791  weight = Plus(weight, (*elems)[i].weight);
792  repository_.ReduceToCommonPrefix((*elems)[i].string, &common_prefix);
793  }
794  KALDI_ASSERT(weight != Weight::Zero()); // we made sure to ignore arcs with zero
795  // weights on them, so we shouldn't have zero here.
796  size_t prefix_len = common_prefix.size();
797  for(size_t i = 0; i < size; i++) {
798  (*elems)[i].weight = Divide((*elems)[i].weight, weight, DIVIDE_LEFT);
799  (*elems)[i].string =
800  repository_.RemovePrefix((*elems)[i].string, prefix_len);
801  }
802  *common_str = repository_.ConvertFromVector(common_prefix);
803  *tot_weight = weight;
804  }
LatticeWeightTpl< FloatType > Divide(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2, DivideType typ=DIVIDE_ANY)
LatticeWeightTpl< FloatType > Plus(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
#define KALDI_WARN
Definition: kaldi-error.h:130
fst::StdArc::Weight Weight
LatticeStringRepository< IntType > repository_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169
void Output ( MutableFst< CompactArc > *  ofst,
bool  destroy = true 
)
inline

Definition at line 56 of file determinize-lattice-pruned.cc.

References LatticeDeterminizerPruned< Weight, IntType >::OutputState::arcs, LatticeDeterminizerPruned< Weight, IntType >::determinized_, LatticeDeterminizerPruned< Weight, IntType >::FreeMostMemory(), LatticeDeterminizerPruned< Weight, IntType >::FreeOutputStates(), LatticeDeterminizerPruned< Weight, IntType >::TempArc::ilabel, KALDI_ASSERT, LatticeDeterminizerPruned< Weight, IntType >::TempArc::nextstate, LatticeDeterminizerPruned< Weight, IntType >::output_states_, LatticeDeterminizerPruned< Weight, IntType >::repository_, LatticeDeterminizerPruned< Weight, IntType >::TempArc::string, and LatticeDeterminizerPruned< Weight, IntType >::TempArc::weight.

Referenced by fst::DeterminizeLatticePruned().

56  {
58  typedef typename Arc::StateId StateId;
59  StateId nStates = static_cast<StateId>(output_states_.size());
60  if (destroy)
62  ofst->DeleteStates();
63  ofst->SetStart(kNoStateId);
64  if (nStates == 0) {
65  return;
66  }
67  for (StateId s = 0;s < nStates;s++) {
68  OutputStateId news = ofst->AddState();
69  KALDI_ASSERT(news == s);
70  }
71  ofst->SetStart(0);
72  // now process transitions.
73  for (StateId this_state_id = 0; this_state_id < nStates; this_state_id++) {
74  OutputState &this_state = *(output_states_[this_state_id]);
75  vector<TempArc> &this_vec(this_state.arcs);
76  typename vector<TempArc>::const_iterator iter = this_vec.begin(), end = this_vec.end();
77 
78  for (;iter != end; ++iter) {
79  const TempArc &temp_arc(*iter);
80  CompactArc new_arc;
81  vector<Label> olabel_seq;
82  repository_.ConvertToVector(temp_arc.string, &olabel_seq);
83  CompactWeight weight(temp_arc.weight, olabel_seq);
84  if (temp_arc.nextstate == kNoStateId) { // is really final weight.
85  ofst->SetFinal(this_state_id, weight);
86  } else { // is really an arc.
87  new_arc.nextstate = temp_arc.nextstate;
88  new_arc.ilabel = temp_arc.ilabel;
89  new_arc.olabel = temp_arc.ilabel; // acceptor. input == output.
90  new_arc.weight = weight; // includes string and weight.
91  ofst->AddArc(this_state_id, new_arc);
92  }
93  }
94  // Free up memory. Do this inside the loop as ofst is also allocating memory,
95  // and we want to reduce the maximum amount ever allocated.
96  if (destroy) { vector<TempArc> temp; temp.swap(this_vec); }
97  }
98  if (destroy) {
100  repository_.Destroy();
101  }
102  }
fst::StdArc::StateId StateId
CompactLatticeWeightTpl< Weight, IntType > CompactWeight
LatticeStringRepository< IntType > repository_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169
void Output ( MutableFst< Arc > *  ofst,
bool  destroy = true 
)
inline

Definition at line 107 of file determinize-lattice-pruned.cc.

References LatticeDeterminizerPruned< Weight, IntType >::OutputState::arcs, LatticeDeterminizerPruned< Weight, IntType >::FreeMostMemory(), LatticeDeterminizerPruned< Weight, IntType >::FreeOutputStates(), rnnlm::i, LatticeDeterminizerPruned< Weight, IntType >::TempArc::ilabel, KALDI_ASSERT, LatticeDeterminizerPruned< Weight, IntType >::TempArc::nextstate, LatticeDeterminizerPruned< Weight, IntType >::output_states_, LatticeDeterminizerPruned< Weight, IntType >::repository_, LatticeDeterminizerPruned< Weight, IntType >::TempArc::string, and LatticeDeterminizerPruned< Weight, IntType >::TempArc::weight.

107  {
108  // Outputs to standard fst.
109  OutputStateId nStates = static_cast<OutputStateId>(output_states_.size());
110  ofst->DeleteStates();
111  if (nStates == 0) {
112  ofst->SetStart(kNoStateId);
113  return;
114  }
115  if (destroy)
116  FreeMostMemory();
117  // Add basic states-- but we will add extra ones to account for strings on output.
118  for (OutputStateId s = 0; s< nStates;s++) {
119  OutputStateId news = ofst->AddState();
120  KALDI_ASSERT(news == s);
121  }
122  ofst->SetStart(0);
123  for (OutputStateId this_state_id = 0; this_state_id < nStates; this_state_id++) {
124  OutputState &this_state = *(output_states_[this_state_id]);
125  vector<TempArc> &this_vec(this_state.arcs);
126 
127  typename vector<TempArc>::const_iterator iter = this_vec.begin(), end = this_vec.end();
128  for (; iter != end; ++iter) {
129  const TempArc &temp_arc(*iter);
130  vector<Label> seq;
131  repository_.ConvertToVector(temp_arc.string, &seq);
132 
133  if (temp_arc.nextstate == kNoStateId) { // Really a final weight.
134  // Make a sequence of states going to a final state, with the strings
135  // as labels. Put the weight on the first arc.
136  OutputStateId cur_state = this_state_id;
137  for (size_t i = 0; i < seq.size(); i++) {
138  OutputStateId next_state = ofst->AddState();
139  Arc arc;
140  arc.nextstate = next_state;
141  arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
142  arc.ilabel = 0; // epsilon.
143  arc.olabel = seq[i];
144  ofst->AddArc(cur_state, arc);
145  cur_state = next_state;
146  }
147  ofst->SetFinal(cur_state, (seq.size() == 0 ? temp_arc.weight : Weight::One()));
148  } else { // Really an arc.
149  OutputStateId cur_state = this_state_id;
150  // Have to be careful with this integer comparison (i+1 < seq.size()) because unsigned.
151  // i < seq.size()-1 could fail for zero-length sequences.
152  for (size_t i = 0; i+1 < seq.size();i++) {
153  // for all but the last element of seq, create new state.
154  OutputStateId next_state = ofst->AddState();
155  Arc arc;
156  arc.nextstate = next_state;
157  arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
158  arc.ilabel = (i == 0 ? temp_arc.ilabel : 0); // put ilabel on first element of seq.
159  arc.olabel = seq[i];
160  ofst->AddArc(cur_state, arc);
161  cur_state = next_state;
162  }
163  // Add the final arc in the sequence.
164  Arc arc;
165  arc.nextstate = temp_arc.nextstate;
166  arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
167  arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
168  arc.olabel = (seq.size() > 0 ? seq.back() : 0);
169  ofst->AddArc(cur_state, arc);
170  }
171  }
172  // Free up memory. Do this inside the loop as ofst is also allocating memory
173  if (destroy) { vector<TempArc> temp; temp.swap(this_vec); }
174  }
175  if (destroy) {
177  repository_.Destroy();
178  }
179  }
LatticeStringRepository< IntType > repository_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169
void ProcessFinal ( OutputStateId  output_state_id)
inlineprivate

Definition at line 733 of file determinize-lattice-pruned.cc.

References LatticeDeterminizerPruned< Weight, IntType >::OutputState::arcs, fst::Compare(), fst::ConvertToCost(), LatticeDeterminizerPruned< Weight, IntType >::OutputState::forward_cost, LatticeDeterminizerPruned< Weight, IntType >::TempArc::ilabel, LatticeDeterminizerPruned< Weight, IntType >::OutputState::minimal_subset, LatticeDeterminizerPruned< Weight, IntType >::TempArc::nextstate, LatticeDeterminizerPruned< Weight, IntType >::Element::state, LatticeDeterminizerPruned< Weight, IntType >::Element::string, LatticeDeterminizerPruned< Weight, IntType >::TempArc::string, fst::Times(), LatticeDeterminizerPruned< Weight, IntType >::Element::weight, and LatticeDeterminizerPruned< Weight, IntType >::TempArc::weight.

733  {
734  OutputState &state = *(output_states_[output_state_id]);
735  const vector<Element> &minimal_subset = state.minimal_subset;
736  // processes final-weights for this subset. state.minimal_subset_ may be
737  // empty if the graphs is not connected/trimmed, I think, do don't check
738  // that it's nonempty.
739  StringId final_string = repository_.EmptyString(); // set it to keep the
740  // compiler happy; if it doesn't get set in the loop, we won't use the value anyway.
741  Weight final_weight = Weight::Zero();
742  bool is_final = false;
743  typename vector<Element>::const_iterator iter = minimal_subset.begin(), end = minimal_subset.end();
744  for (; iter != end; ++iter) {
745  const Element &elem = *iter;
746  Weight this_final_weight = Times(elem.weight, ifst_->Final(elem.state));
747  StringId this_final_string = elem.string;
748  if (this_final_weight != Weight::Zero() &&
749  (!is_final || Compare(this_final_weight, this_final_string,
750  final_weight, final_string) == 1)) { // the new
751  // (weight, string) pair is more in semiring than our current
752  // one.
753  is_final = true;
754  final_weight = this_final_weight;
755  final_string = this_final_string;
756  }
757  }
758  if (is_final &&
759  ConvertToCost(final_weight) + state.forward_cost <= cutoff_) {
760  // store final weights in TempArc structure, just like a transition.
761  // Note: we only store the final-weight if it's inside the pruning beam, hence
762  // the stuff with Compare.
763  TempArc temp_arc;
764  temp_arc.ilabel = 0;
765  temp_arc.nextstate = kNoStateId; // special marker meaning "final weight".
766  temp_arc.string = final_string;
767  temp_arc.weight = final_weight;
768  state.arcs.push_back(temp_arc);
769  num_arcs_++;
770  }
771  }
LatticeWeightTpl< FloatType > Times(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
int Compare(const Weight &a_w, StringId a_str, const Weight &b_w, StringId b_str) const
double ConvertToCost(const LatticeWeightTpl< Float > &w)
fst::StdArc::Weight Weight
LatticeStringRepository< IntType > repository_
const StringRepositoryType::Entry * StringId
void ProcessTransition ( OutputStateId  ostate_id,
Label  ilabel,
vector< Element > *  subset 
)
inlineprivate

Definition at line 846 of file determinize-lattice-pruned.cc.

References fst::ConvertToCost(), LatticeDeterminizerPruned< Weight, IntType >::TempArc::ilabel, LatticeDeterminizerPruned< Weight, IntType >::TempArc::nextstate, LatticeDeterminizerPruned< Weight, IntType >::TempArc::string, fst::Times(), and LatticeDeterminizerPruned< Weight, IntType >::TempArc::weight.

846  {
847 
848  double forward_cost = output_states_[ostate_id]->forward_cost;
849  StringId common_str;
850  Weight tot_weight;
851  NormalizeSubset(subset, &tot_weight, &common_str);
852  forward_cost += ConvertToCost(tot_weight);
853 
854  OutputStateId nextstate;
855  {
856  Weight next_tot_weight;
857  StringId next_common_str;
858  nextstate = InitialToStateId(*subset,
859  forward_cost,
860  &next_tot_weight,
861  &next_common_str);
862  common_str = repository_.Concatenate(common_str, next_common_str);
863  tot_weight = Times(tot_weight, next_tot_weight);
864  }
865 
866  // Now add an arc to the next state (would have been created if necessary by
867  // InitialToStateId).
868  TempArc temp_arc;
869  temp_arc.ilabel = ilabel;
870  temp_arc.nextstate = nextstate;
871  temp_arc.string = common_str;
872  temp_arc.weight = tot_weight;
873  output_states_[ostate_id]->arcs.push_back(temp_arc); // record the arc.
874  num_arcs_++;
875  }
LatticeWeightTpl< FloatType > Times(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
double ConvertToCost(const LatticeWeightTpl< Float > &w)
fst::StdArc::Weight Weight
OutputStateId InitialToStateId(const vector< Element > &subset_in, double forward_cost, Weight *remaining_weight, StringId *common_prefix)
LatticeStringRepository< IntType > repository_
void NormalizeSubset(vector< Element > *elems, Weight *tot_weight, StringId *common_str)
const StringRepositoryType::Entry * StringId
void ProcessTransitions ( OutputStateId  output_state_id)
inlineprivate

Definition at line 902 of file determinize-lattice-pruned.cc.

References fst::ConvertToCost(), KALDI_WARN, LatticeDeterminizerPruned< Weight, IntType >::Task::label, LatticeDeterminizerPruned< Weight, IntType >::Task::priority_cost, LatticeDeterminizerPruned< Weight, IntType >::Element::state, LatticeDeterminizerPruned< Weight, IntType >::Task::state, LatticeDeterminizerPruned< Weight, IntType >::Element::string, LatticeDeterminizerPruned< Weight, IntType >::Task::subset, fst::Times(), and LatticeDeterminizerPruned< Weight, IntType >::Element::weight.

902  {
903  const vector<Element> &minimal_subset = output_states_[output_state_id]->minimal_subset;
904  // it's possible that minimal_subset could be empty if there are
905  // unreachable parts of the graph, so don't check that it's nonempty.
906  vector<pair<Label, Element> > &all_elems(all_elems_tmp_); // use class member
907  // to avoid memory allocation/deallocation.
908  {
909  // Push back into "all_elems", elements corresponding to all
910  // non-epsilon-input transitions out of all states in "minimal_subset".
911  typename vector<Element>::const_iterator iter = minimal_subset.begin(), end = minimal_subset.end();
912  for (;iter != end; ++iter) {
913  const Element &elem = *iter;
914  for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, elem.state); ! aiter.Done(); aiter.Next()) {
915  const Arc &arc = aiter.Value();
916  if (arc.ilabel != 0
917  && arc.weight != Weight::Zero()) { // Non-epsilon transition -- ignore epsilons here.
918  pair<Label, Element> this_pr;
919  this_pr.first = arc.ilabel;
920  Element &next_elem(this_pr.second);
921  next_elem.state = arc.nextstate;
922  next_elem.weight = Times(elem.weight, arc.weight);
923  if (arc.olabel == 0) // output epsilon
924  next_elem.string = elem.string;
925  else
926  next_elem.string = repository_.Successor(elem.string, arc.olabel);
927  all_elems.push_back(this_pr);
928  }
929  }
930  }
931  }
932  PairComparator pc;
933  std::sort(all_elems.begin(), all_elems.end(), pc);
934  // now sorted first on input label, then on state.
935  typedef typename vector<pair<Label, Element> >::const_iterator PairIter;
936  PairIter cur = all_elems.begin(), end = all_elems.end();
937  while (cur != end) {
938  // The old code (non-pruned) called ProcessTransition; here, instead,
939  // we'll put the calls into a priority queue.
940  Task *task = new Task;
941  // Process ranges that share the same input symbol.
942  Label ilabel = cur->first;
943  task->state = output_state_id;
944  task->priority_cost = std::numeric_limits<double>::infinity();
945  task->label = ilabel;
946  while (cur != end && cur->first == ilabel) {
947  task->subset.push_back(cur->second);
948  const Element &element = cur->second;
949  // Note: we'll later include the term "forward_cost" in the
950  // priority_cost.
951  task->priority_cost = std::min(task->priority_cost,
952  ConvertToCost(element.weight) +
953  backward_costs_[element.state]);
954  cur++;
955  }
956 
957  // After the command below, the "priority_cost" is a value comparable to
958  // the total-weight of the input FST, like a total-path weight... of
959  // course, it will typically be less (in the semiring) than that.
960  // note: we represent it just as a double.
961  task->priority_cost += output_states_[output_state_id]->forward_cost;
962 
963  if (task->priority_cost > cutoff_) {
964  // This task would never get done as it's past the pruning cutoff.
965  delete task;
966  } else {
967  MakeSubsetUnique(&(task->subset)); // remove duplicate Elements with the same state.
968  queue_.push(task); // Push the task onto the queue. The queue keeps it
969  // in prioritized order, so we always process the one with the "best"
970  // weight (highest in the semiring).
971 
972  { // this is a check.
973  double best_cost = backward_costs_[ifst_->Start()],
974  tolerance = 0.01 + 1.0e-04 * std::abs(best_cost);
975  if (task->priority_cost < best_cost - tolerance) {
976  KALDI_WARN << "Cost below best cost was encountered:"
977  << task->priority_cost << " < " << best_cost;
978  }
979  }
980  }
981  }
982  all_elems.clear(); // as it's a reference to a class variable; we want it to stay
983  // empty.
984  }
LatticeWeightTpl< FloatType > Times(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
double ConvertToCost(const LatticeWeightTpl< Float > &w)
#define KALDI_WARN
Definition: kaldi-error.h:130
LatticeStringRepository< IntType > repository_
vector< pair< Label, Element > > all_elems_tmp_
void MakeSubsetUnique(vector< Element > *subset)
std::priority_queue< Task *, vector< Task * >, TaskCompare > queue_
void RebuildRepository ( )
inline

Definition at line 245 of file determinize-lattice-pruned.cc.

References rnnlm::i, rnnlm::j, KALDI_LOG, LatticeDeterminizerPruned< Weight, IntType >::Element::string, and LatticeDeterminizerPruned< Weight, IntType >::Task::subset.

245  { // rebuild the string repository,
246  // freeing stuff we don't need.. we call this when memory usage
247  // passes a supplied threshold. We need to accumulate all the
248  // strings we need the repository to "remember", then tell it
249  // to clean the repository.
250  std::vector<StringId> needed_strings;
251  for (size_t i = 0; i < output_states_.size(); i++) {
252  AddStrings(output_states_[i]->minimal_subset, &needed_strings);
253  for (size_t j = 0; j < output_states_[i]->arcs.size(); j++)
254  needed_strings.push_back(output_states_[i]->arcs[j].string);
255  }
256 
257  { // the queue doesn't allow us access to the underlying vector,
258  // so we have to resort to a temporary collection.
259  std::vector<Task*> tasks;
260  while (!queue_.empty()) {
261  Task *task = queue_.top();
262  queue_.pop();
263  tasks.push_back(task);
264  AddStrings(task->subset, &needed_strings);
265  }
266  for (size_t i = 0; i < tasks.size(); i++)
267  queue_.push(tasks[i]);
268  }
269 
270  // the following loop covers strings present in initial_hash_.
271  for (typename InitialSubsetHash::const_iterator
272  iter = initial_hash_.begin();
273  iter != initial_hash_.end(); ++iter) {
274  const vector<Element> &vec = *(iter->first);
275  Element elem = iter->second;
276  AddStrings(vec, &needed_strings);
277  needed_strings.push_back(elem.string);
278  }
279  std::sort(needed_strings.begin(), needed_strings.end());
280  needed_strings.erase(std::unique(needed_strings.begin(),
281  needed_strings.end()),
282  needed_strings.end()); // uniq the strings.
283  KALDI_LOG << "Rebuilding repository.";
284 
285  repository_.Rebuild(needed_strings);
286  }
void AddStrings(const vector< Element > &vec, vector< StringId > *needed_strings)
LatticeStringRepository< IntType > repository_
#define KALDI_LOG
Definition: kaldi-error.h:133
std::priority_queue< Task *, vector< Task * >, TaskCompare > queue_

Member Data Documentation

vector<pair<Label, Element> > all_elems_tmp_
private

Definition at line 1168 of file determinize-lattice-pruned.cc.

std::vector<double> backward_costs_
private

Definition at line 1117 of file determinize-lattice-pruned.cc.

double beam_
private

Definition at line 1122 of file determinize-lattice-pruned.cc.

double cutoff_
private

Definition at line 1123 of file determinize-lattice-pruned.cc.

bool determinized_
private
SubsetEqual equal_
private

Definition at line 1128 of file determinize-lattice-pruned.cc.

SubsetKey hasher_
private

Definition at line 1127 of file determinize-lattice-pruned.cc.

const ExpandedFst<Arc>* ifst_
private
vector<char> isymbol_or_final_
private

Definition at line 1172 of file determinize-lattice-pruned.cc.

int num_arcs_
private

Definition at line 1112 of file determinize-lattice-pruned.cc.

int num_elems_
private

Definition at line 1113 of file determinize-lattice-pruned.cc.

Definition at line 1126 of file determinize-lattice-pruned.cc.

std::priority_queue<Task*, vector<Task*>, TaskCompare> queue_
private

Definition at line 1166 of file determinize-lattice-pruned.cc.

LatticeStringRepository<IntType> repository_
private

The documentation for this class was generated from the following file: