LatticeDeterminizerPruned< Weight, IntType > Class Template Reference
Collaboration diagram for LatticeDeterminizerPruned< Weight, IntType >:

Classes

struct  Element
 
struct  OutputState
 
class  PairComparator
 
class  SubsetEqual
 
class  SubsetEqualStates
 
class  SubsetKey
 
struct  Task
 
struct  TaskCompare
 
struct  TempArc
 

Public Types

typedef CompactLatticeWeightTpl< Weight, IntType > CompactWeight
 
typedef ArcTpl< CompactWeightCompactArc
 
typedef ArcTpl< WeightArc
 

Public Member Functions

void Output (MutableFst< CompactArc > *ofst, bool destroy=true)
 
void Output (MutableFst< Arc > *ofst, bool destroy=true)
 
 LatticeDeterminizerPruned (const ExpandedFst< Arc > &ifst, double beam, DeterminizeLatticePrunedOptions opts)
 
void FreeOutputStates ()
 
void FreeMostMemory ()
 
 ~LatticeDeterminizerPruned ()
 
void RebuildRepository ()
 
bool CheckMemoryUsage ()
 
bool Determinize (double *effective_beam)
 

Private Types

enum  IsymbolOrFinal { OSF_UNKNOWN = 0, OSF_NO = 1, OSF_YES = 2 }
 
typedef Arc::Label Label
 
typedef Arc::StateId StateId
 
typedef Arc::StateId InputStateId
 
typedef Arc::StateId OutputStateId
 
typedef LatticeStringRepository< IntType > StringRepositoryType
 
typedef const StringRepositoryType::Entry * StringId
 
typedef unordered_map< const vector< Element > *, OutputStateId, SubsetKey, SubsetEqualMinimalSubsetHash
 
typedef unordered_map< const vector< Element > *, Element, SubsetKey, SubsetEqualInitialSubsetHash
 

Private Member Functions

void ConvertToMinimal (vector< Element > *subset)
 
OutputStateId MinimalToStateId (const vector< Element > &subset, const double forward_cost)
 
OutputStateId InitialToStateId (const vector< Element > &subset_in, double forward_cost, Weight *remaining_weight, StringId *common_prefix)
 
int Compare (const Weight &a_w, StringId a_str, const Weight &b_w, StringId b_str) const
 
void EpsilonClosure (vector< Element > *subset)
 
void ProcessFinal (OutputStateId output_state_id)
 
void NormalizeSubset (vector< Element > *elems, Weight *tot_weight, StringId *common_str)
 
void MakeSubsetUnique (vector< Element > *subset)
 
void ProcessTransition (OutputStateId ostate_id, Label ilabel, vector< Element > *subset)
 
void ProcessTransitions (OutputStateId output_state_id)
 
bool IsIsymbolOrFinal (InputStateId state)
 
void ComputeBackwardWeight ()
 
void InitializeDeterminization ()
 
 KALDI_DISALLOW_COPY_AND_ASSIGN (LatticeDeterminizerPruned)
 
void AddStrings (const vector< Element > &vec, vector< StringId > *needed_strings)
 

Private Attributes

vector< OutputState * > output_states_
 
int num_arcs_
 
int num_elems_
 
const ExpandedFst< Arc > * ifst_
 
std::vector< double > backward_costs_
 
double beam_
 
double cutoff_
 
DeterminizeLatticePrunedOptions opts_
 
SubsetKey hasher_
 
SubsetEqual equal_
 
bool determinized_
 
MinimalSubsetHash minimal_hash_
 
InitialSubsetHash initial_hash_
 
std::priority_queue< Task *, vector< Task * >, TaskComparequeue_
 
vector< pair< Label, Element > > all_elems_tmp_
 
vector< char > isymbol_or_final_
 
LatticeStringRepository< IntType > repository_
 

Detailed Description

template<class Weight, class IntType>
class fst::LatticeDeterminizerPruned< Weight, IntType >

Definition at line 47 of file determinize-lattice-pruned.cc.

Member Typedef Documentation

◆ Arc

typedef ArcTpl<Weight> Arc

Definition at line 55 of file determinize-lattice-pruned.cc.

◆ CompactArc

typedef ArcTpl<CompactWeight> CompactArc

Definition at line 54 of file determinize-lattice-pruned.cc.

◆ CompactWeight

Definition at line 53 of file determinize-lattice-pruned.cc.

◆ InitialSubsetHash

typedef unordered_map<const vector<Element>*, Element, SubsetKey, SubsetEqual> InitialSubsetHash
private

Definition at line 499 of file determinize-lattice-pruned.cc.

◆ InputStateId

typedef Arc::StateId InputStateId
private

Definition at line 386 of file determinize-lattice-pruned.cc.

◆ Label

typedef Arc::Label Label
private

Definition at line 384 of file determinize-lattice-pruned.cc.

◆ MinimalSubsetHash

typedef unordered_map<const vector<Element>*, OutputStateId, SubsetKey, SubsetEqual> MinimalSubsetHash
private

Definition at line 491 of file determinize-lattice-pruned.cc.

◆ OutputStateId

typedef Arc::StateId OutputStateId
private

Definition at line 387 of file determinize-lattice-pruned.cc.

◆ StateId

typedef Arc::StateId StateId
private

Definition at line 385 of file determinize-lattice-pruned.cc.

◆ StringId

typedef const StringRepositoryType::Entry* StringId
private

Definition at line 391 of file determinize-lattice-pruned.cc.

◆ StringRepositoryType

Definition at line 390 of file determinize-lattice-pruned.cc.

Member Enumeration Documentation

◆ IsymbolOrFinal

Constructor & Destructor Documentation

◆ LatticeDeterminizerPruned()

LatticeDeterminizerPruned ( const ExpandedFst< Arc > &  ifst,
double  beam,
DeterminizeLatticePrunedOptions  opts 
)
inline

Definition at line 191 of file determinize-lattice-pruned.cc.

References KALDI_ASSERT.

193  :
194  num_arcs_(0), num_elems_(0), ifst_(ifst.Copy()), beam_(beam), opts_(opts),
195  equal_(opts_.delta), determinized_(false),
197  KALDI_ASSERT(Weight::Properties() & kIdempotent); // this algorithm won't
198  // work correctly otherwise.
199  }
DeterminizeLatticePrunedOptions opts_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ ~LatticeDeterminizerPruned()

Member Function Documentation

◆ AddStrings()

void AddStrings ( const vector< Element > &  vec,
vector< StringId > *  needed_strings 
)
inlineprivate

Definition at line 1182 of file determinize-lattice-pruned.cc.

Referenced by LatticeDeterminizerPruned< Weight, IntType >::RebuildRepository().

1183  {
1184  for (typename std::vector<Element>::const_iterator iter = vec.begin();
1185  iter != vec.end(); ++iter)
1186  needed_strings->push_back(iter->string);
1187  }

◆ CheckMemoryUsage()

bool CheckMemoryUsage ( )
inline

Definition at line 292 of file determinize-lattice-pruned.cc.

References LatticeDeterminizerPruned< Weight, IntType >::backward_costs_, LatticeDeterminizerPruned< Weight, IntType >::beam_, LatticeDeterminizerPruned< Weight, IntType >::ifst_, KALDI_VLOG, KALDI_WARN, DeterminizeLatticePrunedOptions::max_mem, LatticeDeterminizerPruned< Weight, IntType >::num_arcs_, LatticeDeterminizerPruned< Weight, IntType >::num_elems_, LatticeDeterminizerPruned< Weight, IntType >::opts_, LatticeDeterminizerPruned< Weight, IntType >::Task::priority_cost, LatticeDeterminizerPruned< Weight, IntType >::queue_, LatticeDeterminizerPruned< Weight, IntType >::RebuildRepository(), and LatticeDeterminizerPruned< Weight, IntType >::repository_.

Referenced by LatticeDeterminizerPruned< Weight, IntType >::Determinize().

292  {
293  int32 repo_size = repository_.MemSize(),
294  arcs_size = num_arcs_ * sizeof(TempArc),
295  elems_size = num_elems_ * sizeof(Element),
296  total_size = repo_size + arcs_size + elems_size;
297  if (opts_.max_mem > 0 && total_size > opts_.max_mem) { // We passed the memory threshold.
298  // This is usually due to the repository getting large, so we
299  // clean this out.
301  int32 new_repo_size = repository_.MemSize(),
302  new_total_size = new_repo_size + arcs_size + elems_size;
303 
304  KALDI_VLOG(2) << "Rebuilt repository in determinize-lattice: repository shrank from "
305  << repo_size << " to " << new_repo_size << " bytes (approximately)";
306 
307  if (new_total_size > static_cast<int32>(opts_.max_mem * 0.8)) {
308  // Rebuilding didn't help enough-- we need a margin to stop
309  // having to rebuild too often. We'll just return to the user at
310  // this point, with a partial lattice that's pruned tighter than
311  // the specified beam. Here we figure out what the effective
312  // beam was.
313  double effective_beam = beam_;
314  if (!queue_.empty()) { // Note: queue should probably not be empty; we're
315  // just being paranoid here.
316  Task *task = queue_.top();
317  double total_weight = backward_costs_[ifst_->Start()]; // best weight of FST.
318  effective_beam = task->priority_cost - total_weight;
319  }
320  KALDI_WARN << "Did not reach requested beam in determinize-lattice: "
321  << "size exceeds maximum " << opts_.max_mem
322  << " bytes; (repo,arcs,elems) = (" << repo_size << ","
323  << arcs_size << "," << elems_size
324  << "), after rebuilding, repo size was " << new_repo_size
325  << ", effective beam was " << effective_beam
326  << " vs. requested beam " << beam_;
327  return false;
328  }
329  }
330  return true;
331  }
DeterminizeLatticePrunedOptions opts_
kaldi::int32 int32
#define KALDI_WARN
Definition: kaldi-error.h:150
LatticeStringRepository< IntType > repository_
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
std::priority_queue< Task *, vector< Task * >, TaskCompare > queue_

◆ Compare()

int Compare ( const Weight a_w,
StringId  a_str,
const Weight b_w,
StringId  b_str 
) const
inlineprivate

Definition at line 609 of file determinize-lattice-pruned.cc.

References fst::Compare(), rnnlm::i, KALDI_ASSERT, and LatticeDeterminizerPruned< Weight, IntType >::repository_.

Referenced by LatticeDeterminizerPruned< Weight, IntType >::EpsilonClosure(), LatticeDeterminizerPruned< Weight, IntType >::MakeSubsetUnique(), and LatticeDeterminizerPruned< Weight, IntType >::ProcessFinal().

610  {
611  int weight_comp = fst::Compare(a_w, b_w);
612  if (weight_comp != 0) return weight_comp;
613  // now comparing strings.
614  if (a_str == b_str) return 0;
615  vector<IntType> a_vec, b_vec;
616  repository_.ConvertToVector(a_str, &a_vec);
617  repository_.ConvertToVector(b_str, &b_vec);
618  // First compare their lengths.
619  int a_len = a_vec.size(), b_len = b_vec.size();
620  // use opposite order on the string lengths (c.f. Compare in
621  // lattice-weight.h)
622  if (a_len > b_len) return -1;
623  else if (a_len < b_len) return 1;
624  for(int i = 0; i < a_len; i++) {
625  if (a_vec[i] < b_vec[i]) return -1;
626  else if (a_vec[i] > b_vec[i]) return 1;
627  }
628  KALDI_ASSERT(0); // because we checked if a_str == b_str above, shouldn't reach here
629  return 0;
630  }
LatticeStringRepository< IntType > repository_
int Compare(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
Compare returns -1 if w1 < w2, +1 if w1 > w2, and 0 if w1 == w2.
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ ComputeBackwardWeight()

void ComputeBackwardWeight ( )
inlineprivate

Definition at line 1016 of file determinize-lattice-pruned.cc.

References LatticeDeterminizerPruned< Weight, IntType >::backward_costs_, LatticeDeterminizerPruned< Weight, IntType >::beam_, fst::ConvertToCost(), LatticeDeterminizerPruned< Weight, IntType >::cutoff_, LatticeDeterminizerPruned< Weight, IntType >::ifst_, KALDI_ASSERT, and KALDI_WARN.

Referenced by LatticeDeterminizerPruned< Weight, IntType >::InitializeDeterminization().

1016  {
1017  // Sets up the backward_costs_ array, and the cutoff_ variable.
1018  KALDI_ASSERT(beam_ > 0);
1019 
1020  // Only handle the toplogically sorted case.
1021  backward_costs_.resize(ifst_->NumStates());
1022  for (StateId s = ifst_->NumStates() - 1; s >= 0; s--) {
1023  double &cost = backward_costs_[s];
1024  cost = ConvertToCost(ifst_->Final(s));
1025  for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, s);
1026  !aiter.Done(); aiter.Next()) {
1027  const Arc &arc = aiter.Value();
1028  cost = std::min(cost,
1029  ConvertToCost(arc.weight) + backward_costs_[arc.nextstate]);
1030  }
1031  }
1032 
1033  if (ifst_->Start() == kNoStateId) return; // we'll be returning
1034  // an empty FST.
1035 
1036  double best_cost = backward_costs_[ifst_->Start()];
1037  if (best_cost == std::numeric_limits<double>::infinity())
1038  KALDI_WARN << "Total weight of input lattice is zero.";
1039  cutoff_ = best_cost + beam_;
1040  }
double ConvertToCost(const LatticeWeightTpl< Float > &w)
#define KALDI_WARN
Definition: kaldi-error.h:150
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ ConvertToMinimal()

void ConvertToMinimal ( vector< Element > *  subset)
inlineprivate

Definition at line 505 of file determinize-lattice-pruned.cc.

References LatticeDeterminizerPruned< Weight, IntType >::IsIsymbolOrFinal(), and KALDI_ASSERT.

Referenced by LatticeDeterminizerPruned< Weight, IntType >::InitializeDeterminization(), and LatticeDeterminizerPruned< Weight, IntType >::InitialToStateId().

505  {
506  KALDI_ASSERT(!subset->empty());
507  typename vector<Element>::iterator cur_in = subset->begin(),
508  cur_out = subset->begin(), end = subset->end();
509  while (cur_in != end) {
510  if(IsIsymbolOrFinal(cur_in->state)) { // keep it...
511  *cur_out = *cur_in;
512  cur_out++;
513  }
514  cur_in++;
515  }
516  subset->resize(cur_out - subset->begin());
517  }
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ Determinize()

bool Determinize ( double *  effective_beam)
inline

Definition at line 333 of file determinize-lattice-pruned.cc.

References LatticeDeterminizerPruned< Weight, IntType >::backward_costs_, LatticeDeterminizerPruned< Weight, IntType >::beam_, LatticeDeterminizerPruned< Weight, IntType >::CheckMemoryUsage(), LatticeDeterminizerPruned< Weight, IntType >::determinized_, LatticeDeterminizerPruned< Weight, IntType >::ifst_, LatticeDeterminizerPruned< Weight, IntType >::InitializeDeterminization(), KALDI_ASSERT, KALDI_VLOG, LatticeDeterminizerPruned< Weight, IntType >::Task::label, DeterminizeLatticePrunedOptions::max_arcs, DeterminizeLatticePrunedOptions::max_states, LatticeDeterminizerPruned< Weight, IntType >::num_arcs_, LatticeDeterminizerPruned< Weight, IntType >::opts_, LatticeDeterminizerPruned< Weight, IntType >::output_states_, LatticeDeterminizerPruned< Weight, IntType >::ProcessTransition(), LatticeDeterminizerPruned< Weight, IntType >::queue_, LatticeDeterminizerPruned< Weight, IntType >::Task::state, and LatticeDeterminizerPruned< Weight, IntType >::Task::subset.

Referenced by fst::DeterminizeLatticePruned().

333  {
335  // This determinizes the input fst but leaves it in the "special format"
336  // in "output_arcs_". Must be called after Initialize(). To get the
337  // output, call one of the Output routines.
338 
339  InitializeDeterminization(); // some start-up tasks.
340  while (!queue_.empty()) {
341  Task *task = queue_.top();
342  // Note: the queue contains only tasks that are "within the beam".
343  // We also have to check whether we have reached one of the user-specified
344  // maximums, of estimated memory, arcs, or states. The condition for
345  // ending is:
346  // num-states is more than user specified, OR
347  // num-arcs is more than user specified, OR
348  // memory passed a user-specified threshold and cleanup failed
349  // to get it below that threshold.
350  size_t num_states = output_states_.size();
351  if ((opts_.max_states > 0 && num_states > opts_.max_states) ||
352  (opts_.max_arcs > 0 && num_arcs_ > opts_.max_arcs) ||
353  (num_states % 10 == 0 && !CheckMemoryUsage())) { // note: at some point
354  // it was num_states % 100, not num_states % 10, but I encountered an example
355  // where memory was exhausted before we reached state #100.
356  KALDI_VLOG(1) << "Lattice determinization terminated but not "
357  << " because of lattice-beam. (#states, #arcs) is ( "
358  << output_states_.size() << ", " << num_arcs_
359  << " ), versus limits ( " << opts_.max_states << ", "
360  << opts_.max_arcs << " ) (else, may be memory limit).";
361  break;
362  // we terminate the determinization here-- whatever we already expanded is
363  // what we'll return... because we expanded stuff in order of total
364  // (forward-backward) weight, the stuff we returned first is the most
365  // important.
366  }
367  queue_.pop();
368  ProcessTransition(task->state, task->label, &(task->subset));
369  delete task;
370  }
371  determinized_ = true;
372  if (effective_beam != NULL) {
373  if (queue_.empty()) *effective_beam = beam_;
374  else
375  *effective_beam = queue_.top()->priority_cost -
376  backward_costs_[ifst_->Start()];
377  }
378  return (queue_.empty()); // return success if queue was empty, i.e. we processed
379  // all tasks and did not break out of the loop early due to reaching a memory,
380  // arc or state limit.
381  }
DeterminizeLatticePrunedOptions opts_
void ProcessTransition(OutputStateId ostate_id, Label ilabel, vector< Element > *subset)
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
std::priority_queue< Task *, vector< Task * >, TaskCompare > queue_

◆ EpsilonClosure()

void EpsilonClosure ( vector< Element > *  subset)
inlineprivate

Definition at line 637 of file determinize-lattice-pruned.cc.

References fst::Compare(), LatticeDeterminizerPruned< Weight, IntType >::Compare(), LatticeDeterminizerPruned< Weight, IntType >::ifst_, KALDI_ERR, DeterminizeLatticePrunedOptions::max_loop, LatticeDeterminizerPruned< Weight, IntType >::opts_, LatticeDeterminizerPruned< Weight, IntType >::repository_, LatticeDeterminizerPruned< Weight, IntType >::Element::state, LatticeDeterminizerPruned< Weight, IntType >::Element::string, fst::Times(), and LatticeDeterminizerPruned< Weight, IntType >::Element::weight.

Referenced by LatticeDeterminizerPruned< Weight, IntType >::InitializeDeterminization(), and LatticeDeterminizerPruned< Weight, IntType >::InitialToStateId().

637  {
638  // at input, subset must have only one example of each StateId. [will still
639  // be so at output]. This function follows input-epsilons, and augments the
640  // subset accordingly.
641 
642  std::priority_queue<Element, vector<Element>, greater<Element> > queue;
643  unordered_map<InputStateId, Element> cur_subset;
644  typedef typename unordered_map<InputStateId, Element>::iterator MapIter;
645  typedef typename vector<Element>::const_iterator VecIter;
646 
647  for (VecIter iter = subset->begin(); iter != subset->end(); ++iter) {
648  queue.push(*iter);
649  cur_subset[iter->state] = *iter;
650  }
651 
652  // find whether input fst is known to be sorted on input label.
653  bool sorted = ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
654  bool replaced_elems = false; // relates to an optimization, see below.
655  int counter = 0; // stops infinite loops here for non-lattice-determinizable input
656  // (e.g. input with negative-cost epsilon loops); useful in testing.
657  while (queue.size() != 0) {
658  Element elem = queue.top();
659  queue.pop();
660 
661  // The next if-statement is a kind of optimization. It's to prevent us
662  // unnecessarily repeating the processing of a state. "cur_subset" always
663  // contains only one Element with a particular state. The issue is that
664  // whenever we modify the Element corresponding to that state in "cur_subset",
665  // both the new (optimal) and old (less-optimal) Element will still be in
666  // "queue". The next if-statement stops us from wasting compute by
667  // processing the old Element.
668  if (replaced_elems && cur_subset[elem.state] != elem)
669  continue;
670  if (opts_.max_loop > 0 && counter++ > opts_.max_loop) {
671  KALDI_ERR << "Lattice determinization aborted since looped more than "
672  << opts_.max_loop << " times during epsilon closure.";
673  }
674  for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, elem.state); !aiter.Done(); aiter.Next()) {
675  const Arc &arc = aiter.Value();
676  if (sorted && arc.ilabel != 0) break; // Break from the loop: due to sorting there will be no
677  // more transitions with epsilons as input labels.
678  if (arc.ilabel == 0
679  && arc.weight != Weight::Zero()) { // Epsilon transition.
680  Element next_elem;
681  next_elem.state = arc.nextstate;
682  next_elem.weight = Times(elem.weight, arc.weight);
683  // next_elem.string is not set up yet... create it only
684  // when we know we need it (this is an optimization)
685 
686  MapIter iter = cur_subset.find(next_elem.state);
687  if (iter == cur_subset.end()) {
688  // was no such StateId: insert and add to queue.
689  next_elem.string = (arc.olabel == 0 ? elem.string :
690  repository_.Successor(elem.string, arc.olabel));
691  cur_subset[next_elem.state] = next_elem;
692  queue.push(next_elem);
693  } else {
694  // was not inserted because one already there. In normal
695  // determinization we'd add the weights. Here, we find which one
696  // has the better weight, and keep its corresponding string.
697  int comp = fst::Compare(next_elem.weight, iter->second.weight);
698  if (comp == 0) { // A tie on weights. This should be a rare case;
699  // we don't optimize for it.
700  next_elem.string = (arc.olabel == 0 ? elem.string :
701  repository_.Successor(elem.string,
702  arc.olabel));
703  comp = Compare(next_elem.weight, next_elem.string,
704  iter->second.weight, iter->second.string);
705  }
706  if(comp == 1) { // next_elem is better, so use its (weight, string)
707  next_elem.string = (arc.olabel == 0 ? elem.string :
708  repository_.Successor(elem.string, arc.olabel));
709  iter->second.string = next_elem.string;
710  iter->second.weight = next_elem.weight;
711  queue.push(next_elem);
712  replaced_elems = true;
713  }
714  // else it is the same or worse, so use original one.
715  }
716  }
717  }
718  }
719 
720  { // copy cur_subset to subset.
721  subset->clear();
722  subset->reserve(cur_subset.size());
723  MapIter iter = cur_subset.begin(), end = cur_subset.end();
724  for (; iter != end; ++iter) subset->push_back(iter->second);
725  // sort by state ID, because the subset hash function is order-dependent(see SubsetKey)
726  std::sort(subset->begin(), subset->end());
727  }
728  }
DeterminizeLatticePrunedOptions opts_
LatticeWeightTpl< FloatType > Times(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
#define KALDI_ERR
Definition: kaldi-error.h:147
LatticeStringRepository< IntType > repository_
int Compare(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
Compare returns -1 if w1 < w2, +1 if w1 > w2, and 0 if w1 == w2.
int Compare(const Weight &a_w, StringId a_str, const Weight &b_w, StringId b_str) const

◆ FreeMostMemory()

void FreeMostMemory ( )
inline

Definition at line 210 of file determinize-lattice-pruned.cc.

References LatticeDeterminizerPruned< Weight, IntType >::all_elems_tmp_, rnnlm::i, LatticeDeterminizerPruned< Weight, IntType >::ifst_, LatticeDeterminizerPruned< Weight, IntType >::initial_hash_, LatticeDeterminizerPruned< Weight, IntType >::isymbol_or_final_, LatticeDeterminizerPruned< Weight, IntType >::minimal_hash_, LatticeDeterminizerPruned< Weight, IntType >::output_states_, and LatticeDeterminizerPruned< Weight, IntType >::queue_.

Referenced by LatticeDeterminizerPruned< Weight, IntType >::Output(), and LatticeDeterminizerPruned< Weight, IntType >::~LatticeDeterminizerPruned().

210  {
211  if (ifst_) {
212  delete ifst_;
213  ifst_ = NULL;
214  }
215  { MinimalSubsetHash tmp; tmp.swap(minimal_hash_); }
216 
217  for (size_t i = 0; i < output_states_.size(); i++) {
218  vector<Element> empty_subset;
219  empty_subset.swap(output_states_[i]->minimal_subset);
220  }
221 
222  for (typename InitialSubsetHash::iterator iter = initial_hash_.begin();
223  iter != initial_hash_.end(); ++iter)
224  delete iter->first;
225  { InitialSubsetHash tmp; tmp.swap(initial_hash_); }
226  for (size_t i = 0; i < output_states_.size(); i++) {
227  vector<Element> tmp;
228  tmp.swap(output_states_[i]->minimal_subset);
229  }
230  { vector<char> tmp; tmp.swap(isymbol_or_final_); }
231  { // Free up the queue. I'm not sure how to make sure all
232  // the memory is really freed (no swap() function)... doesn't really
233  // matter much though.
234  while (!queue_.empty()) {
235  Task *t = queue_.top();
236  delete t;
237  queue_.pop();
238  }
239  }
240  { vector<pair<Label, Element> > tmp; tmp.swap(all_elems_tmp_); }
241  }
unordered_map< const vector< Element > *, Element, SubsetKey, SubsetEqual > InitialSubsetHash
vector< pair< Label, Element > > all_elems_tmp_
unordered_map< const vector< Element > *, OutputStateId, SubsetKey, SubsetEqual > MinimalSubsetHash
std::priority_queue< Task *, vector< Task * >, TaskCompare > queue_

◆ FreeOutputStates()

void FreeOutputStates ( )
inline

Definition at line 201 of file determinize-lattice-pruned.cc.

References rnnlm::i, and LatticeDeterminizerPruned< Weight, IntType >::output_states_.

Referenced by LatticeDeterminizerPruned< Weight, IntType >::Output(), and LatticeDeterminizerPruned< Weight, IntType >::~LatticeDeterminizerPruned().

201  {
202  for (size_t i = 0; i < output_states_.size(); i++)
203  delete output_states_[i];
204  vector<OutputState*> temp;
205  temp.swap(output_states_);
206  }

◆ InitializeDeterminization()

void InitializeDeterminization ( )
inlineprivate

Definition at line 1042 of file determinize-lattice-pruned.cc.

References LatticeDeterminizerPruned< Weight, IntType >::ComputeBackwardWeight(), LatticeDeterminizerPruned< Weight, IntType >::ConvertToMinimal(), LatticeDeterminizerPruned< Weight, IntType >::EpsilonClosure(), LatticeDeterminizerPruned< Weight, IntType >::ifst_, LatticeDeterminizerPruned< Weight, IntType >::initial_hash_, KALDI_ASSERT, LatticeDeterminizerPruned< Weight, IntType >::KALDI_DISALLOW_COPY_AND_ASSIGN(), LatticeDeterminizerPruned< Weight, IntType >::minimal_hash_, LatticeDeterminizerPruned< Weight, IntType >::OutputState::minimal_subset, LatticeDeterminizerPruned< Weight, IntType >::num_elems_, LatticeDeterminizerPruned< Weight, IntType >::output_states_, LatticeDeterminizerPruned< Weight, IntType >::ProcessFinal(), LatticeDeterminizerPruned< Weight, IntType >::ProcessTransitions(), and LatticeDeterminizerPruned< Weight, IntType >::repository_.

Referenced by LatticeDeterminizerPruned< Weight, IntType >::Determinize().

1042  {
1043  // We insist that the input lattice be topologically sorted. This is not a
1044  // fundamental limitation of the algorithm (which in principle should be
1045  // applicable to even cyclic FSTs), but it helps us more efficiently
1046  // compute the backward_costs_ array. There may be some other reason we
1047  // require this, that escapes me at the moment.
1048  KALDI_ASSERT(ifst_->Properties(kTopSorted, true) != 0);
1050 #if !(__GNUC__ == 4 && __GNUC_MINOR__ == 0)
1051  if(ifst_->Properties(kExpanded, false) != 0) { // if we know the number of
1052  // states in ifst_, it might be a bit more efficient
1053  // to pre-size the hashes so we're not constantly rebuilding them.
1054  StateId num_states =
1055  down_cast<const ExpandedFst<Arc>*, const Fst<Arc> >(ifst_)->NumStates();
1056  minimal_hash_.rehash(num_states/2 + 3);
1057  initial_hash_.rehash(num_states/2 + 3);
1058  }
1059 #endif
1060  InputStateId start_id = ifst_->Start();
1061  if (start_id != kNoStateId) {
1062  /* Create determinized-state corresponding to the start state....
1063  Unlike all the other states, we don't "normalize" the representation
1064  of this determinized-state before we put it into minimal_hash_. This is actually
1065  what we want, as otherwise we'd have problems dealing with any extra weight
1066  and string and might have to create a "super-initial" state which would make
1067  the output nondeterministic. Normalization is only needed to make the
1068  determinized output more minimal anyway, it's not needed for correctness.
1069  Note, we don't put anything in the initial_hash_. The initial_hash_ is only
1070  a lookaside buffer anyway, so this isn't a problem-- it will get populated
1071  later if it needs to be.
1072  */
1073  vector<Element> subset(1);
1074  subset[0].state = start_id;
1075  subset[0].weight = Weight::One();
1076  subset[0].string = repository_.EmptyString(); // Id of empty sequence.
1077  EpsilonClosure(&subset); // follow through epsilon-input links
1078  ConvertToMinimal(&subset); // remove all but final states and
1079  // states with input-labels on arcs out of them.
1080  // Weight::One() is the "forward-weight" of this determinized state...
1081  // i.e. the minimal cost from the start of the determinized FST to this
1082  // state [One() because it's the start state].
1083  OutputState *initial_state = new OutputState(subset, 0);
1084  KALDI_ASSERT(output_states_.empty());
1085  output_states_.push_back(initial_state);
1086  num_elems_ += subset.size();
1087  OutputStateId initial_state_id = 0;
1088  minimal_hash_[&(initial_state->minimal_subset)] = initial_state_id;
1089  ProcessFinal(initial_state_id);
1090  ProcessTransitions(initial_state_id); // this will add tasks to
1091  // the queue, which we'll start processing in Determinize().
1092  }
1093  }
void ProcessTransitions(OutputStateId output_state_id)
void ProcessFinal(OutputStateId output_state_id)
LatticeStringRepository< IntType > repository_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void EpsilonClosure(vector< Element > *subset)
void ConvertToMinimal(vector< Element > *subset)

◆ InitialToStateId()

OutputStateId InitialToStateId ( const vector< Element > &  subset_in,
double  forward_cost,
Weight remaining_weight,
StringId common_prefix 
)
inlineprivate

Definition at line 556 of file determinize-lattice-pruned.cc.

References fst::ConvertToCost(), LatticeDeterminizerPruned< Weight, IntType >::ConvertToMinimal(), LatticeDeterminizerPruned< Weight, IntType >::EpsilonClosure(), LatticeDeterminizerPruned< Weight, IntType >::initial_hash_, KALDI_WARN, LatticeDeterminizerPruned< Weight, IntType >::MinimalToStateId(), LatticeDeterminizerPruned< Weight, IntType >::NormalizeSubset(), LatticeDeterminizerPruned< Weight, IntType >::num_elems_, LatticeDeterminizerPruned< Weight, IntType >::Element::state, LatticeDeterminizerPruned< Weight, IntType >::Element::string, and LatticeDeterminizerPruned< Weight, IntType >::Element::weight.

Referenced by LatticeDeterminizerPruned< Weight, IntType >::ProcessTransition().

559  {
560  typename InitialSubsetHash::const_iterator iter
561  = initial_hash_.find(&subset_in);
562  if (iter != initial_hash_.end()) { // Found a matching subset.
563  const Element &elem = iter->second;
564  *remaining_weight = elem.weight;
565  *common_prefix = elem.string;
566  if (elem.weight == Weight::Zero())
567  KALDI_WARN << "Zero weight!";
568  return elem.state;
569  }
570  // else no matching subset-- have to work it out.
571  vector<Element> subset(subset_in);
572  // Follow through epsilons. Will add no duplicate states. note: after
573  // EpsilonClosure, it is the same as "canonical" subset, except not
574  // normalized (actually we never compute the normalized canonical subset,
575  // only the normalized minimal one).
576  EpsilonClosure(&subset); // follow epsilons.
577  ConvertToMinimal(&subset); // remove all but emitting and final states.
578 
579  Element elem; // will be used to store remaining weight and string, and
580  // OutputStateId, in initial_hash_;
581  NormalizeSubset(&subset, &elem.weight, &elem.string); // normalize subset; put
582  // common string and weight in "elem". The subset is now a minimal,
583  // normalized subset.
584 
585  forward_cost += ConvertToCost(elem.weight);
586  OutputStateId ans = MinimalToStateId(subset, forward_cost);
587  *remaining_weight = elem.weight;
588  *common_prefix = elem.string;
589  if (elem.weight == Weight::Zero())
590  KALDI_WARN << "Zero weight!";
591 
592  // Before returning "ans", add the initial subset to the hash,
593  // so that we can bypass the epsilon-closure etc., next time
594  // we process the same initial subset.
595  vector<Element> *initial_subset_ptr = new vector<Element>(subset_in);
596  elem.state = ans;
597  initial_hash_[initial_subset_ptr] = elem;
598  num_elems_ += initial_subset_ptr->size(); // keep track of memory usage.
599  return ans;
600  }
OutputStateId MinimalToStateId(const vector< Element > &subset, const double forward_cost)
double ConvertToCost(const LatticeWeightTpl< Float > &w)
#define KALDI_WARN
Definition: kaldi-error.h:150
void NormalizeSubset(vector< Element > *elems, Weight *tot_weight, StringId *common_str)
void EpsilonClosure(vector< Element > *subset)
void ConvertToMinimal(vector< Element > *subset)

◆ IsIsymbolOrFinal()

bool IsIsymbolOrFinal ( InputStateId  state)
inlineprivate

Definition at line 990 of file determinize-lattice-pruned.cc.

References LatticeDeterminizerPruned< Weight, IntType >::ifst_, LatticeDeterminizerPruned< Weight, IntType >::isymbol_or_final_, KALDI_ASSERT, LatticeDeterminizerPruned< Weight, IntType >::OSF_NO, LatticeDeterminizerPruned< Weight, IntType >::OSF_UNKNOWN, LatticeDeterminizerPruned< Weight, IntType >::OSF_YES, and LatticeDeterminizerPruned< Weight, IntType >::Element::state.

Referenced by LatticeDeterminizerPruned< Weight, IntType >::ConvertToMinimal().

990  { // returns true if this state
991  // of the input FST either is final or has an osymbol on an arc out of it.
992  // Uses the vector isymbol_or_final_ as a cache for this info.
993  KALDI_ASSERT(state >= 0);
994  if (isymbol_or_final_.size() <= state)
995  isymbol_or_final_.resize(state+1, static_cast<char>(OSF_UNKNOWN));
996  if (isymbol_or_final_[state] == static_cast<char>(OSF_NO))
997  return false;
998  else if (isymbol_or_final_[state] == static_cast<char>(OSF_YES))
999  return true;
1000  // else work it out...
1001  isymbol_or_final_[state] = static_cast<char>(OSF_NO);
1002  if (ifst_->Final(state) != Weight::Zero())
1003  isymbol_or_final_[state] = static_cast<char>(OSF_YES);
1004  for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, state);
1005  !aiter.Done();
1006  aiter.Next()) {
1007  const Arc &arc = aiter.Value();
1008  if (arc.ilabel != 0 && arc.weight != Weight::Zero()) {
1009  isymbol_or_final_[state] = static_cast<char>(OSF_YES);
1010  return true;
1011  }
1012  }
1013  return IsIsymbolOrFinal(state); // will only recurse once.
1014  }
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ KALDI_DISALLOW_COPY_AND_ASSIGN()

KALDI_DISALLOW_COPY_AND_ASSIGN ( LatticeDeterminizerPruned< Weight, IntType >  )
private

◆ MakeSubsetUnique()

void MakeSubsetUnique ( vector< Element > *  subset)
inlineprivate

Definition at line 812 of file determinize-lattice-pruned.cc.

References LatticeDeterminizerPruned< Weight, IntType >::Compare(), and KALDI_ASSERT.

Referenced by LatticeDeterminizerPruned< Weight, IntType >::ProcessTransitions().

812  {
813  typedef typename vector<Element>::iterator IterType;
814 
815  // This KALDI_ASSERT is designed to fail (usually) if the subset is not sorted on
816  // state.
817  KALDI_ASSERT(subset->size() < 2 || (*subset)[0].state <= (*subset)[1].state);
818 
819  IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
820  size_t num_out = 0;
821  // Merge elements with same state-id
822  while (cur_in != end) { // while we have more elements to process.
823  // At this point, cur_out points to location of next place we want to put an element,
824  // cur_in points to location of next element we want to process.
825  if (cur_in != cur_out) *cur_out = *cur_in;
826  cur_in++;
827  while (cur_in != end && cur_in->state == cur_out->state) {
828  if (Compare(cur_in->weight, cur_in->string,
829  cur_out->weight, cur_out->string) == 1) {
830  // if *cur_in > *cur_out in semiring, then take *cur_in.
831  cur_out->string = cur_in->string;
832  cur_out->weight = cur_in->weight;
833  }
834  cur_in++;
835  }
836  cur_out++;
837  num_out++;
838  }
839  subset->resize(num_out);
840  }
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
int Compare(const Weight &a_w, StringId a_str, const Weight &b_w, StringId b_str) const

◆ MinimalToStateId()

OutputStateId MinimalToStateId ( const vector< Element > &  subset,
const double  forward_cost 
)
inlineprivate

Definition at line 524 of file determinize-lattice-pruned.cc.

References LatticeDeterminizerPruned< Weight, IntType >::OutputState::forward_cost, KALDI_WARN, LatticeDeterminizerPruned< Weight, IntType >::minimal_hash_, LatticeDeterminizerPruned< Weight, IntType >::OutputState::minimal_subset, LatticeDeterminizerPruned< Weight, IntType >::num_elems_, LatticeDeterminizerPruned< Weight, IntType >::output_states_, LatticeDeterminizerPruned< Weight, IntType >::ProcessFinal(), LatticeDeterminizerPruned< Weight, IntType >::ProcessTransitions(), and LatticeDeterminizerPruned< Weight, IntType >::Element::state.

Referenced by LatticeDeterminizerPruned< Weight, IntType >::InitialToStateId().

525  {
526  typename MinimalSubsetHash::const_iterator iter
527  = minimal_hash_.find(&subset);
528  if (iter != minimal_hash_.end()) { // Found a matching subset.
529  OutputStateId state_id = iter->second;
530  const OutputState &state = *(output_states_[state_id]);
531  // Below is just a check that the algorithm is working...
532  if (forward_cost < state.forward_cost - 0.1) {
533  // for large weights, this check could fail due to roundoff.
534  KALDI_WARN << "New cost is less (check the difference is small) "
535  << forward_cost << ", "
536  << state.forward_cost;
537  }
538  return state_id;
539  }
540  OutputStateId state_id = static_cast<OutputStateId>(output_states_.size());
541  OutputState *new_state = new OutputState(subset, forward_cost);
542  minimal_hash_[&(new_state->minimal_subset)] = state_id;
543  output_states_.push_back(new_state);
544  num_elems_ += subset.size();
545  // Note: in the previous algorithm, we pushed the new state-id onto the queue
546  // at this point. Here, the queue happens elsewhere, and we directly process
547  // the state (which result in stuff getting added to the queue).
548  ProcessFinal(state_id); // will work out the final-prob.
549  ProcessTransitions(state_id); // will process transitions and add stuff to the queue.
550  return state_id;
551  }
void ProcessTransitions(OutputStateId output_state_id)
#define KALDI_WARN
Definition: kaldi-error.h:150
void ProcessFinal(OutputStateId output_state_id)

◆ NormalizeSubset()

void NormalizeSubset ( vector< Element > *  elems,
Weight tot_weight,
StringId common_str 
)
inlineprivate

Definition at line 779 of file determinize-lattice-pruned.cc.

References fst::Divide(), rnnlm::i, KALDI_ASSERT, KALDI_WARN, fst::Plus(), LatticeDeterminizerPruned< Weight, IntType >::repository_, and LatticeDeterminizerPruned< Weight, IntType >::Element::weight.

Referenced by LatticeDeterminizerPruned< Weight, IntType >::InitialToStateId(), and LatticeDeterminizerPruned< Weight, IntType >::ProcessTransition().

781  {
782  if(elems->empty()) { // just set common_str, tot_weight
783  // to defaults and return...
784  KALDI_WARN << "empty subset";
785  *common_str = repository_.EmptyString();
786  *tot_weight = Weight::Zero();
787  return;
788  }
789  size_t size = elems->size();
790  vector<IntType> common_prefix;
791  repository_.ConvertToVector((*elems)[0].string, &common_prefix);
792  Weight weight = (*elems)[0].weight;
793  for(size_t i = 1; i < size; i++) {
794  weight = Plus(weight, (*elems)[i].weight);
795  repository_.ReduceToCommonPrefix((*elems)[i].string, &common_prefix);
796  }
797  KALDI_ASSERT(weight != Weight::Zero()); // we made sure to ignore arcs with zero
798  // weights on them, so we shouldn't have zero here.
799  size_t prefix_len = common_prefix.size();
800  for(size_t i = 0; i < size; i++) {
801  (*elems)[i].weight = Divide((*elems)[i].weight, weight, DIVIDE_LEFT);
802  (*elems)[i].string =
803  repository_.RemovePrefix((*elems)[i].string, prefix_len);
804  }
805  *common_str = repository_.ConvertFromVector(common_prefix);
806  *tot_weight = weight;
807  }
LatticeWeightTpl< FloatType > Divide(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2, DivideType typ=DIVIDE_ANY)
LatticeWeightTpl< FloatType > Plus(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
#define KALDI_WARN
Definition: kaldi-error.h:150
fst::StdArc::Weight Weight
LatticeStringRepository< IntType > repository_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ Output() [1/2]

void Output ( MutableFst< CompactArc > *  ofst,
bool  destroy = true 
)
inline

Definition at line 60 of file determinize-lattice-pruned.cc.

References LatticeDeterminizerPruned< Weight, IntType >::OutputState::arcs, LatticeDeterminizerPruned< Weight, IntType >::determinized_, LatticeDeterminizerPruned< Weight, IntType >::FreeMostMemory(), LatticeDeterminizerPruned< Weight, IntType >::FreeOutputStates(), LatticeDeterminizerPruned< Weight, IntType >::TempArc::ilabel, KALDI_ASSERT, LatticeDeterminizerPruned< Weight, IntType >::TempArc::nextstate, LatticeDeterminizerPruned< Weight, IntType >::output_states_, LatticeDeterminizerPruned< Weight, IntType >::repository_, LatticeDeterminizerPruned< Weight, IntType >::TempArc::string, and LatticeDeterminizerPruned< Weight, IntType >::TempArc::weight.

Referenced by fst::DeterminizeLatticePruned().

60  {
62  typedef typename Arc::StateId StateId;
63  StateId nStates = static_cast<StateId>(output_states_.size());
64  if (destroy)
66  ofst->DeleteStates();
67  ofst->SetStart(kNoStateId);
68  if (nStates == 0) {
69  return;
70  }
71  for (StateId s = 0;s < nStates;s++) {
72  OutputStateId news = ofst->AddState();
73  KALDI_ASSERT(news == s);
74  }
75  ofst->SetStart(0);
76  // now process transitions.
77  for (StateId this_state_id = 0; this_state_id < nStates; this_state_id++) {
78  OutputState &this_state = *(output_states_[this_state_id]);
79  vector<TempArc> &this_vec(this_state.arcs);
80  typename vector<TempArc>::const_iterator iter = this_vec.begin(), end = this_vec.end();
81 
82  for (;iter != end; ++iter) {
83  const TempArc &temp_arc(*iter);
84  CompactArc new_arc;
85  vector<Label> olabel_seq;
86  repository_.ConvertToVector(temp_arc.string, &olabel_seq);
87  CompactWeight weight(temp_arc.weight, olabel_seq);
88  if (temp_arc.nextstate == kNoStateId) { // is really final weight.
89  ofst->SetFinal(this_state_id, weight);
90  } else { // is really an arc.
91  new_arc.nextstate = temp_arc.nextstate;
92  new_arc.ilabel = temp_arc.ilabel;
93  new_arc.olabel = temp_arc.ilabel; // acceptor. input == output.
94  new_arc.weight = weight; // includes string and weight.
95  ofst->AddArc(this_state_id, new_arc);
96  }
97  }
98  // Free up memory. Do this inside the loop as ofst is also allocating memory,
99  // and we want to reduce the maximum amount ever allocated.
100  if (destroy) { vector<TempArc> temp; temp.swap(this_vec); }
101  }
102  if (destroy) {
104  repository_.Destroy();
105  }
106  }
fst::StdArc::StateId StateId
CompactLatticeWeightTpl< Weight, IntType > CompactWeight
LatticeStringRepository< IntType > repository_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ Output() [2/2]

void Output ( MutableFst< Arc > *  ofst,
bool  destroy = true 
)
inline

Definition at line 111 of file determinize-lattice-pruned.cc.

References LatticeDeterminizerPruned< Weight, IntType >::OutputState::arcs, LatticeDeterminizerPruned< Weight, IntType >::FreeMostMemory(), LatticeDeterminizerPruned< Weight, IntType >::FreeOutputStates(), rnnlm::i, LatticeDeterminizerPruned< Weight, IntType >::TempArc::ilabel, KALDI_ASSERT, LatticeDeterminizerPruned< Weight, IntType >::TempArc::nextstate, LatticeDeterminizerPruned< Weight, IntType >::output_states_, LatticeDeterminizerPruned< Weight, IntType >::repository_, LatticeDeterminizerPruned< Weight, IntType >::TempArc::string, and LatticeDeterminizerPruned< Weight, IntType >::TempArc::weight.

111  {
112  // Outputs to standard fst.
113  OutputStateId nStates = static_cast<OutputStateId>(output_states_.size());
114  ofst->DeleteStates();
115  if (nStates == 0) {
116  ofst->SetStart(kNoStateId);
117  return;
118  }
119  if (destroy)
120  FreeMostMemory();
121  // Add basic states-- but we will add extra ones to account for strings on output.
122  for (OutputStateId s = 0; s< nStates;s++) {
123  OutputStateId news = ofst->AddState();
124  KALDI_ASSERT(news == s);
125  }
126  ofst->SetStart(0);
127  for (OutputStateId this_state_id = 0; this_state_id < nStates; this_state_id++) {
128  OutputState &this_state = *(output_states_[this_state_id]);
129  vector<TempArc> &this_vec(this_state.arcs);
130 
131  typename vector<TempArc>::const_iterator iter = this_vec.begin(), end = this_vec.end();
132  for (; iter != end; ++iter) {
133  const TempArc &temp_arc(*iter);
134  vector<Label> seq;
135  repository_.ConvertToVector(temp_arc.string, &seq);
136 
137  if (temp_arc.nextstate == kNoStateId) { // Really a final weight.
138  // Make a sequence of states going to a final state, with the strings
139  // as labels. Put the weight on the first arc.
140  OutputStateId cur_state = this_state_id;
141  for (size_t i = 0; i < seq.size(); i++) {
142  OutputStateId next_state = ofst->AddState();
143  Arc arc;
144  arc.nextstate = next_state;
145  arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
146  arc.ilabel = 0; // epsilon.
147  arc.olabel = seq[i];
148  ofst->AddArc(cur_state, arc);
149  cur_state = next_state;
150  }
151  ofst->SetFinal(cur_state, (seq.size() == 0 ? temp_arc.weight : Weight::One()));
152  } else { // Really an arc.
153  OutputStateId cur_state = this_state_id;
154  // Have to be careful with this integer comparison (i+1 < seq.size()) because unsigned.
155  // i < seq.size()-1 could fail for zero-length sequences.
156  for (size_t i = 0; i+1 < seq.size();i++) {
157  // for all but the last element of seq, create new state.
158  OutputStateId next_state = ofst->AddState();
159  Arc arc;
160  arc.nextstate = next_state;
161  arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
162  arc.ilabel = (i == 0 ? temp_arc.ilabel : 0); // put ilabel on first element of seq.
163  arc.olabel = seq[i];
164  ofst->AddArc(cur_state, arc);
165  cur_state = next_state;
166  }
167  // Add the final arc in the sequence.
168  Arc arc;
169  arc.nextstate = temp_arc.nextstate;
170  arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
171  arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
172  arc.olabel = (seq.size() > 0 ? seq.back() : 0);
173  ofst->AddArc(cur_state, arc);
174  }
175  }
176  // Free up memory. Do this inside the loop as ofst is also allocating memory
177  if (destroy) { vector<TempArc> temp; temp.swap(this_vec); }
178  }
179  if (destroy) {
181  repository_.Destroy();
182  }
183  }
LatticeStringRepository< IntType > repository_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ ProcessFinal()

void ProcessFinal ( OutputStateId  output_state_id)
inlineprivate

Definition at line 736 of file determinize-lattice-pruned.cc.

References LatticeDeterminizerPruned< Weight, IntType >::OutputState::arcs, LatticeDeterminizerPruned< Weight, IntType >::Compare(), fst::ConvertToCost(), LatticeDeterminizerPruned< Weight, IntType >::cutoff_, LatticeDeterminizerPruned< Weight, IntType >::OutputState::forward_cost, LatticeDeterminizerPruned< Weight, IntType >::ifst_, LatticeDeterminizerPruned< Weight, IntType >::TempArc::ilabel, LatticeDeterminizerPruned< Weight, IntType >::OutputState::minimal_subset, LatticeDeterminizerPruned< Weight, IntType >::TempArc::nextstate, LatticeDeterminizerPruned< Weight, IntType >::num_arcs_, LatticeDeterminizerPruned< Weight, IntType >::output_states_, LatticeDeterminizerPruned< Weight, IntType >::repository_, LatticeDeterminizerPruned< Weight, IntType >::Element::state, LatticeDeterminizerPruned< Weight, IntType >::Element::string, LatticeDeterminizerPruned< Weight, IntType >::TempArc::string, fst::Times(), LatticeDeterminizerPruned< Weight, IntType >::Element::weight, and LatticeDeterminizerPruned< Weight, IntType >::TempArc::weight.

Referenced by LatticeDeterminizerPruned< Weight, IntType >::InitializeDeterminization(), and LatticeDeterminizerPruned< Weight, IntType >::MinimalToStateId().

736  {
737  OutputState &state = *(output_states_[output_state_id]);
738  const vector<Element> &minimal_subset = state.minimal_subset;
739  // processes final-weights for this subset. state.minimal_subset_ may be
740  // empty if the graphs is not connected/trimmed, I think, do don't check
741  // that it's nonempty.
742  StringId final_string = repository_.EmptyString(); // set it to keep the
743  // compiler happy; if it doesn't get set in the loop, we won't use the value anyway.
744  Weight final_weight = Weight::Zero();
745  bool is_final = false;
746  typename vector<Element>::const_iterator iter = minimal_subset.begin(), end = minimal_subset.end();
747  for (; iter != end; ++iter) {
748  const Element &elem = *iter;
749  Weight this_final_weight = Times(elem.weight, ifst_->Final(elem.state));
750  StringId this_final_string = elem.string;
751  if (this_final_weight != Weight::Zero() &&
752  (!is_final || Compare(this_final_weight, this_final_string,
753  final_weight, final_string) == 1)) { // the new
754  // (weight, string) pair is more in semiring than our current
755  // one.
756  is_final = true;
757  final_weight = this_final_weight;
758  final_string = this_final_string;
759  }
760  }
761  if (is_final &&
762  ConvertToCost(final_weight) + state.forward_cost <= cutoff_) {
763  // store final weights in TempArc structure, just like a transition.
764  // Note: we only store the final-weight if it's inside the pruning beam, hence
765  // the stuff with Compare.
766  TempArc temp_arc;
767  temp_arc.ilabel = 0;
768  temp_arc.nextstate = kNoStateId; // special marker meaning "final weight".
769  temp_arc.string = final_string;
770  temp_arc.weight = final_weight;
771  state.arcs.push_back(temp_arc);
772  num_arcs_++;
773  }
774  }
LatticeWeightTpl< FloatType > Times(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
double ConvertToCost(const LatticeWeightTpl< Float > &w)
fst::StdArc::Weight Weight
LatticeStringRepository< IntType > repository_
const StringRepositoryType::Entry * StringId
int Compare(const Weight &a_w, StringId a_str, const Weight &b_w, StringId b_str) const

◆ ProcessTransition()

void ProcessTransition ( OutputStateId  ostate_id,
Label  ilabel,
vector< Element > *  subset 
)
inlineprivate

Definition at line 849 of file determinize-lattice-pruned.cc.

References fst::ConvertToCost(), LatticeDeterminizerPruned< Weight, IntType >::TempArc::ilabel, LatticeDeterminizerPruned< Weight, IntType >::InitialToStateId(), LatticeDeterminizerPruned< Weight, IntType >::TempArc::nextstate, LatticeDeterminizerPruned< Weight, IntType >::NormalizeSubset(), LatticeDeterminizerPruned< Weight, IntType >::num_arcs_, LatticeDeterminizerPruned< Weight, IntType >::output_states_, LatticeDeterminizerPruned< Weight, IntType >::repository_, LatticeDeterminizerPruned< Weight, IntType >::TempArc::string, fst::Times(), and LatticeDeterminizerPruned< Weight, IntType >::TempArc::weight.

Referenced by LatticeDeterminizerPruned< Weight, IntType >::Determinize().

849  {
850 
851  double forward_cost = output_states_[ostate_id]->forward_cost;
852  StringId common_str;
853  Weight tot_weight;
854  NormalizeSubset(subset, &tot_weight, &common_str);
855  forward_cost += ConvertToCost(tot_weight);
856 
857  OutputStateId nextstate;
858  {
859  Weight next_tot_weight;
860  StringId next_common_str;
861  nextstate = InitialToStateId(*subset,
862  forward_cost,
863  &next_tot_weight,
864  &next_common_str);
865  common_str = repository_.Concatenate(common_str, next_common_str);
866  tot_weight = Times(tot_weight, next_tot_weight);
867  }
868 
869  // Now add an arc to the next state (would have been created if necessary by
870  // InitialToStateId).
871  TempArc temp_arc;
872  temp_arc.ilabel = ilabel;
873  temp_arc.nextstate = nextstate;
874  temp_arc.string = common_str;
875  temp_arc.weight = tot_weight;
876  output_states_[ostate_id]->arcs.push_back(temp_arc); // record the arc.
877  num_arcs_++;
878  }
LatticeWeightTpl< FloatType > Times(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
double ConvertToCost(const LatticeWeightTpl< Float > &w)
fst::StdArc::Weight Weight
OutputStateId InitialToStateId(const vector< Element > &subset_in, double forward_cost, Weight *remaining_weight, StringId *common_prefix)
LatticeStringRepository< IntType > repository_
void NormalizeSubset(vector< Element > *elems, Weight *tot_weight, StringId *common_str)
const StringRepositoryType::Entry * StringId

◆ ProcessTransitions()

void ProcessTransitions ( OutputStateId  output_state_id)
inlineprivate

Definition at line 905 of file determinize-lattice-pruned.cc.

References LatticeDeterminizerPruned< Weight, IntType >::all_elems_tmp_, LatticeDeterminizerPruned< Weight, IntType >::backward_costs_, fst::ConvertToCost(), LatticeDeterminizerPruned< Weight, IntType >::cutoff_, LatticeDeterminizerPruned< Weight, IntType >::ifst_, KALDI_WARN, LatticeDeterminizerPruned< Weight, IntType >::Task::label, LatticeDeterminizerPruned< Weight, IntType >::MakeSubsetUnique(), LatticeDeterminizerPruned< Weight, IntType >::output_states_, LatticeDeterminizerPruned< Weight, IntType >::Task::priority_cost, LatticeDeterminizerPruned< Weight, IntType >::queue_, LatticeDeterminizerPruned< Weight, IntType >::repository_, LatticeDeterminizerPruned< Weight, IntType >::Element::state, LatticeDeterminizerPruned< Weight, IntType >::Task::state, LatticeDeterminizerPruned< Weight, IntType >::Element::string, LatticeDeterminizerPruned< Weight, IntType >::Task::subset, fst::Times(), and LatticeDeterminizerPruned< Weight, IntType >::Element::weight.

Referenced by LatticeDeterminizerPruned< Weight, IntType >::InitializeDeterminization(), and LatticeDeterminizerPruned< Weight, IntType >::MinimalToStateId().

905  {
906  const vector<Element> &minimal_subset = output_states_[output_state_id]->minimal_subset;
907  // it's possible that minimal_subset could be empty if there are
908  // unreachable parts of the graph, so don't check that it's nonempty.
909  vector<pair<Label, Element> > &all_elems(all_elems_tmp_); // use class member
910  // to avoid memory allocation/deallocation.
911  {
912  // Push back into "all_elems", elements corresponding to all
913  // non-epsilon-input transitions out of all states in "minimal_subset".
914  typename vector<Element>::const_iterator iter = minimal_subset.begin(), end = minimal_subset.end();
915  for (;iter != end; ++iter) {
916  const Element &elem = *iter;
917  for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, elem.state); ! aiter.Done(); aiter.Next()) {
918  const Arc &arc = aiter.Value();
919  if (arc.ilabel != 0
920  && arc.weight != Weight::Zero()) { // Non-epsilon transition -- ignore epsilons here.
921  pair<Label, Element> this_pr;
922  this_pr.first = arc.ilabel;
923  Element &next_elem(this_pr.second);
924  next_elem.state = arc.nextstate;
925  next_elem.weight = Times(elem.weight, arc.weight);
926  if (arc.olabel == 0) // output epsilon
927  next_elem.string = elem.string;
928  else
929  next_elem.string = repository_.Successor(elem.string, arc.olabel);
930  all_elems.push_back(this_pr);
931  }
932  }
933  }
934  }
935  PairComparator pc;
936  std::sort(all_elems.begin(), all_elems.end(), pc);
937  // now sorted first on input label, then on state.
938  typedef typename vector<pair<Label, Element> >::const_iterator PairIter;
939  PairIter cur = all_elems.begin(), end = all_elems.end();
940  while (cur != end) {
941  // The old code (non-pruned) called ProcessTransition; here, instead,
942  // we'll put the calls into a priority queue.
943  Task *task = new Task;
944  // Process ranges that share the same input symbol.
945  Label ilabel = cur->first;
946  task->state = output_state_id;
947  task->priority_cost = std::numeric_limits<double>::infinity();
948  task->label = ilabel;
949  while (cur != end && cur->first == ilabel) {
950  task->subset.push_back(cur->second);
951  const Element &element = cur->second;
952  // Note: we'll later include the term "forward_cost" in the
953  // priority_cost.
954  task->priority_cost = std::min(task->priority_cost,
955  ConvertToCost(element.weight) +
956  backward_costs_[element.state]);
957  cur++;
958  }
959 
960  // After the command below, the "priority_cost" is a value comparable to
961  // the total-weight of the input FST, like a total-path weight... of
962  // course, it will typically be less (in the semiring) than that.
963  // note: we represent it just as a double.
964  task->priority_cost += output_states_[output_state_id]->forward_cost;
965 
966  if (task->priority_cost > cutoff_) {
967  // This task would never get done as it's past the pruning cutoff.
968  delete task;
969  } else {
970  MakeSubsetUnique(&(task->subset)); // remove duplicate Elements with the same state.
971  queue_.push(task); // Push the task onto the queue. The queue keeps it
972  // in prioritized order, so we always process the one with the "best"
973  // weight (highest in the semiring).
974 
975  { // this is a check.
976  double best_cost = backward_costs_[ifst_->Start()],
977  tolerance = 0.01 + 1.0e-04 * std::abs(best_cost);
978  if (task->priority_cost < best_cost - tolerance) {
979  KALDI_WARN << "Cost below best cost was encountered:"
980  << task->priority_cost << " < " << best_cost;
981  }
982  }
983  }
984  }
985  all_elems.clear(); // as it's a reference to a class variable; we want it to stay
986  // empty.
987  }
LatticeWeightTpl< FloatType > Times(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
double ConvertToCost(const LatticeWeightTpl< Float > &w)
#define KALDI_WARN
Definition: kaldi-error.h:150
LatticeStringRepository< IntType > repository_
vector< pair< Label, Element > > all_elems_tmp_
void MakeSubsetUnique(vector< Element > *subset)
std::priority_queue< Task *, vector< Task * >, TaskCompare > queue_

◆ RebuildRepository()

void RebuildRepository ( )
inline

Definition at line 249 of file determinize-lattice-pruned.cc.

References LatticeDeterminizerPruned< Weight, IntType >::AddStrings(), rnnlm::i, LatticeDeterminizerPruned< Weight, IntType >::initial_hash_, rnnlm::j, KALDI_LOG, LatticeDeterminizerPruned< Weight, IntType >::output_states_, LatticeDeterminizerPruned< Weight, IntType >::queue_, LatticeDeterminizerPruned< Weight, IntType >::repository_, LatticeDeterminizerPruned< Weight, IntType >::Element::string, and LatticeDeterminizerPruned< Weight, IntType >::Task::subset.

Referenced by LatticeDeterminizerPruned< Weight, IntType >::CheckMemoryUsage().

249  { // rebuild the string repository,
250  // freeing stuff we don't need.. we call this when memory usage
251  // passes a supplied threshold. We need to accumulate all the
252  // strings we need the repository to "remember", then tell it
253  // to clean the repository.
254  std::vector<StringId> needed_strings;
255  for (size_t i = 0; i < output_states_.size(); i++) {
256  AddStrings(output_states_[i]->minimal_subset, &needed_strings);
257  for (size_t j = 0; j < output_states_[i]->arcs.size(); j++)
258  needed_strings.push_back(output_states_[i]->arcs[j].string);
259  }
260 
261  { // the queue doesn't allow us access to the underlying vector,
262  // so we have to resort to a temporary collection.
263  std::vector<Task*> tasks;
264  while (!queue_.empty()) {
265  Task *task = queue_.top();
266  queue_.pop();
267  tasks.push_back(task);
268  AddStrings(task->subset, &needed_strings);
269  }
270  for (size_t i = 0; i < tasks.size(); i++)
271  queue_.push(tasks[i]);
272  }
273 
274  // the following loop covers strings present in initial_hash_.
275  for (typename InitialSubsetHash::const_iterator
276  iter = initial_hash_.begin();
277  iter != initial_hash_.end(); ++iter) {
278  const vector<Element> &vec = *(iter->first);
279  Element elem = iter->second;
280  AddStrings(vec, &needed_strings);
281  needed_strings.push_back(elem.string);
282  }
283  std::sort(needed_strings.begin(), needed_strings.end());
284  needed_strings.erase(std::unique(needed_strings.begin(),
285  needed_strings.end()),
286  needed_strings.end()); // uniq the strings.
287  KALDI_LOG << "Rebuilding repository.";
288 
289  repository_.Rebuild(needed_strings);
290  }
void AddStrings(const vector< Element > &vec, vector< StringId > *needed_strings)
LatticeStringRepository< IntType > repository_
#define KALDI_LOG
Definition: kaldi-error.h:153
std::priority_queue< Task *, vector< Task * >, TaskCompare > queue_

Member Data Documentation

◆ all_elems_tmp_

◆ backward_costs_

◆ beam_

◆ cutoff_

◆ determinized_

◆ equal_

SubsetEqual equal_
private

Definition at line 1131 of file determinize-lattice-pruned.cc.

◆ hasher_

SubsetKey hasher_
private

Definition at line 1130 of file determinize-lattice-pruned.cc.

◆ ifst_

◆ initial_hash_

◆ isymbol_or_final_

◆ minimal_hash_

◆ num_arcs_

◆ num_elems_

◆ opts_

◆ output_states_

◆ queue_

◆ repository_


The documentation for this class was generated from the following file: