All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
DeterminizerStar< F >::EpsilonClosure Class Reference
Collaboration diagram for DeterminizerStar< F >::EpsilonClosure:

Classes

struct  EpsilonClosureInfo
 

Public Member Functions

 EpsilonClosure (const Fst< Arc > *ifst, int max_states, StringRepository< Label, StringId > *repository, float delta)
 
void GetEpsilonClosure (const vector< Element > &input_subset, vector< Element > *output_subset)
 

Private Member Functions

void AddOneElement (const Element &elem, const Weight &unprocessed_weight)
 
void ExpandOneElement (const Element &elem, bool sorted, const Weight &unprocessed_weight, bool save_to_queue_2=false)
 

Private Attributes

deque< typename Arc::StateId > queue_
 
vector< Elementqueue_2_
 
vector< int > id_to_index_
 
vector< EpsilonClosureInfoecinfo_
 
const Fst< Arc > * ifst_
 
int max_states_
 
StringRepository< Label,
StringId > * 
repository_
 
float delta_
 

Detailed Description

template<class F>
class fst::DeterminizerStar< F >::EpsilonClosure

Definition at line 354 of file determinize-star-inl.h.

Constructor & Destructor Documentation

EpsilonClosure ( const Fst< Arc > *  ifst,
int  max_states,
StringRepository< Label, StringId > *  repository,
float  delta 
)
inline

Definition at line 356 of file determinize-star-inl.h.

357  :
358  ifst_(ifst), max_states_(max_states), repository_(repository),
359  delta_(delta) {
360 
361  }
StringRepository< Label, StringId > * repository_

Member Function Documentation

void AddOneElement ( const Element elem,
const Weight unprocessed_weight 
)
private

Definition at line 754 of file determinize-star-inl.h.

References fst::ApproxEqual(), DeterminizerStar< F >::delta_, DeterminizerStar< F >::EpsilonClosure::EpsilonClosureInfo::element, rnnlm::i, DeterminizerStar< F >::EpsilonClosure::EpsilonClosureInfo::in_queue, KALDI_ERR, fst::Plus(), DeterminizerStar< F >::repository_, DeterminizerStar< F >::Element::state, DeterminizerStar< F >::Element::string, DeterminizerStar< F >::Element::weight, and DeterminizerStar< F >::EpsilonClosure::EpsilonClosureInfo::weight_to_process.

Referenced by DeterminizerStar< F >::EpsilonClosure::GetEpsilonClosure().

754  {
755  // first we try to find the element info in the ecinfo_ vector
756  int index = -1;
757  if (elem.state < id_to_index_.size()) {
758  index = id_to_index_[elem.state];
759  }
760  if (index != -1) {
761  if (index >= ecinfo_.size()) {
762  index = -1;
763  }
764  // since ecinfo_ might store outdated information, we need to check
765  else if (ecinfo_[index].element.state != elem.state) {
766  index = -1;
767  }
768  }
769 
770  if (index == -1) {
771  // was no such StateId: insert and add to queue.
772  ecinfo_.push_back(EpsilonClosureInfo(elem, unprocessed_weight, true));
773  size_t size = id_to_index_.size();
774  if (size < elem.state + 1) {
775  // double the size to reduce memory operations
776  id_to_index_.resize(2 * elem.state + 1, -1);
777  }
778  id_to_index_[elem.state] = ecinfo_.size() - 1;
779  queue_.push_back(elem.state);
780 
781  } else { // one is already there. Add weights.
782  EpsilonClosureInfo &info = ecinfo_[index];
783  if (info.element.string != elem.string) {
784  // Non-functional FST.
785  std::ostringstream ss;
786  ss << "FST was not functional -> not determinizable.";
787  { // Print some debugging information. Can be helpful to debug
788  // the inputs when FSTs are mysteriously non-functional.
789  vector<Label> tmp_seq;
790  repository_->SeqOfId(info.element.string, &tmp_seq);
791  ss << "\nFirst string:";
792  for (size_t i = 0; i < tmp_seq.size(); i++)
793  ss << ' ' << tmp_seq[i];
794  ss << "\nSecond string:";
795  repository_->SeqOfId(elem.string, &tmp_seq);
796  for (size_t i = 0; i < tmp_seq.size(); i++)
797  ss << ' ' << tmp_seq[i];
798  }
799  KALDI_ERR << ss.str();
800  }
801 
802  info.weight_to_process =
803  Plus(info.weight_to_process, unprocessed_weight);
804 
805  if (!info.in_queue) {
806  // this is because the code in "else" below: the
807  // iter->second.weight_to_process might not be Zero()
808  Weight weight = Plus(info.element.weight, info.weight_to_process);
809 
810  // What is done below is, we propagate the weight (by adding them
811  // to the queue only when the change is big enough;
812  // otherwise we just store the weight, until before returning
813  // we add the element.weight and weight_to_process together
814  if (! ApproxEqual(weight, info.element.weight, delta_)) {
815  // add extra part of weight to queue.
816  info.in_queue = true;
817  queue_.push_back(elem.state);
818  }
819  }
820  }
821 }
vector< EpsilonClosureInfo > ecinfo_
deque< typename Arc::StateId > queue_
LatticeWeightTpl< FloatType > Plus(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
bool ApproxEqual(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2, float delta=kDelta)
#define KALDI_ERR
Definition: kaldi-error.h:127
StringRepository< Label, StringId > * repository_
void ExpandOneElement ( const Element elem,
bool  sorted,
const Weight unprocessed_weight,
bool  save_to_queue_2 = false 
)
private

Definition at line 824 of file determinize-star-inl.h.

References DeterminizerStar< F >::ifst_, DeterminizerStar< F >::repository_, DeterminizerStar< F >::Element::state, DeterminizerStar< F >::Element::string, fst::Times(), and DeterminizerStar< F >::Element::weight.

Referenced by DeterminizerStar< F >::EpsilonClosure::GetEpsilonClosure().

828  {
829  StringId str = elem.string; // copy it here because there is an iterator-
830  // - invalidation problem (it really happens for some FSTs)
831 
832  // now we are going to propagate the "unprocessed_weight"
833  for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state);
834  !aiter.Done(); aiter.Next()) {
835  const Arc &arc = aiter.Value();
836  if (sorted && arc.ilabel > 0) {
837  break;
838  // Break from the loop: due to sorting there will be no
839  // more transitions with epsilons as input labels.
840  }
841  if (arc.ilabel != 0) {
842  continue; // we only process epsilons here
843  }
844  Element next_elem;
845  next_elem.state = arc.nextstate;
846  next_elem.weight = Weight::Zero();
847  Weight next_unprocessed_weight
848  = Times(unprocessed_weight, arc.weight);
849 
850  // now must append strings
851  if (arc.olabel == 0) {
852  next_elem.string = str;
853  } else {
854  vector<Label> seq;
855  repository_->SeqOfId(str, &seq);
856  if (arc.olabel != 0)
857  seq.push_back(arc.olabel);
858  next_elem.string = repository_->IdOfSeq(seq);
859  }
860  if (save_to_queue_2) {
861  next_elem.weight = next_unprocessed_weight;
862  queue_2_.push_back(next_elem);
863  } else {
864  AddOneElement(next_elem, next_unprocessed_weight);
865  }
866  }
867 }
void AddOneElement(const Element &elem, const Weight &unprocessed_weight)
LatticeWeightTpl< FloatType > Times(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
StringRepository< Label, StringId > * repository_
void GetEpsilonClosure ( const vector< Element > &  input_subset,
vector< Element > *  output_subset 
)

Definition at line 660 of file determinize-star-inl.h.

References DeterminizerStar< F >::EpsilonClosure::AddOneElement(), DeterminizerStar< F >::EpsilonClosure::ecinfo_, DeterminizerStar< F >::EpsilonClosure::EpsilonClosureInfo::element, DeterminizerStar< F >::EpsilonClosure::ExpandOneElement(), rnnlm::i, DeterminizerStar< F >::EpsilonClosure::id_to_index_, DeterminizerStar< F >::EpsilonClosure::ifst_, DeterminizerStar< F >::EpsilonClosure::EpsilonClosureInfo::in_queue, KALDI_ERR, DeterminizerStar< F >::EpsilonClosure::max_states_, fst::Plus(), DeterminizerStar< F >::EpsilonClosure::queue_, DeterminizerStar< F >::EpsilonClosure::queue_2_, DeterminizerStar< F >::Element::state, DeterminizerStar< F >::Element::string, DeterminizerStar< F >::Element::weight, and DeterminizerStar< F >::EpsilonClosure::EpsilonClosureInfo::weight_to_process.

Referenced by DeterminizerStar< F >::ProcessSubset().

661  {
662  ecinfo_.resize(0);
663  size_t size = input_subset.size();
664  // find whether input fst is known to be sorted in input label.
665  bool sorted =
666  ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
667 
668  // size is still the input_subset.size()
669  for (size_t i = 0; i < size; i++) {
670  ExpandOneElement(input_subset[i], sorted, input_subset[i].weight, true);
671  }
672 
673  size_t s = queue_2_.size();
674  if (s == 0) {
675  *output_subset = input_subset;
676  return;
677  } else {
678  // queue_2 not empty. Need to create the vector<info>
679  for (size_t i = 0; i < size; i++) {
680  // the weight has not been processed yet,
681  // so put all of them in the "weight_to_process"
682  ecinfo_.push_back(EpsilonClosureInfo(input_subset[i],
683  input_subset[i].weight,
684  false));
685  ecinfo_.back().element.weight = Weight::Zero(); // clear the weight
686 
687  if (id_to_index_.size() < input_subset[i].state + 1) {
688  id_to_index_.resize(2 * input_subset[i].state + 1, -1);
689  }
690  id_to_index_[input_subset[i].state] = ecinfo_.size() - 1;
691  }
692  }
693 
694  {
695  Element elem;
696  elem.weight = Weight::Zero();
697  for (size_t i = 0; i < s; i++) {
698  elem.state = queue_2_[i].state;
699  elem.string = queue_2_[i].string;
700  AddOneElement(elem, queue_2_[i].weight);
701  }
702  queue_2_.resize(0);
703  }
704 
705  int counter = 0; // relates to max-states option, used for test.
706  while (!queue_.empty()) {
707  InputStateId id = queue_.front();
708 
709  // no need to check validity of the index
710  // since anything in the queue we are sure they're in the "virtual set"
711  int index = id_to_index_[id];
712  EpsilonClosureInfo &info = ecinfo_[index];
713  Element &elem = info.element;
714  Weight unprocessed_weight = info.weight_to_process;
715 
716  elem.weight = Plus(elem.weight, unprocessed_weight);
717  info.weight_to_process = Weight::Zero();
718 
719  info.in_queue = false;
720  queue_.pop_front();
721 
722  if (max_states_ > 0 && counter++ > max_states_) {
723  KALDI_ERR << "Determinization aborted since looped more than "
724  << max_states_ << " times during epsilon closure";
725  }
726 
727  // generally we need to be careful about iterator-invalidation problem
728  // here we pass a reference (elem), which could be an issue.
729  // In the beginning of ExpandOneElement, we make a copy of elem.string
730  // to avoid that issue
731  ExpandOneElement(elem, sorted, unprocessed_weight);
732  }
733 
734  {
735  // this sorting is based on StateId
736  sort(ecinfo_.begin(), ecinfo_.end());
737 
738  output_subset->clear();
739 
740  size = ecinfo_.size();
741  output_subset->reserve(size);
742  for (size_t i = 0; i < size; i++) {
743  EpsilonClosureInfo& info = ecinfo_[i];
744  if (info.weight_to_process != Weight::Zero()) {
745  info.element.weight = Plus(info.element.weight, info.weight_to_process);
746  }
747  output_subset->push_back(info.element);
748  }
749  }
750 }
vector< EpsilonClosureInfo > ecinfo_
deque< typename Arc::StateId > queue_
void AddOneElement(const Element &elem, const Weight &unprocessed_weight)
LatticeWeightTpl< FloatType > Plus(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
void ExpandOneElement(const Element &elem, bool sorted, const Weight &unprocessed_weight, bool save_to_queue_2=false)
#define KALDI_ERR
Definition: kaldi-error.h:127

Member Data Documentation

float delta_
private

Definition at line 435 of file determinize-star-inl.h.

vector<EpsilonClosureInfo> ecinfo_
private
vector<int> id_to_index_
private
const Fst<Arc>* ifst_
private
int max_states_
private
deque<typename Arc::StateId> queue_
private
vector<Element> queue_2_
private
StringRepository<Label, StringId>* repository_
private

Definition at line 434 of file determinize-star-inl.h.


The documentation for this class was generated from the following file: