DeterminizerStar< F >::EpsilonClosure Class Reference
Collaboration diagram for DeterminizerStar< F >::EpsilonClosure:

Classes

struct  EpsilonClosureInfo
 

Public Member Functions

 EpsilonClosure (const Fst< Arc > *ifst, int max_states, StringRepository< Label, StringId > *repository, float delta)
 
void GetEpsilonClosure (const std::vector< Element > &input_subset, std::vector< Element > *output_subset)
 

Private Member Functions

void AddOneElement (const Element &elem, const Weight &unprocessed_weight)
 
void ExpandOneElement (const Element &elem, bool sorted, const Weight &unprocessed_weight, bool save_to_queue_2=false)
 

Private Attributes

std::deque< typename Arc::StateId > queue_
 
std::vector< Elementqueue_2_
 
std::vector< int > id_to_index_
 
std::vector< EpsilonClosureInfoecinfo_
 
const Fst< Arc > * ifst_
 
int max_states_
 
StringRepository< Label, StringId > * repository_
 
float delta_
 

Detailed Description

template<class F>
class fst::DeterminizerStar< F >::EpsilonClosure

Definition at line 346 of file determinize-star-inl.h.

Constructor & Destructor Documentation

◆ EpsilonClosure()

EpsilonClosure ( const Fst< Arc > *  ifst,
int  max_states,
StringRepository< Label, StringId > *  repository,
float  delta 
)
inline

Definition at line 348 of file determinize-star-inl.h.

349  :
350  ifst_(ifst), max_states_(max_states), repository_(repository),
351  delta_(delta) {
352 
353  }
StringRepository< Label, StringId > * repository_

Member Function Documentation

◆ AddOneElement()

void AddOneElement ( const Element elem,
const Weight unprocessed_weight 
)
private

Definition at line 746 of file determinize-star-inl.h.

References fst::ApproxEqual(), DeterminizerStar< F >::EpsilonClosure::EpsilonClosureInfo::element, rnnlm::i, DeterminizerStar< F >::EpsilonClosure::EpsilonClosureInfo::in_queue, KALDI_ERR, fst::Plus(), DeterminizerStar< F >::Element::state, DeterminizerStar< F >::Element::string, DeterminizerStar< F >::Element::weight, and DeterminizerStar< F >::EpsilonClosure::EpsilonClosureInfo::weight_to_process.

Referenced by DeterminizerStar< F >::EpsilonClosure::GetEpsilonClosure().

746  {
747  // first we try to find the element info in the ecinfo_ vector
748  int index = -1;
749  if (elem.state < id_to_index_.size()) {
750  index = id_to_index_[elem.state];
751  }
752  if (index != -1) {
753  if (index >= ecinfo_.size()) {
754  index = -1;
755  }
756  // since ecinfo_ might store outdated information, we need to check
757  else if (ecinfo_[index].element.state != elem.state) {
758  index = -1;
759  }
760  }
761 
762  if (index == -1) {
763  // was no such StateId: insert and add to queue.
764  ecinfo_.push_back(EpsilonClosureInfo(elem, unprocessed_weight, true));
765  size_t size = id_to_index_.size();
766  if (size < elem.state + 1) {
767  // double the size to reduce memory operations
768  id_to_index_.resize(2 * elem.state + 1, -1);
769  }
770  id_to_index_[elem.state] = ecinfo_.size() - 1;
771  queue_.push_back(elem.state);
772 
773  } else { // one is already there. Add weights.
774  EpsilonClosureInfo &info = ecinfo_[index];
775  if (info.element.string != elem.string) {
776  // Non-functional FST.
777  std::ostringstream ss;
778  ss << "FST was not functional -> not determinizable.";
779  { // Print some debugging information. Can be helpful to debug
780  // the inputs when FSTs are mysteriously non-functional.
781  std::vector<Label> tmp_seq;
782  repository_->SeqOfId(info.element.string, &tmp_seq);
783  ss << "\nFirst string:";
784  for (size_t i = 0; i < tmp_seq.size(); i++)
785  ss << ' ' << tmp_seq[i];
786  ss << "\nSecond string:";
787  repository_->SeqOfId(elem.string, &tmp_seq);
788  for (size_t i = 0; i < tmp_seq.size(); i++)
789  ss << ' ' << tmp_seq[i];
790  }
791  KALDI_ERR << ss.str();
792  }
793 
794  info.weight_to_process =
795  Plus(info.weight_to_process, unprocessed_weight);
796 
797  if (!info.in_queue) {
798  // this is because the code in "else" below: the
799  // iter->second.weight_to_process might not be Zero()
800  Weight weight = Plus(info.element.weight, info.weight_to_process);
801 
802  // What is done below is, we propagate the weight (by adding them
803  // to the queue only when the change is big enough;
804  // otherwise we just store the weight, until before returning
805  // we add the element.weight and weight_to_process together
806  if (! ApproxEqual(weight, info.element.weight, delta_)) {
807  // add extra part of weight to queue.
808  info.in_queue = true;
809  queue_.push_back(elem.state);
810  }
811  }
812  }
813 }
LatticeWeightTpl< FloatType > Plus(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
std::deque< typename Arc::StateId > queue_
bool ApproxEqual(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2, float delta=kDelta)
#define KALDI_ERR
Definition: kaldi-error.h:147
std::vector< EpsilonClosureInfo > ecinfo_
StringRepository< Label, StringId > * repository_

◆ ExpandOneElement()

void ExpandOneElement ( const Element elem,
bool  sorted,
const Weight unprocessed_weight,
bool  save_to_queue_2 = false 
)
private

Definition at line 816 of file determinize-star-inl.h.

References DeterminizerStar< F >::Element::state, DeterminizerStar< F >::Element::string, fst::Times(), and DeterminizerStar< F >::Element::weight.

820  {
821  StringId str = elem.string; // copy it here because there is an iterator-
822  // - invalidation problem (it really happens for some FSTs)
823 
824  // now we are going to propagate the "unprocessed_weight"
825  for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state);
826  !aiter.Done(); aiter.Next()) {
827  const Arc &arc = aiter.Value();
828  if (sorted && arc.ilabel > 0) {
829  break;
830  // Break from the loop: due to sorting there will be no
831  // more transitions with epsilons as input labels.
832  }
833  if (arc.ilabel != 0) {
834  continue; // we only process epsilons here
835  }
836  Element next_elem;
837  next_elem.state = arc.nextstate;
838  next_elem.weight = Weight::Zero();
839  Weight next_unprocessed_weight
840  = Times(unprocessed_weight, arc.weight);
841 
842  // now must append strings
843  if (arc.olabel == 0) {
844  next_elem.string = str;
845  } else {
846  std::vector<Label> seq;
847  repository_->SeqOfId(str, &seq);
848  if (arc.olabel != 0)
849  seq.push_back(arc.olabel);
850  next_elem.string = repository_->IdOfSeq(seq);
851  }
852  if (save_to_queue_2) {
853  next_elem.weight = next_unprocessed_weight;
854  queue_2_.push_back(next_elem);
855  } else {
856  AddOneElement(next_elem, next_unprocessed_weight);
857  }
858  }
859 }
void AddOneElement(const Element &elem, const Weight &unprocessed_weight)
LatticeWeightTpl< FloatType > Times(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
StringRepository< Label, StringId > * repository_

◆ GetEpsilonClosure()

void GetEpsilonClosure ( const std::vector< Element > &  input_subset,
std::vector< Element > *  output_subset 
)

Definition at line 652 of file determinize-star-inl.h.

References DeterminizerStar< F >::EpsilonClosure::AddOneElement(), DeterminizerStar< F >::EpsilonClosure::EpsilonClosureInfo::element, rnnlm::i, DeterminizerStar< F >::EpsilonClosure::EpsilonClosureInfo::in_queue, KALDI_ERR, fst::Plus(), DeterminizerStar< F >::Element::state, DeterminizerStar< F >::Element::string, DeterminizerStar< F >::Element::weight, and DeterminizerStar< F >::EpsilonClosure::EpsilonClosureInfo::weight_to_process.

Referenced by fst::DeterminizeStar().

653  {
654  ecinfo_.resize(0);
655  size_t size = input_subset.size();
656  // find whether input fst is known to be sorted in input label.
657  bool sorted =
658  ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
659 
660  // size is still the input_subset.size()
661  for (size_t i = 0; i < size; i++) {
662  ExpandOneElement(input_subset[i], sorted, input_subset[i].weight, true);
663  }
664 
665  size_t s = queue_2_.size();
666  if (s == 0) {
667  *output_subset = input_subset;
668  return;
669  } else {
670  // queue_2 not empty. Need to create the vector<info>
671  for (size_t i = 0; i < size; i++) {
672  // the weight has not been processed yet,
673  // so put all of them in the "weight_to_process"
674  ecinfo_.push_back(EpsilonClosureInfo(input_subset[i],
675  input_subset[i].weight,
676  false));
677  ecinfo_.back().element.weight = Weight::Zero(); // clear the weight
678 
679  if (id_to_index_.size() < input_subset[i].state + 1) {
680  id_to_index_.resize(2 * input_subset[i].state + 1, -1);
681  }
682  id_to_index_[input_subset[i].state] = ecinfo_.size() - 1;
683  }
684  }
685 
686  {
687  Element elem;
688  elem.weight = Weight::Zero();
689  for (size_t i = 0; i < s; i++) {
690  elem.state = queue_2_[i].state;
691  elem.string = queue_2_[i].string;
692  AddOneElement(elem, queue_2_[i].weight);
693  }
694  queue_2_.resize(0);
695  }
696 
697  int counter = 0; // relates to max-states option, used for test.
698  while (!queue_.empty()) {
699  InputStateId id = queue_.front();
700 
701  // no need to check validity of the index
702  // since anything in the queue we are sure they're in the "virtual set"
703  int index = id_to_index_[id];
704  EpsilonClosureInfo &info = ecinfo_[index];
705  Element &elem = info.element;
706  Weight unprocessed_weight = info.weight_to_process;
707 
708  elem.weight = Plus(elem.weight, unprocessed_weight);
709  info.weight_to_process = Weight::Zero();
710 
711  info.in_queue = false;
712  queue_.pop_front();
713 
714  if (max_states_ > 0 && counter++ > max_states_) {
715  KALDI_ERR << "Determinization aborted since looped more than "
716  << max_states_ << " times during epsilon closure";
717  }
718 
719  // generally we need to be careful about iterator-invalidation problem
720  // here we pass a reference (elem), which could be an issue.
721  // In the beginning of ExpandOneElement, we make a copy of elem.string
722  // to avoid that issue
723  ExpandOneElement(elem, sorted, unprocessed_weight);
724  }
725 
726  {
727  // this sorting is based on StateId
728  sort(ecinfo_.begin(), ecinfo_.end());
729 
730  output_subset->clear();
731 
732  size = ecinfo_.size();
733  output_subset->reserve(size);
734  for (size_t i = 0; i < size; i++) {
735  EpsilonClosureInfo& info = ecinfo_[i];
736  if (info.weight_to_process != Weight::Zero()) {
737  info.element.weight = Plus(info.element.weight, info.weight_to_process);
738  }
739  output_subset->push_back(info.element);
740  }
741  }
742 }
void AddOneElement(const Element &elem, const Weight &unprocessed_weight)
LatticeWeightTpl< FloatType > Plus(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
std::deque< typename Arc::StateId > queue_
void ExpandOneElement(const Element &elem, bool sorted, const Weight &unprocessed_weight, bool save_to_queue_2=false)
#define KALDI_ERR
Definition: kaldi-error.h:147
std::vector< EpsilonClosureInfo > ecinfo_

Member Data Documentation

◆ delta_

float delta_
private

Definition at line 427 of file determinize-star-inl.h.

◆ ecinfo_

std::vector<EpsilonClosureInfo> ecinfo_
private

Definition at line 405 of file determinize-star-inl.h.

◆ id_to_index_

std::vector<int> id_to_index_
private

Definition at line 400 of file determinize-star-inl.h.

◆ ifst_

const Fst<Arc>* ifst_
private

Definition at line 424 of file determinize-star-inl.h.

◆ max_states_

int max_states_
private

Definition at line 425 of file determinize-star-inl.h.

◆ queue_

std::deque<typename Arc::StateId> queue_
private

Definition at line 390 of file determinize-star-inl.h.

◆ queue_2_

std::vector<Element> queue_2_
private

Definition at line 391 of file determinize-star-inl.h.

◆ repository_

StringRepository<Label, StringId>* repository_
private

Definition at line 426 of file determinize-star-inl.h.


The documentation for this class was generated from the following file: