GrammarFstPreparer Class Reference
Collaboration diagram for GrammarFstPreparer:

Classes

struct  ArcCategory
 

Public Types

using FST = VectorFst< StdArc >
 
using Arc = StdArc
 
using StateId = Arc::StateId
 
using Label = Arc::Label
 
using Weight = Arc::Weight
 

Public Member Functions

 GrammarFstPreparer (int32 nonterm_phones_offset, VectorFst< StdArc > *fst)
 
void Prepare ()
 

Private Member Functions

bool IsSpecialState (StateId s) const
 
void MaybeAddFinalProbToState (StateId s)
 
bool NeedEpsilons (StateId s) const
 
bool IsEntryState (StateId s) const
 
void FixArcsToFinalStates (StateId s)
 
void GetCategoryOfArc (const Arc &arc, ArcCategory *arc_category) const
 
void InsertEpsilonsForState (StateId s)
 
int32 GetPhoneSymbolFor (enum NonterminalValues n) const
 

Private Attributes

int32 nonterm_phones_offset_
 
VectorFst< StdArc > * fst_
 
StateId orig_num_states_
 
StateId simple_final_state_
 

Detailed Description

Definition at line 541 of file grammar-fst.cc.

Member Typedef Documentation

◆ Arc

using Arc = StdArc

Definition at line 544 of file grammar-fst.cc.

◆ FST

using FST = VectorFst<StdArc>

Definition at line 543 of file grammar-fst.cc.

◆ Label

using Label = Arc::Label

Definition at line 546 of file grammar-fst.cc.

◆ StateId

using StateId = Arc::StateId

Definition at line 545 of file grammar-fst.cc.

◆ Weight

using Weight = Arc::Weight

Definition at line 547 of file grammar-fst.cc.

Constructor & Destructor Documentation

◆ GrammarFstPreparer()

GrammarFstPreparer ( int32  nonterm_phones_offset,
VectorFst< StdArc > *  fst 
)
inline

Definition at line 549 of file grammar-fst.cc.

550  :
551  nonterm_phones_offset_(nonterm_phones_offset),
552  fst_(fst), orig_num_states_(fst->NumStates()),
553  simple_final_state_(kNoStateId) { }
VectorFst< StdArc > * fst_
Definition: grammar-fst.cc:681
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
Definition: graph.dox:21

Member Function Documentation

◆ FixArcsToFinalStates()

void FixArcsToFinalStates ( StateId  s)
private

Definition at line 810 of file grammar-fst.cc.

References FasterDecoder::fst_, fst::GetEncodingMultiple(), KALDI_ASSERT, fst::kNontermBigNumber, fst::kNontermEnd, and fst::Times().

810  {
811  int32 encoding_multiple = GetEncodingMultiple(nonterm_phones_offset_),
812  big_number = kNontermBigNumber;
813  for (MutableArcIterator<FST> aiter(fst_, s ); !aiter.Done(); aiter.Next()) {
814  Arc arc = aiter.Value();
815  if (arc.ilabel < big_number)
816  continue;
817  int32 nonterminal = (arc.ilabel - big_number) / encoding_multiple;
818  if (nonterminal == GetPhoneSymbolFor(kNontermEnd)) {
819  KALDI_ASSERT(fst_->NumArcs(arc.nextstate) == 0 &&
820  fst_->Final(arc.nextstate) != Weight::Zero());
821  if (fst_->Final(arc.nextstate) == Weight::One())
822  continue; // There is no problem to fix.
823  if (simple_final_state_ == kNoStateId) {
824  simple_final_state_ = fst_->AddState();
825  fst_->SetFinal(simple_final_state_, Weight::One());
826  }
827  arc.weight = Times(arc.weight, fst_->Final(arc.nextstate));
828  arc.nextstate = simple_final_state_;
829  aiter.SetValue(arc);
830  }
831  }
832 }
VectorFst< StdArc > * fst_
Definition: grammar-fst.cc:681
int32 GetPhoneSymbolFor(enum NonterminalValues n) const
Definition: grammar-fst.cc:676
kaldi::int32 int32
LatticeWeightTpl< FloatType > Times(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
int32 GetEncodingMultiple(int32 nonterm_phones_offset)

◆ GetCategoryOfArc()

void GetCategoryOfArc ( const Arc arc,
ArcCategory arc_category 
) const
private

Definition at line 855 of file grammar-fst.cc.

References fst::GetEncodingMultiple(), KALDI_ERR, fst::kNontermBigNumber, fst::kNontermEnd, fst::kNontermUserDefined, GrammarFstPreparer::ArcCategory::nextstate, GrammarFstPreparer::ArcCategory::nonterminal, and GrammarFstPreparer::ArcCategory::olabel.

856  {
857  int32 encoding_multiple = GetEncodingMultiple(nonterm_phones_offset_),
858  big_number = kNontermBigNumber;
859 
860  int32 ilabel = arc.ilabel;
861  if (ilabel < big_number) {
862  arc_category->nonterminal = 0;
863  arc_category->nextstate = kNoStateId;
864  arc_category->olabel = 0;
865  } else {
866  int32 nonterminal = (ilabel - big_number) / encoding_multiple;
867  arc_category->nonterminal = nonterminal;
868  if (nonterminal <= nonterm_phones_offset_) {
869  KALDI_ERR << "Problem decoding nonterminal symbol "
870  "(wrong --nonterm-phones-offset option?), ilabel="
871  << ilabel;
872  }
873  if (nonterminal >= GetPhoneSymbolFor(kNontermUserDefined)) {
874  // This is a user-defined symbol.
875  arc_category->nextstate = arc.nextstate;
876  arc_category->olabel = arc.olabel;
877  } else {
878  arc_category->nextstate = kNoStateId;
879  if (nonterminal == GetPhoneSymbolFor(kNontermEnd))
880  arc_category->olabel = arc.olabel;
881  else
882  arc_category->olabel = 0;
883  }
884  }
885 }
int32 GetPhoneSymbolFor(enum NonterminalValues n) const
Definition: grammar-fst.cc:676
kaldi::int32 int32
#define KALDI_ERR
Definition: kaldi-error.h:147
int32 GetEncodingMultiple(int32 nonterm_phones_offset)

◆ GetPhoneSymbolFor()

int32 GetPhoneSymbolFor ( enum NonterminalValues  n) const
inlineprivate

Definition at line 676 of file grammar-fst.cc.

References rnnlm::n.

676  {
677  return nonterm_phones_offset_ + static_cast<int32>(n);
678  }
kaldi::int32 int32
struct rnnlm::@11::@12 n

◆ InsertEpsilonsForState()

void InsertEpsilonsForState ( StateId  s)
private

Definition at line 888 of file grammar-fst.cc.

References FasterDecoder::fst_, rnnlm::i, KALDI_ASSERT, KALDI_ERR, fst::kNontermBegin, fst::kNontermReenter, kaldi::LogAdd(), GrammarFstPreparer::ArcCategory::nonterminal, and GrammarFstPreparer::ArcCategory::olabel.

888  {
889  // Maps from category of arc, to a pair:
890  // the StateId is the state corresponding to that category.
891  // the float is the cost on the arc leading to that state;
892  // we compute the value that corresponds to the sum of the probabilities
893  // of the leaving arcs, bearing in mind that p = exp(-cost).
894  // We don't insert the arc-category whose 'nonterminal' is 0 here (i.e. the
895  // category for normal arcs); those arcs stay at this state.
896  std::map<ArcCategory, std::pair<StateId, float> > category_to_state;
897 
898  // This loop sets up 'category_to_state'.
899  for (fst::ArcIterator<FST> aiter(*fst_, s); !aiter.Done(); aiter.Next()) {
900  const Arc &arc = aiter.Value();
901  ArcCategory category;
902  GetCategoryOfArc(arc, &category);
903  int32 nonterminal = category.nonterminal;
904  if (nonterminal == 0)
905  continue;
906  if (nonterminal == GetPhoneSymbolFor(kNontermBegin) ||
907  nonterminal == GetPhoneSymbolFor(kNontermReenter)) {
908  KALDI_ERR << "Something went wrong; did not expect to insert epsilons "
909  "for this type of state.";
910  }
911  auto iter = category_to_state.find(category);
912  if (iter == category_to_state.end()) {
913  StateId new_state = fst_->AddState();
914  float cost = arc.weight.Value();
915  category_to_state[category] = std::pair<StateId, float>(new_state, cost);
916  } else {
917  std::pair<StateId, float> &p = iter->second;
918  p.second = -kaldi::LogAdd(-p.second, -arc.weight.Value());
919  }
920  }
921 
922  KALDI_ASSERT(!category_to_state.empty()); // would be a code error.
923 
924  // 'arcs_from_this_state' is a place to put arcs that will put on this state
925  // after we delete all its existing arcs.
926  std::vector<Arc> arcs_from_this_state;
927  arcs_from_this_state.reserve(fst_->NumArcs(s) + category_to_state.size());
928 
929  // add arcs corresponding to transitions to the newly created states, to
930  // 'arcs_from_this_state'
931  for (std::map<ArcCategory, std::pair<StateId, float> >::const_iterator
932  iter = category_to_state.begin(); iter != category_to_state.end();
933  ++iter) {
934  const ArcCategory &category = iter->first;
935  StateId new_state = iter->second.first;
936  float cost = iter->second.second;
937  Arc arc;
938  arc.ilabel = 0;
939  arc.olabel = category.olabel;
940  arc.weight = Weight(cost);
941  arc.nextstate = new_state;
942  arcs_from_this_state.push_back(arc);
943  }
944 
945  // Now add to 'arcs_from_this_state', and to the newly created states,
946  // arcs corresponding to each of the arcs that were originally leaving
947  // this state.
948  for (fst::ArcIterator<FST> aiter(*fst_, s); !aiter.Done(); aiter.Next()) {
949  const Arc &arc = aiter.Value();
950  ArcCategory category;
951  GetCategoryOfArc(arc, &category);
952  int32 nonterminal = category.nonterminal;
953  if (nonterminal == 0) { // this arc remains unchanged; we'll put it back later.
954  arcs_from_this_state.push_back(arc);
955  continue;
956  }
957  auto iter = category_to_state.find(category);
958  KALDI_ASSERT(iter != category_to_state.end());
959  Arc new_arc;
960  new_arc.ilabel = arc.ilabel;
961  if (arc.olabel == category.olabel) {
962  new_arc.olabel = 0; // the olabel went on the epsilon-input arc.
963  } else {
964  KALDI_ASSERT(category.olabel == 0);
965  new_arc.olabel = arc.olabel;
966  }
967  StateId new_state = iter->second.first;
968  float epsilon_arc_cost = iter->second.second;
969  new_arc.weight = Weight(arc.weight.Value() - epsilon_arc_cost);
970  new_arc.nextstate = arc.nextstate;
971  fst_->AddArc(new_state, new_arc);
972  }
973 
974  fst_->DeleteArcs(s);
975  for (size_t i = 0; i < arcs_from_this_state.size(); i++) {
976  fst_->AddArc(s, arcs_from_this_state[i]);
977  }
978  // leave the final-prob on this state as it was before.
979 }
VectorFst< StdArc > * fst_
Definition: grammar-fst.cc:681
int32 GetPhoneSymbolFor(enum NonterminalValues n) const
Definition: grammar-fst.cc:676
Lattice::StateId StateId
kaldi::int32 int32
#define KALDI_ERR
Definition: kaldi-error.h:147
double LogAdd(double x, double y)
Definition: kaldi-math.h:184
void GetCategoryOfArc(const Arc &arc, ArcCategory *arc_category) const
Definition: grammar-fst.cc:855
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ IsEntryState()

bool IsEntryState ( StateId  s) const
private

Definition at line 705 of file grammar-fst.cc.

References FasterDecoder::fst_, fst::GetEncodingMultiple(), fst::kNontermBegin, and fst::kNontermBigNumber.

705  {
706  int32 big_number = kNontermBigNumber,
707  encoding_multiple = GetEncodingMultiple(nonterm_phones_offset_);
708 
709  for (ArcIterator<FST> aiter(*fst_, s ); !aiter.Done(); aiter.Next()) {
710  const Arc &arc = aiter.Value();
711  int32 nonterminal = (arc.ilabel - big_number) /
712  encoding_multiple;
713  // we check that at least one has label with nonterminal equal to #nonterm_begin...
714  // in fact they will all have this value if at least one does, and this was checked
715  // in NeedEpsilons().
716  if (nonterminal == GetPhoneSymbolFor(kNontermBegin))
717  return true;
718  }
719  return false;
720 }
VectorFst< StdArc > * fst_
Definition: grammar-fst.cc:681
int32 GetPhoneSymbolFor(enum NonterminalValues n) const
Definition: grammar-fst.cc:676
kaldi::int32 int32
int32 GetEncodingMultiple(int32 nonterm_phones_offset)

◆ IsSpecialState()

bool IsSpecialState ( StateId  s) const
private

Definition at line 690 of file grammar-fst.cc.

References FasterDecoder::fst_, KALDI_GRAMMAR_FST_SPECIAL_WEIGHT, KALDI_WARN, and fst::kNontermBigNumber.

690  {
691  if (fst_->Final(s).Value() == KALDI_GRAMMAR_FST_SPECIAL_WEIGHT) {
692  // TODO: find a way to detect if it was a coincidence, or not make it an
693  // error, because in principle a user-defined grammar could contain this
694  // special cost.
695  KALDI_WARN << "It looks like you are calling PrepareForGrammarFst twice.";
696  }
697  for (ArcIterator<FST> aiter(*fst_, s ); !aiter.Done(); aiter.Next()) {
698  const Arc &arc = aiter.Value();
699  if (arc.ilabel >= kNontermBigNumber) // 1 million
700  return true;
701  }
702  return false;
703 }
VectorFst< StdArc > * fst_
Definition: grammar-fst.cc:681
#define KALDI_GRAMMAR_FST_SPECIAL_WEIGHT
Definition: grammar-fst.h:67
#define KALDI_WARN
Definition: kaldi-error.h:150

◆ MaybeAddFinalProbToState()

void MaybeAddFinalProbToState ( StateId  s)
private

Definition at line 834 of file grammar-fst.cc.

References FasterDecoder::fst_, fst::GetEncodingMultiple(), KALDI_ASSERT, KALDI_ERR, KALDI_GRAMMAR_FST_SPECIAL_WEIGHT, fst::kNontermBegin, fst::kNontermBigNumber, fst::kNontermEnd, and fst::kNontermUserDefined.

834  {
835  if (fst_->Final(s) != Weight::Zero()) {
836  // Something went wrong and it will require some debugging. In Prepare(),
837  // if we detected that the special state had a nonzero final-prob, we
838  // would have inserted epsilons to remove it, so there may be a bug in
839  // this class's code.
840  KALDI_ERR << "State already final-prob.";
841  }
842  ArcIterator<FST> aiter(*fst_, s );
843  KALDI_ASSERT(!aiter.Done());
844  const Arc &arc = aiter.Value();
845  int32 encoding_multiple = GetEncodingMultiple(nonterm_phones_offset_),
846  big_number = kNontermBigNumber,
847  nonterminal = (arc.ilabel - big_number) / encoding_multiple;
849  if (nonterminal == GetPhoneSymbolFor(kNontermEnd) ||
850  nonterminal >= GetPhoneSymbolFor(kNontermUserDefined)) {
852  }
853 }
VectorFst< StdArc > * fst_
Definition: grammar-fst.cc:681
int32 GetPhoneSymbolFor(enum NonterminalValues n) const
Definition: grammar-fst.cc:676
kaldi::int32 int32
#define KALDI_GRAMMAR_FST_SPECIAL_WEIGHT
Definition: grammar-fst.h:67
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
int32 GetEncodingMultiple(int32 nonterm_phones_offset)

◆ NeedEpsilons()

bool NeedEpsilons ( StateId  s) const
private

Definition at line 723 of file grammar-fst.cc.

References FasterDecoder::fst_, fst::GetEncodingMultiple(), KALDI_ERR, fst::kNontermBegin, fst::kNontermBigNumber, fst::kNontermEnd, fst::kNontermReenter, fst::kNontermUserDefined, GrammarFstPreparer::ArcCategory::nextstate, GrammarFstPreparer::ArcCategory::nonterminal, and GrammarFstPreparer::ArcCategory::olabel.

723  {
724 
725  // See the documentation for GetCategoryOfArc() for explanation of what these are.
726  std::set<ArcCategory> categories;
727 
728  if (fst_->Final(s) != Weight::Zero()) {
729  // A state having a final-prob is considered the same as it having
730  // a non-nonterminal arc out of it.. this would be like a transition
731  // within the same FST.
732  ArcCategory category;
733  category.nonterminal = 0;
734  category.nextstate = kNoStateId;
735  category.olabel = 0;
736  categories.insert(category);
737  }
738 
739  int32 big_number = kNontermBigNumber,
740  encoding_multiple = GetEncodingMultiple(nonterm_phones_offset_);
741 
742  for (ArcIterator<FST> aiter(*fst_, s ); !aiter.Done(); aiter.Next()) {
743  const Arc &arc = aiter.Value();
744  ArcCategory category;
745  GetCategoryOfArc(arc, &category);
746  categories.insert(category);
747 
748  // the rest of this block is just checking.
749  int32 nonterminal = category.nonterminal;
750 
751  if (nonterminal >= GetPhoneSymbolFor(kNontermUserDefined)) {
752  // Check that the destination state of this arc has arcs with
753  // kNontermReenter on them. We'll separately check that such states
754  // don't have other types of arcs leaving them (search for
755  // kNontermReenter below), so it's sufficient to check the first arc.
756  ArcIterator<FST> next_aiter(*fst_, arc.nextstate);
757  if (next_aiter.Done())
758  KALDI_ERR << "Destination state of a user-defined nonterminal "
759  "has no arcs leaving it.";
760  const Arc &next_arc = next_aiter.Value();
761  int32 next_nonterminal = (next_arc.ilabel - big_number) /
762  encoding_multiple;
763  if (next_nonterminal != GetPhoneSymbolFor(kNontermReenter)) {
764  KALDI_ERR << "Expected arcs with user-defined nonterminals to be "
765  "followed by arcs with kNontermReenter.";
766  }
767  }
768  if (nonterminal == GetPhoneSymbolFor(kNontermBegin) &&
769  s != fst_->Start()) {
770  KALDI_ERR << "#nonterm_begin symbol is present but this is not the "
771  "first state. Did you do fstdeterminizestar while compiling?";
772  }
773  if (nonterminal == GetPhoneSymbolFor(kNontermEnd)) {
774  if (fst_->NumArcs(arc.nextstate) != 0 ||
775  fst_->Final(arc.nextstate) == Weight::Zero()) {
776  KALDI_ERR << "Arc with kNontermEnd is not the final arc.";
777  }
778  }
779  }
780  if (categories.size() > 1) {
781  // This state has arcs leading to multiple FST instances.
782  // Do some checking to see that there is nothing really unexpected in
783  // there.
784  for (std::set<ArcCategory>::const_iterator
785  iter = categories.begin();
786  iter != categories.end(); ++iter) {
787  int32 nonterminal = iter->nonterminal;
788  if (nonterminal == nonterm_phones_offset_ + kNontermBegin ||
789  nonterminal == nonterm_phones_offset_ + kNontermReenter)
790  // we don't expect any state which has symbols like (kNontermBegin:p1)
791  // on arcs coming out of it, to also have other types of symbol. The
792  // same goes for kNontermReenter.
793  KALDI_ERR << "We do not expect states with arcs of type "
794  "kNontermBegin/kNontermReenter coming out of them, to also have "
795  "other types of arc.";
796  }
797  }
798  // the first half of the || below relates to olabels on arcs with either
799  // user-defined nonterminals or #nonterm_end (which would become 'leaving_arc'
800  // in the CombineArcs() function of GrammarFst). That function does not allow
801  // nonzero olabels on 'leaving_arc', which would be a problem if the
802  // 'arriving' arc had nonzero olabels, so we solve this by introducing
803  // input-epsilon arcs and putting the olabels on them instead.
804  bool need_epsilons = (categories.size() == 1 &&
805  categories.begin()->olabel != 0) ||
806  categories.size() > 1;
807  return need_epsilons;
808 }
VectorFst< StdArc > * fst_
Definition: grammar-fst.cc:681
int32 GetPhoneSymbolFor(enum NonterminalValues n) const
Definition: grammar-fst.cc:676
kaldi::int32 int32
#define KALDI_ERR
Definition: kaldi-error.h:147
void GetCategoryOfArc(const Arc &arc, ArcCategory *arc_category) const
Definition: grammar-fst.cc:855
int32 GetEncodingMultiple(int32 nonterm_phones_offset)

◆ Prepare()

void Prepare ( )
inline

Definition at line 555 of file grammar-fst.cc.

References FasterDecoder::fst_, fst::InputDeterminizeSingleState(), KALDI_ERR, and KALDI_LOG.

Referenced by fst::PrepareForGrammarFst().

555  {
556  if (fst_->Start() == kNoStateId) {
557  KALDI_ERR << "FST has no states.";
558  }
559  for (StateId s = 0; s < fst_->NumStates(); s++) {
560  if (IsSpecialState(s)) {
561  if (NeedEpsilons(s)) {
563  // This state won't be treated as a 'special' state any more;
564  // all 'special' arcs (arcs with ilabels >= kNontermBigNumber)
565  // have been moved and now leave from newly created states that
566  // this state transitions to via epsilons arcs.
567  } else {
568  // OK, state s is a special state.
571  // The following ensures that the start-state of sub-FSTs only has
572  // a single arc per left-context phone (the graph-building recipe can
573  // end up creating more than one if there were disambiguation symbols,
574  // e.g. for langauge model backoff).
575  if (s == fst_->Start() && IsEntryState(s))
577  }
578  }
579  }
580  StateId num_new_states = fst_->NumStates() - orig_num_states_;
581  KALDI_LOG << "Added " << num_new_states << " new states while "
582  "preparing for grammar FST.";
583  }
VectorFst< StdArc > * fst_
Definition: grammar-fst.cc:681
bool IsEntryState(StateId s) const
Definition: grammar-fst.cc:705
Lattice::StateId StateId
static void InputDeterminizeSingleState(StdArc::StateId s, VectorFst< StdArc > *fst)
This utility function input-determinizes a specified state s of the FST &#39;fst&#39;.
Definition: grammar-fst.cc:472
bool NeedEpsilons(StateId s) const
Definition: grammar-fst.cc:723
#define KALDI_ERR
Definition: kaldi-error.h:147
void MaybeAddFinalProbToState(StateId s)
Definition: grammar-fst.cc:834
bool IsSpecialState(StateId s) const
Definition: grammar-fst.cc:690
void InsertEpsilonsForState(StateId s)
Definition: grammar-fst.cc:888
#define KALDI_LOG
Definition: kaldi-error.h:153
void FixArcsToFinalStates(StateId s)
Definition: grammar-fst.cc:810

Member Data Documentation

◆ fst_

VectorFst<StdArc>* fst_
private

Definition at line 681 of file grammar-fst.cc.

◆ nonterm_phones_offset_

int32 nonterm_phones_offset_
private

Definition at line 680 of file grammar-fst.cc.

◆ orig_num_states_

StateId orig_num_states_
private

Definition at line 682 of file grammar-fst.cc.

◆ simple_final_state_

StateId simple_final_state_
private

Definition at line 687 of file grammar-fst.cc.


The documentation for this class was generated from the following file: