All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
DeterminizerStar< F > Class Template Reference

#include <determinize-star-inl.h>

Collaboration diagram for DeterminizerStar< F >:

Classes

struct  Element
 
class  EpsilonClosure
 
class  PairComparator
 
class  SubsetEqual
 
class  SubsetEqualStates
 
class  SubsetKey
 
struct  TempArc
 

Public Member Functions

void Output (MutableFst< GallicArc< Arc > > *ofst, bool destroy=true)
 
void Output (MutableFst< Arc > *ofst, bool destroy=true)
 
 DeterminizerStar (const Fst< Arc > &ifst, float delta=kDelta, int max_states=-1, bool allow_partial=false)
 
void Determinize (bool *debug_ptr)
 
bool IsPartial ()
 
void FreeMostMemory ()
 
 ~DeterminizerStar ()
 

Private Types

typedef F::Arc Arc
 
typedef Arc::Label Label
 
typedef Arc::Weight Weight
 
typedef Arc::StateId InputStateId
 
typedef Arc::StateId OutputStateId
 
typedef Arc::Label StringId
 
typedef StringRepository
< Label, StringId
StringRepositoryType
 
typedef unordered_map< const
vector< Element >
*, OutputStateId, SubsetKey,
SubsetEqual
SubsetHash
 

Private Member Functions

void ProcessFinal (const vector< Element > &closed_subset, OutputStateId state)
 
void ProcessTransition (OutputStateId state, Label ilabel, vector< Element > *subset)
 
void ProcessTransitions (const vector< Element > &closed_subset, OutputStateId state)
 
OutputStateId SubsetToStateId (const vector< Element > &subset)
 
void ProcessSubset (const pair< vector< Element > *, OutputStateId > &pair)
 
void Debug ()
 
 KALDI_DISALLOW_COPY_AND_ASSIGN (DeterminizerStar)
 

Private Attributes

deque< pair< vector< Element >
*, OutputStateId > > 
Q_
 
vector< vector< TempArc > > output_arcs_
 
const Fst< Arc > * ifst_
 
float delta_
 
int max_states_
 
bool determinized_
 
bool allow_partial_
 
bool is_partial_
 
SubsetKey hasher_
 
SubsetEqual equal_
 
SubsetHash hash_
 
StringRepository< Label, StringIdrepository_
 
EpsilonClosure epsilon_closure_
 

Detailed Description

template<class F>
class fst::DeterminizerStar< F >

Definition at line 159 of file determinize-star-inl.h.

Member Typedef Documentation

typedef F::Arc Arc
private

Definition at line 160 of file determinize-star-inl.h.

typedef Arc::StateId InputStateId
private

Definition at line 247 of file determinize-star-inl.h.

typedef Arc::Label Label
private

Definition at line 245 of file determinize-star-inl.h.

typedef Arc::StateId OutputStateId
private

Definition at line 248 of file determinize-star-inl.h.

typedef Arc::Label StringId
private

Definition at line 249 of file determinize-star-inl.h.

Definition at line 250 of file determinize-star-inl.h.

typedef unordered_map<const vector<Element>*, OutputStateId, SubsetKey, SubsetEqual> SubsetHash
private

Definition at line 344 of file determinize-star-inl.h.

typedef Arc::Weight Weight
private

Definition at line 246 of file determinize-star-inl.h.

Constructor & Destructor Documentation

DeterminizerStar ( const Fst< Arc > &  ifst,
float  delta = kDelta,
int  max_states = -1,
bool  allow_partial = false 
)
inline

Definition at line 176 of file determinize-star-inl.h.

177  :
178  ifst_(ifst.Copy()), delta_(delta), max_states_(max_states),
179  determinized_(false), allow_partial_(allow_partial),
180  is_partial_(false), equal_(delta),
181  hash_(ifst.Properties(kExpanded, false) ?
182  down_cast<const ExpandedFst<Arc>*,
183  const Fst<Arc> >(&ifst)->NumStates()/2 + 3 : 20,
184  hasher_, equal_),
185  epsilon_closure_(ifst_, max_states, &repository_, delta) { }
const Fst< Arc > * ifst_
StringRepository< Label, StringId > repository_
~DeterminizerStar ( )
inline

Definition at line 241 of file determinize-star-inl.h.

References DeterminizerStar< F >::FreeMostMemory().

241  {
242  FreeMostMemory();
243  }

Member Function Documentation

void Debug ( )
private

Definition at line 1068 of file determinize-star-inl.h.

References DeterminizerStar< F >::hash_, rnnlm::i, rnnlm::j, KALDI_ASSERT, KALDI_ERR, KALDI_WARN, DeterminizerStar< F >::output_arcs_, DeterminizerStar< F >::repository_, and kaldi::swap().

Referenced by DeterminizerStar< F >::Determinize().

1068  {
1069  // this function called if you send a signal
1070  // SIGUSR1 to the process (and it's caught by the handler in
1071  // fstdeterminizestar). It prints out some traceback
1072  // info and exits.
1073 
1074  KALDI_WARN << "Debug function called (probably SIGUSR1 caught)";
1075  // free up memory from the hash as we need a little memory
1076  { SubsetHash hash_tmp; std::swap(hash_tmp, hash_); }
1077 
1078  if (output_arcs_.size() <= 2) {
1079  KALDI_ERR << "Nothing to trace back";
1080  }
1081  size_t max_state = output_arcs_.size() - 2; // don't take the last
1082  // one as we might be halfway into constructing it.
1083 
1084  vector<OutputStateId> predecessor(max_state+1, kNoStateId);
1085  for (size_t i = 0; i < max_state; i++) {
1086  for (size_t j = 0; j < output_arcs_[i].size(); j++) {
1087  OutputStateId nextstate = output_arcs_[i][j].nextstate;
1088  // Always find an earlier-numbered predecessor; this
1089  // is always possible because of the way the algorithm
1090  // works.
1091  if (nextstate <= max_state && nextstate > i)
1092  predecessor[nextstate] = i;
1093  }
1094  }
1095  vector<pair<Label, StringId> > traceback;
1096  // 'traceback' is a pair of (ilabel, olabel-seq).
1097  OutputStateId cur_state = max_state; // A recently constructed state.
1098 
1099  while (cur_state != 0 && cur_state != kNoStateId) {
1100  OutputStateId last_state = predecessor[cur_state];
1101  pair<Label, StringId> p;
1102  size_t i;
1103  for (i = 0; i < output_arcs_[last_state].size(); i++) {
1104  if (output_arcs_[last_state][i].nextstate == cur_state) {
1105  p.first = output_arcs_[last_state][i].ilabel;
1106  p.second = output_arcs_[last_state][i].ostring;
1107  traceback.push_back(p);
1108  break;
1109  }
1110  }
1111  KALDI_ASSERT(i != output_arcs_[last_state].size()); // Or fell off loop.
1112  cur_state = last_state;
1113  }
1114  if (cur_state == kNoStateId)
1115  KALDI_WARN << "Traceback did not reach start state "
1116  << "(possibly debug-code error)";
1117 
1118  std::stringstream ss;
1119  ss << "Traceback follows in format "
1120  << "ilabel (olabel olabel) ilabel (olabel) ... :";
1121  for (ssize_t i = traceback.size() - 1; i >= 0; i--) {
1122  ss << ' ' << traceback[i].first << " ( ";
1123  vector<Label> seq;
1124  repository_.SeqOfId(traceback[i].second, &seq);
1125  for (size_t j = 0; j < seq.size(); j++)
1126  ss << seq[j] << ' ';
1127  ss << ')';
1128  }
1129  KALDI_ERR << ss.str();
1130 }
unordered_map< const vector< Element > *, OutputStateId, SubsetKey, SubsetEqual > SubsetHash
void swap(basic_filebuf< CharT, Traits > &x, basic_filebuf< CharT, Traits > &y)
#define KALDI_ERR
Definition: kaldi-error.h:127
#define KALDI_WARN
Definition: kaldi-error.h:130
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169
vector< vector< TempArc > > output_arcs_
StringRepository< Label, StringId > repository_
void Determinize ( bool *  debug_ptr)
inline

Definition at line 187 of file determinize-star-inl.h.

References DeterminizerStar< F >::allow_partial_, DeterminizerStar< F >::Debug(), DeterminizerStar< F >::determinized_, DeterminizerStar< F >::ifst_, DeterminizerStar< F >::is_partial_, KALDI_ERR, KALDI_WARN, DeterminizerStar< F >::max_states_, DeterminizerStar< F >::output_arcs_, DeterminizerStar< F >::ProcessSubset(), DeterminizerStar< F >::Q_, DeterminizerStar< F >::repository_, DeterminizerStar< F >::Element::state, DeterminizerStar< F >::Element::string, DeterminizerStar< F >::SubsetToStateId(), and DeterminizerStar< F >::Element::weight.

Referenced by fst::DeterminizeStar().

187  {
188  assert(!determinized_);
189  // This determinizes the input fst but leaves it in the "special format"
190  // in "output_arcs_".
191  InputStateId start_id = ifst_->Start();
192  if (start_id == kNoStateId) { determinized_ = true; return; } // Nothing to do.
193  else { // Insert start state into hash and queue.
194  Element elem;
195  elem.state = start_id;
196  elem.weight = Weight::One();
197  elem.string = repository_.IdOfEmpty(); // Id of empty sequence.
198  vector<Element> vec;
199  vec.push_back(elem);
200  OutputStateId cur_id = SubsetToStateId(vec);
201  assert(cur_id == 0 && "Do not call Determinize twice.");
202  }
203  while (!Q_.empty()) {
204  pair<vector<Element>*, OutputStateId> cur_pair = Q_.front();
205  Q_.pop_front();
206  ProcessSubset(cur_pair);
207  if (debug_ptr && *debug_ptr) Debug(); // will exit.
208  if (max_states_ > 0 && output_arcs_.size() > max_states_) {
209  if (allow_partial_ == false) {
210  KALDI_ERR << "Determinization aborted since passed " << max_states_
211  << " states";
212  } else {
213  KALDI_WARN << "Determinization terminated since passed " << max_states_
214  << " states, partial results will be generated";
215  is_partial_ = true;
216  break;
217  }
218  }
219  }
220  determinized_ = true;
221  }
OutputStateId SubsetToStateId(const vector< Element > &subset)
const Fst< Arc > * ifst_
deque< pair< vector< Element > *, OutputStateId > > Q_
void ProcessSubset(const pair< vector< Element > *, OutputStateId > &pair)
#define KALDI_ERR
Definition: kaldi-error.h:127
#define KALDI_WARN
Definition: kaldi-error.h:130
vector< vector< TempArc > > output_arcs_
StringRepository< Label, StringId > repository_
void FreeMostMemory ( )
inline

Definition at line 229 of file determinize-star-inl.h.

References DeterminizerStar< F >::hash_, and DeterminizerStar< F >::ifst_.

Referenced by DeterminizerStar< F >::Output(), and DeterminizerStar< F >::~DeterminizerStar().

229  {
230  if (ifst_) {
231  delete ifst_;
232  ifst_ = NULL;
233  }
234  for (typename SubsetHash::iterator iter = hash_.begin();
235  iter != hash_.end(); ++iter)
236  delete iter->first;
237  SubsetHash tmp;
238  tmp.swap(hash_);
239  }
unordered_map< const vector< Element > *, OutputStateId, SubsetKey, SubsetEqual > SubsetHash
const Fst< Arc > * ifst_
bool IsPartial ( )
inline

Definition at line 223 of file determinize-star-inl.h.

References DeterminizerStar< F >::is_partial_.

Referenced by fst::DeterminizeStar().

223  {
224  return is_partial_;
225  }
KALDI_DISALLOW_COPY_AND_ASSIGN ( DeterminizerStar< F >  )
private
void Output ( MutableFst< GallicArc< Arc > > *  ofst,
bool  destroy = true 
)

Definition at line 862 of file determinize-star-inl.h.

References DeterminizerStar< F >::determinized_, DeterminizerStar< F >::FreeMostMemory(), rnnlm::i, DeterminizerStar< F >::TempArc::ilabel, DeterminizerStar< F >::TempArc::nextstate, DeterminizerStar< F >::TempArc::ostring, DeterminizerStar< F >::output_arcs_, DeterminizerStar< F >::repository_, and DeterminizerStar< F >::TempArc::weight.

Referenced by fst::DeterminizeStar().

863  {
864  assert(determinized_);
865  if (destroy) determinized_ = false;
866  typedef GallicWeight<Label, Weight> ThisGallicWeight;
867  typedef typename Arc::StateId StateId;
868  if (destroy)
869  FreeMostMemory();
870  StateId nStates = static_cast<StateId>(output_arcs_.size());
871  ofst->DeleteStates();
872  ofst->SetStart(kNoStateId);
873  if (nStates == 0) {
874  return;
875  }
876  for (StateId s = 0;s < nStates;s++) {
877  OutputStateId news = ofst->AddState();
878  assert(news == s);
879  }
880  ofst->SetStart(0);
881  // now process transitions.
882  for (StateId this_state = 0; this_state < nStates; this_state++) {
883  vector<TempArc> &this_vec(output_arcs_[this_state]);
884  typename vector<TempArc>::const_iterator iter = this_vec.begin(),
885  end = this_vec.end();
886  for (; iter != end; ++iter) {
887  const TempArc &temp_arc(*iter);
888  GallicArc<Arc> new_arc;
889  vector<Label> seq;
890  repository_.SeqOfId(temp_arc.ostring, &seq);
891  StringWeight<Label, STRING_LEFT> string_weight;
892  for (size_t i = 0;i < seq.size();i++) string_weight.PushBack(seq[i]);
893  ThisGallicWeight gallic_weight(string_weight, temp_arc.weight);
894 
895  if (temp_arc.nextstate == kNoStateId) { // is really final weight.
896  ofst->SetFinal(this_state, gallic_weight);
897  } else { // is really an arc.
898  new_arc.nextstate = temp_arc.nextstate;
899  new_arc.ilabel = temp_arc.ilabel;
900  new_arc.olabel = temp_arc.ilabel; // acceptor. input == output.
901  new_arc.weight = gallic_weight; // includes string and weight.
902  ofst->AddArc(this_state, new_arc);
903  }
904  }
905  // Free up memory. Do this inside the loop as ofst is also allocating memory
906  if (destroy) { vector<TempArc> temp; temp.swap(this_vec); }
907  }
908  if (destroy) { vector<vector<TempArc> > temp; temp.swap(output_arcs_); }
909 }
fst::StdArc::StateId StateId
vector< vector< TempArc > > output_arcs_
StringRepository< Label, StringId > repository_
void Output ( MutableFst< Arc > *  ofst,
bool  destroy = true 
)

Definition at line 912 of file determinize-star-inl.h.

References DeterminizerStar< F >::determinized_, DeterminizerStar< F >::FreeMostMemory(), rnnlm::i, DeterminizerStar< F >::TempArc::ilabel, DeterminizerStar< F >::TempArc::nextstate, DeterminizerStar< F >::TempArc::ostring, DeterminizerStar< F >::output_arcs_, DeterminizerStar< F >::repository_, and DeterminizerStar< F >::TempArc::weight.

912  {
913  assert(determinized_);
914  if (destroy) determinized_ = false;
915  // Outputs to standard fst.
916  OutputStateId num_states = static_cast<OutputStateId>(output_arcs_.size());
917  if (destroy)
918  FreeMostMemory();
919  ofst->DeleteStates();
920  if (num_states == 0) {
921  ofst->SetStart(kNoStateId);
922  return;
923  }
924  // Add basic states-- but will add extra ones to account for strings on output.
925  for (OutputStateId s = 0; s < num_states; s++) {
926  OutputStateId news = ofst->AddState();
927  assert(news == s);
928  }
929  ofst->SetStart(0);
930  for (OutputStateId this_state = 0; this_state < num_states; this_state++) {
931  vector<TempArc> &this_vec(output_arcs_[this_state]);
932 
933  typename vector<TempArc>::const_iterator iter = this_vec.begin(),
934  end = this_vec.end();
935  for (; iter != end; ++iter) {
936  const TempArc &temp_arc(*iter);
937  vector<Label> seq;
938  repository_.SeqOfId(temp_arc.ostring, &seq);
939  if (temp_arc.nextstate == kNoStateId) { // Really a final weight.
940  // Make a sequence of states going to a final state, with the strings as labels.
941  // Put the weight on the first arc.
942  OutputStateId cur_state = this_state;
943  for (size_t i = 0; i < seq.size();i++) {
944  OutputStateId next_state = ofst->AddState();
945  Arc arc;
946  arc.nextstate = next_state;
947  arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
948  arc.ilabel = 0; // epsilon.
949  arc.olabel = seq[i];
950  ofst->AddArc(cur_state, arc);
951  cur_state = next_state;
952  }
953  ofst->SetFinal(cur_state, (seq.size() == 0 ? temp_arc.weight : Weight::One()));
954  } else { // Really an arc.
955  OutputStateId cur_state = this_state;
956  // Have to be careful with this integer comparison (i+1 < seq.size()) because unsigned.
957  // i < seq.size()-1 could fail for zero-length sequences.
958  for (size_t i = 0; i+1 < seq.size();i++) {
959  // for all but the last element of seq, create new state.
960  OutputStateId next_state = ofst->AddState();
961  Arc arc;
962  arc.nextstate = next_state;
963  arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
964  arc.ilabel = (i == 0 ? temp_arc.ilabel : 0); // put ilabel on first element of seq.
965  arc.olabel = seq[i];
966  ofst->AddArc(cur_state, arc);
967  cur_state = next_state;
968  }
969  // Add the final arc in the sequence.
970  Arc arc;
971  arc.nextstate = temp_arc.nextstate;
972  arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
973  arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
974  arc.olabel = (seq.size() > 0 ? seq.back() : 0);
975  ofst->AddArc(cur_state, arc);
976  }
977  }
978  // Free up memory. Do this inside the loop as ofst is also allocating memory
979  if (destroy) { vector<TempArc> temp; temp.swap(this_vec); }
980  }
981  if (destroy) {
982  vector<vector<TempArc> > temp;
983  temp.swap(output_arcs_);
984  repository_.Destroy();
985  }
986 }
vector< vector< TempArc > > output_arcs_
StringRepository< Label, StringId > repository_
void ProcessFinal ( const vector< Element > &  closed_subset,
OutputStateId  state 
)
inlineprivate

Definition at line 435 of file determinize-star-inl.h.

References DeterminizerStar< F >::ifst_, DeterminizerStar< F >::TempArc::ilabel, KALDI_ERR, DeterminizerStar< F >::TempArc::nextstate, DeterminizerStar< F >::TempArc::ostring, DeterminizerStar< F >::output_arcs_, fst::Plus(), DeterminizerStar< F >::Element::state, DeterminizerStar< F >::Element::string, fst::Times(), DeterminizerStar< F >::Element::weight, and DeterminizerStar< F >::TempArc::weight.

Referenced by DeterminizerStar< F >::ProcessSubset().

435  {
436  // processes final-weights for this subset.
437  bool is_final = false;
438  StringId final_string = 0; // = 0 to keep compiler happy.
439  Weight final_weight = Weight::One(); // This value will never be accessed, and
440  // we just set it to avoid spurious compiler warnings. We avoid setting it
441  // to Zero() because floating-point infinities can sometimes generate
442  // interrupts and slow things down.
443  typename vector<Element>::const_iterator iter = closed_subset.begin(),
444  end = closed_subset.end();
445  for (; iter != end; ++iter) {
446  const Element &elem = *iter;
447  Weight this_final_weight = ifst_->Final(elem.state);
448  if (this_final_weight != Weight::Zero()) {
449  if (!is_final) { // first final-weight
450  final_string = elem.string;
451  final_weight = Times(elem.weight, this_final_weight);
452  is_final = true;
453  } else { // already have one.
454  if (final_string != elem.string) {
455  KALDI_ERR << "FST was not functional -> not determinizable";
456  }
457  final_weight = Plus(final_weight, Times(elem.weight, this_final_weight));
458  }
459  }
460  }
461  if (is_final) {
462  // store final weights in TempArc structure, just like a transition.
463  TempArc temp_arc;
464  temp_arc.ilabel = 0;
465  temp_arc.nextstate = kNoStateId; // special marker meaning "final weight".
466  temp_arc.ostring = final_string;
467  temp_arc.weight = final_weight;
468  output_arcs_[state].push_back(temp_arc);
469  }
470  }
const Fst< Arc > * ifst_
LatticeWeightTpl< FloatType > Plus(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
LatticeWeightTpl< FloatType > Times(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
#define KALDI_ERR
Definition: kaldi-error.h:127
vector< vector< TempArc > > output_arcs_
void ProcessSubset ( const pair< vector< Element > *, OutputStateId > &  pair)
inlineprivate

Definition at line 588 of file determinize-star-inl.h.

References DeterminizerStar< F >::epsilon_closure_, DeterminizerStar< F >::EpsilonClosure::GetEpsilonClosure(), DeterminizerStar< F >::ProcessFinal(), and DeterminizerStar< F >::ProcessTransitions().

Referenced by DeterminizerStar< F >::Determinize().

588  {
589  const vector<Element> *subset = pair.first;
590  OutputStateId state = pair.second;
591 
592  vector<Element> closed_subset; // subset after epsilon closure.
593  epsilon_closure_.GetEpsilonClosure(*subset, &closed_subset);
594 
595  // Now follow non-epsilon arcs [and also process final states]
596  ProcessFinal(closed_subset, state);
597 
598  // Now handle transitions out of these states.
599  ProcessTransitions(closed_subset, state);
600  }
void ProcessFinal(const vector< Element > &closed_subset, OutputStateId state)
void GetEpsilonClosure(const vector< Element > &input_subset, vector< Element > *output_subset)
void ProcessTransitions(const vector< Element > &closed_subset, OutputStateId state)
void ProcessTransition ( OutputStateId  state,
Label  ilabel,
vector< Element > *  subset 
)
private

Definition at line 989 of file determinize-star-inl.h.

References fst::Divide(), rnnlm::i, DeterminizerStar< F >::TempArc::ilabel, KALDI_ERR, DeterminizerStar< F >::TempArc::nextstate, DeterminizerStar< F >::TempArc::ostring, DeterminizerStar< F >::output_arcs_, fst::Plus(), DeterminizerStar< F >::repository_, DeterminizerStar< F >::SubsetToStateId(), and DeterminizerStar< F >::TempArc::weight.

Referenced by DeterminizerStar< F >::ProcessTransitions().

989  {
990  // At input, "subset" may contain duplicates for a given dest state (but in sorted
991  // order). This function removes duplicates from "subset", normalizes it, and adds
992  // a transition to the dest. state (possibly affecting Q_ and hash_, if state did not
993  // exist).
994 
995  typedef typename vector<Element>::iterator IterType;
996  { // This block makes the subset have one unique Element per state, adding the weights.
997  IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
998  size_t num_out = 0;
999  // Merge elements with same state-id
1000  while (cur_in != end) { // while we have more elements to process.
1001  // At this point, cur_out points to location of next place we want to put an element,
1002  // cur_in points to location of next element we want to process.
1003  if (cur_in != cur_out) *cur_out = *cur_in;
1004  cur_in++;
1005  while (cur_in != end && cur_in->state == cur_out->state) { // merge elements.
1006  if (cur_in->string != cur_out->string) {
1007  KALDI_ERR << "FST was not functional -> not determinizable";
1008  }
1009  cur_out->weight = Plus(cur_out->weight, cur_in->weight);
1010  cur_in++;
1011  }
1012  cur_out++;
1013  num_out++;
1014  }
1015  subset->resize(num_out);
1016  }
1017 
1018  StringId common_str;
1019  Weight tot_weight;
1020  { // This block computes common_str and tot_weight (essentially: the common divisor)
1021  // and removes them from the elements.
1022  vector<Label> seq;
1023 
1024  IterType begin = subset->begin(), iter, end = subset->end();
1025  { // This block computes "seq", which is the common prefix, and "common_str",
1026  // which is the StringId version of "seq".
1027  vector<Label> tmp_seq;
1028  for (iter = begin; iter != end; ++iter) {
1029  if (iter == begin) {
1030  repository_.SeqOfId(iter->string, &seq);
1031  } else {
1032  repository_.SeqOfId(iter->string, &tmp_seq);
1033  if (tmp_seq.size() < seq.size()) seq.resize(tmp_seq.size()); // size of shortest one.
1034  for (size_t i = 0; i < seq.size(); i++) // seq.size() is the shorter one at this point.
1035  if (tmp_seq[i] != seq[i]) seq.resize(i);
1036  }
1037  if (seq.size() == 0) break; // will not get any prefix.
1038  }
1039  common_str = repository_.IdOfSeq(seq);
1040  }
1041 
1042  { // This block computes "tot_weight".
1043  iter = begin;
1044  tot_weight = iter->weight;
1045  for (++iter; iter != end; ++iter)
1046  tot_weight = Plus(tot_weight, iter->weight);
1047  }
1048 
1049  // Now divide out common stuff from elements.
1050  size_t prefix_len = seq.size();
1051  for (iter = begin; iter != end; ++iter) {
1052  iter->weight = Divide(iter->weight, tot_weight);
1053  iter->string = repository_.RemovePrefix(iter->string, prefix_len);
1054  }
1055  }
1056 
1057  // Now add an arc to the state that the subset represents.
1058  // We may create a new state id for this (in SubsetToStateId).
1059  TempArc temp_arc;
1060  temp_arc.ilabel = ilabel;
1061  temp_arc.nextstate = SubsetToStateId(*subset); // may or may not really add the subset.
1062  temp_arc.ostring = common_str;
1063  temp_arc.weight = tot_weight;
1064  output_arcs_[state].push_back(temp_arc); // record the arc.
1065 }
OutputStateId SubsetToStateId(const vector< Element > &subset)
LatticeWeightTpl< FloatType > Divide(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2, DivideType typ=DIVIDE_ANY)
LatticeWeightTpl< FloatType > Plus(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
#define KALDI_ERR
Definition: kaldi-error.h:127
vector< vector< TempArc > > output_arcs_
StringRepository< Label, StringId > repository_
void ProcessTransitions ( const vector< Element > &  closed_subset,
OutputStateId  state 
)
inlineprivate

Definition at line 501 of file determinize-star-inl.h.

References DeterminizerStar< F >::ifst_, DeterminizerStar< F >::ProcessTransition(), DeterminizerStar< F >::repository_, DeterminizerStar< F >::Element::state, DeterminizerStar< F >::Element::string, fst::Times(), and DeterminizerStar< F >::Element::weight.

Referenced by DeterminizerStar< F >::ProcessSubset().

501  {
502  vector<pair<Label, Element> > all_elems;
503  { // Push back into "all_elems", elements corresponding to all non-epsilon-input transitions
504  // out of all states in "closed_subset".
505  typename vector<Element>::const_iterator iter = closed_subset.begin(),
506  end = closed_subset.end();
507  for (; iter != end; ++iter) {
508  const Element &elem = *iter;
509  for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state);
510  !aiter.Done(); aiter.Next()) {
511  const Arc &arc = aiter.Value();
512  if (arc.ilabel != 0) { // Non-epsilon transition -- ignore epsilons here.
513  pair<Label, Element> this_pr;
514  this_pr.first = arc.ilabel;
515  Element &next_elem(this_pr.second);
516  next_elem.state = arc.nextstate;
517  next_elem.weight = Times(elem.weight, arc.weight);
518  if (arc.olabel == 0) // output epsilon-- this is simple case so
519  // handle separately for efficiency
520  next_elem.string = elem.string;
521  else {
522  vector<Label> seq;
523  repository_.SeqOfId(elem.string, &seq);
524  seq.push_back(arc.olabel);
525  next_elem.string = repository_.IdOfSeq(seq);
526  }
527  all_elems.push_back(this_pr);
528  }
529  }
530  }
531  }
532  PairComparator pc;
533  std::sort(all_elems.begin(), all_elems.end(), pc);
534  // now sorted first on input label, then on state.
535  typedef typename vector<pair<Label, Element> >::const_iterator PairIter;
536  PairIter cur = all_elems.begin(), end = all_elems.end();
537  vector<Element> this_subset;
538  while (cur != end) {
539  // Process ranges that share the same input symbol.
540  Label ilabel = cur->first;
541  this_subset.clear();
542  while (cur != end && cur->first == ilabel) {
543  this_subset.push_back(cur->second);
544  cur++;
545  }
546  // We now have a subset for this ilabel.
547  ProcessTransition(state, ilabel, &this_subset);
548  }
549  }
void ProcessTransition(OutputStateId state, Label ilabel, vector< Element > *subset)
const Fst< Arc > * ifst_
LatticeWeightTpl< FloatType > Times(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
StringRepository< Label, StringId > repository_
OutputStateId SubsetToStateId ( const vector< Element > &  subset)
inlineprivate

Definition at line 555 of file determinize-star-inl.h.

References DeterminizerStar< F >::allow_partial_, DeterminizerStar< F >::hash_, DeterminizerStar< F >::output_arcs_, and DeterminizerStar< F >::Q_.

Referenced by DeterminizerStar< F >::Determinize(), and DeterminizerStar< F >::ProcessTransition().

555  { // may add the subset to the queue.
556  typedef typename SubsetHash::iterator IterType;
557  IterType iter = hash_.find(&subset);
558  if (iter == hash_.end()) { // was not there.
559  vector<Element> *new_subset = new vector<Element>(subset);
560  OutputStateId new_state_id = (OutputStateId) output_arcs_.size();
561  bool ans = hash_.insert(std::pair<const vector<Element>*,
562  OutputStateId>(new_subset,
563  new_state_id)).second;
564  assert(ans);
565  output_arcs_.push_back(vector<TempArc>());
566  if (allow_partial_ == false) {
567  // If --allow-partial is not requested, we do the old way.
568  Q_.push_front(pair<vector<Element>*, OutputStateId>(new_subset, new_state_id));
569  } else {
570  // If --allow-partial is requested, we do breadth first search. This
571  // ensures that when we return partial results, we return the states
572  // that are reachable by the fewest steps from the start state.
573  Q_.push_back(pair<vector<Element>*, OutputStateId>(new_subset, new_state_id));
574  }
575  return new_state_id;
576  } else {
577  return iter->second; // the OutputStateId.
578  }
579  }
deque< pair< vector< Element > *, OutputStateId > > Q_
vector< vector< TempArc > > output_arcs_

Member Data Documentation

bool allow_partial_
private
float delta_
private
bool determinized_
private
EpsilonClosure epsilon_closure_
private

Definition at line 620 of file determinize-star-inl.h.

Referenced by DeterminizerStar< F >::ProcessSubset().

SubsetEqual equal_
private

Definition at line 616 of file determinize-star-inl.h.

SubsetKey hasher_
private

Definition at line 615 of file determinize-star-inl.h.

bool is_partial_
private
int max_states_
private

Definition at line 611 of file determinize-star-inl.h.

Referenced by DeterminizerStar< F >::Determinize().

deque<pair<vector<Element>*, OutputStateId> > Q_
private

The documentation for this class was generated from the following file: