All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
DeterminizerStar< F > Class Template Reference

#include <determinize-star-inl.h>

Collaboration diagram for DeterminizerStar< F >:

Classes

struct  Element
 
class  EpsilonClosure
 
class  PairComparator
 
class  SubsetEqual
 
class  SubsetEqualStates
 
class  SubsetKey
 
struct  TempArc
 

Public Member Functions

void Output (MutableFst< GallicArc< Arc > > *ofst, bool destroy=true)
 
void Output (MutableFst< Arc > *ofst, bool destroy=true)
 
 DeterminizerStar (const Fst< Arc > &ifst, float delta=kDelta, int max_states=-1, bool allow_partial=false)
 
void Determinize (bool *debug_ptr)
 
bool IsPartial ()
 
void FreeMostMemory ()
 
 ~DeterminizerStar ()
 

Private Types

typedef F::Arc Arc
 
typedef Arc::Label Label
 
typedef Arc::Weight Weight
 
typedef Arc::StateId InputStateId
 
typedef Arc::StateId OutputStateId
 
typedef Arc::Label StringId
 
typedef StringRepository
< Label, StringId
StringRepositoryType
 
typedef unordered_map< const
vector< Element >
*, OutputStateId, SubsetKey,
SubsetEqual
SubsetHash
 

Private Member Functions

void ProcessFinal (const vector< Element > &closed_subset, OutputStateId state)
 
void ProcessTransition (OutputStateId state, Label ilabel, vector< Element > *subset)
 
void ProcessTransitions (const vector< Element > &closed_subset, OutputStateId state)
 
OutputStateId SubsetToStateId (const vector< Element > &subset)
 
void ProcessSubset (const pair< vector< Element > *, OutputStateId > &pair)
 
void Debug ()
 
 DISALLOW_COPY_AND_ASSIGN (DeterminizerStar)
 

Private Attributes

deque< pair< vector< Element >
*, OutputStateId > > 
Q_
 
vector< vector< TempArc > > output_arcs_
 
const Fst< Arc > * ifst_
 
float delta_
 
int max_states_
 
bool determinized_
 
bool allow_partial_
 
bool is_partial_
 
SubsetKey hasher_
 
SubsetEqual equal_
 
SubsetHash hash_
 
StringRepository< Label, StringIdrepository_
 
EpsilonClosure epsilon_closure_
 

Detailed Description

template<class F>
class fst::DeterminizerStar< F >

Definition at line 167 of file determinize-star-inl.h.

Member Typedef Documentation

typedef F::Arc Arc
private

Definition at line 168 of file determinize-star-inl.h.

typedef Arc::StateId InputStateId
private

Definition at line 255 of file determinize-star-inl.h.

typedef Arc::Label Label
private

Definition at line 253 of file determinize-star-inl.h.

typedef Arc::StateId OutputStateId
private

Definition at line 256 of file determinize-star-inl.h.

typedef Arc::Label StringId
private

Definition at line 257 of file determinize-star-inl.h.

Definition at line 258 of file determinize-star-inl.h.

typedef unordered_map<const vector<Element>*, OutputStateId, SubsetKey, SubsetEqual> SubsetHash
private

Definition at line 352 of file determinize-star-inl.h.

typedef Arc::Weight Weight
private

Definition at line 254 of file determinize-star-inl.h.

Constructor & Destructor Documentation

DeterminizerStar ( const Fst< Arc > &  ifst,
float  delta = kDelta,
int  max_states = -1,
bool  allow_partial = false 
)
inline

Definition at line 184 of file determinize-star-inl.h.

185  :
186  ifst_(ifst.Copy()), delta_(delta), max_states_(max_states),
187  determinized_(false), allow_partial_(allow_partial),
188  is_partial_(false), equal_(delta),
189  hash_(ifst.Properties(kExpanded, false) ?
190  down_cast<const ExpandedFst<Arc>*,
191  const Fst<Arc> >(&ifst)->NumStates()/2 + 3 : 20,
192  hasher_, equal_),
193  epsilon_closure_(ifst_, max_states, &repository_, delta) { }
const Fst< Arc > * ifst_
StringRepository< Label, StringId > repository_
~DeterminizerStar ( )
inline

Definition at line 249 of file determinize-star-inl.h.

References DeterminizerStar< F >::FreeMostMemory().

249  {
250  FreeMostMemory();
251  }

Member Function Documentation

void Debug ( )
private

Definition at line 1076 of file determinize-star-inl.h.

References DeterminizerStar< F >::hash_, rnnlm::i, rnnlm::j, KALDI_ASSERT, KALDI_ERR, KALDI_WARN, DeterminizerStar< F >::output_arcs_, DeterminizerStar< F >::repository_, and kaldi::swap().

Referenced by DeterminizerStar< F >::Determinize().

1076  {
1077  // this function called if you send a signal
1078  // SIGUSR1 to the process (and it's caught by the handler in
1079  // fstdeterminizestar). It prints out some traceback
1080  // info and exits.
1081 
1082  KALDI_WARN << "Debug function called (probably SIGUSR1 caught)";
1083  // free up memory from the hash as we need a little memory
1084  { SubsetHash hash_tmp; std::swap(hash_tmp, hash_); }
1085 
1086  if (output_arcs_.size() <= 2) {
1087  KALDI_ERR << "Nothing to trace back";
1088  }
1089  size_t max_state = output_arcs_.size() - 2; // don't take the last
1090  // one as we might be halfway into constructing it.
1091 
1092  vector<OutputStateId> predecessor(max_state+1, kNoStateId);
1093  for (size_t i = 0; i < max_state; i++) {
1094  for (size_t j = 0; j < output_arcs_[i].size(); j++) {
1095  OutputStateId nextstate = output_arcs_[i][j].nextstate;
1096  // Always find an earlier-numbered predecessor; this
1097  // is always possible because of the way the algorithm
1098  // works.
1099  if (nextstate <= max_state && nextstate > i)
1100  predecessor[nextstate] = i;
1101  }
1102  }
1103  vector<pair<Label, StringId> > traceback;
1104  // 'traceback' is a pair of (ilabel, olabel-seq).
1105  OutputStateId cur_state = max_state; // A recently constructed state.
1106 
1107  while (cur_state != 0 && cur_state != kNoStateId) {
1108  OutputStateId last_state = predecessor[cur_state];
1109  pair<Label, StringId> p;
1110  size_t i;
1111  for (i = 0; i < output_arcs_[last_state].size(); i++) {
1112  if (output_arcs_[last_state][i].nextstate == cur_state) {
1113  p.first = output_arcs_[last_state][i].ilabel;
1114  p.second = output_arcs_[last_state][i].ostring;
1115  traceback.push_back(p);
1116  break;
1117  }
1118  }
1119  KALDI_ASSERT(i != output_arcs_[last_state].size()); // Or fell off loop.
1120  cur_state = last_state;
1121  }
1122  if (cur_state == kNoStateId)
1123  KALDI_WARN << "Traceback did not reach start state "
1124  << "(possibly debug-code error)";
1125 
1126  std::stringstream ss;
1127  ss << "Traceback follows in format "
1128  << "ilabel (olabel olabel) ilabel (olabel) ... :";
1129  for (ssize_t i = traceback.size() - 1; i >= 0; i--) {
1130  ss << ' ' << traceback[i].first << " ( ";
1131  vector<Label> seq;
1132  repository_.SeqOfId(traceback[i].second, &seq);
1133  for (size_t j = 0; j < seq.size(); j++)
1134  ss << seq[j] << ' ';
1135  ss << ')';
1136  }
1137  KALDI_ERR << ss.str();
1138 }
unordered_map< const vector< Element > *, OutputStateId, SubsetKey, SubsetEqual > SubsetHash
void swap(basic_filebuf< CharT, Traits > &x, basic_filebuf< CharT, Traits > &y)
#define KALDI_ERR
Definition: kaldi-error.h:127
#define KALDI_WARN
Definition: kaldi-error.h:130
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169
vector< vector< TempArc > > output_arcs_
StringRepository< Label, StringId > repository_
void Determinize ( bool *  debug_ptr)
inline

Definition at line 195 of file determinize-star-inl.h.

References DeterminizerStar< F >::allow_partial_, DeterminizerStar< F >::Debug(), DeterminizerStar< F >::determinized_, DeterminizerStar< F >::ifst_, DeterminizerStar< F >::is_partial_, KALDI_ERR, KALDI_WARN, DeterminizerStar< F >::max_states_, DeterminizerStar< F >::output_arcs_, DeterminizerStar< F >::ProcessSubset(), DeterminizerStar< F >::Q_, DeterminizerStar< F >::repository_, DeterminizerStar< F >::Element::state, DeterminizerStar< F >::Element::string, DeterminizerStar< F >::SubsetToStateId(), and DeterminizerStar< F >::Element::weight.

Referenced by fst::DeterminizeStar().

195  {
196  assert(!determinized_);
197  // This determinizes the input fst but leaves it in the "special format"
198  // in "output_arcs_".
199  InputStateId start_id = ifst_->Start();
200  if (start_id == kNoStateId) { determinized_ = true; return; } // Nothing to do.
201  else { // Insert start state into hash and queue.
202  Element elem;
203  elem.state = start_id;
204  elem.weight = Weight::One();
205  elem.string = repository_.IdOfEmpty(); // Id of empty sequence.
206  vector<Element> vec;
207  vec.push_back(elem);
208  OutputStateId cur_id = SubsetToStateId(vec);
209  assert(cur_id == 0 && "Do not call Determinize twice.");
210  }
211  while (!Q_.empty()) {
212  pair<vector<Element>*, OutputStateId> cur_pair = Q_.front();
213  Q_.pop_front();
214  ProcessSubset(cur_pair);
215  if (debug_ptr && *debug_ptr) Debug(); // will exit.
216  if (max_states_ > 0 && output_arcs_.size() > max_states_) {
217  if (allow_partial_ == false) {
218  KALDI_ERR << "Determinization aborted since passed " << max_states_
219  << " states";
220  } else {
221  KALDI_WARN << "Determinization terminated since passed " << max_states_
222  << " states, partial results will be generated";
223  is_partial_ = true;
224  break;
225  }
226  }
227  }
228  determinized_ = true;
229  }
OutputStateId SubsetToStateId(const vector< Element > &subset)
const Fst< Arc > * ifst_
deque< pair< vector< Element > *, OutputStateId > > Q_
void ProcessSubset(const pair< vector< Element > *, OutputStateId > &pair)
#define KALDI_ERR
Definition: kaldi-error.h:127
#define KALDI_WARN
Definition: kaldi-error.h:130
vector< vector< TempArc > > output_arcs_
StringRepository< Label, StringId > repository_
DISALLOW_COPY_AND_ASSIGN ( DeterminizerStar< F >  )
private
void FreeMostMemory ( )
inline

Definition at line 237 of file determinize-star-inl.h.

References DeterminizerStar< F >::hash_, and DeterminizerStar< F >::ifst_.

Referenced by DeterminizerStar< F >::Output(), and DeterminizerStar< F >::~DeterminizerStar().

237  {
238  if (ifst_) {
239  delete ifst_;
240  ifst_ = NULL;
241  }
242  for (typename SubsetHash::iterator iter = hash_.begin();
243  iter != hash_.end(); ++iter)
244  delete iter->first;
245  SubsetHash tmp;
246  tmp.swap(hash_);
247  }
unordered_map< const vector< Element > *, OutputStateId, SubsetKey, SubsetEqual > SubsetHash
const Fst< Arc > * ifst_
bool IsPartial ( )
inline

Definition at line 231 of file determinize-star-inl.h.

References DeterminizerStar< F >::is_partial_.

Referenced by fst::DeterminizeStar().

231  {
232  return is_partial_;
233  }
void Output ( MutableFst< GallicArc< Arc > > *  ofst,
bool  destroy = true 
)

Definition at line 870 of file determinize-star-inl.h.

References DeterminizerStar< F >::determinized_, DeterminizerStar< F >::FreeMostMemory(), rnnlm::i, DeterminizerStar< F >::TempArc::ilabel, DeterminizerStar< F >::TempArc::nextstate, DeterminizerStar< F >::TempArc::ostring, DeterminizerStar< F >::output_arcs_, DeterminizerStar< F >::repository_, and DeterminizerStar< F >::TempArc::weight.

Referenced by fst::DeterminizeStar().

871  {
872  assert(determinized_);
873  if (destroy) determinized_ = false;
874  typedef GallicWeight<Label, Weight> ThisGallicWeight;
875  typedef typename Arc::StateId StateId;
876  if (destroy)
877  FreeMostMemory();
878  StateId nStates = static_cast<StateId>(output_arcs_.size());
879  ofst->DeleteStates();
880  ofst->SetStart(kNoStateId);
881  if (nStates == 0) {
882  return;
883  }
884  for (StateId s = 0;s < nStates;s++) {
885  OutputStateId news = ofst->AddState();
886  assert(news == s);
887  }
888  ofst->SetStart(0);
889  // now process transitions.
890  for (StateId this_state = 0; this_state < nStates; this_state++) {
891  vector<TempArc> &this_vec(output_arcs_[this_state]);
892  typename vector<TempArc>::const_iterator iter = this_vec.begin(),
893  end = this_vec.end();
894  for (; iter != end; ++iter) {
895  const TempArc &temp_arc(*iter);
896  GallicArc<Arc> new_arc;
897  vector<Label> seq;
898  repository_.SeqOfId(temp_arc.ostring, &seq);
899  StringWeight<Label, STRING_LEFT> string_weight;
900  for (size_t i = 0;i < seq.size();i++) string_weight.PushBack(seq[i]);
901  ThisGallicWeight gallic_weight(string_weight, temp_arc.weight);
902 
903  if (temp_arc.nextstate == kNoStateId) { // is really final weight.
904  ofst->SetFinal(this_state, gallic_weight);
905  } else { // is really an arc.
906  new_arc.nextstate = temp_arc.nextstate;
907  new_arc.ilabel = temp_arc.ilabel;
908  new_arc.olabel = temp_arc.ilabel; // acceptor. input == output.
909  new_arc.weight = gallic_weight; // includes string and weight.
910  ofst->AddArc(this_state, new_arc);
911  }
912  }
913  // Free up memory. Do this inside the loop as ofst is also allocating memory
914  if (destroy) { vector<TempArc> temp; temp.swap(this_vec); }
915  }
916  if (destroy) { vector<vector<TempArc> > temp; temp.swap(output_arcs_); }
917 }
fst::StdArc::StateId StateId
vector< vector< TempArc > > output_arcs_
StringRepository< Label, StringId > repository_
void Output ( MutableFst< Arc > *  ofst,
bool  destroy = true 
)

Definition at line 920 of file determinize-star-inl.h.

References DeterminizerStar< F >::determinized_, DeterminizerStar< F >::FreeMostMemory(), rnnlm::i, DeterminizerStar< F >::TempArc::ilabel, DeterminizerStar< F >::TempArc::nextstate, DeterminizerStar< F >::TempArc::ostring, DeterminizerStar< F >::output_arcs_, DeterminizerStar< F >::repository_, and DeterminizerStar< F >::TempArc::weight.

920  {
921  assert(determinized_);
922  if (destroy) determinized_ = false;
923  // Outputs to standard fst.
924  OutputStateId num_states = static_cast<OutputStateId>(output_arcs_.size());
925  if (destroy)
926  FreeMostMemory();
927  ofst->DeleteStates();
928  if (num_states == 0) {
929  ofst->SetStart(kNoStateId);
930  return;
931  }
932  // Add basic states-- but will add extra ones to account for strings on output.
933  for (OutputStateId s = 0; s < num_states; s++) {
934  OutputStateId news = ofst->AddState();
935  assert(news == s);
936  }
937  ofst->SetStart(0);
938  for (OutputStateId this_state = 0; this_state < num_states; this_state++) {
939  vector<TempArc> &this_vec(output_arcs_[this_state]);
940 
941  typename vector<TempArc>::const_iterator iter = this_vec.begin(),
942  end = this_vec.end();
943  for (; iter != end; ++iter) {
944  const TempArc &temp_arc(*iter);
945  vector<Label> seq;
946  repository_.SeqOfId(temp_arc.ostring, &seq);
947  if (temp_arc.nextstate == kNoStateId) { // Really a final weight.
948  // Make a sequence of states going to a final state, with the strings as labels.
949  // Put the weight on the first arc.
950  OutputStateId cur_state = this_state;
951  for (size_t i = 0; i < seq.size();i++) {
952  OutputStateId next_state = ofst->AddState();
953  Arc arc;
954  arc.nextstate = next_state;
955  arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
956  arc.ilabel = 0; // epsilon.
957  arc.olabel = seq[i];
958  ofst->AddArc(cur_state, arc);
959  cur_state = next_state;
960  }
961  ofst->SetFinal(cur_state, (seq.size() == 0 ? temp_arc.weight : Weight::One()));
962  } else { // Really an arc.
963  OutputStateId cur_state = this_state;
964  // Have to be careful with this integer comparison (i+1 < seq.size()) because unsigned.
965  // i < seq.size()-1 could fail for zero-length sequences.
966  for (size_t i = 0; i+1 < seq.size();i++) {
967  // for all but the last element of seq, create new state.
968  OutputStateId next_state = ofst->AddState();
969  Arc arc;
970  arc.nextstate = next_state;
971  arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
972  arc.ilabel = (i == 0 ? temp_arc.ilabel : 0); // put ilabel on first element of seq.
973  arc.olabel = seq[i];
974  ofst->AddArc(cur_state, arc);
975  cur_state = next_state;
976  }
977  // Add the final arc in the sequence.
978  Arc arc;
979  arc.nextstate = temp_arc.nextstate;
980  arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
981  arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
982  arc.olabel = (seq.size() > 0 ? seq.back() : 0);
983  ofst->AddArc(cur_state, arc);
984  }
985  }
986  // Free up memory. Do this inside the loop as ofst is also allocating memory
987  if (destroy) { vector<TempArc> temp; temp.swap(this_vec); }
988  }
989  if (destroy) {
990  vector<vector<TempArc> > temp;
991  temp.swap(output_arcs_);
992  repository_.Destroy();
993  }
994 }
vector< vector< TempArc > > output_arcs_
StringRepository< Label, StringId > repository_
void ProcessFinal ( const vector< Element > &  closed_subset,
OutputStateId  state 
)
inlineprivate

Definition at line 443 of file determinize-star-inl.h.

References DeterminizerStar< F >::ifst_, DeterminizerStar< F >::TempArc::ilabel, KALDI_ERR, DeterminizerStar< F >::TempArc::nextstate, DeterminizerStar< F >::TempArc::ostring, DeterminizerStar< F >::output_arcs_, fst::Plus(), DeterminizerStar< F >::Element::state, DeterminizerStar< F >::Element::string, fst::Times(), DeterminizerStar< F >::Element::weight, and DeterminizerStar< F >::TempArc::weight.

Referenced by DeterminizerStar< F >::ProcessSubset().

443  {
444  // processes final-weights for this subset.
445  bool is_final = false;
446  StringId final_string = 0; // = 0 to keep compiler happy.
447  Weight final_weight = Weight::One(); // This value will never be accessed, and
448  // we just set it to avoid spurious compiler warnings. We avoid setting it
449  // to Zero() because floating-point infinities can sometimes generate
450  // interrupts and slow things down.
451  typename vector<Element>::const_iterator iter = closed_subset.begin(),
452  end = closed_subset.end();
453  for (; iter != end; ++iter) {
454  const Element &elem = *iter;
455  Weight this_final_weight = ifst_->Final(elem.state);
456  if (this_final_weight != Weight::Zero()) {
457  if (!is_final) { // first final-weight
458  final_string = elem.string;
459  final_weight = Times(elem.weight, this_final_weight);
460  is_final = true;
461  } else { // already have one.
462  if (final_string != elem.string) {
463  KALDI_ERR << "FST was not functional -> not determinizable";
464  }
465  final_weight = Plus(final_weight, Times(elem.weight, this_final_weight));
466  }
467  }
468  }
469  if (is_final) {
470  // store final weights in TempArc structure, just like a transition.
471  TempArc temp_arc;
472  temp_arc.ilabel = 0;
473  temp_arc.nextstate = kNoStateId; // special marker meaning "final weight".
474  temp_arc.ostring = final_string;
475  temp_arc.weight = final_weight;
476  output_arcs_[state].push_back(temp_arc);
477  }
478  }
const Fst< Arc > * ifst_
LatticeWeightTpl< FloatType > Plus(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
LatticeWeightTpl< FloatType > Times(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
#define KALDI_ERR
Definition: kaldi-error.h:127
vector< vector< TempArc > > output_arcs_
void ProcessSubset ( const pair< vector< Element > *, OutputStateId > &  pair)
inlineprivate

Definition at line 596 of file determinize-star-inl.h.

References DeterminizerStar< F >::epsilon_closure_, DeterminizerStar< F >::EpsilonClosure::GetEpsilonClosure(), DeterminizerStar< F >::ProcessFinal(), and DeterminizerStar< F >::ProcessTransitions().

Referenced by DeterminizerStar< F >::Determinize().

596  {
597  const vector<Element> *subset = pair.first;
598  OutputStateId state = pair.second;
599 
600  vector<Element> closed_subset; // subset after epsilon closure.
601  epsilon_closure_.GetEpsilonClosure(*subset, &closed_subset);
602 
603  // Now follow non-epsilon arcs [and also process final states]
604  ProcessFinal(closed_subset, state);
605 
606  // Now handle transitions out of these states.
607  ProcessTransitions(closed_subset, state);
608  }
void ProcessFinal(const vector< Element > &closed_subset, OutputStateId state)
void GetEpsilonClosure(const vector< Element > &input_subset, vector< Element > *output_subset)
void ProcessTransitions(const vector< Element > &closed_subset, OutputStateId state)
void ProcessTransition ( OutputStateId  state,
Label  ilabel,
vector< Element > *  subset 
)
private

Definition at line 997 of file determinize-star-inl.h.

References fst::Divide(), rnnlm::i, DeterminizerStar< F >::TempArc::ilabel, KALDI_ERR, DeterminizerStar< F >::TempArc::nextstate, DeterminizerStar< F >::TempArc::ostring, DeterminizerStar< F >::output_arcs_, fst::Plus(), DeterminizerStar< F >::repository_, DeterminizerStar< F >::SubsetToStateId(), and DeterminizerStar< F >::TempArc::weight.

Referenced by DeterminizerStar< F >::ProcessTransitions().

997  {
998  // At input, "subset" may contain duplicates for a given dest state (but in sorted
999  // order). This function removes duplicates from "subset", normalizes it, and adds
1000  // a transition to the dest. state (possibly affecting Q_ and hash_, if state did not
1001  // exist).
1002 
1003  typedef typename vector<Element>::iterator IterType;
1004  { // This block makes the subset have one unique Element per state, adding the weights.
1005  IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
1006  size_t num_out = 0;
1007  // Merge elements with same state-id
1008  while (cur_in != end) { // while we have more elements to process.
1009  // At this point, cur_out points to location of next place we want to put an element,
1010  // cur_in points to location of next element we want to process.
1011  if (cur_in != cur_out) *cur_out = *cur_in;
1012  cur_in++;
1013  while (cur_in != end && cur_in->state == cur_out->state) { // merge elements.
1014  if (cur_in->string != cur_out->string) {
1015  KALDI_ERR << "FST was not functional -> not determinizable";
1016  }
1017  cur_out->weight = Plus(cur_out->weight, cur_in->weight);
1018  cur_in++;
1019  }
1020  cur_out++;
1021  num_out++;
1022  }
1023  subset->resize(num_out);
1024  }
1025 
1026  StringId common_str;
1027  Weight tot_weight;
1028  { // This block computes common_str and tot_weight (essentially: the common divisor)
1029  // and removes them from the elements.
1030  vector<Label> seq;
1031 
1032  IterType begin = subset->begin(), iter, end = subset->end();
1033  { // This block computes "seq", which is the common prefix, and "common_str",
1034  // which is the StringId version of "seq".
1035  vector<Label> tmp_seq;
1036  for (iter = begin; iter != end; ++iter) {
1037  if (iter == begin) {
1038  repository_.SeqOfId(iter->string, &seq);
1039  } else {
1040  repository_.SeqOfId(iter->string, &tmp_seq);
1041  if (tmp_seq.size() < seq.size()) seq.resize(tmp_seq.size()); // size of shortest one.
1042  for (size_t i = 0; i < seq.size(); i++) // seq.size() is the shorter one at this point.
1043  if (tmp_seq[i] != seq[i]) seq.resize(i);
1044  }
1045  if (seq.size() == 0) break; // will not get any prefix.
1046  }
1047  common_str = repository_.IdOfSeq(seq);
1048  }
1049 
1050  { // This block computes "tot_weight".
1051  iter = begin;
1052  tot_weight = iter->weight;
1053  for (++iter; iter != end; ++iter)
1054  tot_weight = Plus(tot_weight, iter->weight);
1055  }
1056 
1057  // Now divide out common stuff from elements.
1058  size_t prefix_len = seq.size();
1059  for (iter = begin; iter != end; ++iter) {
1060  iter->weight = Divide(iter->weight, tot_weight);
1061  iter->string = repository_.RemovePrefix(iter->string, prefix_len);
1062  }
1063  }
1064 
1065  // Now add an arc to the state that the subset represents.
1066  // We may create a new state id for this (in SubsetToStateId).
1067  TempArc temp_arc;
1068  temp_arc.ilabel = ilabel;
1069  temp_arc.nextstate = SubsetToStateId(*subset); // may or may not really add the subset.
1070  temp_arc.ostring = common_str;
1071  temp_arc.weight = tot_weight;
1072  output_arcs_[state].push_back(temp_arc); // record the arc.
1073 }
OutputStateId SubsetToStateId(const vector< Element > &subset)
LatticeWeightTpl< FloatType > Divide(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2, DivideType typ=DIVIDE_ANY)
LatticeWeightTpl< FloatType > Plus(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
#define KALDI_ERR
Definition: kaldi-error.h:127
vector< vector< TempArc > > output_arcs_
StringRepository< Label, StringId > repository_
void ProcessTransitions ( const vector< Element > &  closed_subset,
OutputStateId  state 
)
inlineprivate

Definition at line 509 of file determinize-star-inl.h.

References DeterminizerStar< F >::ifst_, DeterminizerStar< F >::ProcessTransition(), DeterminizerStar< F >::repository_, DeterminizerStar< F >::Element::state, DeterminizerStar< F >::Element::string, fst::Times(), and DeterminizerStar< F >::Element::weight.

Referenced by DeterminizerStar< F >::ProcessSubset().

509  {
510  vector<pair<Label, Element> > all_elems;
511  { // Push back into "all_elems", elements corresponding to all non-epsilon-input transitions
512  // out of all states in "closed_subset".
513  typename vector<Element>::const_iterator iter = closed_subset.begin(),
514  end = closed_subset.end();
515  for (; iter != end; ++iter) {
516  const Element &elem = *iter;
517  for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state);
518  !aiter.Done(); aiter.Next()) {
519  const Arc &arc = aiter.Value();
520  if (arc.ilabel != 0) { // Non-epsilon transition -- ignore epsilons here.
521  pair<Label, Element> this_pr;
522  this_pr.first = arc.ilabel;
523  Element &next_elem(this_pr.second);
524  next_elem.state = arc.nextstate;
525  next_elem.weight = Times(elem.weight, arc.weight);
526  if (arc.olabel == 0) // output epsilon-- this is simple case so
527  // handle separately for efficiency
528  next_elem.string = elem.string;
529  else {
530  vector<Label> seq;
531  repository_.SeqOfId(elem.string, &seq);
532  seq.push_back(arc.olabel);
533  next_elem.string = repository_.IdOfSeq(seq);
534  }
535  all_elems.push_back(this_pr);
536  }
537  }
538  }
539  }
540  PairComparator pc;
541  std::sort(all_elems.begin(), all_elems.end(), pc);
542  // now sorted first on input label, then on state.
543  typedef typename vector<pair<Label, Element> >::const_iterator PairIter;
544  PairIter cur = all_elems.begin(), end = all_elems.end();
545  vector<Element> this_subset;
546  while (cur != end) {
547  // Process ranges that share the same input symbol.
548  Label ilabel = cur->first;
549  this_subset.clear();
550  while (cur != end && cur->first == ilabel) {
551  this_subset.push_back(cur->second);
552  cur++;
553  }
554  // We now have a subset for this ilabel.
555  ProcessTransition(state, ilabel, &this_subset);
556  }
557  }
void ProcessTransition(OutputStateId state, Label ilabel, vector< Element > *subset)
const Fst< Arc > * ifst_
LatticeWeightTpl< FloatType > Times(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
StringRepository< Label, StringId > repository_
OutputStateId SubsetToStateId ( const vector< Element > &  subset)
inlineprivate

Definition at line 563 of file determinize-star-inl.h.

References DeterminizerStar< F >::allow_partial_, DeterminizerStar< F >::hash_, DeterminizerStar< F >::output_arcs_, and DeterminizerStar< F >::Q_.

Referenced by DeterminizerStar< F >::Determinize(), and DeterminizerStar< F >::ProcessTransition().

563  { // may add the subset to the queue.
564  typedef typename SubsetHash::iterator IterType;
565  IterType iter = hash_.find(&subset);
566  if (iter == hash_.end()) { // was not there.
567  vector<Element> *new_subset = new vector<Element>(subset);
568  OutputStateId new_state_id = (OutputStateId) output_arcs_.size();
569  bool ans = hash_.insert(std::pair<const vector<Element>*,
570  OutputStateId>(new_subset,
571  new_state_id)).second;
572  assert(ans);
573  output_arcs_.push_back(vector<TempArc>());
574  if (allow_partial_ == false) {
575  // If --allow-partial is not requested, we do the old way.
576  Q_.push_front(pair<vector<Element>*, OutputStateId>(new_subset, new_state_id));
577  } else {
578  // If --allow-partial is requested, we do breadth first search. This
579  // ensures that when we return partial results, we return the states
580  // that are reachable by the fewest steps from the start state.
581  Q_.push_back(pair<vector<Element>*, OutputStateId>(new_subset, new_state_id));
582  }
583  return new_state_id;
584  } else {
585  return iter->second; // the OutputStateId.
586  }
587  }
deque< pair< vector< Element > *, OutputStateId > > Q_
vector< vector< TempArc > > output_arcs_

Member Data Documentation

bool allow_partial_
private
float delta_
private
bool determinized_
private
EpsilonClosure epsilon_closure_
private

Definition at line 628 of file determinize-star-inl.h.

Referenced by DeterminizerStar< F >::ProcessSubset().

SubsetEqual equal_
private

Definition at line 624 of file determinize-star-inl.h.

SubsetKey hasher_
private

Definition at line 623 of file determinize-star-inl.h.

bool is_partial_
private
int max_states_
private

Definition at line 619 of file determinize-star-inl.h.

Referenced by DeterminizerStar< F >::Determinize().

deque<pair<vector<Element>*, OutputStateId> > Q_
private

The documentation for this class was generated from the following file: