21 #ifndef KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_ 22 #define KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_ 43 return (parent == other.
parent && i == other.
i);
60 std::pair<typename SetType::iterator, bool> pr =
set_.insert(
new_entry_);
74 if (a == NULL)
return b;
75 else if (b == NULL)
return a;
76 std::vector<IntType> v;
79 for(
size_t i = 0;
i < v.size();
i++)
84 std::vector<IntType> a_vec, b_vec;
87 const Entry *ans = NULL;
88 for(
size_t i = 0;
i < a_vec.size() &&
i < b_vec.size() &&
89 a_vec[
i] == b_vec[
i];
i++)
97 std::vector<IntType> *b) {
98 size_t a_size =
Size(a), b_size = b->size();
99 while (a_size> b_size) {
105 typename std::vector<IntType>::iterator b_begin = b->begin();
106 while (a_size != 0) {
107 if (a->
i != *(b_begin + a_size - 1))
112 if (b_size != b->size())
119 std::vector<IntType> a_vec;
121 assert(a_vec.size() >=
n);
122 const Entry *ans = NULL;
123 for(
size_t i = n;
i < a_vec.size();
i++)
133 if(a == NULL)
return true;
134 if (a == b)
return true;
135 if (b == NULL)
return false;
142 while (entry != NULL) {
150 size_t length =
Size(entry);
153 typename std::vector<IntType>::reverse_iterator iter = out->rbegin();
154 while (entry != NULL) {
163 const Entry *e = NULL;
164 for(
size_t i = 0;
i < vec.size();
i++)
172 for (
typename SetType::iterator iter =
set_.begin();
188 void Rebuild(
const std::vector<const Entry*> &to_keep) {
190 for (
typename std::vector<const Entry*>::const_iterator
191 iter = to_keep.begin();
192 iter != to_keep.end(); ++iter)
195 for (
typename SetType::iterator iter =
set_.begin();
196 iter !=
set_.end(); ++iter) {
197 if (tmp_set.count(*iter) == 0)
212 size_t prime = 49109;
213 return static_cast<size_t>(entry->
i)
214 + prime * reinterpret_cast<size_t>(entry->
parent);
223 typedef std::unordered_set<const Entry*, EntryKey, EntryEqual>
SetType;
227 if (to_add == NULL)
return;
228 typename SetType::iterator iter = tmp_set->find(to_add);
229 if (iter == tmp_set->end()) {
230 tmp_set->insert(to_add);
266 typedef ArcTpl<Weight>
Arc;
272 void Output(MutableFst<CompactArc> *ofst,
bool destroy =
true) {
273 assert(determinized_);
275 StateId nStates =
static_cast<StateId
>(output_arcs_.size());
278 ofst->DeleteStates();
279 ofst->SetStart(kNoStateId);
283 for (StateId s = 0;s < nStates;s++) {
289 for (StateId this_state = 0; this_state < nStates; this_state++) {
290 std::vector<TempArc> &this_vec(output_arcs_[this_state]);
291 typename std::vector<TempArc>::const_iterator iter = this_vec.begin(), end = this_vec.end();
293 for (;iter != end; ++iter) {
294 const TempArc &temp_arc(*iter);
296 std::vector<Label> seq;
297 repository_.ConvertToVector(temp_arc.
string, &seq);
298 CompactWeight weight(temp_arc.
weight, seq);
300 ofst->SetFinal(this_state, weight);
303 new_arc.ilabel = temp_arc.
ilabel;
304 new_arc.olabel = temp_arc.
ilabel;
305 new_arc.weight = weight;
306 ofst->AddArc(this_state, new_arc);
310 if (destroy) { std::vector<TempArc> temp;
std::swap(temp, this_vec); }
312 if (destroy) { std::vector<std::vector<TempArc> > temp;
std::swap(temp, output_arcs_); }
318 void Output(MutableFst<Arc> *ofst,
bool destroy =
true) {
321 ofst->DeleteStates();
323 ofst->SetStart(kNoStateId);
334 for (
OutputStateId this_state = 0; this_state < nStates; this_state++) {
335 std::vector<TempArc> &this_vec(output_arcs_[this_state]);
337 typename std::vector<TempArc>::const_iterator iter = this_vec.begin(), end = this_vec.end();
338 for (; iter != end; ++iter) {
339 const TempArc &temp_arc(*iter);
340 std::vector<Label> seq;
341 repository_.ConvertToVector(temp_arc.
string, &seq);
347 for (
size_t i = 0;
i < seq.size();
i++) {
350 arc.nextstate = next_state;
351 arc.weight = (
i == 0 ? temp_arc.
weight : Weight::One());
354 ofst->AddArc(cur_state, arc);
355 cur_state = next_state;
357 ofst->SetFinal(cur_state, (seq.size() == 0 ? temp_arc.
weight : Weight::One()));
362 for (
size_t i = 0;
i+1 < seq.size();
i++) {
366 arc.nextstate = next_state;
367 arc.weight = (
i == 0 ? temp_arc.
weight : Weight::One());
368 arc.ilabel = (
i == 0 ? temp_arc.
ilabel : 0);
370 ofst->AddArc(cur_state, arc);
371 cur_state = next_state;
376 arc.weight = (seq.size() <= 1 ? temp_arc.
weight : Weight::One());
377 arc.ilabel = (seq.size() <= 1 ? temp_arc.
ilabel : 0);
378 arc.olabel = (seq.size() > 0 ? seq.back() : 0);
379 ofst->AddArc(cur_state, arc);
384 std::vector<TempArc> temp; temp.swap(this_vec);
388 std::vector<std::vector<TempArc> > temp;
389 temp.swap(output_arcs_);
390 repository_.Destroy();
402 num_arcs_(0), num_elems_(0), ifst_(ifst.
Copy()), opts_(opts),
403 equal_(opts_.delta), determinized_(false),
404 minimal_hash_(3, hasher_, equal_), initial_hash_(3, hasher_, equal_) {
416 for (
typename MinimalSubsetHash::iterator iter = minimal_hash_.begin();
417 iter != minimal_hash_.end(); ++iter)
420 for (
typename InitialSubsetHash::iterator iter = initial_hash_.begin();
421 iter != initial_hash_.end(); ++iter)
424 { std::vector<std::vector<Element>* > output_states_tmp;
425 output_states_tmp.swap(output_states_); }
426 { std::vector<char> tmp; tmp.swap(isymbol_or_final_); }
427 { std::vector<OutputStateId> tmp; tmp.swap(queue_); }
428 { std::vector<std::pair<Label, Element> > tmp; tmp.swap(all_elems_tmp_); }
439 std::vector<StringId> needed_strings;
440 for (
size_t i = 0;
i < output_arcs_.size();
i++)
441 for (
size_t j = 0;
j < output_arcs_[
i].size();
j++)
442 needed_strings.push_back(output_arcs_[
i][
j].string);
446 for (
size_t i = 0;
i < output_states_.size();
i++)
447 for (
size_t j = 0; j < output_states_[
i]->size(); j++)
448 needed_strings.push_back((*(output_states_[
i]))[j].string);
451 for (
typename InitialSubsetHash::const_iterator
452 iter = initial_hash_.begin();
453 iter != initial_hash_.end(); ++iter) {
454 const std::vector<Element> &vec = *(iter->first);
456 for (
size_t i = 0; i < vec.size(); i++)
457 needed_strings.push_back(vec[i].string);
458 needed_strings.push_back(elem.
string);
461 std::sort(needed_strings.begin(), needed_strings.end());
462 needed_strings.erase(std::unique(needed_strings.begin(),
463 needed_strings.end()),
464 needed_strings.end());
465 repository_.Rebuild(needed_strings);
469 int32 repo_size = repository_.MemSize(),
470 arcs_size = num_arcs_ *
sizeof(
TempArc),
471 elems_size = num_elems_ *
sizeof(
Element),
472 total_size = repo_size + arcs_size + elems_size;
473 if (opts_.max_mem > 0 && total_size > opts_.max_mem) {
477 int32 new_repo_size = repository_.MemSize(),
478 new_total_size = new_repo_size + arcs_size + elems_size;
480 KALDI_VLOG(2) <<
"Rebuilt repository in determinize-lattice: repository shrank from " 481 << repo_size <<
" to " << new_repo_size <<
" bytes (approximately)";
483 if (new_total_size > static_cast<int32>(opts_.max_mem * 0.8)) {
486 KALDI_WARN <<
"Failure in determinize-lattice: size exceeds maximum " 487 << opts_.max_mem <<
" bytes; (repo,arcs,elems) = (" 488 << repo_size <<
"," << arcs_size <<
"," << elems_size
489 <<
"), after rebuilding, repo size was " << new_repo_size;
499 assert(!determinized_);
504 InitializeDeterminization();
505 while (!queue_.empty()) {
508 ProcessState(out_state);
509 if (debug_ptr && *debug_ptr) Debug();
510 if (!CheckMemoryUsage())
return false;
512 return (determinized_ =
true);
513 }
catch (
const std::bad_alloc &) {
514 int32 repo_size = repository_.MemSize(),
515 arcs_size = num_arcs_ *
sizeof(
TempArc),
516 elems_size = num_elems_ *
sizeof(
Element),
517 total_size = repo_size + arcs_size + elems_size;
518 KALDI_WARN <<
"Memory allocation error doing lattice determinization; using " 519 << total_size <<
" bytes (max = " << opts_.max_mem
520 <<
" (repo,arcs,elems) = (" 521 << repo_size <<
"," << arcs_size <<
"," << elems_size <<
")";
522 return (determinized_ =
false);
523 }
catch (
const std::runtime_error &) {
524 KALDI_WARN <<
"Caught exception doing lattice determinization";
525 return (determinized_ =
false);
538 typedef const typename StringRepositoryType::Entry*
StringId;
547 return (state != other.
state ||
string != other.
string ||
552 return state < other.
state;
580 size_t operator ()(
const std::vector<Element> * subset)
const {
581 size_t hash = 0, factor = 1;
582 for (
typename std::vector<Element>::const_iterator iter= subset->begin(); iter != subset->end(); ++iter) {
584 hash += iter->state +
reinterpret_cast<size_t>(iter->string);
595 bool operator ()(
const std::vector<Element> * s1,
const std::vector<Element> * s2)
const {
596 size_t sz = s1->size();
598 if (sz != s2->size())
return false;
599 typename std::vector<Element>::const_iterator iter1 = s1->begin(),
600 iter1_end = s1->end(), iter2=s2->begin();
601 for (; iter1 < iter1_end; ++iter1, ++iter2) {
602 if (iter1->state != iter2->state ||
603 iter1->string != iter2->string ||
604 !
ApproxEqual(iter1->weight, iter2->weight, delta_))
return false;
617 bool operator ()(
const std::vector<Element> * s1,
const std::vector<Element> * s2)
const {
618 size_t sz = s1->size();
620 if (sz != s2->size())
return false;
621 typename std::vector<Element>::const_iterator iter1 = s1->begin(),
622 iter1_end = s1->end(), iter2=s2->begin();
623 for (; iter1 < iter1_end; ++iter1, ++iter2) {
624 if (iter1->state != iter2->state)
return false;
632 typedef std::unordered_map<const std::vector<Element>*,
OutputStateId,
640 typedef std::unordered_map<const std::vector<Element>*,
Element,
648 assert(!subset->empty());
649 typename std::vector<Element>::iterator cur_in = subset->begin(),
650 cur_out = subset->begin(), end = subset->end();
651 while (cur_in != end) {
652 if(IsIsymbolOrFinal(cur_in->state)) {
658 subset->resize(cur_out - subset->begin());
665 typename MinimalSubsetHash::const_iterator iter
666 = minimal_hash_.find(&subset);
667 if (iter != minimal_hash_.end())
669 OutputStateId ans = static_cast<OutputStateId>(output_arcs_.size());
670 std::vector<Element> *subset_ptr =
new std::vector<Element>(subset);
671 output_states_.push_back(subset_ptr);
672 num_elems_ += subset_ptr->size();
673 output_arcs_.push_back(std::vector<TempArc>());
674 minimal_hash_[subset_ptr] = ans;
675 queue_.push_back(ans);
685 typename InitialSubsetHash::const_iterator iter
686 = initial_hash_.find(&subset_in);
687 if (iter != initial_hash_.end()) {
688 const Element &elem = iter->second;
689 *remaining_weight = elem.
weight;
690 *common_prefix = elem.
string;
691 if (elem.
weight == Weight::Zero())
696 std::vector<Element> subset(subset_in);
701 EpsilonClosure(&subset);
702 ConvertToMinimal(&subset);
711 *remaining_weight = elem.
weight;
712 *common_prefix = elem.
string;
713 if (elem.
weight == Weight::Zero())
719 std::vector<Element> *initial_subset_ptr =
new std::vector<Element>(subset_in);
721 initial_hash_[initial_subset_ptr] = elem;
722 num_elems_ += initial_subset_ptr->size();
736 if (weight_comp != 0)
return weight_comp;
738 if (a_str == b_str)
return 0;
739 std::vector<IntType> a_vec, b_vec;
740 repository_.ConvertToVector(a_str, &a_vec);
741 repository_.ConvertToVector(b_str, &b_vec);
743 int a_len = a_vec.size(), b_len = b_vec.size();
746 if (a_len > b_len)
return -1;
747 else if (a_len < b_len)
return 1;
748 for(
int i = 0;
i < a_len;
i++) {
749 if (a_vec[
i] < b_vec[
i])
return -1;
750 else if (a_vec[i] > b_vec[i])
return 1;
767 std::deque<Element> queue;
768 std::unordered_map<InputStateId, Element> cur_subset;
769 typedef typename std::unordered_map<InputStateId, Element>::iterator MapIter;
770 typedef typename std::vector<Element>::const_iterator VecIter;
772 for (VecIter iter = subset->begin(); iter != subset->end(); ++iter) {
773 queue.push_back(*iter);
774 cur_subset[iter->state] = *iter;
778 bool sorted = ((ifst_->Properties(kILabelSorted,
false) & kILabelSorted) != 0);
779 bool replaced_elems =
false;
782 while (queue.size() != 0) {
793 if (replaced_elems && cur_subset[elem.
state] != elem)
795 if (opts_.max_loop > 0 && counter++ > opts_.max_loop) {
796 KALDI_ERR <<
"Lattice determinization aborted since looped more than " 797 << opts_.max_loop <<
" times during epsilon closure";
799 for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.
state); !aiter.Done(); aiter.Next()) {
800 const Arc &arc = aiter.Value();
801 if (sorted && arc.ilabel != 0)
break;
804 && arc.weight != Weight::Zero()) {
806 next_elem.
state = arc.nextstate;
812 next_elem.
string = repository_.Successor(elem.
string, arc.olabel);
814 MapIter iter = cur_subset.find(next_elem.
state);
815 if (iter == cur_subset.end()) {
817 cur_subset[next_elem.
state] = next_elem;
818 queue.push_back(next_elem);
824 iter->second.weight, iter->second.string);
826 iter->second.string = next_elem.
string;
827 iter->second.weight = next_elem.
weight;
828 queue.push_back(next_elem);
829 replaced_elems =
true;
839 subset->reserve(cur_subset.size());
840 MapIter iter = cur_subset.begin(), end = cur_subset.end();
841 for (; iter != end; ++iter) subset->push_back(iter->second);
843 std::sort(subset->begin(), subset->end());
853 const std::vector<Element> &minimal_subset = *(output_states_[output_state]);
858 bool is_final =
false;
860 Weight final_weight = Weight::Zero();
861 typename std::vector<Element>::const_iterator iter = minimal_subset.begin(), end = minimal_subset.end();
862 for (; iter != end; ++iter) {
866 if (this_final_weight != Weight::Zero() &&
867 (!is_final ||
Compare(this_final_weight, this_final_string,
868 final_weight, final_string) == 1)) {
872 final_weight = this_final_weight;
873 final_string = this_final_string;
881 temp_arc.
string = final_string;
882 temp_arc.
weight = final_weight;
883 output_arcs_[output_state].push_back(temp_arc);
897 *common_str = repository_.EmptyString();
898 *tot_weight = Weight::Zero();
901 size_t size = elems->size();
902 std::vector<IntType> common_prefix;
903 repository_.ConvertToVector((*elems)[0].
string, &common_prefix);
904 Weight weight = (*elems)[0].weight;
905 for (
size_t i = 1;
i < size;
i++) {
906 weight =
Plus(weight, (*elems)[
i].weight);
907 repository_.ReduceToCommonPrefix((*elems)[
i].
string, &common_prefix);
909 assert(weight != Weight::Zero());
911 size_t prefix_len = common_prefix.size();
912 for (
size_t i = 0;
i < size;
i++) {
913 (*elems)[
i].weight =
Divide((*elems)[
i].weight, weight, DIVIDE_LEFT);
915 repository_.RemovePrefix((*elems)[
i].
string, prefix_len);
917 *common_str = repository_.ConvertFromVector(common_prefix);
918 *tot_weight = weight;
925 typedef typename std::vector<Element>::iterator IterType;
929 assert(subset->size() < 2 || (*subset)[0].state <= (*subset)[1].state);
931 IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
934 while (cur_in != end) {
937 if (cur_in != cur_out) *cur_out = *cur_in;
939 while (cur_in != end && cur_in->state == cur_out->state) {
940 if (
Compare(cur_in->weight, cur_in->string,
941 cur_out->weight, cur_out->string) == 1) {
943 cur_out->string = cur_in->string;
944 cur_out->weight = cur_in->weight;
951 subset->resize(num_out);
963 MakeSubsetUnique(subset);
967 NormalizeSubset(subset, &tot_weight, &common_str);
973 nextstate = InitialToStateId(*subset,
976 common_str = repository_.Concatenate(common_str, next_common_str);
977 tot_weight =
Times(tot_weight, next_tot_weight);
985 temp_arc.
string = common_str;
986 temp_arc.
weight = tot_weight;
987 output_arcs_[state].push_back(temp_arc);
998 inline bool operator () (
const std::pair<Label, Element> &p1,
const std::pair<Label, Element> &p2) {
999 if (p1.first < p2.first)
return true;
1000 else if (p1.first > p2.first)
return false;
1002 return p1.second.state < p2.second.state;
1019 const std::vector<Element> &minimal_subset = *(output_states_[output_state]);
1022 std::vector<std::pair<Label, Element> > &all_elems(all_elems_tmp_);
1027 typename std::vector<Element>::const_iterator iter = minimal_subset.begin(), end = minimal_subset.end();
1028 for (;iter != end; ++iter) {
1030 for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.
state); ! aiter.Done(); aiter.Next()) {
1031 const Arc &arc = aiter.Value();
1033 && arc.weight != Weight::Zero()) {
1034 std::pair<Label, Element> this_pr;
1035 this_pr.first = arc.ilabel;
1036 Element &next_elem(this_pr.second);
1037 next_elem.
state = arc.nextstate;
1039 if (arc.olabel == 0)
1042 next_elem.
string = repository_.Successor(elem.
string, arc.olabel);
1043 all_elems.push_back(this_pr);
1049 std::sort(all_elems.begin(), all_elems.end(), pc);
1051 typedef typename std::vector<std::pair<Label, Element> >::const_iterator PairIter;
1052 PairIter cur = all_elems.begin(), end = all_elems.end();
1053 std::vector<Element> this_subset;
1054 while (cur != end) {
1056 Label ilabel = cur->first;
1057 this_subset.clear();
1058 while (cur != end && cur->first == ilabel) {
1059 this_subset.push_back(cur->second);
1063 assert(!this_subset.empty());
1064 ProcessTransition(output_state, ilabel, &this_subset);
1075 ProcessFinal(output_state);
1076 ProcessTransitions(output_state);
1085 KALDI_WARN <<
"Debug function called (probably SIGUSR1 caught)";
1089 if (output_arcs_.size() <= 2) {
1092 size_t max_state = output_arcs_.size() - 2;
1095 std::vector<OutputStateId> predecessor(max_state+1, kNoStateId);
1096 for (
size_t i = 0;
i < max_state;
i++) {
1097 for (
size_t j = 0;
j < output_arcs_[
i].size();
j++) {
1102 if (nextstate <= max_state && nextstate >
i)
1103 predecessor[nextstate] =
i;
1106 std::vector<std::pair<Label, StringId> > traceback;
1110 while (cur_state != 0 && cur_state != kNoStateId) {
1112 std::pair<Label, StringId> p;
1114 for (i = 0; i < output_arcs_[last_state].size(); i++) {
1115 if (output_arcs_[last_state][i].nextstate == cur_state) {
1116 p.first = output_arcs_[last_state][
i].ilabel;
1117 p.second = output_arcs_[last_state][
i].string;
1118 traceback.push_back(p);
1123 cur_state = last_state;
1125 if (cur_state == kNoStateId)
1126 KALDI_WARN <<
"Traceback did not reach start state " 1127 <<
"(possibly debug-code error)";
1129 std::stringstream ss;
1130 ss <<
"Traceback follows in format " 1131 <<
"ilabel (olabel olabel) ilabel (olabel) ... :";
1132 for (ssize_t
i = traceback.size() - 1;
i >= 0;
i--) {
1133 ss <<
' ' << traceback[
i].first <<
" ( ";
1134 std::vector<Label> seq;
1135 repository_.ConvertToVector(traceback[
i].second, &seq);
1136 for (
size_t j = 0;
j < seq.size();
j++)
1137 ss << seq[
j] <<
' ';
1147 if (isymbol_or_final_.size() <= state)
1148 isymbol_or_final_.resize(state+1, static_cast<char>(OSF_UNKNOWN));
1149 if (isymbol_or_final_[state] == static_cast<char>(OSF_NO))
1151 else if (isymbol_or_final_[state] == static_cast<char>(OSF_YES))
1154 isymbol_or_final_[state] =
static_cast<char>(OSF_NO);
1155 if (ifst_->Final(state) != Weight::Zero())
1156 isymbol_or_final_[state] = static_cast<char>(OSF_YES);
1157 for (ArcIterator<Fst<Arc> > aiter(*ifst_, state);
1160 const Arc &arc = aiter.Value();
1161 if (arc.ilabel != 0 && arc.weight != Weight::Zero()) {
1162 isymbol_or_final_[state] =
static_cast<char>(OSF_YES);
1166 return IsIsymbolOrFinal(state);
1170 if(ifst_->Properties(kExpanded,
false) != 0) {
1173 #if !(__GNUC__ == 4 && __GNUC_MINOR__ == 0) 1175 down_cast<
const ExpandedFst<Arc>*,
const Fst<Arc> >(ifst_)->NumStates();
1176 minimal_hash_.rehash(num_states/2 + 3);
1177 initial_hash_.rehash(num_states/2 + 3);
1181 if (start_id != kNoStateId) {
1194 elem.
state = start_id;
1195 elem.
weight = Weight::One();
1196 elem.
string = repository_.EmptyString();
1197 std::vector<Element> subset;
1198 subset.push_back(elem);
1199 EpsilonClosure(&subset);
1200 ConvertToMinimal(&subset);
1202 std::vector<Element> *subset_ptr =
new std::vector<Element>(subset);
1203 assert(output_arcs_.empty() && output_states_.empty());
1205 output_states_.push_back(subset_ptr);
1206 output_arcs_.push_back(std::vector<TempArc>());
1208 minimal_hash_[subset_ptr] = initial_state;
1209 queue_.push_back(initial_state);
1263 template<
class Weight,
class IntType>
1265 MutableFst<ArcTpl<Weight> > *ofst,
1268 ofst->SetInputSymbols(ifst.InputSymbols());
1269 ofst->SetOutputSymbols(ifst.OutputSymbols());
1280 template<
class Weight,
class IntType>
1285 ofst->SetInputSymbols(ifst.InputSymbols());
1286 ofst->SetOutputSymbols(ifst.OutputSymbols());
1296 #endif // KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_ fst::StdArc::StateId StateId
Arc::StateId InputStateId
OutputStateId MinimalToStateId(const std::vector< Element > &subset)
const Entry * ConvertFromVector(const std::vector< IntType > &vec)
LatticeWeightTpl< FloatType > Divide(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2, DivideType typ=DIVIDE_ANY)
const StringRepositoryType::Entry * StringId
void Rebuild(const std::vector< const Entry *> &to_keep)
const Entry * CommonPrefix(const Entry *a, const Entry *b)
bool operator!=(const LatticeWeightTpl< FloatType > &wa, const LatticeWeightTpl< FloatType > &wb)
void Output(MutableFst< Arc > *ofst, bool destroy=true)
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
void InitializeDeterminization()
bool Determinize(bool *debug_ptr)
LatticeWeightTpl< FloatType > Plus(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
void Output(MutableFst< CompactArc > *ofst, bool destroy=true)
void ConvertToVector(const Entry *entry, std::vector< IntType > *out) const
LatticeStringRepository< IntType > repository_
std::unordered_set< const Entry *, EntryKey, EntryEqual > SetType
void swap(basic_filebuf< CharT, Traits > &x, basic_filebuf< CharT, Traits > &y)
bool ApproxEqual(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2, float delta=kDelta)
OutputStateId InitialToStateId(const std::vector< Element > &subset_in, Weight *remaining_weight, StringId *common_prefix)
bool operator()(const Entry *e1, const Entry *e2) const
const Entry * Successor(const Entry *parent, IntType i)
std::vector< char > isymbol_or_final_
size_t operator()(const Entry *entry) const
void ProcessTransition(OutputStateId state, Label ilabel, std::vector< Element > *subset)
LatticeStringRepository()
void RebuildHelper(const Entry *to_add, SetType *tmp_set)
const Entry * RemovePrefix(const Entry *a, size_t n)
void NormalizeSubset(std::vector< Element > *elems, Weight *tot_weight, StringId *common_str)
std::vector< std::vector< TempArc > > output_arcs_
LatticeWeightTpl< FloatType > Times(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
void ConvertToMinimal(std::vector< Element > *subset)
const Entry * EmptyString()
int Compare(const Weight &a_w, StringId a_str, const Weight &b_w, StringId b_str) const
LatticeDeterminizer(const Fst< Arc > &ifst, DeterminizeLatticeOptions opts)
bool IsIsymbolOrFinal(InputStateId state)
void EpsilonClosure(std::vector< Element > *subset)
MinimalSubsetHash minimal_hash_
ArcTpl< CompactWeight > CompactArc
void MakeSubsetUnique(std::vector< Element > *subset)
bool operator==(const Entry &other) const
std::unordered_map< const std::vector< Element > *, Element, SubsetKey, SubsetEqual > InitialSubsetHash
DeterminizeLatticeOptions opts_
fst::StdArc::Weight Weight
std::vector< std::pair< Label, Element > > all_elems_tmp_
bool operator<(const Int32Pair &a, const Int32Pair &b)
~LatticeStringRepository()
Arc::StateId OutputStateId
std::vector< OutputStateId > queue_
CompactLatticeWeightTpl< Weight, IntType > CompactWeight
KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeStringRepository)
void ProcessTransitions(OutputStateId output_state)
int Compare(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
Compare returns -1 if w1 < w2, +1 if w1 > w2, and 0 if w1 == w2.
LatticeStringRepository< IntType > StringRepositoryType
#define KALDI_ASSERT(cond)
bool DeterminizeLattice(const Fst< ArcTpl< Weight > > &ifst, MutableFst< ArcTpl< Weight > > *ofst, DeterminizeLatticeOptions opts, bool *debug_ptr)
This function implements the normal version of DeterminizeLattice, in which the output strings are re...
std::vector< std::vector< Element > *> output_states_
void ProcessState(OutputStateId output_state)
bool IsPrefixOf(const Entry *a, const Entry *b) const
const Entry * Concatenate(const Entry *a, const Entry *b)
void ReduceToCommonPrefix(const Entry *a, std::vector< IntType > *b)
void ProcessFinal(OutputStateId output_state)
std::unordered_map< const std::vector< Element > *, OutputStateId, SubsetKey, SubsetEqual > MinimalSubsetHash
size_t Size(const Entry *entry) const
void Copy(const CuMatrixBase< Real > &src, const CuArray< int32 > ©_from_indices, CuMatrixBase< Real > *tgt)
Copies elements from src into tgt as given by copy_from_indices.
InitialSubsetHash initial_hash_