24 #include "fst/fstlib.h" 33 std::vector<int32> phone_window,
40 if (static_cast<int32>(phone_window.size()) != ctx_dep.
ContextWidth())
41 KALDI_ERR <<
"Context size mismatch, ilabel-info [from context FST is " 42 << phone_window.size() <<
", context-dependency object " 46 int32 phone = phone_window[P];
48 KALDI_ERR <<
"phone == 0. Some mismatch happened, or there is " 57 for (
int32 pdf_class = 0;
58 pdf_class < static_cast<int32>(pdfs.size());
60 if (! ctx_dep.
Compute(phone_window, pdf_class, &(pdfs[pdf_class])) ) {
61 std::ostringstream ctx_ss;
62 for (
size_t i = 0;
i < phone_window.size();
i++)
63 ctx_ss << phone_window[
i] <<
' ';
64 KALDI_ERR <<
"GetHmmAsFsa: context-dependency object could not produce " 65 <<
"an answer: pdf-class = " << pdf_class <<
" ctx-window = " 66 << ctx_ss.str() <<
". This probably points " 67 "to either a coding error in some graph-building process, " 68 "a mismatch of topology with context-dependency object, the " 69 "wrong FST being passed on a command-line, or something of " 70 " that general nature.";
73 std::pair<int32, std::vector<int32> > cache_index(phone, pdfs);
75 HmmCacheType::iterator iter = cache->find(cache_index);
76 if (iter != cache->end())
80 VectorFst<StdArc> *ans =
new VectorFst<StdArc>;
87 std::vector<StateId> state_ids;
88 for (
size_t i = 0;
i < entry.size();
i++)
89 state_ids.push_back(ans->AddState());
91 ans->SetStart(state_ids[0]);
92 StateId
final = state_ids.back();
93 ans->SetFinal(
final, Weight::One());
95 for (
int32 hmm_state = 0;
96 hmm_state < static_cast<int32>(entry.size());
98 int32 forward_pdf_class = entry[hmm_state].forward_pdf_class, forward_pdf;
99 int32 self_loop_pdf_class = entry[hmm_state].self_loop_pdf_class, self_loop_pdf;
100 if (forward_pdf_class ==
kNoPdf) {
104 KALDI_ASSERT(forward_pdf_class < static_cast<int32>(pdfs.size()));
105 KALDI_ASSERT(self_loop_pdf_class < static_cast<int32>(pdfs.size()));
106 forward_pdf = pdfs[forward_pdf_class];
107 self_loop_pdf = pdfs[self_loop_pdf_class];
111 trans_idx < static_cast<int32>(entry[hmm_state].transitions.size());
115 int32 dest_state = entry[hmm_state].transitions[trans_idx].first;
116 bool is_self_loop = (dest_state == hmm_state);
120 if (forward_pdf_class ==
kNoPdf) {
124 log_prob =
Log(entry[hmm_state].transitions[trans_idx].second);
136 ans->AddArc(state_ids[hmm_state],
137 Arc(label, label,
Weight(-log_prob), state_ids[dest_state]));
148 (*cache)[cache_index] = ans;
154 fst::VectorFst<fst::StdArc>*
161 if (static_cast<int32>(phone_window.size()) != ctx_dep.
ContextWidth())
162 KALDI_ERR <<
"Context size mismatch, ilabel-info [from context FST is " 163 <<(phone_window.size())<<
", context-dependency object " 167 int32 phone = phone_window[P];
173 VectorFst<StdArc> *ans =
new VectorFst<StdArc>;
182 std::vector<StateId> state_ids;
183 for (
size_t i = 0;
i < entry.size();
i++)
184 state_ids.push_back(ans->AddState());
186 ans->SetStart(state_ids[0]);
187 StateId
final = state_ids.back();
188 ans->SetFinal(
final, Weight::One());
190 for (
int32 hmm_state = 0;
191 hmm_state < static_cast<int32>(entry.size());
193 int32 forward_pdf_class = entry[hmm_state].forward_pdf_class, forward_pdf;
194 int32 self_loop_pdf_class = entry[hmm_state].self_loop_pdf_class, self_loop_pdf;
195 if (forward_pdf_class ==
kNoPdf) {
199 bool ans = ctx_dep.
Compute(phone_window, forward_pdf_class, &forward_pdf);
200 KALDI_ASSERT(ans &&
"Context-dependency computation failed.");
201 ans = ctx_dep.
Compute(phone_window, self_loop_pdf_class, &self_loop_pdf);
202 KALDI_ASSERT(ans &&
"Context-dependency computation failed.");
206 trans_idx < static_cast<int32>(entry[hmm_state].transitions.size());
210 int32 dest_state = entry[hmm_state].transitions[trans_idx].first;
211 if (forward_pdf_class ==
kNoPdf) {
216 log_prob =
Log(entry[hmm_state].transitions[trans_idx].second);
227 ans->AddArc(state_ids[hmm_state],
228 Arc(label, label,
Weight(-log_prob), state_ids[dest_state]));
242 fst::VectorFst<Arc> *ans =
new fst::VectorFst<Arc>;
246 ans->SetFinal(1, Weight::One());
247 ans->AddArc(0,
Arc(label, label, Weight::One(), 1));
254 fst::VectorFst<fst::StdArc> *
GetHTransducer(
const std::vector<std::vector<int32> > &ilabel_info,
258 std::vector<int32> *disambig_syms_left) {
259 KALDI_ASSERT(ilabel_info.size() >= 1 && ilabel_info[0].size() == 0);
269 std::vector<const ExpandedFst<Arc>* > fsts(ilabel_info.size(), NULL);
270 std::vector<int32> phones = trans_model.
GetPhones();
273 disambig_syms_left->clear();
276 int32 next_disambig_sym = first_disambig_sym;
278 if (ilabel_info.size() > 0)
281 for (
int32 j = 1; j < static_cast<int32>(ilabel_info.size());
j++) {
283 if (ilabel_info[
j][0] < 0 ||
284 (ilabel_info[
j][0] == 0 && ilabel_info[
j].size() == 1)) {
286 if (ilabel_info[
j].size() == 1) {
288 int32 disambig_sym_left = next_disambig_sym++;
289 disambig_syms_left->push_back(disambig_sym_left);
291 }
else if (ilabel_info[
j].size() == 2) {
293 KALDI_ERR <<
"ilabel-info seems to be for grammar-FST. You need to " 294 "supply the --nonterm-phones-offset option.";
297 nonterminal = -ilabel_info[
j][0],
298 left_context_phone = ilabel_info[
j][1];
299 if (nonterminal <= nonterm_phones_offset ||
300 left_context_phone <= 0 ||
301 left_context_phone > nonterm_phones_offset) {
302 KALDI_ERR <<
"Could not interpret this ilabel-info with " 303 "--nonterm-phones-offset=" << nonterm_phones_offset
304 <<
": nonterminal,left-context-phone=" 305 << nonterminal <<
',' << left_context_phone;
309 int32 encoded_symbol = big_number + nonterminal * encoding_multiple +
313 KALDI_ERR <<
"Could not decode this ilabel_info entry.";
316 std::vector<int32> phone_window = ilabel_info[
j];
338 std::vector<int32> *old2new_map) {
348 std::map<std::pair<int32, std::vector<int32> >,
int32 >
353 int32 num_syms_old = ilabel_info_old.size();
357 std::vector<int32> old2old_map(num_syms_old);
359 for (
int32 i = 1;
i < num_syms_old;
i++) {
360 const std::vector<int32> &vec = ilabel_info_old[
i];
361 if (vec.size() == 1 && vec[0] <= 0) {
366 int32 central_phone = vec[P];
368 std::vector<int32> state_seq(num_pdf_classes);
369 for (
int32 pdf_class = 0; pdf_class < num_pdf_classes; pdf_class++) {
370 if (!ctx_dep.
Compute(vec, pdf_class, &(state_seq[pdf_class]))) {
371 std::ostringstream ss;
373 KALDI_ERR <<
"tree did not succeed in converting phone window "<<ss.str();
376 std::pair<int32, std::vector<int32> > pr(central_phone, state_seq);
377 std::map<std::pair<int32, std::vector<int32> >,
int32 >::iterator iter
378 = pair_to_physical.find(pr);
379 if (iter == pair_to_physical.end()) {
380 pair_to_physical[pr] =
i;
383 old2old_map[
i] = iter->second;
388 std::vector<bool> seen(num_syms_old,
false);
389 for (
int32 i = 0;
i < num_syms_old;
i++)
390 seen[old2old_map[
i]] =
true;
396 old2new_map->resize(num_syms_old);
397 for (
int32 i = 0;
i < num_syms_old;
i++)
399 (*old2new_map)[
i] = cur_id++;
401 for (
int32 i = 0; i < num_syms_old; i++)
402 (*old2new_map)[
i] = (*old2new_map)[old2old_map[
i]];
409 VectorFst<StdArc> *ans =
new VectorFst<StdArc>;
414 ans->SetFinal(0, Weight::One());
417 ans->AddArc(0,
Arc(pdf+1, tid, Weight::One(), 0));
441 const std::vector<int32> &disambig_syms,
442 bool check_no_self_loops):
443 trans_model_(trans_model),
444 disambig_syms_(disambig_syms),
445 check_no_self_loops_(check_no_self_loops) { }
448 if (label == static_cast<int32>(fst::kNoLabel))
return -1;
449 else if (label >= 1 && label <= trans_model_.NumTransitionIds()) {
450 if (check_no_self_loops_ && trans_model_.IsSelfLoop(label))
451 KALDI_ERR <<
"AddSelfLoops: graph already has self-loops.";
452 return trans_model_.TransitionIdToTransitionState(label);
455 if (label != 0 && label < big_number)
457 disambig_syms_.end(),
473 const std::vector<int32> &disambig_syms,
475 bool check_no_self_loops,
476 fst::VectorFst<fst::StdArc> *
fst) {
490 int32 kNoTransState = f(kNoLabel);
494 std::vector<int32> state_in(fst->NumStates(), kNoTransState);
500 for (StateIterator<VectorFst<Arc> > siter(*fst);
503 StateId s = siter.Value();
504 for (MutableArcIterator<VectorFst<Arc> > aiter(fst, s);
507 Arc arc = aiter.Value();
508 int32 trans_state = f(arc.ilabel);
509 if (state_in[arc.nextstate] == kNoTransState)
510 state_in[arc.nextstate] = trans_state;
518 KALDI_ASSERT(state_in[fst->Start()] == kNoStateId || state_in[fst->Start()] == 0);
527 for (StateId s = 0; s < static_cast<StateId>(state_in.size()); s++) {
528 if (state_in[s] > 0) {
530 int32 trans_state =
static_cast<int32>(state_in[s]);
533 fst->SetFinal(s,
Times(fst->Final(s),
Weight(-log_prob*self_loop_scale)));
534 for (MutableArcIterator<MutableFst<Arc> > aiter(fst, s);
537 Arc arc = aiter.Value();
538 arc.weight =
Times(arc.weight, Weight(-log_prob*self_loop_scale));
545 fst->AddArc(s,
Arc(trans_id, 0,
Weight(-log_prob*self_loop_scale), s));
557 const std::vector<int32> &disambig_syms,
559 bool check_no_self_loops,
560 fst::VectorFst<fst::StdArc> *
fst) {
572 StateId num_states = fst->NumStates();
573 for (StateId s = 0; s < num_states; s++) {
574 int32 my_trans_state = f(kNoLabel);
576 for (MutableArcIterator<VectorFst<Arc> > aiter(fst, s);
579 Arc arc = aiter.Value();
580 if (my_trans_state == -1) my_trans_state = f(arc.ilabel);
582 if (my_trans_state > 0) {
584 arc.weight =
Times(arc.weight, Weight(-log_prob*self_loop_scale));
588 if (fst->Final(s) != Weight::Zero()) {
589 KALDI_ASSERT(my_trans_state == kNoLabel || my_trans_state == 0);
591 if (my_trans_state != kNoLabel && my_trans_state != 0) {
596 fst->AddArc(s,
Arc(trans_id, 0,
Weight(-log_prob*self_loop_scale), s));
603 const std::vector<int32> &disambig_syms,
606 bool check_no_self_loops,
607 fst::VectorFst<fst::StdArc> *
fst) {
611 check_no_self_loops, fst);
614 check_no_self_loops, fst);
626 const std::vector<int32> &alignment) {
627 for (
size_t i = 0;
i + 1 < alignment.size();
i++) {
630 if (tstate1 != tstate2) {
631 bool is_loop_1 = trans_model.
IsSelfLoop(alignment[i]),
632 is_loop_2 = trans_model.
IsSelfLoop(alignment[i+1]);
634 if (is_loop_1)
return true;
635 if (is_loop_2)
return false;
640 if (alignment.empty())
return false;
642 bool is_loop_front = trans_model.
IsSelfLoop(alignment.front()),
643 is_loop_back = trans_model.
IsSelfLoop(alignment.back());
644 if (is_loop_front)
return false;
645 if (is_loop_back)
return true;
660 const std::vector<int32> &alignment,
662 std::vector<std::vector<int32> > *split_output) {
663 if (alignment.empty())
return true;
664 std::vector<size_t> end_points;
669 for (
size_t i = 0;
i < alignment.size();
i++) {
670 int32 trans_id = alignment[
i];
671 if (trans_model.
IsFinal(trans_id)) {
672 if (!reordered) end_points.push_back(
i+1);
674 while (
i+1 < alignment.size() &&
680 end_points.push_back(
i+1);
682 }
else if (
i+1 == alignment.size()) {
686 end_points.push_back(
i+1);
690 if (this_state == next_state)
continue;
693 if (this_phone != next_phone) {
697 end_points.push_back(i+1);
702 size_t cur_point = 0;
703 for (
size_t i = 0;
i < end_points.size();
i++) {
704 split_output->push_back(std::vector<int32>());
712 if (forward_pdf_class !=
kNoPdf)
715 for (
size_t j = cur_point;
j < end_points[
i];
j++)
716 split_output->back().push_back(alignment[
j]);
717 cur_point = end_points[
i];
724 const std::vector<int32> &alignment,
725 std::vector<std::vector<int32> > *split_alignment) {
727 split_alignment->clear();
729 bool is_reordered =
IsReordered(trans_model, alignment);
731 is_reordered, split_alignment);
746 const std::vector<int32> &old_phone_alignment,
747 const std::vector<int32> &new_phone_window,
748 bool old_is_reordered,
749 bool new_is_reordered,
750 std::vector<int32> *new_phone_alignment) {
751 int32 alignment_size = old_phone_alignment.size();
752 static bool warned_topology =
false;
755 old_phone_alignment[0]),
756 new_central_phone = new_phone_window[P];
758 &new_topo = new_trans_model.
GetTopo();
761 new_topo.TopologyForPhone(new_central_phone));
762 if (topology_mismatch) {
763 if (!warned_topology) {
764 warned_topology =
true;
765 KALDI_WARN <<
"Topology mismatch detected; automatically converting. " 766 <<
"Won't warn again.";
769 bool length_mismatch =
770 (new_phone_alignment->size() != old_phone_alignment.size());
771 if (length_mismatch || topology_mismatch) {
775 new_phone_window, new_phone_alignment);
776 if (new_is_reordered)
783 int32 new_num_pdf_classes = new_topo.NumPdfClasses(new_central_phone);
784 std::vector<int32> pdf_ids(new_num_pdf_classes);
785 for (
int32 pdf_class = 0; pdf_class < new_num_pdf_classes; pdf_class++) {
786 if (!new_ctx_dep.
Compute(new_phone_window, pdf_class,
787 &(pdf_ids[pdf_class]))) {
788 std::ostringstream ss;
790 KALDI_ERR <<
"tree did not succeed in converting phone window " 797 for (
int32 j = 0;
j < alignment_size;
j++) {
798 int32 old_tid = old_phone_alignment[
j],
800 int32 forward_pdf_class =
802 self_loop_pdf_class =
806 int32 new_forward_pdf = pdf_ids[forward_pdf_class];
807 int32 new_self_loop_pdf = pdf_ids[self_loop_pdf_class];
808 int32 new_trans_state =
810 new_forward_pdf, new_self_loop_pdf);
813 (*new_phone_alignment)[
j] = new_tid;
816 if (new_is_reordered != old_is_reordered)
850 const std::vector<int32> &mapped_phones,
851 const std::vector<int32> &old_lengths,
852 int32 conversion_shift,
853 int32 subsample_factor,
854 std::vector<int32> *new_lengths) {
855 int32 phone_sequence_length = old_lengths.size();
856 std::vector<int32> min_lengths(phone_sequence_length);
857 new_lengths->resize(phone_sequence_length);
858 for (
int32 i = 0;
i < phone_sequence_length;
i++)
859 min_lengths[
i] = topology.
MinLength(mapped_phones[
i]);
860 int32 cur_time_elapsed = 0;
861 for (
int32 i = 0; i < phone_sequence_length; i++) {
865 int32 subsampled_time =
866 (cur_time_elapsed + conversion_shift) / subsample_factor;
867 cur_time_elapsed += old_lengths[
i];
868 int32 next_subsampled_time =
869 (cur_time_elapsed + conversion_shift) / subsample_factor;
870 (*new_lengths)[
i] = next_subsampled_time - subsampled_time;
875 for (
int32 i = 0; i < phone_sequence_length; i++) {
876 if ((*new_lengths)[
i] < min_lengths[
i]) {
881 int32 min_distance = std::numeric_limits<int32>::max(),
882 best_other_phone_index = -1,
885 for (
int32 j = i - 1;
j >= 0;
j--) {
886 if ((*new_lengths)[
j] > min_lengths[
j]) {
887 min_distance = cur_distance;
888 best_other_phone_index =
j;
891 cur_distance += (*new_lengths)[
j];
896 for (
int32 j = i + 1;
j < phone_sequence_length;
j++) {
897 if ((*new_lengths)[
j] > min_lengths[
j]) {
898 if (cur_distance < min_distance) {
899 min_distance = cur_distance;
900 best_other_phone_index =
j;
904 cur_distance += (*new_lengths)[
j];
907 if (best_other_phone_index == -1)
912 (*new_lengths)[best_other_phone_index]--;
929 const std::vector<int32> &old_alignment,
930 int32 conversion_shift,
931 int32 subsample_factor,
932 bool new_is_reordered,
933 const std::vector<int32> *phone_map,
934 std::vector<int32> *new_alignment) {
935 KALDI_ASSERT(0 <= conversion_shift && conversion_shift < subsample_factor);
936 bool old_is_reordered =
IsReordered(old_trans_model, old_alignment);
938 new_alignment->clear();
939 new_alignment->reserve(old_alignment.size());
940 std::vector<std::vector<int32> > old_split;
941 if (!
SplitToPhones(old_trans_model, old_alignment, &old_split))
943 int32 phone_sequence_length = old_split.size();
944 std::vector<int32> mapped_phones(phone_sequence_length);
945 for (
size_t i = 0;
i < phone_sequence_length;
i++) {
948 if (phone_map != NULL) {
949 int32 sz = phone_map->size();
950 if (mapped_phones[i] < 0 || mapped_phones[i] >= sz ||
951 (*phone_map)[mapped_phones[i]] == -1)
952 KALDI_ERR <<
"ConvertAlignment: could not map phone " << mapped_phones[
i];
953 mapped_phones[
i] = (*phone_map)[mapped_phones[
i]];
959 std::vector<std::vector<int32> > new_split(phone_sequence_length);
960 if (subsample_factor == 1 &&
963 for (
size_t i = 0;
i < phone_sequence_length;
i++)
964 new_split[
i].resize(old_split[
i].size());
967 std::vector<int32> old_lengths(phone_sequence_length), new_lengths;
968 for (
int32 i = 0;
i < phone_sequence_length;
i++)
969 old_lengths[
i] = old_split[
i].size();
971 mapped_phones, old_lengths, conversion_shift,
972 subsample_factor, &new_lengths)) {
973 KALDI_WARN <<
"Failed to produce suitable phone lengths";
976 for (
int32 i = 0;
i < phone_sequence_length;
i++)
977 new_split[
i].resize(new_lengths[
i]);
986 for (
int32 win_start = -N;
987 win_start < static_cast<int32>(phone_sequence_length + N);
989 int32 central_pos = win_start + P;
990 if (static_cast<size_t>(central_pos) < phone_sequence_length) {
992 std::vector<int32> new_phone_window(N, 0);
993 for (
int32 offset = 0; offset < N; offset++)
994 if (static_cast<size_t>(win_start+offset) < phone_sequence_length)
995 new_phone_window[offset] = mapped_phones[win_start+offset];
996 const std::vector<int32> &old_alignment_for_phone = old_split[central_pos];
997 std::vector<int32> &new_alignment_for_phone = new_split[central_pos];
1000 old_alignment_for_phone, new_phone_window,
1001 old_is_reordered, new_is_reordered,
1002 &new_alignment_for_phone);
1003 new_alignment->insert(new_alignment->end(),
1004 new_alignment_for_phone.begin(),
1005 new_alignment_for_phone.end());
1009 (old_alignment.size() + conversion_shift)/subsample_factor);
1016 const std::vector<int32> &old_alignment,
1017 int32 subsample_factor,
1019 bool new_is_reordered,
1020 const std::vector<int32> *phone_map,
1021 std::vector<int32> *new_alignment) {
1022 if (!repeat_frames || subsample_factor == 1) {
1027 subsample_factor - 1,
1035 std::vector<std::vector<int32> > shifted_alignments(subsample_factor);
1036 for (
int32 conversion_shift = subsample_factor - 1;
1037 conversion_shift >= 0; conversion_shift--) {
1046 &shifted_alignments[conversion_shift]))
1050 new_alignment->clear();
1051 new_alignment->reserve(old_alignment.size());
1052 int32 max_shifted_ali_length = (old_alignment.size() / subsample_factor)
1053 + (old_alignment.size() % subsample_factor);
1054 for (
int32 i = 0;
i < max_shifted_ali_length;
i++)
1055 for (
int32 conversion_shift = subsample_factor - 1;
1056 conversion_shift >= 0; conversion_shift--)
1057 if (
i < static_cast<int32>(shifted_alignments[conversion_shift].size()))
1058 new_alignment->push_back(shifted_alignments[conversion_shift][
i]);
1060 KALDI_ASSERT(new_alignment->size() == old_alignment.size());
1069 if (transition_scale == self_loop_scale) {
1089 const std::vector<int32> &disambig_syms,
1092 fst::VectorFst<fst::StdArc> *
fst) {
1093 using namespace fst;
1096 for (StateIterator<VectorFst<StdArc> > siter(*fst);
1099 for (MutableArcIterator<VectorFst<StdArc> > aiter(fst, siter.Value());
1102 StdArc arc = aiter.Value();
1104 if (l >= 1 && l <= num_tids) {
1109 arc.weight =
Times(arc.weight, TropicalWeight(-scaled_log_prob));
1110 }
else if (l != 0) {
1111 if (!std::binary_search(disambig_syms.begin(), disambig_syms.end(),
1113 KALDI_ERR <<
"AddTransitionProbs: invalid symbol " << arc.ilabel
1114 <<
" on graph input side.";
1116 aiter.SetValue(arc);
1125 using namespace fst;
1127 for (fst::StateIterator<Lattice> siter(*lat);
1130 for (MutableArcIterator<Lattice> aiter(lat, siter.Value());
1135 if (l >= 1 && l <= num_tids) {
1141 arc.weight.SetValue1(arc.weight.Value1() - scaled_log_prob);
1142 }
else if (l != 0) {
1143 KALDI_ERR <<
"AddTransitionProbs: invalid symbol " << arc.ilabel
1144 <<
" on lattice input side.";
1146 aiter.SetValue(arc);
1162 const std::vector<int32> &
words,
1163 int32 word_start_sym,
1165 std::vector<std::vector<int32> > *prons) {
1166 size_t i = 0,
j = 0;
1168 while (i < phnx.size()) {
1169 if (phnx[i] == 0)
return false;
1170 if (phnx[i] == word_start_sym) {
1171 std::vector<int32> pron;
1172 if (
j >= words.size())
return false;
1173 if (words[
j] == 0)
return false;
1174 pron.push_back(words[
j++]);
1176 while (i < phnx.size()) {
1177 if (phnx[i] == 0)
return false;
1178 if (phnx[i] == word_start_sym)
return false;
1179 if (phnx[i] == word_end_sym) { i++;
break; }
1180 pron.push_back(phnx[i]);
1184 if (!(i > 0 && phnx[i-1] == word_end_sym))
1186 prons->push_back(pron);
1187 }
else if (phnx[i] == word_end_sym) {
1191 std::vector<int32> pron;
1193 while (i < phnx.size()) {
1194 if (phnx[i] == 0)
return false;
1195 if (phnx[i] == word_start_sym)
break;
1196 if (phnx[i] == word_end_sym)
return false;
1197 pron.push_back(phnx[i]);
1200 prons->push_back(pron);
1203 return (
j == words.size());
1209 const std::vector<int32> &phone_window,
1210 std::vector<int32> *alignment) {
1212 int32 length = alignment->size();
1215 trans_model, prob_scale);
1216 fst::RmEpsilon(fst);
1218 fst::VectorFst<Arc> length_constraint_fst;
1220 std::vector<int32> symbols;
1221 bool include_epsilon =
false;
1224 int32 cur_state = length_constraint_fst.AddState();
1225 length_constraint_fst.SetStart(cur_state);
1226 for (
int32 i = 0;
i < length;
i++) {
1227 int32 next_state = length_constraint_fst.AddState();
1228 for (
size_t j = 0;
j < symbols.size();
j++) {
1229 length_constraint_fst.AddArc(cur_state,
1230 Arc(symbols[
j], symbols[j],
1231 fst::TropicalWeight::One(),
1234 cur_state = next_state;
1236 length_constraint_fst.SetFinal(cur_state, fst::TropicalWeight::One());
1238 fst::VectorFst<Arc> composed_fst;
1239 fst::Compose(*fst, length_constraint_fst, &composed_fst);
1240 fst::VectorFst<Arc> single_path_fst;
1242 fst::UniformArcSelector<Arc> selector;
1243 fst::RandGenOptions<fst::UniformArcSelector<Arc> > randgen_opts(selector);
1244 fst::RandGen(composed_fst, &single_path_fst, randgen_opts);
1246 if (single_path_fst.NumStates() == 0) {
1247 KALDI_ERR <<
"Error generating random alignment (wrong length?): " 1248 <<
"requested length is " << length <<
" versus min-length " 1252 std::vector<int32> symbol_sequence;
1253 bool ans = fst::GetLinearSymbolSequence<Arc, int32>(
1254 single_path_fst, &symbol_sequence, NULL, NULL);
1256 symbol_sequence.swap(*alignment);
1261 std::vector<int32> *alignment) {
1262 int32 start_pos = 0, size = alignment->size();
1263 while (start_pos != size) {
1264 int32 start_tid = (*alignment)[start_pos];
1266 bool start_is_self_loop = trans_model.
IsSelfLoop(start_tid) ? 0 : 1;
1267 int32 end_pos = start_pos + 1;
1273 while (end_pos != size &&
1276 bool this_is_self_loop = trans_model.
IsSelfLoop((*alignment)[end_pos]);
1277 if (!this_is_self_loop) {
1278 if (start_is_self_loop) {
1287 std::swap((*alignment)[start_pos], (*alignment)[end_pos - 1]);
1288 start_pos = end_pos;
1293 std::vector<std::set<int32> > *pdf2phones) {
1294 pdf2phones->clear();
1295 pdf2phones->resize(trans_model.
NumPdfs());
1300 (*pdf2phones)[pdf_id].insert(phone);
fst::StdArc::StateId StateId
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
bool check_no_self_loops_
fst::ArcTpl< LatticeWeight > LatticeArc
void DeletePointers(std::vector< A *> *v)
Deletes any non-NULL pointers in the vector v, and sets the corresponding entries of v to NULL...
int32 PairToTransitionId(int32 trans_state, int32 trans_index) const
A class for storing topology information for phones.
const std::vector< int32 > & GetPhones() const
Returns a sorted, unique list of phones.
void GetRandomAlignmentForPhone(const ContextDependencyInterface &ctx_dep, const TransitionModel &trans_model, const std::vector< int32 > &phone_window, std::vector< int32 > *alignment)
virtual bool Compute(const std::vector< int32 > &phoneseq, int32 pdf_class, int32 *pdf_id) const =0
The "new" Compute interface.
void RemoveEpsLocal(MutableFst< Arc > *fst)
RemoveEpsLocal remove some (but not necessarily all) epsilons in an FST, using an algorithm that is g...
bool ConvertPhnxToProns(const std::vector< int32 > &phnx, const std::vector< int32 > &words, int32 word_start_sym, int32 word_end_sym, std::vector< std::vector< int32 > > *prons)
static fst::VectorFst< fst::StdArc > * MakeTrivialAcceptor(int32 label)
This utility function, used in GetHTransducer(), creates an FSA (finite state acceptor, i.e.
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
int32 TransitionStateToSelfLoopPdfClass(int32 trans_state) const
int32 TransitionStateToForwardPdfClass(int32 trans_state) const
void AddSelfLoops(const TransitionModel &trans_model, const std::vector< int32 > &disambig_syms, BaseFloat self_loop_scale, bool reorder, bool check_no_self_loops, fst::VectorFst< fst::StdArc > *fst)
For context, see AddSelfLoops().
void GetIlabelMapping(const std::vector< std::vector< int32 > > &ilabel_info_old, const ContextDependencyInterface &ctx_dep, const TransitionModel &trans_model, std::vector< int32 > *old2new_map)
GetIlabelMapping produces a mapping that's similar to HTK's logical-to-physical model mapping (i...
static bool IsReordered(const TransitionModel &trans_model, const std::vector< int32 > &alignment)
int32 TransitionStateToHmmState(int32 trans_state) const
unordered_map< std::pair< int32, std::vector< int32 > >, fst::VectorFst< fst::StdArc > *, HmmCacheHash > HmmCacheType
HmmCacheType is a map from (central-phone, sequence of pdf-ids) to FST, used as cache in GetHmmAsFsa...
int32 SelfLoopOf(int32 trans_state) const
void swap(basic_filebuf< CharT, Traits > &x, basic_filebuf< CharT, Traits > &y)
int32 TupleToTransitionState(int32 phone, int32 hmm_state, int32 pdf, int32 self_loop_pdf) const
int32 nonterm_phones_offset
std::vector< HmmState > TopologyEntry
TopologyEntry is a typedef that represents the topology of a single (prototype) state.
void SortAndUniq(std::vector< T > *vec)
Sorts and uniq's (removes duplicates) from a vector.
int32 TransitionIdToPdf(int32 trans_id) const
static const int32 kNoPdf
A constant used in the HmmTopology class as the pdf-class kNoPdf, which is used when a HMM-state is n...
void GetInputSymbols(const Fst< Arc > &fst, bool include_eps, std::vector< I > *symbols)
GetInputSymbols gets the list of symbols on the input of fst (including epsilon, if include_eps == tr...
int32 NumPdfClasses(int32 phone) const
Returns the number of pdf-classes for this phone; throws exception if phone not covered by this topol...
bool SplitToPhones(const TransitionModel &trans_model, const std::vector< int32 > &alignment, std::vector< std::vector< int32 > > *split_alignment)
SplitToPhones splits up the TransitionIds in "alignment" into their individual phones (one vector per...
LatticeWeightTpl< FloatType > Times(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
static bool ConvertAlignmentInternal(const TransitionModel &old_trans_model, const TransitionModel &new_trans_model, const ContextDependencyInterface &new_ctx_dep, const std::vector< int32 > &old_alignment, int32 conversion_shift, int32 subsample_factor, bool new_is_reordered, const std::vector< int32 > *phone_map, std::vector< int32 > *new_alignment)
This function is the same as 'ConvertAligment', but instead of the 'repeat_frames' option it supports...
const TransitionModel & trans_model_
Configuration class for the GetHTransducer() function; see The HTransducerConfig configuration class ...
static bool ComputeNewPhoneLengths(const HmmTopology &topology, const std::vector< int32 > &mapped_phones, const std::vector< int32 > &old_lengths, int32 conversion_shift, int32 subsample_factor, std::vector< int32 > *new_lengths)
This function, called from ConvertAlignmentInternal(), works out suitable new lengths of phones in th...
void AddTransitionProbs(const TransitionModel &trans_model, const std::vector< int32 > &disambig_syms, BaseFloat transition_scale, BaseFloat self_loop_scale, fst::VectorFst< fst::StdArc > *fst)
Adds transition-probs, with the supplied scales (see Scaling of transition and acoustic probabilities...
static void AddSelfLoopsNoReorder(const TransitionModel &trans_model, const std::vector< int32 > &disambig_syms, BaseFloat self_loop_scale, bool check_no_self_loops, fst::VectorFst< fst::StdArc > *fst)
void GetPdfToPhonesMap(const TransitionModel &trans_model, std::vector< std::set< int32 > > *pdf2phones)
virtual int CentralPosition() const =0
Central position P of the phone context, in 0-based numbering, e.g.
int32 NumTransitionIds() const
Returns the total number of transition-ids (note, these are one-based).
static void AddSelfLoopsReorder(const TransitionModel &trans_model, const std::vector< int32 > &disambig_syms, BaseFloat self_loop_scale, bool check_no_self_loops, fst::VectorFst< fst::StdArc > *fst)
int32 TransitionIdToHmmState(int32 trans_id) const
bool IsSelfLoop(int32 trans_id) const
const TopologyEntry & TopologyForPhone(int32 phone) const
Returns the topology entry (i.e.
const HmmTopology & GetTopo() const
return reference to HMM-topology object.
BaseFloat GetTransitionLogProb(int32 trans_id) const
fst::VectorFst< LatticeArc > Lattice
int32 TransitionIdToTransitionState(int32 trans_id) const
VectorFst< Arc > * MakeLoopFst(const std::vector< const ExpandedFst< Arc > *> &fsts)
MakeLoopFst creates an FST that has a state that is both initial and final (weight == Weight::One())...
int32 TransitionStateToPhone(int32 trans_state) const
void ApplyProbabilityScale(float scale, MutableFst< Arc > *fst)
ApplyProbabilityScale is applicable to FSTs in the log or tropical semiring.
TidToTstateMapper(const TransitionModel &trans_model, const std::vector< int32 > &disambig_syms, bool check_no_self_loops)
void MakePrecedingInputSymbolsSameClass(bool start_is_epsilon, MutableFst< Arc > *fst, const F &f)
As MakePrecedingInputSymbolsSame, but takes a functor object that maps labels to classes.
static void ConvertAlignmentForPhone(const TransitionModel &old_trans_model, const TransitionModel &new_trans_model, const ContextDependencyInterface &new_ctx_dep, const std::vector< int32 > &old_phone_alignment, const std::vector< int32 > &new_phone_window, bool old_is_reordered, bool new_is_reordered, std::vector< int32 > *new_phone_alignment)
This function is used internally inside ConvertAlignment; it converts the alignment for a single phon...
fst::VectorFst< fst::StdArc > * GetHTransducer(const std::vector< std::vector< int32 > > &ilabel_info, const ContextDependencyInterface &ctx_dep, const TransitionModel &trans_model, const HTransducerConfig &config, std::vector< int32 > *disambig_syms_left)
Returns the H tranducer; result owned by caller.
fst::StdArc::Weight Weight
static BaseFloat GetScaledTransitionLogProb(const TransitionModel &trans_model, int32 trans_id, BaseFloat transition_scale, BaseFloat self_loop_scale)
context-dep-itf.h provides a link between the tree-building code in ../tree/, and the FST code in ...
virtual int ContextWidth() const =0
ContextWidth() returns the value N (e.g.
BaseFloat transition_scale
Transition log-prob scale, see Scaling of transition and acoustic probabilities.
fst::VectorFst< fst::StdArc > * GetHmmAsFsa(std::vector< int32 > phone_window, const ContextDependencyInterface &ctx_dep, const TransitionModel &trans_model, const HTransducerConfig &config, HmmCacheType *cache)
Called by GetHTransducer() and probably will not need to be called directly; it creates and returns t...
BaseFloat GetTransitionLogProbIgnoringSelfLoops(int32 trans_id) const
Returns the log-probability of a particular non-self-loop transition after subtracting the probabilit...
#define KALDI_ASSERT(cond)
void WriteIntegerVector(std::ostream &os, bool binary, const std::vector< T > &v)
Function for writing STL vectors of integer types.
void ChangeReorderingOfAlignment(const TransitionModel &trans_model, std::vector< int32 > *alignment)
int32 MinLength(int32 phone) const
static bool SplitToPhonesInternal(const TransitionModel &trans_model, const std::vector< int32 > &alignment, bool reordered, std::vector< std::vector< int32 > > *split_output)
void MakeFollowingInputSymbolsSameClass(bool end_is_epsilon, MutableFst< Arc > *fst, const F &f)
As MakeFollowingInputSymbolsSame, but takes a functor object that maps labels to classes.
bool IsSortedAndUniq(const std::vector< T > &vec)
Returns true if the vector is sorted and contains each element only once.
int32 GetEncodingMultiple(int32 nonterm_phones_offset)
bool IsFinal(int32 trans_id) const
int32 TransitionIdToPhone(int32 trans_id) const
bool ConvertAlignment(const TransitionModel &old_trans_model, const TransitionModel &new_trans_model, const ContextDependencyInterface &new_ctx_dep, const std::vector< int32 > &old_alignment, int32 subsample_factor, bool repeat_frames, bool new_is_reordered, const std::vector< int32 > *phone_map, std::vector< int32 > *new_alignment)
ConvertAlignment converts an alignment that was created using one model, to another model...
fst::VectorFst< fst::StdArc > * GetHmmAsFsaSimple(std::vector< int32 > phone_window, const ContextDependencyInterface &ctx_dep, const TransitionModel &trans_model, BaseFloat prob_scale)
Included mainly as a form of documentation, not used in any other code currently. ...
const std::vector< int32 > & disambig_syms_
BaseFloat GetNonSelfLoopLogProb(int32 trans_state) const
Returns the log-prob of the non-self-loop probability mass for this transition state.
int32 TransitionIdToTransitionIndex(int32 trans_id) const
fst::VectorFst< fst::StdArc > * GetPdfToTransitionIdTransducer(const TransitionModel &trans_model)
Returns a transducer from pdfs plus one (input) to transition-ids (output).