41 const std::vector<int32> &
string = arc.weight.String();
43 string.begin(),
string.end());
138 input_state(input_state), comp_state(comp_state) {}
144 size_t operator() (
const Tuple &state)
const {
150 bool operator () (
const Tuple &state1,
const Tuple &state2)
const {
157 typedef unordered_map<Tuple, StateId, TupleHash, TupleEqual>
MapType;
160 MapType::iterator iter =
map_.find(tuple);
161 if (iter ==
map_.end()) {
162 StateId output_state =
lat_out_->AddState();
163 map_[tuple] = output_state;
165 queue_.push_back(std::make_pair(tuple, output_state));
179 std::vector<int32> empty_vec;
197 lat_out_->AddArc(output_state, lat_arc);
205 StateId output_state =
queue_.back().second;
219 lat_out_->AddArc(output_state, lat_arc);
232 for (fst::ArcIterator<CompactLattice> aiter(
lat_, tuple.input_state);
233 !aiter.Done(); aiter.Next()) {
235 Tuple next_tuple(tuple);
261 uint64 props =
lat_.Properties(fst::kIDeterministic|fst::kIEpsilons, test);
262 if (props != fst::kIDeterministic) {
263 KALDI_WARN <<
"[Lattice has input epsilons and/or is not input-deterministic " 264 <<
"(in Mohri sense)]-- i.e. lattice is not deterministic. " 265 <<
"Word-alignment may be slow and-or blow up in memory.";
296 std::vector<int32> syms_to_remove;
301 if (!syms_to_remove.empty()) {
303 Project(
lat_out_, fst::PROJECT_INPUT);
309 if (
lat_.Start() == fst::kNoStateId) {
310 KALDI_WARN <<
"Trying to word-align empty lattice.";
314 Tuple initial_tuple(
lat_.Start(), initial_comp_state);
320 KALDI_WARN <<
"Number of states in lattice exceeded max-states of " 322 <<
lat_.NumStates() <<
" states. Returning what we have.";
341 std::vector<std::pair<Tuple, StateId> >
queue_;
362 for (
i = 0;
i < len;
i++) {
365 if (this_phone != phone && ! *error) {
367 KALDI_WARN <<
"Phone changed before final transition-id found " 368 "[broken lattice or mismatched model or wrong --reorder option?]";
373 if (
i == len)
return false;
377 if (i == len)
return false;
381 KALDI_WARN <<
"Phone changed unexpectedly in lattice " 382 "[broken lattice or mismatched model?]";
407 for (
i = 0;
i < len;
i++) {
410 if (this_phone != phone && ! *error) {
411 KALDI_WARN <<
"Phone changed before final transition-id found " 412 "[broken lattice or mismatched model or wrong --reorder option?]";
418 if (
i == len)
return false;
422 if (i == len)
return false;
426 KALDI_WARN <<
"Phone changed unexpectedly in lattice " 427 "[broken lattice or mismatched model?]";
466 if (i == len)
return false;
470 if (i == len)
return false;
473 KALDI_WARN <<
"Phone changed unexpectedly in lattice " 474 "[broken lattice or mismatched model?]";
481 for (; i < len; i++) {
487 KALDI_WARN <<
"Unexpected phone " << this_phone
488 <<
" found inside a word.";
492 if (i == len)
return false;
499 for (; i < len; i++) {
501 if (this_phone != final_phone && ! *error) {
503 KALDI_WARN <<
"Phone changed before final transition-id found " 504 "[broken lattice or mismatched model or wrong --reorder option?]";
508 if (i == len)
return false;
514 if (i == len)
return false;
518 KALDI_WARN <<
"Phone changed while following final self-loop " 519 "[broken lattice or mismatched model or wrong --reorder option?]";
545 const std::vector<int32> &transition_ids) {
546 if (transition_ids.empty())
return false;
550 && first_phone == last_phone)
555 return (tmodel.
IsFinal(transition_ids.back()));
557 int32 i = transition_ids.size() - 1;
558 while (i > 0 && tmodel.
IsSelfLoop(transition_ids[i])) i--;
559 return tmodel.
IsFinal(transition_ids[i]);
578 KALDI_WARN <<
"Invalid word at end of lattice [partial lattice, forced out?]";
591 KALDI_WARN <<
"Discarding word-ids at the end of a sentence, " 592 "that don't have alignments.";
612 KALDI_ERR <<
"Broken silence arc at end of utterance (the phone " 613 "changed); code error";
622 KALDI_WARN <<
"Broken silence arc at end of utterance (does not " 623 "reach end of silence)";
628 cw, fst::kNoStateId);
635 KALDI_WARN <<
"Partial word detected at end of utterance";
639 cw, fst::kNoStateId);
644 KALDI_ERR <<
"Code error, word-aligning lattice";
651 KALDI_ASSERT(!int_list.empty() && phone_type != kNoPhone);
652 std::vector<int32> phone_list;
656 || phone_list.empty())
657 KALDI_ERR <<
"Invalid argument to --*-phones option: " << int_list;
658 for (
size_t i= 0;
i < phone_list.size();
i++) {
659 if (phone_to_type.size() <= phone_list[
i])
660 phone_to_type.resize(phone_list[
i]+1, kNoPhone);
661 if (phone_to_type[phone_list[
i]] != kNoPhone)
662 KALDI_ERR <<
"Phone " << phone_list[
i] <<
"was given two incompatible " 664 phone_to_type[phone_list[
i]] = phone_type;
675 kWordBeginAndEndPhone : kNonWordPhone));
688 std::string word_boundary_file) {
693 Input ki(word_boundary_file, &binary_in);
694 KALDI_ASSERT(!binary_in &&
"Not expecting binary word-boundary file.");
700 while (std::getline(stream, line)) {
701 std::vector<std::string> split_line;
704 if (split_line.size() != 2 ||
706 KALDI_ERR <<
"Invalid line in word-boundary file: " << line;
708 if (phone_to_type.size() <=
static_cast<size_t>(p))
709 phone_to_type.resize(p+1, kNoPhone);
710 std::string t = split_line[1];
711 if (t ==
"nonword") phone_to_type[p] = kNonWordPhone;
712 else if (t ==
"begin") phone_to_type[p] = kWordBeginPhone;
713 else if (t ==
"singleton") phone_to_type[p] = kWordBeginAndEndPhone;
714 else if (t ==
"end") phone_to_type[p] = kWordEndPhone;
715 else if (t ==
"internal") phone_to_type[p] = kWordInternalPhone;
717 KALDI_ERR <<
"Invalid line in word-boundary file: " << line;
719 if (phone_to_type.empty())
745 for (StateId s = 0; s < aligned_lat_.NumStates(); s++) {
746 for (fst::ArcIterator<CompactLattice> iter(aligned_lat_, s);
749 TestArc(iter.Value());
752 TestFinal(aligned_lat_.Final(s));
759 if (! (TestArcSilence(arc) || TestArcNormalWord(arc) || TestArcOnePhoneWord(arc)
760 || TestArcEmpty(arc)))
761 KALDI_ERR <<
"Invalid arc in aligned CompactLattice: " 762 << arc.ilabel <<
" " << arc.olabel <<
" " << arc.nextstate
763 <<
" " << arc.weight;
766 if (arc.ilabel != 0)
return false;
767 const std::vector<int32> &tids = arc.weight.String();
774 const std::vector<int32> &tids = arc.weight.String();
775 if (tids.empty())
return false;
779 for (
size_t i = 0;
i < tids.size();
i++)
784 for (
size_t i = 0; i < tids.size(); i++) {
789 for (
size_t j = i+1;
j < tids.size();
j++) {
801 if (arc.ilabel == 0)
return false;
802 const std::vector<int32> &tids = arc.weight.String();
803 if (tids.empty())
return false;
807 for (
size_t i = 0;
i < tids.size();
i++)
812 for (
size_t i = 0; i < tids.size(); i++) {
817 for (
size_t j = i+1;
j < tids.size();
j++) {
829 if (arc.ilabel == 0)
return false;
830 const std::vector<int32> &tids = arc.weight.String();
831 if (tids.empty())
return false;
838 for (i = 0; i < tids.size(); i++) {
847 while (i < tids.size() &&
852 if (i == tids.size())
return false;
856 for (
size_t j = i;
j < tids.size();
j++)
860 for (
size_t j = i; j < tids.size(); j++) {
867 for (
size_t k = j + 1; k < tids.size(); k++)
882 const std::vector<int32> &tids = arc.weight.String();
883 if (tids.empty())
return false;
889 KALDI_ERR <<
"Expect to have no strings on final-weights of lattices.";
894 std::vector<int32> to_remove;
897 Project(&aligned_lat, fst::PROJECT_INPUT);
900 if (!RandEquivalent(
lat_, aligned_lat, 5, 1.0e+10,
Rand(),
902 KALDI_ERR <<
"Equivalence test failed (testing word-alignment of lattices.) " 903 <<
"Make sure your model and lattices match!";
std::vector< int32 > transition_ids_
fst::StdArc::StateId StateId
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
bool ConvertStringToInteger(const std::string &str, Int *out)
Converts a string into an integer via strtoll and returns false if there was any kind of problem (i...
void Init(std::istream &stream)
bool TestArcNormalWord(const CompactLatticeArc &arc)
A hashing function-object for vectors.
bool OutputNormalWordArc(const WordBoundaryInfo &info, const TransitionModel &tmodel, CompactLatticeArc *arc_out, bool *error)
This function tries to see if it can output a normal word arc– one with at least two phones in it...
LatticeWeight FinalWeight()
FinalWeight() will return "weight" if both transition_ids and word_labels are empty, otherwise it will return Weight::Zero().
bool TestArcOnePhoneWord(const CompactLatticeArc &arc)
const WordBoundaryInfo & info_in_
void RemoveEpsilonsFromLattice()
static const LatticeWeightTpl One()
const WordBoundaryInfo & info_
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
CompactLattice * lat_out_
LatticeWeightTpl< FloatType > Plus(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
void TestArc(const CompactLatticeArc &arc)
fst::CompactLatticeWeightTpl< LatticeWeight, int32 > CompactLatticeWeight
void TestFinal(const CompactLatticeWeight &w)
bool TestArcEmpty(const CompactLatticeArc &arc)
std::string winternal_phones
StateId GetStateForTuple(const Tuple &tuple, bool add_to_queue)
bool OutputSilenceArc(const WordBoundaryInfo &info, const TransitionModel &tmodel, CompactLatticeArc *arc_out, bool *error)
bool WordAlignLattice(const CompactLattice &lat, const TransitionModel &tmodel, const WordBoundaryInfo &info, int32 max_states, CompactLattice *lat_out)
Align lattice so that each arc has the transition-ids on it that correspond to the word that is on th...
const CompactLattice & aligned_lat_
static bool TestWordAlignedLattice(const WordAlignLatticeLexiconInfo &lexicon_info, const TransitionModel &tmodel, CompactLattice clat, CompactLattice aligned_clat, bool allow_duplicate_paths)
WordAlignedLatticeTester(const CompactLattice &lat, const TransitionModel &tmodel, const WordBoundaryInfo &info, const CompactLattice &aligned_lat)
void SetOptions(const std::string int_list, PhoneType phone_type)
std::vector< std::pair< Tuple, StateId > > queue_
LatticeWeightTpl< FloatType > Times(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
std::string wbegin_phones
unordered_map< Tuple, StateId, TupleHash, TupleEqual > MapType
static const CompactLatticeWeightTpl< WeightType, IntType > One()
PhoneType TypeOfPhone(int32 p) const
void OutputArcForce(const WordBoundaryInfo &info, const TransitionModel &tmodel, CompactLatticeArc *arc_out, bool *error)
This function may be called when you reach the end of the lattice and this structure hasn't voluntari...
bool OutputOnePhoneWordArc(const WordBoundaryInfo &info, const TransitionModel &tmodel, CompactLatticeArc *arc_out, bool *error)
void SplitStringToVector(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< std::string > *out)
Split a string using any of the single character delimiters.
Arc::StateId CreateSuperFinal(MutableFst< Arc > *fst)
Tuple(StateId input_state, ComputationState comp_state)
bool IsSelfLoop(int32 trans_id) const
static const LatticeWeightTpl Zero()
std::string wbegin_and_end_phones
bool TestArcSilence(const CompactLatticeArc &arc)
int32 TransitionIdToTransitionState(int32 trans_id) const
Arc::Label HighestNumberedOutputSymbol(const Fst< Arc > &fst)
Returns the highest numbered output symbol id of the FST (or zero for an empty FST.
void Advance(const CompactLatticeArc &arc, LatticeWeight *weight)
The state of the computation in which,.
static bool IsPlausibleWord(const WordAlignLatticeLexiconInfo &lexicon_info, const TransitionModel &tmodel, int32 word_id, const std::vector< int32 > &transition_ids)
std::vector< int32 > word_labels_
int Rand(struct RandomState *state)
fst::VectorFst< CompactLatticeArc > CompactLattice
CompactLatticeArc::StateId StateId
const CompactLattice & lat_
CompactLatticeArc::Label Label
#define KALDI_ASSERT(cond)
const TransitionModel & tmodel_
bool operator==(const ComputationState &other) const
static const CompactLatticeWeightTpl< WeightType, IntType > Zero()
bool OutputArc(const WordBoundaryInfo &info, const TransitionModel &tmodel, CompactLatticeArc *arc_out, bool *error)
If it can output a whole word, it will do so, will put it in arc_out, and return true; else it will r...
bool TestArcPartialWord(const CompactLatticeArc &arc)
fst::ArcTpl< CompactLatticeWeight > CompactLatticeArc
void ProcessQueueElement()
ComputationState comp_state
ComputationState(const ComputationState &other)
WordBoundaryInfo(const WordBoundaryInfoOpts &opts)
std::string silence_phones
const std::vector< IntType > & String() const
bool IsFinal(int32 trans_id) const
int32 TransitionIdToPhone(int32 trans_id) const
const TransitionModel & tmodel_
LatticeWordAligner(const CompactLattice &lat, const TransitionModel &tmodel, const WordBoundaryInfo &info, int32 max_states, CompactLattice *lat_out)
void ProcessFinal(Tuple tuple, StateId output_state)
void RemoveSomeInputSymbols(const std::vector< I > &to_remove, MutableFst< Arc > *fst)
RemoveSomeInputSymbols removes any symbol that appears in "to_remove", from the input side of the FST...