word-align-lattice-lexicon.cc
Go to the documentation of this file.
1 // lat/word-align-lattice-lexicon.cc
2 
3 // Copyright 2013 Johns Hopkins University (Author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
23 #include "lat/lattice-functions.h"
24 #include "hmm/transition-model.h"
25 #include "hmm/hmm-utils.h"
26 #include "util/stl-utils.h"
27 
28 namespace kaldi {
29 
30 const int kTemporaryEpsilon = -2;
31 const int kNumStatesOffset = 1000; // relates to how we apply the
32 // max-states to the lattices; relates to the --max-expand option which
33 // stops this blowing up for pathological cases or in case of a mismatch.
34 
36  public:
42 
43  /*
44  The Freshness enum is applied to phone and word-sequences in the computation
45  state; it is related to the epsilon sequencing problem. If a phone or word
46  is new (added by the latest transition), it is fresh. We are only concerned
47  with the freshness of the left-most word (i.e. word index 0) in words_, and
48  the freshness of that can take only two values, kNotFresh or kFresh. As
49  regards the phones_ variable, the difference between kFresh and kAllFresh
50  is: if we just appended a phone it's kFresh, but if we just shifted off a
51  phone or phones by outputting a nonempty word it's kAllFresh, meaning that
52  all sub-sequences of the phone sequence are new. Note: if a phone or
53  word-sequence is empty the freshness of that sequence does not matter or is
54  not defined; we'll let it default to kNotFresh.
55  */
56  typedef enum {
60  } Freshness;
61 
66  public:
67 
73  void Advance(const CompactLatticeArc &arc,
74  const TransitionModel &tmodel,
75  LatticeWeight *leftover_weight);
76 
82  bool ViableIfAdvanced(const ViabilityMap &viability_map) const;
83 
84  int32 NumPhones() const { return phones_.size(); }
85  int32 NumWords() const { return words_.size(); }
86  int32 PendingWord() const { KALDI_ASSERT(!words_.empty()); return words_[0]; }
87  Freshness WordFreshness() const { return word_fresh_; }
88  Freshness PhoneFreshness() const { return phone_fresh_; }
89 
94  void TakeForcedTransition(int32 partial_word_label,
95  ComputationState *next_state,
96  CompactLatticeArc *arc_out) const;
97 
101  bool TakeTransition(const LexiconMap &lexicon_map,
102  int32 word_id,
103  int32 num_phones,
104  ComputationState *next_state,
105  CompactLatticeArc *arc_out) const;
106 
107  bool IsEmpty() const { return (transition_ids_.empty() && words_.empty()); }
108 
113  return (IsEmpty() ? weight_ : LatticeWeight::Zero());
114  }
115 
116  size_t Hash() const {
118  const int32 p1 = 11117, p2 = 90647, p3 = 3967, p4 = 3557; // primes.
119  size_t ans = 0;
120  for (int32 i = 0; i < static_cast<int32>(transition_ids_.size()); i++) {
121  ans *= p1;
122  ans += vh(transition_ids_[i]);
123  }
124  ans += p2 * vh(words_)
125  + static_cast<int32>(word_fresh_) * p3
126  + static_cast<int32>(phone_fresh_) * p4;
127  // phones_ is determined by transition-id sequence so we don't
128  // need to include it in the hash.
129  return ans;
130  }
131 
132  bool operator == (const ComputationState &other) const {
133  // phones_ is determined by transition-id sequence so don't
134  // need to compare it.
135  return (transition_ids_ == other.transition_ids_ &&
136  words_ == other.words_ &&
137  weight_ == other.weight_ &&
138  phone_fresh_ == other.phone_fresh_ &&
139  word_fresh_ == other.word_fresh_);
140  }
141 
143  weight_(LatticeWeight::One()) { } // initial state.
144 
146  phones_(other.phones_), words_(other.words_),
149  private:
150  std::vector<int32> phones_; // sequence of pending phones
151  std::vector<int32> words_; // sequence of pending words.
152 
153  // The following variables tell us whether the phones_ and/or words_
154  // variables were modified by the last operation on the computation state.
155  // This is used to make sure we don't have multiple ways of handling the
156  // same sequence, by taking transitions at multiple points (see code for
157  // more details). It's related to the epsilon sequencing problem.
158  // See the declaration above of the enum "Freshness".
159  Freshness phone_fresh_;
160  Freshness word_fresh_;
161 
162  std::vector<std::vector<int32> > transition_ids_; // sequence of transition-ids for each phone..
163 
164  LatticeWeight weight_; // contains two floats.
165  };
166 
167 
168  static void AppendVectors(
169  std::vector<std::vector<int32> >::const_iterator input_begin,
170  std::vector<std::vector<int32> >::const_iterator input_end,
171  std::vector<int32> *output);
172 
173  struct Tuple {
174  Tuple(StateId input_state, ComputationState comp_state):
175  input_state(input_state), comp_state(comp_state) {}
176  Tuple() {}
177  StateId input_state;
179  };
180 
181  struct TupleHash {
182  size_t operator() (const Tuple &state) const {
183  return state.input_state + 102763 * state.comp_state.Hash();
184  // 102763 is just an arbitrary prime number
185  }
186  };
187  struct TupleEqual {
188  bool operator () (const Tuple &state1, const Tuple &state2) const {
189  // treat this like operator ==
190  return (state1.input_state == state2.input_state
191  && state1.comp_state == state2.comp_state);
192  }
193  };
194 
195  typedef unordered_map<Tuple, StateId, TupleHash, TupleEqual> MapType;
196 
197  // This function may alter queue_.
198  StateId GetStateForTuple(const Tuple &tuple) {
199  MapType::iterator iter = map_.find(tuple);
200  if (iter == map_.end()) { // not in map.
201  StateId output_state = lat_out_->AddState();
202  map_[tuple] = output_state;
203  queue_.push_back(std::make_pair(tuple, output_state));
204  return output_state;
205  } else {
206  return iter->second;
207  }
208  }
209 
210  // This function may alter queue_, via GetStateForTuple.
211  void ProcessTransition(StateId prev_output_state, // state-id of from-state in output lattice
212  const Tuple &next_tuple,
213  CompactLatticeArc *arc) { // arc to add (must first modify it by adding "nextstate")
214  arc->nextstate = GetStateForTuple(next_tuple); // adds it to queue_ if new.
215  lat_out_->AddArc(prev_output_state, *arc);
216  }
217 
218  // Process any epsilon transitions out of this state. This refers to
219  // filler-words, such as silence, which have epsilon as the symbol in the
220  // original lattice, or no symbol at all (typically the original lattice
221  // will be determinized with epsilon-removal so there is no separate arc,
222  // just one or more extra phones that don't match up with any word.
223  void ProcessEpsilonTransitions(const Tuple &tuple, StateId output_state);
224 
225  // Process any non-epsilon transitions out of this state in the output lattice.
226  void ProcessWordTransitions(const Tuple &tuple, StateId output_state);
227 
228  // Take any transitions that correspond to advancing along arcs arc in the
229  // original FST.
230  void PossiblyAdvanceArc(const Tuple &tuple, StateId output_state);
231 
234  bool ProcessFinal();
235 
239  bool HasNonEpsArcsOut(StateId output_state);
240 
247  void ProcessFinalForceOut();
248 
249  // Process all final-probs -- a wrapper function that handles the forced-out case.
251  if (final_queue_.empty()) {
252  KALDI_WARN << "No final-probs to process.";
253  error_ = true;
254  return;
255  }
256  if (ProcessFinal()) return;
257  error_ = true;
258  KALDI_WARN << "Word-aligning lattice: lattice was forced out, will have partial words at end.";
259 
261 
262  if (ProcessFinal()) return;
263  KALDI_WARN << "Word-aligning lattice: had no final-states even after forcing out "
264  << "(result will be empty). This probably indicates wrong input.";
265  return;
266  }
267 
269  KALDI_ASSERT(!queue_.empty());
270  Tuple tuple = queue_.back().first;
271  StateId output_state = queue_.back().second;
272  queue_.pop_back();
273 
274  ProcessEpsilonTransitions(tuple, output_state);
275  ProcessWordTransitions(tuple, output_state);
276  PossiblyAdvanceArc(tuple, output_state);
277 
278  // Note: we'll do a bit more filtering in ProcessFinal(), meaning
279  // that we won't necessarily give a final-prob to all of the things
280  // that go onto final_queue_.
281  if (lat_in_.Final(tuple.input_state) != CompactLatticeWeight::Zero())
282  final_queue_.push_back(std::make_pair(tuple, output_state));
283  }
284 
286  const TransitionModel &tmodel,
287  const WordAlignLatticeLexiconInfo &lexicon_info,
288  int32 max_states,
289  int32 partial_word_label,
290  CompactLattice *lat_out):
291  lat_in_(lat), tmodel_(tmodel), lexicon_info_(lexicon_info),
292  max_states_(max_states),
293  lat_out_(lat_out),
294  partial_word_label_(partial_word_label == 0 ?
295  kTemporaryEpsilon : partial_word_label),
296  error_(false) {
297  // lat_in_ is after PhoneAlignLattice, it is not deterministic and contains epsilons
298 
299  fst::CreateSuperFinal(&lat_in_); // Creates a super-final state, so the
300  // only final-probs are One(). Note: the member lat_in_ is not a reference.
301 
302  }
303 
304  // Removes epsilons; also removes unreachable states...
305  // not sure if these would exist if original was connected.
306  // This also replaces the temporary symbols for the silence
307  // and partial-words, with epsilons, if we wanted epsilons.
309  Connect(lat_out_);
310  RmEpsilon(lat_out_, true); // true = connect.
311  std::vector<int32> syms_to_remove;
312  syms_to_remove.push_back(kTemporaryEpsilon);
313  RemoveSomeInputSymbols(syms_to_remove, lat_out_);
314  Project(lat_out_, fst::PROJECT_INPUT);
315  }
316 
317  bool AlignLattice() {
318  lat_out_->DeleteStates();
319  if (lat_in_.Start() == fst::kNoStateId) {
320  KALDI_WARN << "Trying to word-align empty lattice.";
321  return false;
322  }
323  ComputationState initial_comp_state;
324  Tuple initial_tuple(lat_in_.Start(), initial_comp_state);
325  StateId start_state = GetStateForTuple(initial_tuple);
326  lat_out_->SetStart(start_state);
327 
328  while (!queue_.empty()) {
329  if (max_states_ > 0 && lat_out_->NumStates() > max_states_) {
330  KALDI_WARN << "Number of states in lattice exceeded max-states of "
331  << max_states_ << ", original lattice had "
332  << lat_in_.NumStates() << " states. Returning empty lattice.";
333  lat_out_->DeleteStates();
334  return false;
335  }
337  }
339 
341 
342  return !error_;
343  }
344 
350 
351  std::vector<std::pair<Tuple, StateId> > queue_;
352 
353  std::vector<std::pair<Tuple, StateId> > final_queue_; // as queue_, but
354  // just contains states that may have final-probs to process. We process these
355  // all at once, at the end.
356 
357  MapType map_; // map from tuples to StateId.
359  bool error_;
360 };
361 
362 // static
364  std::vector<std::vector<int32> >::const_iterator input_begin,
365  std::vector<std::vector<int32> >::const_iterator input_end,
366  std::vector<int32> *output) {
367  size_t size = 0;
368  for (std::vector<std::vector<int32> >::const_iterator iter = input_begin;
369  iter != input_end;
370  ++iter)
371  size += iter->size();
372  output->clear();
373  output->reserve(size);
374  for (std::vector<std::vector<int32> >::const_iterator iter = input_begin;
375  iter != input_end;
376  ++iter)
377  output->insert(output->end(), iter->begin(), iter->end());
378 }
379 
381  const Tuple &tuple, StateId output_state) {
382  const ComputationState &comp_state = tuple.comp_state;
383  StateId input_state = tuple.input_state;
384  StateId zero_word = 0;
385  NumPhonesMap::const_iterator iter =
386  lexicon_info_.num_phones_map_.find(zero_word);
387  if (iter == lexicon_info_.num_phones_map_.end()) {
388  return; // No epsilons to match; this can only happen if the lexicon
389  // we were provided had no lines with 0 as the first entry, i.e. no
390  // optional silences or the like.
391  }
392  // Now decide what range of phone-lengths we must process. This is all
393  // about only getting a single opportunity to process any given sequence of
394  // phones.
395  int32 min_num_phones, max_num_phones;
396 
397  if (comp_state.PhoneFreshness() == kAllFresh) {
398  // All sub-sequences of the phone sequence are fresh because we just
399  // shifted some phones off, so we do this for all lengths. We can limit
400  // ourselves to the range of possible lengths for the epsilon symbol,
401  // in the lexicon.
402  min_num_phones = iter->second.first;
403  max_num_phones = std::min(iter->second.second, comp_state.NumPhones());
404  } else if (comp_state.PhoneFreshness() == kFresh) {
405  // only last phone is "fresh", so only consider the sequence of all
406  // phones including the last one.
407  int32 num_phones = comp_state.NumPhones();
408  if (num_phones >= iter->second.first &&
409  num_phones <= iter->second.second) {
410  min_num_phones = num_phones;
411  max_num_phones = num_phones;
412  } else {
413  return;
414  }
415  } else { // kNotFresh
416  return;
417  }
418 
419  if (min_num_phones == 0)
420  KALDI_ERR << "Lexicon error: epsilon transition that produces no output:";
421 
422  for (int32 num_phones = min_num_phones;
423  num_phones <= max_num_phones;
424  num_phones++) {
425  Tuple next_tuple;
426  next_tuple.input_state = input_state; // We're not taking a transition in the
427  // input FST so this stays the same.
428  CompactLatticeArc arc;
430  zero_word,
431  num_phones,
432  &next_tuple.comp_state,
433  &arc)) {
434  ProcessTransition(output_state, next_tuple, &arc);
435  }
436  }
437 }
438 
440  const Tuple &tuple, StateId output_state) {
441  const ComputationState &comp_state = tuple.comp_state;
442  StateId input_state = tuple.input_state;
443  if (comp_state.NumWords() > 0) {
444  int32 min_num_phones, max_num_phones;
445  int32 word_id = comp_state.PendingWord();
446 
447  if (comp_state.WordFreshness() == kFresh ||
448  comp_state.PhoneFreshness() == kAllFresh) {
449  // Just saw word, or shifted phones,
450  // so 1st opportunity to process phone-sequences of all possible sizes,
451  // with this word.
452  NumPhonesMap::const_iterator iter =
453  lexicon_info_.num_phones_map_.find(word_id);
454  if (iter == lexicon_info_.num_phones_map_.end()) {
455  KALDI_ERR << "Word " << word_id << " is not present in the lexicon.";
456  }
457  min_num_phones = iter->second.first;
458  max_num_phones = std::min(iter->second.second,
459  comp_state.NumPhones());
460  } else if (comp_state.PhoneFreshness() == kFresh) {
461  // just the latest phone is new -> just try to process the
462  // phone-sequence of all the phones we have.
463  min_num_phones = comp_state.NumPhones();
464  max_num_phones = min_num_phones;
465  } else {
466  return; // Nothing to do, since neither the word nor the phones are fresh.
467  }
468 
469  for (int32 num_phones = min_num_phones;
470  num_phones <= max_num_phones;
471  num_phones++) {
472  Tuple next_tuple;
473  next_tuple.input_state = input_state; // We're not taking a transition in the
474  // input FST so this stays the same.
475  CompactLatticeArc arc;
477  word_id,
478  num_phones,
479  &next_tuple.comp_state,
480  &arc)) {
481  ProcessTransition(output_state, next_tuple, &arc);
482  }
483  }
484  }
485 }
486 
487 
489  const Tuple &tuple, StateId output_state) {
491  for(fst::ArcIterator<CompactLattice> aiter(lat_in_, tuple.input_state);
492  !aiter.Done(); aiter.Next()) {
493  const CompactLatticeArc &arc_in = aiter.Value();
494  Tuple next_tuple(arc_in.nextstate, tuple.comp_state);
495  LatticeWeight arc_weight;
496  next_tuple.comp_state.Advance(arc_in, tmodel_, &arc_weight);
497  // Note: GetStateForTuple will add the tuple to the queue,
498  // if necessary.
499 
500  StateId next_output_state = GetStateForTuple(next_tuple);
501  CompactLatticeArc arc_out(0, 0,
502  CompactLatticeWeight(arc_weight,
503  std::vector<int32>()),
504  next_output_state);
505  lat_out_->AddArc(output_state,
506  arc_out);
507  }
508  }
509 }
510 
512  bool saw_final = false;
513  // Find final-states...
514  for (size_t i = 0; i < final_queue_.size(); i++) {
515  const Tuple &tuple = final_queue_[i].first;
516  StateId output_state = final_queue_[i].second;
518  LatticeWeight final_weight = tuple.comp_state.FinalWeight();
519  if (final_weight != LatticeWeight::Zero()) {
520  // note: final_weight is only nonzero if there are no
521  // pending transition-ids, so there is no string component.
522  std::vector<int32> empty_vec;
523  lat_out_->SetFinal(output_state,
524  CompactLatticeWeight(final_weight, empty_vec));
525  saw_final = true;
526  }
527  }
528  return saw_final;
529 }
530 
532  for (fst::ArcIterator<CompactLattice> aiter(*lat_out_, output_state);
533  !aiter.Done(); aiter.Next()) {
534  const CompactLatticeArc &arc = aiter.Value();
535  if (arc.ilabel != 0 || arc.olabel != 0 || !arc.weight.String().empty())
536  return true;
537  }
538  return false;
539 }
540 
542  KALDI_ASSERT(queue_.empty());
543  std::vector<std::pair<Tuple, StateId> > new_final_queue_;
544  new_final_queue_.reserve(final_queue_.size());
545  for (size_t i = 0; i < final_queue_.size();i++) { // note: all the states will
546  // be final in the orig. lattice
547  const Tuple &tuple = final_queue_[i].first;
548  StateId output_state = final_queue_[i].second;
549 
550  if (!HasNonEpsArcsOut(output_state)) { // This if-statement
551  // avoids forcing things out too early, when they had words
552  // that could naturally have been put out. [without it,
553  // we'd have multiple alternate paths at the end.]
554 
555  CompactLatticeArc arc;
556  Tuple next_tuple;
557  next_tuple.input_state = tuple.input_state;
559  &next_tuple.comp_state,
560  &arc);
561  // Note: the following call may add to queue_, but we'll clear it,
562  // we don't want to process these states.
563  StateId new_state = GetStateForTuple(next_tuple);
564  arc.nextstate = new_state;
565  lat_out_->AddArc(output_state, arc);
566  new_final_queue_.push_back(std::make_pair(next_tuple, new_state));
567  }
568  }
569  queue_.clear();
570  std::swap(final_queue_, new_final_queue_);
571 }
572 
574  const CompactLatticeArc &arc, const TransitionModel &tmodel, LatticeWeight *weight) {
575  const std::vector<int32> &tids = arc.weight.String();
576  int32 phone;
577  if (tids.empty()) phone = 0;
578  else {
579  phone = tmodel.TransitionIdToPhone(tids.front());
580  KALDI_ASSERT(phone == tmodel.TransitionIdToPhone(tids.back()) &&
581  "Error: lattice is not phone-aligned.");
582  }
583  if (arc.ilabel != 0) { // note: arc.ilabel==arc.olabel (acceptor)
584  words_.push_back(arc.ilabel);
585  // Note: the word freshness only applies to the word in position 0,
586  // so only if the word-sequence is now of size 1, is it fresh.
587  if (words_.size() == 1) word_fresh_ = kFresh;
588  else word_fresh_ = kNotFresh;
589  } else { // No word added -> word not fresh.
591  }
592  if (phone != 0) {
593  phones_.push_back(phone);
594  transition_ids_.push_back(tids);
596  } else {
598  }
599  *weight = Times(weight_, arc.weight.Weight()); // will go on arc in output lattice
601 }
602 
603 
605  const ViabilityMap &viability_map) const {
606  /* This will ideally to return true if and only if we can ever take
607  any kind of transition out of this state after "advancing" it by adding
608  words and/or phones. It's OK to return true in some cases where the
609  condition is false, though, if it's a pain to check, because the result
610  will just be doing extra work for nothing (those states won't be
611  co-accessible in the output).
612  */
613  if (phones_.empty()) return true;
614  if (words_.empty()) return true;
615  else {
616  // neither phones_ or words_ is empty. Return true if a longer sequence
617  // than this phone sequence can have either zero (<eps>/epsilon) or the
618  // first element of words_, as an entry in the lexicon with that phone
619  // sequence.
620  ViabilityMap::const_iterator iter = viability_map.find(phones_);
621  if (iter == viability_map.end()) return false;
622  else {
623  const std::vector<int32> &this_set = iter->second; // sorted vector.
624  // Return true if either 0 or words_[0] is in the set. If 0 is
625  // in the set, it will be the 1st element of the vector, because it's
626  // the lowest element.
627  return (this_set.front() == 0 ||
628  std::binary_search(this_set.begin(), this_set.end(), words_[0]));
629  }
630  }
631 }
632 
633 
635  int32 partial_word_label,
636  ComputationState *next_state,
637  CompactLatticeArc *arc_out) const {
638  KALDI_ASSERT(!IsEmpty());
639 
640  next_state->phones_.clear();
641  next_state->words_.clear();
642  next_state->transition_ids_.clear();
643  // neither of the following variables should matter, actually,
644  // they will never be inspected. So just set them to kFresh for consistency,
645  // so they end up at the same place in the tuple-map_.
646  next_state->word_fresh_ = kFresh;
647  next_state->phone_fresh_ = kFresh;
648  next_state->weight_ = LatticeWeight::One();
649 
650  int32 word_id;
651  if (words_.size() >= 1) {
652  word_id = words_[0];
653  if (words_.size() > 1)
654  KALDI_WARN << "Word-aligning lattice: discarding extra word at end of lattice"
655  << "(forced-out).";
656  } else {
657  word_id = partial_word_label;
658  }
659  KALDI_ASSERT(word_id != 0); // any zeros would have been replaced with
660  // 'temporary epsilon' = 2.
661  std::vector<int32> appended_transition_ids;
663  transition_ids_.end(),
664  &appended_transition_ids);
665  arc_out->ilabel = word_id;
666  arc_out->olabel = word_id;
667  arc_out->weight = CompactLatticeWeight(weight_,
668  appended_transition_ids);
669  // arc_out->nextstate will be set by the calling code.
670 }
671 
672 
674  const LexiconMap &lexicon_map, int32 word_id, int32 num_phones,
675  ComputationState *next_state, CompactLatticeArc *arc_out) const {
676  KALDI_ASSERT(word_id == 0 || (!words_.empty() && word_id == words_[0]));
677  KALDI_ASSERT(num_phones <= static_cast<int32>(phones_.size()));
678 
679  std::vector<int32> lexicon_key;
680  lexicon_key.reserve(1 + num_phones);
681  lexicon_key.push_back(word_id); // put 1st word in lexicon_key.
682  lexicon_key.insert(lexicon_key.end(),
683  phones_.begin(), phones_.begin() + num_phones);
684  LexiconMap::const_iterator iter = lexicon_map.find(lexicon_key);
685  if (iter == lexicon_map.end()) { // no such entry
686  return false;
687  } else { // Entry exists. We'll create an arc.
688  next_state->phones_.assign(phones_.begin() + num_phones, phones_.end());
689  next_state->words_.assign(words_.begin() + (word_id == 0 ? 0 : 1),
690  words_.end());
691  next_state->transition_ids_.assign(transition_ids_.begin() + num_phones,
692  transition_ids_.end());
693  next_state->word_fresh_ =
694  (word_id != 0 && !next_state->words_.empty()) ? kFresh : kNotFresh;
695  next_state->phone_fresh_ =
696  (next_state->phones_.empty() || num_phones == 0) ? kNotFresh : kAllFresh;
697 
698  // this next thing is a bit hard to explain. If we just consumed a word with
699  // no phones, we treat the phones as fresh. The idea is that if we need to
700  // both consume a word with no phones and a phone with no words (e.g.
701  // an empty word and then silence), we need to have the phones marked
702  // as fresh in order for this to be possible.
703  if (num_phones == 0 && word_id != 0 && !next_state->phones_.empty())
704  next_state->phone_fresh_ = kAllFresh;
705 
706  next_state->weight_ = LatticeWeight::One();
707 
708  if (GetVerboseLevel() >= 5) {
709  std::ostringstream ostr;
710  for (size_t i = 0; i < num_phones; i++)
711  ostr << phones_[i] << " ";
712  KALDI_VLOG(5) << "Taking arc with word = " << word_id
713  << " and phones = " << ostr.str()
714  << ", output-word = " << iter->second
715  << ", dest-state has num-words = " << next_state->words_.size()
716  << " and num-phones = " << next_state->phones_.size();
717  }
718 
719  // Set arc_out:
720  Label word_id = iter->second; // word_id will typically be
721  // the same as words_[0], i.e. the
722  // word we consumed.
723 
724  KALDI_ASSERT(word_id != 0); // we replaced zeros with 'temporary epsilon' = -2.
725 
726  std::vector<int32> appended_transition_ids;
728  transition_ids_.begin() + num_phones,
729  &appended_transition_ids);
730  arc_out->ilabel = word_id;
731  arc_out->olabel = word_id;
732  arc_out->weight = CompactLatticeWeight(weight_,
733  appended_transition_ids);
734  // arc_out->nextstate will be set in the calling code.
735  return true;
736  }
737 }
738 
739 
740 
741 // Returns true if this vector of transition-ids could be a valid
742 // word. Note: for testing, we assume that the lexicon always
743 // has the same input-word and output-word. The other case is complex
744 // to test.
745 static bool IsPlausibleWord(const WordAlignLatticeLexiconInfo &lexicon_info,
746  const TransitionModel &tmodel,
747  int32 word_id,
748  const std::vector<int32> &transition_ids) {
749 
750  std::vector<std::vector<int32> > split_alignment; // Split into phones.
751  if (!SplitToPhones(tmodel, transition_ids, &split_alignment)) {
752  KALDI_WARN << "Could not split word into phones correctly (forced-out?)";
753  }
754  std::vector<int32> phones(split_alignment.size());
755  for (size_t i = 0; i < split_alignment.size(); i++) {
756  KALDI_ASSERT(!split_alignment[i].empty());
757  phones[i] = tmodel.TransitionIdToPhone(split_alignment[i][0]);
758  }
759  std::vector<int32> lexicon_entry;
760  lexicon_entry.push_back(word_id);
761  lexicon_entry.insert(lexicon_entry.end(), phones.begin(), phones.end());
762 
763  if (!lexicon_info.IsValidEntry(lexicon_entry)) {
764  std::ostringstream ostr;
765  for (size_t i = 0; i < lexicon_entry.size(); i++)
766  ostr << lexicon_entry[i] << ' ';
767  KALDI_WARN << "Invalid arc in aligned lattice (code error?) lexicon-entry is " << ostr.str();
768  return false;
769  } else {
770  return true;
771  }
772 }
773 
775  const std::vector<int32> &lexicon_entry) {
776  int32 word = lexicon_entry[0]; // note: word may be zero.
777  int32 num_phones = static_cast<int32>(lexicon_entry.size()) - 2;
778  std::vector<int32> phones;
779  if (num_phones > 0)
780  phones.reserve(num_phones - 1);
781  // for each nonempty sequence of phones that is a strict prefix of the phones
782  // in the lexicon entry (i.e. lexicon_entry [2 ... ]), add the word to the set
783  // in viability_map_[phones].
784  for (int32 n = 0; n < num_phones - 1; n++) {
785  phones.push_back(lexicon_entry[n + 2]); // first phone is at position 2.
786  // n+1 is the length of the sequence of phones
787  viability_map_[phones].push_back(word);
788  }
789 }
790 
792  for (ViabilityMap::iterator iter = viability_map_.begin();
793  iter != viability_map_.end();
794  ++iter) {
795  std::vector<int32> &words = iter->second;
796  SortAndUniq(&words);
797  KALDI_ASSERT(words[0] >= 0 && "Error: negative labels in lexicon.");
798  }
799 }
800 
805  const std::vector<int32> &lexicon_entry) {
806  KALDI_ASSERT(lexicon_entry.size() >= 2);
807  std::vector<int32> key;
808  key.reserve(lexicon_entry.size() - 1);
809  // add the original word:
810  key.push_back(lexicon_entry[0]);
811  // add the phones:
812  key.insert(key.end(), lexicon_entry.begin() + 2, lexicon_entry.end());
813  int32 new_word = lexicon_entry[1]; // This will typically be the same as
814  // the original word at lexicon_entry[0] but is allowed to differ.
815  if (new_word == 0) new_word = kTemporaryEpsilon; // replace 0's with -2;
816  // we'll revert the change at the end.
817  if (lexicon_map_.count(key) != 0) {
818  if (lexicon_map_[key] == new_word)
819  KALDI_WARN << "Duplicate entry in lexicon map for word " << lexicon_entry[0];
820  else
821  KALDI_ERR << "Duplicate entry in lexicon map for word " << lexicon_entry[0]
822  << " with inconsistent to-word.";
823  }
824  lexicon_map_[key] = new_word;
825 
826  if (lexicon_entry[0] != lexicon_entry[1]) {
827  // Add reverse lexicon entry, this time with no 0 -> -2 mapping.
828  key[0] = lexicon_entry[1];
829  // Note: we ignore the situation where there are conflicting
830  // entries in reverse_lexicon_map_, as we never actually inspect
831  // the contents so it won't matter.
832  reverse_lexicon_map_[key] = lexicon_entry[0];
833  }
834 }
835 
837  const std::vector<int32> &lexicon_entry) {
838  int32 num_phones = static_cast<int32>(lexicon_entry.size()) - 2;
839  int32 word = lexicon_entry[0];
840  if (num_phones_map_.count(word) == 0)
841  num_phones_map_[word] = std::make_pair(num_phones, num_phones);
842  else {
843  std::pair<int32, int32> &pr = num_phones_map_[word];
844  pr.first = std::min(pr.first, num_phones); // update min-num-phones
845  pr.second = std::max(pr.second, num_phones); // update max-num-phones
846  if (pr.first == 0 && word == 0)
847  KALDI_ERR << "Zero word with empty pronunciation is not allowed.";
848  }
849 }
850 
853 bool WordAlignLatticeLexiconInfo::IsValidEntry(const std::vector<int32> &entry) const {
854  KALDI_ASSERT(!entry.empty());
855  LexiconMap::const_iterator iter = lexicon_map_.find(entry);
856  if (iter != lexicon_map_.end()) {
857  int32 tgt_word = (iter->second == kTemporaryEpsilon ? 0 : iter->second);
858  if (tgt_word == entry[0]) return true; // symmetric entry.
859  // this means that that there would be an output-word with this
860  // value, and this sequence of phones.
861  }
862  // For entries that were not symmetric:
863  return (reverse_lexicon_map_.count(entry) != 0);
864 }
865 
867  unordered_map<int32, int32>::const_iterator iter =
868  equivalence_map_.find(word);
869  if (iter == equivalence_map_.end()) return word; // not in map.
870  else return iter->second;
871 }
872 
874  const std::vector<std::vector<int32> > &lexicon) {
875  std::vector<std::pair<int32, int32> > equiv_pairs; // pairs of
876  // (lower,higher) words that are equivalent.
877  for (size_t i = 0; i < lexicon.size(); i++) {
878  KALDI_ASSERT(lexicon[i].size() >= 2);
879  int32 w1 = lexicon[i][0], w2 = lexicon[i][1];
880  if (w1 == w2) continue; // They are the same; this provides no information
881  // about equivalence, since any word is equivalent
882  // to itself.
883  if (w1 > w2) std::swap(w1, w2); // make sure w1 < w2.
884  equiv_pairs.push_back(std::make_pair(w1, w2));
885  }
886  SortAndUniq(&equiv_pairs);
887  equivalence_map_.clear();
888  for (size_t i = 0; i < equiv_pairs.size(); i++) {
889  int32 w1 = equiv_pairs[i].first, w2 = equiv_pairs[i].second,
890  w1dash = EquivalenceClassOf(w1);
891  equivalence_map_[w2] = w1dash;
892  }
893 }
894 
895 
897  const std::vector<std::vector<int32> > &lexicon) {
898  for (size_t i = 0; i < lexicon.size(); i++) {
899  const std::vector<int32> &lexicon_entry = lexicon[i];
900  KALDI_ASSERT(lexicon_entry.size() >= 2);
901  UpdateViabilityMap(lexicon_entry);
902  UpdateLexiconMap(lexicon_entry);
903  UpdateNumPhonesMap(lexicon_entry);
904  }
905  FinalizeViabilityMap();
906  UpdateEquivalenceMap(lexicon);
907 }
908 
912 static void MapSymbols(const WordAlignLatticeLexiconInfo &lexicon_info,
913  CompactLattice *lat) {
915  for (StateId s = 0; s < lat->NumStates(); s++) {
916  for (fst::MutableArcIterator<CompactLattice> aiter(lat, s);
917  !aiter.Done(); aiter.Next()) {
918  CompactLatticeArc arc (aiter.Value());
919  KALDI_ASSERT(arc.ilabel == arc.olabel);
920  arc.ilabel = lexicon_info.EquivalenceClassOf(arc.ilabel);
921  arc.olabel = arc.ilabel;
922  aiter.SetValue(arc);
923  }
924  }
925 }
926 
927 static bool TestWordAlignedLattice(const WordAlignLatticeLexiconInfo &lexicon_info,
928  const TransitionModel &tmodel,
929  CompactLattice clat,
930  CompactLattice aligned_clat,
931  bool allow_duplicate_paths) {
932  int32 max_err = 5, num_err = 0;
933  { // We test whether the forward-backward likelihoods differ; this is intended
934  // to detect when we have duplicate paths in the aligned lattice, for some path
935  // in the input lattice (e.g. due to epsilon-sequencing problems).
936  Posterior post;
937  Lattice lat, aligned_lat;
938  ConvertLattice(clat, &lat);
939  ConvertLattice(aligned_clat, &aligned_lat);
940  TopSort(&lat);
941  TopSort(&aligned_lat);
942  BaseFloat like_before = LatticeForwardBackward(lat, &post),
943  like_after = LatticeForwardBackward(aligned_lat, &post);
944  if (fabs(like_before - like_after) >
945  1.0e-04 * (fabs(like_before) + fabs(like_after))) {
946  KALDI_WARN << "Forward-backward likelihoods differ in word-aligned lattice "
947  << "testing, " << like_before << " != " << like_after;
948  if (!allow_duplicate_paths)
949  num_err++;
950  }
951  }
952 
953  // Do a check on the arcs of the aligned lattice, that each arc corresponds
954  // to an entry in the lexicon.
955  for (CompactLattice::StateId s = 0; s < aligned_clat.NumStates(); s++) {
956  for (fst::ArcIterator<CompactLattice> aiter(aligned_clat, s);
957  !aiter.Done(); aiter.Next()) {
958  const CompactLatticeArc &arc (aiter.Value());
959  KALDI_ASSERT(arc.ilabel == arc.olabel);
960  int32 word_id = arc.ilabel;
961  const std::vector<int32> &tids = arc.weight.String();
962  if (word_id == 0 && tids.empty()) continue; // We allow epsilon arcs.
963 
964  if (num_err < max_err)
965  if (!IsPlausibleWord(lexicon_info, tmodel, word_id, tids))
966  num_err++;
967  // Note: IsPlausibleWord will warn if there is an error.
968  }
969  if (!aligned_clat.Final(s).String().empty()) {
970  KALDI_WARN << "Aligned lattice has nonempty string on its final-prob.";
971  return false;
972  }
973  }
974 
975  // Next we'll do an equivalence test.
976  // First map symbols into equivalence classes, so that we don't wrongly fail
977  // due to the capability of the framework to map words to other words.
978  // (e.g. mapping <eps> on silence arcs to SIL).
979 
980  MapSymbols(lexicon_info, &clat);
981  MapSymbols(lexicon_info, &aligned_clat);
982 
984  int32 num_paths = 5, seed = Rand(), max_path_length = -1;
985  BaseFloat delta = 0.2; // some lattices have large costs -> use large delta.
986 
987  FLAGS_v = GetVerboseLevel(); // set the OpenFst verbose level to the Kaldi
988  // verbose level.
989  if (!RandEquivalent(clat, aligned_clat, num_paths, delta, seed, max_path_length)) {
990  KALDI_WARN << "Equivalence test failed during lattice alignment.";
991  return false;
992  }
993  FLAGS_v = 0;
994 
995  return (num_err == 0);
996 }
997 
998 
999 
1000 // This is the wrapper function for users to call.
1002  const TransitionModel &tmodel,
1003  const WordAlignLatticeLexiconInfo &lexicon_info,
1004  const WordAlignLatticeLexiconOpts &opts,
1005  CompactLattice *lat_out) {
1006  PhoneAlignLatticeOptions phone_align_opts;
1007  phone_align_opts.reorder = opts.reorder;
1008  phone_align_opts.replace_output_symbols = false;
1009  phone_align_opts.remove_epsilon = false;
1010 
1011  // Input Lattice should be deterministic and w/o epsilons.
1012  bool test = true;
1013  uint64 props = lat.Properties(fst::kIDeterministic|fst::kIEpsilons, test);
1014  if (props != fst::kIDeterministic) {
1015  KALDI_WARN << "[Lattice has input epsilons and/or is not input-deterministic "
1016  << "(in Mohri sense)]-- i.e. lattice is not deterministic. "
1017  << "Word-alignment may be slow and-or blow up in memory.";
1018  }
1019 
1020  CompactLattice phone_aligned_lat;
1021  bool ans = PhoneAlignLattice(lat, tmodel, phone_align_opts,
1022  &phone_aligned_lat);
1023  // 'phone_aligned_lat' is no longer deterministic and contains epsilons.
1024 
1025  int32 max_states;
1026  if (opts.max_expand <= 0) {
1027  max_states = -1;
1028  } else {
1029  // The 1000 is a fixed offset to give it more wiggle room for very
1030  // small inputs.
1031  max_states = kNumStatesOffset + opts.max_expand * phone_aligned_lat.NumStates();
1032  }
1033 
1034  // If ans == false, we hope this is due to a forced-out lattice, and we try to
1035  // continue.
1036  LatticeLexiconWordAligner aligner(phone_aligned_lat, tmodel, lexicon_info,
1037  max_states, opts.partial_word_label, lat_out);
1038  // We'll let the calling code warn if this is false; it will know the utterance-id.
1039  ans = aligner.AlignLattice() && ans;
1040  if (ans && opts.test) { // We only test if it succeeded.
1041  if (!TestWordAlignedLattice(lexicon_info, tmodel, lat, *lat_out,
1042  opts.allow_duplicate_paths)) {
1043  KALDI_WARN << "Lattice failed test (activated because --test=true). "
1044  << "Probable code error, please contact Kaldi maintainers.";
1045  ans = false;
1046  }
1047  }
1048  return ans;
1049 }
1050 
1051 bool ReadLexiconForWordAlign (std::istream &is,
1052  std::vector<std::vector<int32> > *lexicon) {
1053  lexicon->clear();
1054  std::string line;
1055  while (std::getline(is, line)) {
1056  std::vector<int32> this_entry;
1057  if (!SplitStringToIntegers(line, " \t\r", false, &this_entry) ||
1058  this_entry.size() < 2) {
1059  KALDI_WARN << "Lexicon line '" << line << "' is invalid";
1060  return false;
1061  }
1062  lexicon->push_back(this_entry);
1063  }
1064  return (!lexicon->empty());
1065 }
1066 
1067 } // namespace kaldi
1068 
int32 words[kMaxOrder]
fst::StdArc::StateId StateId
StateId GetStateForTuple(const Tuple &tuple)
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void ProcessTransition(StateId prev_output_state, const Tuple &next_tuple, CompactLatticeArc *arc)
LatticeLexiconWordAligner(const CompactLattice &lat, const TransitionModel &tmodel, const WordAlignLatticeLexiconInfo &lexicon_info, int32 max_states, int32 partial_word_label, CompactLattice *lat_out)
std::vector< std::pair< Tuple, StateId > > final_queue_
static void AppendVectors(std::vector< std::vector< int32 > >::const_iterator input_begin, std::vector< std::vector< int32 > >::const_iterator input_end, std::vector< int32 > *output)
Tuple(StateId input_state, ComputationState comp_state)
unordered_map< Tuple, StateId, TupleHash, TupleEqual > MapType
A hashing function-object for vectors.
Definition: stl-utils.h:216
static void MapSymbols(const WordAlignLatticeLexiconInfo &lexicon_info, CompactLattice *lat)
Testing code; map word symbols in the lattice "lat" using the equivalence-classes obtained from the l...
static const LatticeWeightTpl One()
void TakeForcedTransition(int32 partial_word_label, ComputationState *next_state, CompactLatticeArc *arc_out) const
This may be called at the end of a lattice, if it was forced out.
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
Definition: text-utils.h:68
int32 GetVerboseLevel()
Get verbosity level, usually set via command line &#39;–verbose=&#39; switch.
Definition: kaldi-error.h:60
bool ViableIfAdvanced(const ViabilityMap &viability_map) const
Returns true if, assuming we were to add one or more phones by calling Advance one or more times on t...
void UpdateViabilityMap(const std::vector< int32 > &lexicon_entry)
bool ReadLexiconForWordAlign(std::istream &is, std::vector< std::vector< int32 > > *lexicon)
Read the lexicon in the special format required for word alignment.
unordered_map< std::vector< int32 >, int32, VectorHasher< int32 > > LexiconMap
This is a map from a vector (orig-word-symbol phone1 phone2 ...
fst::CompactLatticeWeightTpl< LatticeWeight, int32 > CompactLatticeWeight
Definition: kaldi-lattice.h:35
bool TakeTransition(const LexiconMap &lexicon_map, int32 word_id, int32 num_phones, ComputationState *next_state, CompactLatticeArc *arc_out) const
Take a transition, if possible; consume "num_phones" phones and (if word_id != 0) the word "word_id" ...
bool IsValidEntry(const std::vector< int32 > &entry) const
Returns true if this lexicon-entry can appear, intepreted as (output-word phone1 phone2 ...
void PossiblyAdvanceArc(const Tuple &tuple, StateId output_state)
void swap(basic_filebuf< CharT, Traits > &x, basic_filebuf< CharT, Traits > &y)
kaldi::int32 int32
bool WordAlignLatticeLexicon(const CompactLattice &lat, const TransitionModel &tmodel, const WordAlignLatticeLexiconInfo &lexicon_info, const WordAlignLatticeLexiconOpts &opts, CompactLattice *lat_out)
Align lattice so that each arc has the transition-ids on it that correspond to the word that is on th...
LatticeWeight FinalWeight() const
FinalWeight() will return "weight" if both transition_ids and word_labels are empty, otherwise it will return Weight::Zero().
bool operator==(const ComputationState &other) const
void SortAndUniq(std::vector< T > *vec)
Sorts and uniq&#39;s (removes duplicates) from a vector.
Definition: stl-utils.h:39
static bool TestWordAlignedLattice(const WordAlignLatticeLexiconInfo &lexicon_info, const TransitionModel &tmodel, CompactLattice clat, CompactLattice aligned_clat, bool allow_duplicate_paths)
WordAlignLatticeLexiconInfo::ViabilityMap ViabilityMap
bool SplitToPhones(const TransitionModel &trans_model, const std::vector< int32 > &alignment, std::vector< std::vector< int32 > > *split_alignment)
SplitToPhones splits up the TransitionIds in "alignment" into their individual phones (one vector per...
Definition: hmm-utils.cc:723
void ProcessWordTransitions(const Tuple &tuple, StateId output_state)
unordered_map< int32, std::pair< int32, int32 > > NumPhonesMap
This is a map from the word-id (as present in the original lattice) to the minimum and maximum #phone...
LatticeWeightTpl< FloatType > Times(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
Definition: posterior.h:42
WordAlignLatticeLexiconInfo::NumPhonesMap NumPhonesMap
BaseFloat LatticeForwardBackward(const Lattice &lat, Posterior *post, double *acoustic_like_sum)
This function does the forward-backward over lattices and computes the posterior probabilities of the...
std::vector< std::pair< Tuple, StateId > > queue_
static const CompactLatticeWeightTpl< WeightType, IntType > One()
struct rnnlm::@11::@12 n
Arc::StateId CreateSuperFinal(MutableFst< Arc > *fst)
void ConvertLattice(const ExpandedFst< ArcTpl< Weight > > &ifst, MutableFst< ArcTpl< CompactLatticeWeightTpl< Weight, Int > > > *ofst, bool invert)
Convert lattice from a normal FST to a CompactLattice FST.
static const LatticeWeightTpl Zero()
void ProcessFinalForceOut()
Creates arcs from all the tuples that were final in the original lattice but have no arcs out of them...
fst::VectorFst< LatticeArc > Lattice
Definition: kaldi-lattice.h:44
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
unordered_map< std::vector< int32 >, std::vector< int32 >, VectorHasher< int32 > > ViabilityMap
The type ViabilityMap maps from sequences of phones (excluding the empty sequence), to the sets of all word-labels [on the input lattice] that could correspond to phone sequences that start with s [but are longer than s].
static bool IsPlausibleWord(const WordAlignLatticeLexiconInfo &lexicon_info, const TransitionModel &tmodel, int32 word_id, const std::vector< int32 > &transition_ids)
fst::StdArc::Label Label
void Advance(const CompactLatticeArc &arc, const TransitionModel &tmodel, LatticeWeight *leftover_weight)
The state of the computation in which, along a single path in the lattice, we work out the word bound...
int Rand(struct RandomState *state)
Definition: kaldi-math.cc:45
WordAlignLatticeLexiconInfo::LexiconMap LexiconMap
void UpdateEquivalenceMap(const std::vector< std::vector< int32 > > &lexicon)
fst::VectorFst< CompactLatticeArc > CompactLattice
Definition: kaldi-lattice.h:46
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
This class extracts some information from the lexicon and stores it in a suitable form for the word-a...
static const CompactLatticeWeightTpl< WeightType, IntType > Zero()
void UpdateNumPhonesMap(const std::vector< int32 > &lexicon_entry)
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
fst::ArcTpl< CompactLatticeWeight > CompactLatticeArc
Definition: kaldi-lattice.h:42
WordAlignLatticeLexiconInfo(const std::vector< std::vector< int32 > > &lexicon)
bool PhoneAlignLattice(const CompactLattice &lat, const TransitionModel &tmodel, const PhoneAlignLatticeOptions &opts, CompactLattice *lat_out)
Outputs a lattice in which the arcs correspond exactly to sequences of phones, so the boundaries betw...
void UpdateLexiconMap(const std::vector< int32 > &lexicon_entry)
Update the map from a vector (orig-word-symbol phone1 phone2 ...
bool ProcessFinal()
Process all final-probs (normal case, no forcing-out).
int32 TransitionIdToPhone(int32 trans_id) const
void ProcessEpsilonTransitions(const Tuple &tuple, StateId output_state)
const int kTemporaryEpsilon
int32 EquivalenceClassOf(int32 word) const
Purely for the testing code, we map words into equivalence classes derived from the mappings in the f...
const WordAlignLatticeLexiconInfo & lexicon_info_
const int kNumStatesOffset
bool HasNonEpsArcsOut(StateId output_state)
This function returns true if the state "output_state" in the output lattice has arcs out that have e...
void RemoveSomeInputSymbols(const std::vector< I > &to_remove, MutableFst< Arc > *fst)
RemoveSomeInputSymbols removes any symbol that appears in "to_remove", from the input side of the FST...