word-align-lattice.cc
Go to the documentation of this file.
1 // lat/word-align-lattice.cc
2 
3 // Copyright 2011-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
21 #include "lat/word-align-lattice.h"
22 #include "hmm/transition-model.h"
23 #include "util/stl-utils.h"
24 
25 namespace kaldi {
26 
28  public:
31 
33  public:
36 
40  void Advance(const CompactLatticeArc &arc, LatticeWeight *weight) {
41  const std::vector<int32> &string = arc.weight.String();
42  transition_ids_.insert(transition_ids_.end(),
43  string.begin(), string.end());
44  if (arc.ilabel != 0) // note: arc.ilabel==arc.olabel (acceptor)
45  word_labels_.push_back(arc.ilabel);
46  *weight = Times(weight_, arc.weight.Weight());
48  }
49 
59  bool OutputArc(const WordBoundaryInfo &info,
60  const TransitionModel &tmodel,
61  CompactLatticeArc *arc_out,
62  bool *error) {
63  // order of this ||-expression doesn't matter for
64  // function behavior, only for efficiency, since the
65  // cases are disjoint.
66  return OutputNormalWordArc(info, tmodel, arc_out, error) ||
67  OutputSilenceArc(info, tmodel, arc_out, error) ||
68  OutputOnePhoneWordArc(info, tmodel, arc_out, error);
69  }
70 
71  bool OutputSilenceArc(const WordBoundaryInfo &info,
72  const TransitionModel &tmodel,
73  CompactLatticeArc *arc_out,
74  bool *error);
75  bool OutputOnePhoneWordArc(const WordBoundaryInfo &info,
76  const TransitionModel &tmodel,
77  CompactLatticeArc *arc_out,
78  bool *error);
79  bool OutputNormalWordArc(const WordBoundaryInfo &info,
80  const TransitionModel &tmodel,
81  CompactLatticeArc *arc_out,
82  bool *error);
83 
84  bool IsEmpty() { return (transition_ids_.empty() && word_labels_.empty()); }
85 
90 
103  void OutputArcForce(const WordBoundaryInfo &info,
104  const TransitionModel &tmodel,
105  CompactLatticeArc *arc_out,
106  bool *error);
107 
108  size_t Hash() const {
110  return vh(transition_ids_) + 90647 * vh(word_labels_);
111  // 90647 is an arbitrary largish prime number.
112  // We don't bother including the weight in the hash--
113  // we don't really expect duplicates with the same vectors
114  // but different weights, and anyway, this is only an
115  // efficiency issue.
116  }
117 
118  // Just need an arbitrary complete order.
119  bool operator == (const ComputationState &other) const {
120  return (transition_ids_ == other.transition_ids_
121  && word_labels_ == other.word_labels_
122  && weight_ == other.weight_);
123  }
124 
125  ComputationState(): weight_(LatticeWeight::One()) { } // initial state.
128  weight_(other.weight_) { }
129  private:
130  std::vector<int32> transition_ids_;
131  std::vector<int32> word_labels_;
132  LatticeWeight weight_; // contains two floats.
133  };
134 
135 
136  struct Tuple {
137  Tuple(StateId input_state, ComputationState comp_state):
138  input_state(input_state), comp_state(comp_state) {}
139  StateId input_state;
141  };
142 
143  struct TupleHash {
144  size_t operator() (const Tuple &state) const {
145  return state.input_state + 102763 * state.comp_state.Hash();
146  // 102763 is just an arbitrary prime number
147  }
148  };
149  struct TupleEqual {
150  bool operator () (const Tuple &state1, const Tuple &state2) const {
151  // treat this like operator ==
152  return (state1.input_state == state2.input_state
153  && state1.comp_state == state2.comp_state);
154  }
155  };
156 
157  typedef unordered_map<Tuple, StateId, TupleHash, TupleEqual> MapType;
158 
159  StateId GetStateForTuple(const Tuple &tuple, bool add_to_queue) {
160  MapType::iterator iter = map_.find(tuple);
161  if (iter == map_.end()) { // not in map.
162  StateId output_state = lat_out_->AddState();
163  map_[tuple] = output_state;
164  if (add_to_queue)
165  queue_.push_back(std::make_pair(tuple, output_state));
166  return output_state;
167  } else {
168  return iter->second;
169  }
170  }
171 
172  void ProcessFinal(Tuple tuple, StateId output_state) {
173  // ProcessFinal is only called if the input_state has
174  // final-prob of One(). [else it should be zero. This
175  // is because we called CreateSuperFinal().]
176 
177  if (tuple.comp_state.IsEmpty()) { // computation state doesn't have
178  // anything pending.
179  std::vector<int32> empty_vec;
180  CompactLatticeWeight cw(tuple.comp_state.FinalWeight(), empty_vec);
181  lat_out_->SetFinal(output_state, Plus(lat_out_->Final(output_state), cw));
182  } else {
183  // computation state has something pending, i.e. input or
184  // output symbols that need to be flushed out. Note: OutputArc() would
185  // have returned false or we wouldn't have been called, so we have to
186  // force it out.
187  CompactLatticeArc lat_arc;
188  tuple.comp_state.OutputArcForce(info_, tmodel_, &lat_arc, &error_);
189  // True in the next line means add it to the queue.
190  lat_arc.nextstate = GetStateForTuple(tuple, true);
191  // The final-prob stuff will get called again from ProcessQueueElement().
192  // Note: because we did CreateSuperFinal(), this final-state on the input
193  // lattice will have no output arcs (and unit final-prob), so there will be
194  // no complications with processing the arcs from this state (there won't
195  // be any).
196  KALDI_ASSERT(output_state != lat_arc.nextstate);
197  lat_out_->AddArc(output_state, lat_arc);
198  }
199  }
200 
201 
203  KALDI_ASSERT(!queue_.empty());
204  Tuple tuple = queue_.back().first;
205  StateId output_state = queue_.back().second;
206  queue_.pop_back();
207 
208  // First thing is-- we see whether the computation-state has something
209  // pending that it wants to output. In this case we don't do
210  // anything further. This is a chosen behavior similar to the
211  // epsilon-sequencing rules encoded by the filters in
212  // composition.
213  CompactLatticeArc lat_arc;
214  if (tuple.comp_state.OutputArc(info_, tmodel_, &lat_arc, &error_)) {
215  // note: this function changes the tuple (when it returns true).
216  lat_arc.nextstate = GetStateForTuple(tuple, true); // true == add to queue,
217  // if not already present.
218  KALDI_ASSERT(output_state != lat_arc.nextstate);
219  lat_out_->AddArc(output_state, lat_arc);
220  } else {
221  // when there's nothing to output, we'll process arcs from the input-state.
222  // note: it would in a sense be valid to do both (i.e. process the stuff
223  // above, and also these), but this is a bit like the epsilon-sequencing
224  // stuff in composition: we avoid duplicate arcs by doing it this way.
225 
226  if (lat_.Final(tuple.input_state) != CompactLatticeWeight::Zero()) {
227  KALDI_ASSERT(lat_.Final(tuple.input_state) == CompactLatticeWeight::One());
228  // ... since we did CreateSuperFinal.
229  ProcessFinal(tuple, output_state);
230  }
231  // Now process the arcs. Note: final-state shouldn't have any arcs.
232  for (fst::ArcIterator<CompactLattice> aiter(lat_, tuple.input_state);
233  !aiter.Done(); aiter.Next()) {
234  const CompactLatticeArc &arc = aiter.Value();
235  Tuple next_tuple(tuple);
236  LatticeWeight weight;
237  next_tuple.comp_state.Advance(arc, &weight);
238  next_tuple.input_state = arc.nextstate;
239  StateId next_output_state = GetStateForTuple(next_tuple, true); // true == add to queue,
240  // if not already present.
241  // We add an epsilon arc here (as the input and output happens
242  // separately)... the epsilons will get removed later.
243  KALDI_ASSERT(next_output_state != output_state);
244  lat_out_->AddArc(output_state,
245  CompactLatticeArc(0, 0,
246  CompactLatticeWeight(weight, std::vector<int32>()),
247  next_output_state));
248  }
249  }
250  }
251 
253  const TransitionModel &tmodel,
254  const WordBoundaryInfo &info,
255  int32 max_states,
256  CompactLattice *lat_out):
257  lat_(lat), tmodel_(tmodel), info_in_(info), info_(info),
258  max_states_(max_states), lat_out_(lat_out),
259  error_(false) {
260  bool test = true;
261  uint64 props = lat_.Properties(fst::kIDeterministic|fst::kIEpsilons, test);
262  if (props != fst::kIDeterministic) {
263  KALDI_WARN << "[Lattice has input epsilons and/or is not input-deterministic "
264  << "(in Mohri sense)]-- i.e. lattice is not deterministic. "
265  << "Word-alignment may be slow and-or blow up in memory.";
266  }
267  fst::CreateSuperFinal(&lat_); // Creates a super-final state, so the
268  // only final-probs are One().
269 
270  // Inside this class, we don't want to use zero for the silence
271  // or partial-word labels, as this will interfere with the RmEpsilon
272  // stage, where we don't want the arcs corresponding to silence or
273  // partial words to be removed-- only the arcs with nothing at all
274  // on them.
275  if (info_.partial_word_label == 0 || info_.silence_label == 0) {
276  int32 unused_label = 1 + HighestNumberedOutputSymbol(lat);
277  if (info_.partial_word_label >= unused_label)
278  unused_label = info_.partial_word_label + 1;
279  if (info_.silence_label >= unused_label)
280  unused_label = info_.silence_label + 1;
281  KALDI_ASSERT(unused_label > 0);
282  if (info_.partial_word_label == 0)
283  info_.partial_word_label = unused_label++;
284  if (info_.silence_label == 0)
285  info_.silence_label = unused_label;
286  }
287  }
288 
289  // Removes epsilons; also removes unreachable states...
290  // not sure if these would exist if original was connected.
291  // This also replaces the temporary symbols for the silence
292  // and partial-words, with epsilons, if we wanted epsilons.
294  // Remove epsilon arcs from output lattice.
295  RmEpsilon(lat_out_, true); // true = connect.
296  std::vector<int32> syms_to_remove;
297  if (info_in_.partial_word_label == 0)
298  syms_to_remove.push_back(info_.partial_word_label);
299  if (info_in_.silence_label == 0)
300  syms_to_remove.push_back(info_.silence_label);
301  if (!syms_to_remove.empty()) {
302  RemoveSomeInputSymbols(syms_to_remove, lat_out_);
303  Project(lat_out_, fst::PROJECT_INPUT);
304  }
305  }
306 
307  bool AlignLattice() {
308  lat_out_->DeleteStates();
309  if (lat_.Start() == fst::kNoStateId) {
310  KALDI_WARN << "Trying to word-align empty lattice.";
311  return false;
312  }
313  ComputationState initial_comp_state;
314  Tuple initial_tuple(lat_.Start(), initial_comp_state);
315  StateId start_state = GetStateForTuple(initial_tuple, true); // True = add this to queue.
316  lat_out_->SetStart(start_state);
317 
318  while (!queue_.empty()) {
319  if (max_states_ > 0 && lat_out_->NumStates() > max_states_) {
320  KALDI_WARN << "Number of states in lattice exceeded max-states of "
321  << max_states_ << ", original lattice had "
322  << lat_.NumStates() << " states. Returning what we have.";
324  return false;
325  }
327  }
328 
330 
331  return !error_;
332  }
333 
340 
341  std::vector<std::pair<Tuple, StateId> > queue_;
342 
343 
344 
345  MapType map_; // map from tuples to StateId.
346  bool error_;
347 
348 };
349 
351  const WordBoundaryInfo &info, const TransitionModel &tmodel,
352  CompactLatticeArc *arc_out, bool *error) {
353  if (transition_ids_.empty()) return false;
354  int32 phone = tmodel.TransitionIdToPhone(transition_ids_[0]);
355  if (info.TypeOfPhone(phone) != WordBoundaryInfo::kNonWordPhone) return false;
356 
357  // we assume the start of transition_ids_ is the start of the phone [silence];
358  // this is a precondition.
359  size_t len = transition_ids_.size(), i;
360  // Keep going till we reach a "final" transition-id; note, if
361  // reorder==true, we have to go a bit further after this.
362  for (i = 0; i < len; i++) {
363  int32 tid = transition_ids_[i];
364  int32 this_phone = tmodel.TransitionIdToPhone(tid);
365  if (this_phone != phone && ! *error) { // error condition: should have reached final transition-id first.
366  *error = true;
367  KALDI_WARN << "Phone changed before final transition-id found "
368  "[broken lattice or mismatched model or wrong --reorder option?]";
369  }
370  if (tmodel.IsFinal(tid))
371  break;
372  }
373  if (i == len) return false; // fell off loop.
374  i++; // go past the one for which IsFinal returned true.
375  if (info.reorder) // we have to consume the following self-loop transition-ids.
376  while (i < len && tmodel.IsSelfLoop(transition_ids_[i])) i++;
377  if (i == len) return false; // we don't know if it ends here... so can't output arc.
378 
379  if (tmodel.TransitionIdToPhone(transition_ids_[i-1]) != phone
380  && ! *error) { // another check.
381  KALDI_WARN << "Phone changed unexpectedly in lattice "
382  "[broken lattice or mismatched model?]";
383  }
384  // interpret i as the number of transition-ids to consume.
385  std::vector<int32> tids_out(transition_ids_.begin(), transition_ids_.begin()+i);
386 
387  // consumed transition ids from our internal state.
388  *arc_out = CompactLatticeArc(info.silence_label, info.silence_label,
389  CompactLatticeWeight(weight_, tids_out), fst::kNoStateId);
390  transition_ids_.erase(transition_ids_.begin(), transition_ids_.begin()+i); // delete these
391  weight_ = LatticeWeight::One(); // we just output the weight.
392  return true;
393 }
394 
395 
397  const WordBoundaryInfo &info, const TransitionModel &tmodel,
398  CompactLatticeArc *arc_out, bool *error) {
399  if (transition_ids_.empty()) return false;
400  if (word_labels_.empty()) return false;
401  int32 phone = tmodel.TransitionIdToPhone(transition_ids_[0]);
403  return false;
404  // we assume the start of transition_ids_ is the start of the phone.
405  // this is a precondition.
406  size_t len = transition_ids_.size(), i;
407  for (i = 0; i < len; i++) {
408  int32 tid = transition_ids_[i];
409  int32 this_phone = tmodel.TransitionIdToPhone(tid);
410  if (this_phone != phone && ! *error) { // error condition: should have reached final transition-id first.
411  KALDI_WARN << "Phone changed before final transition-id found "
412  "[broken lattice or mismatched model or wrong --reorder option?]";
413  // just continue, ignoring this-- we'll probably output something...
414  }
415  if (tmodel.IsFinal(tid))
416  break;
417  }
418  if (i == len) return false; // fell off loop.
419  i++; // go past the one for which IsFinal returned true.
420  if (info.reorder) // we have to consume the following self-loop transition-ids.
421  while (i < len && tmodel.IsSelfLoop(transition_ids_[i])) i++;
422  if (i == len) return false; // we don't know if it ends here... so can't output arc.
423 
424  if (tmodel.TransitionIdToPhone(transition_ids_[i-1]) != phone
425  && ! *error) { // another check.
426  KALDI_WARN << "Phone changed unexpectedly in lattice "
427  "[broken lattice or mismatched model?]";
428  *error = true;
429  }
430 
431  // interpret i as the number of transition-ids to consume.
432  std::vector<int32> tids_out(transition_ids_.begin(),
433  transition_ids_.begin() + i);
434 
435  // consumed transition ids from our internal state.
436  int32 word = word_labels_[0];
437  *arc_out = CompactLatticeArc(word, word,
438  CompactLatticeWeight(weight_, tids_out), fst::kNoStateId);
439  transition_ids_.erase(transition_ids_.begin(),
440  transition_ids_.begin() + i); // delete these
441  // Remove the word that we just output.
442  word_labels_.erase(word_labels_.begin(), word_labels_.begin() + 1);
443  weight_ = LatticeWeight::One(); // we just output the weight.
444  return true;
445 }
446 
447 
451  const WordBoundaryInfo &info, const TransitionModel &tmodel,
452  CompactLatticeArc *arc_out, bool *error) {
453  if (transition_ids_.empty()) return false;
454  if (word_labels_.empty()) return false;
455  int32 begin_phone = tmodel.TransitionIdToPhone(transition_ids_[0]);
456  if (info.TypeOfPhone(begin_phone) != WordBoundaryInfo::kWordBeginPhone)
457  return false;
458  // we assume the start of transition_ids_ is the start of the phone.
459  // this is a precondition.
460  size_t len = transition_ids_.size(), i;
461 
462  // Eat up the transition-ids of this word-begin phone until we get to the
463  // "final" transition-id. [there may be self-loops following this though,
464  // if reorder==true]
465  for (i = 0; i < len && !tmodel.IsFinal(transition_ids_[i]); i++);
466  if (i == len) return false;
467  i++; // Skip over this final-transition.
468  if (info.reorder) // Skip over any reordered self-loops for this final-transition
469  for (; i < len && tmodel.IsSelfLoop(transition_ids_[i]); i++);
470  if (i == len) return false;
471  if (tmodel.TransitionIdToPhone(transition_ids_[i-1]) != begin_phone
472  && ! *error) { // another check.
473  KALDI_WARN << "Phone changed unexpectedly in lattice "
474  "[broken lattice or mismatched model?]";
475  *error = true;
476  }
477  // Now keep going till we hit a word-ending phone.
478  // Note: we don't expect anything except word-internal phones
479  // here, but we'll just print a warning if we get something
480  // else.
481  for (; i < len; i++) {
482  int32 this_phone = tmodel.TransitionIdToPhone(transition_ids_[i]);
483  if (info.TypeOfPhone(this_phone) == WordBoundaryInfo::kWordEndPhone)
484  break;
485  if (info.TypeOfPhone(this_phone) != WordBoundaryInfo::kWordInternalPhone
486  && !*error) {
487  KALDI_WARN << "Unexpected phone " << this_phone
488  << " found inside a word.";
489  *error = true;
490  }
491  }
492  if (i == len) return false;
493 
494  // OK, we hit a word-ending phone. Continue till we get to
495  // a "final-transition".
496 
497  // this variable just used for checks.
498  int32 final_phone = tmodel.TransitionIdToPhone(transition_ids_[i]);
499  for (; i < len; i++) {
500  int32 this_phone = tmodel.TransitionIdToPhone(transition_ids_[i]);
501  if (this_phone != final_phone && ! *error) {
502  *error = true;
503  KALDI_WARN << "Phone changed before final transition-id found "
504  "[broken lattice or mismatched model or wrong --reorder option?]";
505  }
506  if (tmodel.IsFinal(transition_ids_[i])) break;
507  }
508  if (i == len) return false;
509  i++;
510  // We got to the final-transition of the final phone;
511  // if reorder==true, continue eating up the self-loop.
512  if (info.reorder == true)
513  while (i < len && tmodel.IsSelfLoop(transition_ids_[i])) i++;
514  if (i == len) return false;
515  if (tmodel.TransitionIdToPhone(transition_ids_[i-1]) != final_phone
516  && ! *error) {
517  *error = true;
518  KALDI_WARN << "Phone changed while following final self-loop "
519  "[broken lattice or mismatched model or wrong --reorder option?]";
520  }
521 
522  // OK, we're ready to output the word.
523  // Interpret i as the number of transition-ids to consume.
524  std::vector<int32> tids_out(transition_ids_.begin(),
525  transition_ids_.begin() + i);
526 
527  // consumed transition ids from our internal state.
528  int32 word = word_labels_[0];
529  *arc_out = CompactLatticeArc(word, word,
530  CompactLatticeWeight(weight_, tids_out),
531  fst::kNoStateId);
532  transition_ids_.erase(transition_ids_.begin(),
533  transition_ids_.begin() + i); // delete these
534  // Remove the word that we just output.
535  word_labels_.erase(word_labels_.begin(),
536  word_labels_.begin() + 1);
537  weight_ = LatticeWeight::One(); // we just output the weight.
538  return true;
539 }
540 
541 // Returns true if this vector of transition-ids could be a valid
542 // word. Note: the checks are not 100% exhaustive.
543 static bool IsPlausibleWord(const WordBoundaryInfo &info,
544  const TransitionModel &tmodel,
545  const std::vector<int32> &transition_ids) {
546  if (transition_ids.empty()) return false;
547  int32 first_phone = tmodel.TransitionIdToPhone(transition_ids.front()),
548  last_phone = tmodel.TransitionIdToPhone(transition_ids.back());
549  if ( (info.TypeOfPhone(first_phone) == WordBoundaryInfo::kWordBeginAndEndPhone
550  && first_phone == last_phone)
551  ||
552  (info.TypeOfPhone(first_phone) == WordBoundaryInfo::kWordBeginPhone &&
553  info.TypeOfPhone(last_phone) == WordBoundaryInfo::kWordEndPhone) ) {
554  if (! info.reorder) {
555  return (tmodel.IsFinal(transition_ids.back()));
556  } else {
557  int32 i = transition_ids.size() - 1;
558  while (i > 0 && tmodel.IsSelfLoop(transition_ids[i])) i--;
559  return tmodel.IsFinal(transition_ids[i]);
560  }
561  } else return false;
562 }
563 
564 
566  const WordBoundaryInfo &info, const TransitionModel &tmodel,
567  CompactLatticeArc *arc_out, bool *error) {
568 
569  KALDI_ASSERT(!IsEmpty());
570  if (!word_labels_.empty()
571  && !transition_ids_.empty()) { // We have at least one word to
572  // output, and some transition-ids. We assume that the normal OutputArc was called
573  // and failed, so this means we didn't see the end of that
574  // word.
575  int32 word = word_labels_[0];
576  if (! *error && !IsPlausibleWord(info, tmodel, transition_ids_)) {
577  *error = true;
578  KALDI_WARN << "Invalid word at end of lattice [partial lattice, forced out?]";
579  }
581  *arc_out = CompactLatticeArc(word, word, cw, fst::kNoStateId);
583  transition_ids_.clear();
584  word_labels_.erase(word_labels_.begin(), word_labels_.begin()+1);
585  } else if (!word_labels_.empty() && transition_ids_.empty()) {
586  // We won't create arcs with these word labels on, as most likely
587  // this will cause errors down the road. This is an error
588  // condition anyway, in some sense.
589  if (! *error) {
590  *error = true;
591  KALDI_WARN << "Discarding word-ids at the end of a sentence, "
592  "that don't have alignments.";
593  }
595  // This creates an epsilon arc with a weight on it, but
596  // no transition-ids since the vector is empty.
597  // The word labels are discarded.
598  *arc_out = CompactLatticeArc(0, 0, cw, fst::kNoStateId);
600  word_labels_.clear();
601  } else if (!transition_ids_.empty() && word_labels_.empty()) {
602  // Transition-ids but no word label-- either silence or partial word.
603  int32 first_phone = tmodel.TransitionIdToPhone(transition_ids_[0]);
604  if (info.TypeOfPhone(first_phone) == WordBoundaryInfo::kNonWordPhone) {
605  // first phone is silence...
606  if (first_phone != tmodel.TransitionIdToPhone(transition_ids_.back())
607  && ! *error) {
608  *error = true;
609  // Phone changed-- this is a code error, because the regular OutputArc
610  // should have output an arc (a silence arc) if that phone finished.
611  // So we make it fatal.
612  KALDI_ERR << "Broken silence arc at end of utterance (the phone "
613  "changed); code error";
614  }
615  if (!*error) { // Check that it ends at the end state of silence; error otherwise.
616  int32 i = transition_ids_.size() - 1;
617  if (info.reorder)
618  while (tmodel.IsSelfLoop(transition_ids_[i]) && i > 0)
619  i--;
620  if (!tmodel.IsFinal(transition_ids_[i])) {
621  *error = true;
622  KALDI_WARN << "Broken silence arc at end of utterance (does not "
623  "reach end of silence)";
624  }
625  }
627  *arc_out = CompactLatticeArc(info.silence_label, info.silence_label,
628  cw, fst::kNoStateId);
629  } else {
630  // Not silence phone -- treat as partial word (with no word label).
631  // This is in itself an error condition, i.e. the lattice was maybe
632  // forced out.
633  if (! *error) {
634  *error = true;
635  KALDI_WARN << "Partial word detected at end of utterance";
636  }
639  cw, fst::kNoStateId);
640  }
641  transition_ids_.clear();
643  } else {
644  KALDI_ERR << "Code error, word-aligning lattice"; // this shouldn't
645  // be able to happen; we don't call this function of they're both empty.
646  }
647 }
648 
649 // This code will eventually be removed.
650 void WordBoundaryInfo::SetOptions(const std::string int_list, PhoneType phone_type) {
651  KALDI_ASSERT(!int_list.empty() && phone_type != kNoPhone);
652  std::vector<int32> phone_list;
653  if (!kaldi::SplitStringToIntegers(int_list, ":",
654  false,
655  &phone_list)
656  || phone_list.empty())
657  KALDI_ERR << "Invalid argument to --*-phones option: " << int_list;
658  for (size_t i= 0; i < phone_list.size(); i++) {
659  if (phone_to_type.size() <= phone_list[i])
660  phone_to_type.resize(phone_list[i]+1, kNoPhone);
661  if (phone_to_type[phone_list[i]] != kNoPhone)
662  KALDI_ERR << "Phone " << phone_list[i] << "was given two incompatible "
663  "assignments.";
664  phone_to_type[phone_list[i]] = phone_type;
665  }
666 }
667 
668 // This initializer will be deleted eventually.
670  SetOptions(opts.wbegin_phones, kWordBeginPhone);
671  SetOptions(opts.wend_phones, kWordEndPhone);
672  SetOptions(opts.wbegin_and_end_phones, kWordBeginAndEndPhone);
673  SetOptions(opts.winternal_phones, kWordInternalPhone);
674  SetOptions(opts.silence_phones, (opts.silence_has_olabels ?
675  kWordBeginAndEndPhone : kNonWordPhone));
676  reorder = opts.reorder;
677  silence_label = opts.silence_label;
678  partial_word_label = opts.partial_word_label;
679 }
680 
682  reorder = opts.reorder;
683  silence_label = opts.silence_label;
684  partial_word_label = opts.partial_word_label;
685 }
686 
688  std::string word_boundary_file) {
689  reorder = opts.reorder;
690  silence_label = opts.silence_label;
691  partial_word_label = opts.partial_word_label;
692  bool binary_in;
693  Input ki(word_boundary_file, &binary_in);
694  KALDI_ASSERT(!binary_in && "Not expecting binary word-boundary file.");
695  Init(ki.Stream());
696 }
697 
698 void WordBoundaryInfo::Init(std::istream &stream) {
699  std::string line;
700  while (std::getline(stream, line)) {
701  std::vector<std::string> split_line;
702  SplitStringToVector(line, " \t\r", true, &split_line);// split the line by space or tab
703  int32 p = 0;
704  if (split_line.size() != 2 ||
705  !ConvertStringToInteger(split_line[0], &p))
706  KALDI_ERR << "Invalid line in word-boundary file: " << line;
707  KALDI_ASSERT(p > 0);
708  if (phone_to_type.size() <= static_cast<size_t>(p))
709  phone_to_type.resize(p+1, kNoPhone);
710  std::string t = split_line[1];
711  if (t == "nonword") phone_to_type[p] = kNonWordPhone;
712  else if (t == "begin") phone_to_type[p] = kWordBeginPhone;
713  else if (t == "singleton") phone_to_type[p] = kWordBeginAndEndPhone;
714  else if (t == "end") phone_to_type[p] = kWordEndPhone;
715  else if (t == "internal") phone_to_type[p] = kWordInternalPhone;
716  else
717  KALDI_ERR << "Invalid line in word-boundary file: " << line;
718  }
719  if (phone_to_type.empty())
720  KALDI_ERR << "Empty word-boundary file";
721 }
722 
724  const TransitionModel &tmodel,
725  const WordBoundaryInfo &info,
726  int32 max_states,
727  CompactLattice *lat_out) {
728  LatticeWordAligner aligner(lat, tmodel, info, max_states, lat_out);
729  return aligner.AlignLattice();
730 }
731 
732 
733 
735  public:
737  const TransitionModel &tmodel,
738  const WordBoundaryInfo &info,
739  const CompactLattice &aligned_lat):
740  lat_(lat), tmodel_(tmodel), info_(info), aligned_lat_(aligned_lat) { }
741 
742  void Test() {
743  // First test that each aligned arc is valid.
745  for (StateId s = 0; s < aligned_lat_.NumStates(); s++) {
746  for (fst::ArcIterator<CompactLattice> iter(aligned_lat_, s);
747  !iter.Done();
748  iter.Next()) {
749  TestArc(iter.Value());
750  }
751  if (aligned_lat_.Final(s) != CompactLatticeWeight::Zero()) {
752  TestFinal(aligned_lat_.Final(s));
753  }
754  }
755  TestEquivalent();
756  }
757  private:
758  void TestArc(const CompactLatticeArc &arc) {
759  if (! (TestArcSilence(arc) || TestArcNormalWord(arc) || TestArcOnePhoneWord(arc)
760  || TestArcEmpty(arc)))
761  KALDI_ERR << "Invalid arc in aligned CompactLattice: "
762  << arc.ilabel << " " << arc.olabel << " " << arc.nextstate
763  << " " << arc.weight;
764  }
765  bool TestArcEmpty(const CompactLatticeArc &arc) {
766  if (arc.ilabel != 0) return false; // Check there is no label. Note, ilabel==olabel.
767  const std::vector<int32> &tids = arc.weight.String();
768  return tids.empty();
769  }
771  // This only applies when silence doesn't have word labels.
772  if (arc.ilabel != info_.silence_label) return false; // Check the label is
773  // the silence label. Note, ilabel==olabel.
774  const std::vector<int32> &tids = arc.weight.String();
775  if (tids.empty()) return false;
776  int32 first_phone = tmodel_.TransitionIdToPhone(tids.front());
778  return false;
779  for (size_t i = 0; i < tids.size(); i++)
780  if (tmodel_.TransitionIdToPhone(tids[i]) != first_phone) return false;
781 
782  if (!info_.reorder) return tmodel_.IsFinal(tids.back());
783  else {
784  for (size_t i = 0; i < tids.size(); i++) {
785  if (tmodel_.IsFinal(tids[i])) { // got the "final" transition, which is
786  // reordered to actually not be final. Make sure that all the
787  // rest of the transition ids are the self-loop of that same
788  // transition-state.
789  for (size_t j = i+1; j < tids.size(); j++) {
791  == tmodel_.TransitionIdToTransitionState(tids[i]))) return false;
792  }
793  return true;
794  }
795  }
796  return false; // fell off loop. No final-state present.
797  }
798  }
799 
801  if (arc.ilabel == 0) return false; // Check there's a label. Note, ilabel==olabel.
802  const std::vector<int32> &tids = arc.weight.String();
803  if (tids.empty()) return false;
804  int32 first_phone = tmodel_.TransitionIdToPhone(tids.front());
805  if (info_.TypeOfPhone(first_phone) !=
807  for (size_t i = 0; i < tids.size(); i++)
808  if (tmodel_.TransitionIdToPhone(tids[i]) != first_phone) return false;
809 
810  if (!info_.reorder) return tmodel_.IsFinal(tids.back());
811  else {
812  for (size_t i = 0; i < tids.size(); i++) {
813  if (tmodel_.IsFinal(tids[i])) { // got the "final" transition, which is
814  // reordered to actually not be final. Make sure that all the
815  // rest of the transition ids are the self-loop of that same
816  // transition-state.
817  for (size_t j = i+1; j < tids.size(); j++) {
819  != tmodel_.TransitionIdToTransitionState(tids[i])) return false;
820  }
821  return true;
822  }
823  }
824  return false; // fell off loop. No final-state present.
825  }
826  }
827 
829  if (arc.ilabel == 0) return false; // Check there's a label. Note, ilabel==olabel.
830  const std::vector<int32> &tids = arc.weight.String();
831  if (tids.empty()) return false;
832  int32 first_phone = tmodel_.TransitionIdToPhone(tids.front());
834  return false;
835  size_t i;
836  { // first phone.
837  int num_final = 0;
838  for (i = 0; i < tids.size(); i++) {
839  if (tmodel_.TransitionIdToPhone(tids[i]) != first_phone) break;
840  if (tmodel_.IsFinal(tids[i])) num_final++;
841  }
842  if (num_final != 1)
843  return false; // Something went wrong-- perhaps we
844  // got two beginning phones in a row.
845  }
846  { // middle phones. Skip over them.
847  while (i < tids.size() &&
850  i++;
851  }
852  if (i == tids.size()) return false;
853  int32 final_phone = tmodel_.TransitionIdToPhone(tids[i]);
855  return false; // not word-ending.
856  for (size_t j = i; j < tids.size(); j++) // make sure only this final phone till end.
857  if (tmodel_.TransitionIdToPhone(tids[j]) != final_phone)
858  return false; // Other phones after final phone.
859 
860  for (size_t j = i; j < tids.size(); j++) {
861  if (tmodel_.IsFinal(tids[j])) { // Found "final transition".. Note:
862  // may be "reordered" with its self loops.
863  if (!info_.reorder) return (j+1 == tids.size());
864  else {
865  // Make sure the only thing that follows this is self-loops
866  // of the final transition-state.
867  for (size_t k = j + 1; k < tids.size(); k++)
870  || !tmodel_.IsSelfLoop(tids[k]))
871  return false;
872  return true;
873  }
874  }
875  }
876  return false; // Found no final state.
877  }
878 
880  if (arc.ilabel != info_.partial_word_label) return false; // label should
881  // be the partial-word label.
882  const std::vector<int32> &tids = arc.weight.String();
883  if (tids.empty()) return false;
884  return true; // We're pretty liberal when it comes to partial words here.
885  }
886 
888  if (!w.String().empty())
889  KALDI_ERR << "Expect to have no strings on final-weights of lattices.";
890  }
891  void TestEquivalent() {
892  CompactLattice aligned_lat(aligned_lat_);
893  if (info_.silence_label != 0) { // remove silence labels.
894  std::vector<int32> to_remove;
895  to_remove.push_back(info_.silence_label);
896  RemoveSomeInputSymbols(to_remove, &aligned_lat);
897  Project(&aligned_lat, fst::PROJECT_INPUT);
898  }
899 
900  if (!RandEquivalent(lat_, aligned_lat, 5/*paths*/, 1.0e+10/*delta*/, Rand()/*seed*/,
901  200/*path length (max?)*/))
902  KALDI_ERR << "Equivalence test failed (testing word-alignment of lattices.) "
903  << "Make sure your model and lattices match!";
904  }
905 
910 };
911 
912 
913 
914 
919  const TransitionModel &tmodel,
920  const WordBoundaryInfo &info,
921  const CompactLattice &aligned_lat) {
922  WordAlignedLatticeTester t(lat, tmodel, info, aligned_lat);
923  t.Test();
924 }
925 
926 
927 
928 
929 
930 
931 } // namespace kaldi
fst::StdArc::StateId StateId
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
bool ConvertStringToInteger(const std::string &str, Int *out)
Converts a string into an integer via strtoll and returns false if there was any kind of problem (i...
Definition: text-utils.h:118
void Init(std::istream &stream)
bool TestArcNormalWord(const CompactLatticeArc &arc)
A hashing function-object for vectors.
Definition: stl-utils.h:216
bool OutputNormalWordArc(const WordBoundaryInfo &info, const TransitionModel &tmodel, CompactLatticeArc *arc_out, bool *error)
This function tries to see if it can output a normal word arc– one with at least two phones in it...
LatticeWeight FinalWeight()
FinalWeight() will return "weight" if both transition_ids and word_labels are empty, otherwise it will return Weight::Zero().
bool TestArcOnePhoneWord(const CompactLatticeArc &arc)
const WordBoundaryInfo & info_in_
static const LatticeWeightTpl One()
const WordBoundaryInfo & info_
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
Definition: text-utils.h:68
LatticeWeightTpl< FloatType > Plus(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
void TestArc(const CompactLatticeArc &arc)
fst::CompactLatticeWeightTpl< LatticeWeight, int32 > CompactLatticeWeight
Definition: kaldi-lattice.h:35
void TestFinal(const CompactLatticeWeight &w)
bool TestArcEmpty(const CompactLatticeArc &arc)
kaldi::int32 int32
StateId GetStateForTuple(const Tuple &tuple, bool add_to_queue)
bool OutputSilenceArc(const WordBoundaryInfo &info, const TransitionModel &tmodel, CompactLatticeArc *arc_out, bool *error)
bool WordAlignLattice(const CompactLattice &lat, const TransitionModel &tmodel, const WordBoundaryInfo &info, int32 max_states, CompactLattice *lat_out)
Align lattice so that each arc has the transition-ids on it that correspond to the word that is on th...
const CompactLattice & aligned_lat_
static bool TestWordAlignedLattice(const WordAlignLatticeLexiconInfo &lexicon_info, const TransitionModel &tmodel, CompactLattice clat, CompactLattice aligned_clat, bool allow_duplicate_paths)
WordAlignedLatticeTester(const CompactLattice &lat, const TransitionModel &tmodel, const WordBoundaryInfo &info, const CompactLattice &aligned_lat)
void SetOptions(const std::string int_list, PhoneType phone_type)
std::vector< std::pair< Tuple, StateId > > queue_
LatticeWeightTpl< FloatType > Times(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
std::istream & Stream()
Definition: kaldi-io.cc:826
unordered_map< Tuple, StateId, TupleHash, TupleEqual > MapType
static const CompactLatticeWeightTpl< WeightType, IntType > One()
PhoneType TypeOfPhone(int32 p) const
void OutputArcForce(const WordBoundaryInfo &info, const TransitionModel &tmodel, CompactLatticeArc *arc_out, bool *error)
This function may be called when you reach the end of the lattice and this structure hasn&#39;t voluntari...
bool OutputOnePhoneWordArc(const WordBoundaryInfo &info, const TransitionModel &tmodel, CompactLatticeArc *arc_out, bool *error)
void SplitStringToVector(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< std::string > *out)
Split a string using any of the single character delimiters.
Definition: text-utils.cc:63
Arc::StateId CreateSuperFinal(MutableFst< Arc > *fst)
Tuple(StateId input_state, ComputationState comp_state)
bool IsSelfLoop(int32 trans_id) const
static const LatticeWeightTpl Zero()
bool TestArcSilence(const CompactLatticeArc &arc)
#define KALDI_ERR
Definition: kaldi-error.h:147
int32 TransitionIdToTransitionState(int32 trans_id) const
Arc::Label HighestNumberedOutputSymbol(const Fst< Arc > &fst)
Returns the highest numbered output symbol id of the FST (or zero for an empty FST.
void Advance(const CompactLatticeArc &arc, LatticeWeight *weight)
The state of the computation in which,.
#define KALDI_WARN
Definition: kaldi-error.h:150
static bool IsPlausibleWord(const WordAlignLatticeLexiconInfo &lexicon_info, const TransitionModel &tmodel, int32 word_id, const std::vector< int32 > &transition_ids)
fst::StdArc::Label Label
int Rand(struct RandomState *state)
Definition: kaldi-math.cc:45
fst::VectorFst< CompactLatticeArc > CompactLattice
Definition: kaldi-lattice.h:46
CompactLatticeArc::StateId StateId
CompactLatticeArc::Label Label
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
const TransitionModel & tmodel_
bool operator==(const ComputationState &other) const
static const CompactLatticeWeightTpl< WeightType, IntType > Zero()
bool OutputArc(const WordBoundaryInfo &info, const TransitionModel &tmodel, CompactLatticeArc *arc_out, bool *error)
If it can output a whole word, it will do so, will put it in arc_out, and return true; else it will r...
bool TestArcPartialWord(const CompactLatticeArc &arc)
fst::ArcTpl< CompactLatticeWeight > CompactLatticeArc
Definition: kaldi-lattice.h:42
ComputationState(const ComputationState &other)
WordBoundaryInfo(const WordBoundaryInfoOpts &opts)
const std::vector< IntType > & String() const
bool IsFinal(int32 trans_id) const
int32 TransitionIdToPhone(int32 trans_id) const
LatticeWordAligner(const CompactLattice &lat, const TransitionModel &tmodel, const WordBoundaryInfo &info, int32 max_states, CompactLattice *lat_out)
void ProcessFinal(Tuple tuple, StateId output_state)
void RemoveSomeInputSymbols(const std::vector< I > &to_remove, MutableFst< Arc > *fst)
RemoveSomeInputSymbols removes any symbol that appears in "to_remove", from the input side of the FST...