phone-align-lattice.cc
Go to the documentation of this file.
1 // lat/phone-align-lattice.cc
2 
3 // Copyright 2012-2013 Microsoft Corporation
4 // Johns Hopkins University (Author: Daniel Povey)
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 
23 #include "hmm/transition-model.h"
24 #include "util/stl-utils.h"
25 
26 namespace kaldi {
27 
29  public:
32 
34  public:
38 
42  void Advance(const CompactLatticeArc &arc, const PhoneAlignLatticeOptions &opts,
43  LatticeWeight *weight) {
44  const std::vector<int32> &string = arc.weight.String();
45  transition_ids_.insert(transition_ids_.end(),
46  string.begin(), string.end());
47  if (arc.ilabel != 0 && !opts.replace_output_symbols) // note: arc.ilabel==arc.olabel (acceptor)
48  word_labels_.push_back(arc.ilabel);
49  *weight = Times(weight_, arc.weight.Weight());
51  }
52 
61  bool OutputPhoneArc(const TransitionModel &tmodel,
62  const PhoneAlignLatticeOptions &opts,
63  CompactLatticeArc *arc_out,
64  bool *error);
65 
70  bool OutputWordArc(const TransitionModel &tmodel,
71  const PhoneAlignLatticeOptions &opts,
72  CompactLatticeArc *arc_out,
73  bool *error);
74 
75  bool IsEmpty() { return (transition_ids_.empty() && word_labels_.empty()); }
76 
81 
94  void OutputArcForce(const TransitionModel &tmodel,
95  const PhoneAlignLatticeOptions &opts,
96  CompactLatticeArc *arc_out,
97  bool *error);
98 
99  size_t Hash() const {
101  return vh(transition_ids_) + 90647 * vh(word_labels_);
102  // 90647 is an arbitrary largish prime number.
103  // We don't bother including the weight in the hash--
104  // we don't really expect duplicates with the same vectors
105  // but different weights, and anyway, this is only an
106  // efficiency issue.
107  }
108 
109  // Just need an arbitrary complete order.
110  bool operator == (const ComputationState &other) const {
111  return (transition_ids_ == other.transition_ids_
112  && word_labels_ == other.word_labels_
113  && weight_ == other.weight_);
114  }
115 
116  ComputationState(): weight_(LatticeWeight::One()) { } // initial state.
119  weight_(other.weight_) { }
120  private:
121  std::vector<int32> transition_ids_;
122  std::vector<int32> word_labels_;
123  LatticeWeight weight_; // contains two floats.
124  };
125 
126 
127  struct Tuple {
128  Tuple(StateId input_state, ComputationState comp_state):
129  input_state(input_state), comp_state(comp_state) {}
130  StateId input_state;
132  };
133 
134  struct TupleHash {
135  size_t operator() (const Tuple &state) const {
136  return state.input_state + 102763 * state.comp_state.Hash();
137  // 102763 is just an arbitrary prime number
138  }
139  };
140  struct TupleEqual {
141  bool operator () (const Tuple &state1, const Tuple &state2) const {
142  // treat this like operator ==
143  return (state1.input_state == state2.input_state
144  && state1.comp_state == state2.comp_state);
145  }
146  };
147 
148  typedef unordered_map<Tuple, StateId, TupleHash, TupleEqual> MapType;
149 
150  StateId GetStateForTuple(const Tuple &tuple, bool add_to_queue) {
151  MapType::iterator iter = map_.find(tuple);
152  if (iter == map_.end()) { // not in map.
153  StateId output_state = lat_out_->AddState();
154  map_[tuple] = output_state;
155  if (add_to_queue)
156  queue_.push_back(std::make_pair(tuple, output_state));
157  return output_state;
158  } else {
159  return iter->second;
160  }
161  }
162 
163  void ProcessFinal(Tuple tuple, StateId output_state) {
164  // ProcessFinal is only called if the input_state has
165  // final-prob of One(). [else it should be zero. This
166  // is because we called CreateSuperFinal().]
167 
168  if (tuple.comp_state.IsEmpty()) { // computation state doesn't have
169  // anything pending.
170  std::vector<int32> empty_vec;
171  CompactLatticeWeight cw(tuple.comp_state.FinalWeight(), empty_vec);
172  lat_out_->SetFinal(output_state, Plus(lat_out_->Final(output_state), cw));
173  } else {
174  // computation state has something pending, i.e. input or
175  // output symbols that need to be flushed out. Note: OutputArc() would
176  // have returned false or we wouldn't have been called, so we have to
177  // force it out.
178  CompactLatticeArc lat_arc;
179  // Note: the next call will change the computation-state of the tuple,
180  // so it becomes a different tuple.
181  tuple.comp_state.OutputArcForce(tmodel_, opts_, &lat_arc, &error_);
182  lat_arc.nextstate = GetStateForTuple(tuple, true); // true == add to queue.
183  // The final-prob stuff will get called again from ProcessQueueElement().
184  // Note: because we did CreateSuperFinal(), this final-state on the input
185  // lattice will have no output arcs (and unit final-prob), so there will be
186  // no complications with processing the arcs from this state (there won't
187  // be any).
188  KALDI_ASSERT(output_state != lat_arc.nextstate);
189  lat_out_->AddArc(output_state, lat_arc);
190  }
191  }
192 
193 
195  KALDI_ASSERT(!queue_.empty());
196  Tuple tuple = queue_.back().first;
197  StateId output_state = queue_.back().second;
198  queue_.pop_back();
199 
200  // First thing is-- we see whether the computation-state has something
201  // pending that it wants to output. In this case we don't do
202  // anything further. This is a chosen behavior similar to the
203  // epsilon-sequencing rules encoded by the filters in
204  // composition.
205  CompactLatticeArc lat_arc;
206  if (tuple.comp_state.OutputPhoneArc(tmodel_, opts_, &lat_arc, &error_) ||
207  tuple.comp_state.OutputWordArc(tmodel_, opts_, &lat_arc, &error_)) {
208  // note: the functions OutputPhoneArc() and OutputWordArc() change the
209  // tuple (when they return true).
210  lat_arc.nextstate = GetStateForTuple(tuple, true); // true == add to
211  // queue, if not
212  // already present.
213  KALDI_ASSERT(output_state != lat_arc.nextstate);
214  lat_out_->AddArc(output_state, lat_arc);
215  } else {
216  // when there's nothing to output, we'll process arcs from the input-state.
217  // note: it would in a sense be valid to do both (i.e. process the stuff
218  // above, and also these), but this is a bit like the epsilon-sequencing
219  // stuff in composition: we avoid duplicate arcs by doing it this way.
220 
221  if (lat_.Final(tuple.input_state) != CompactLatticeWeight::Zero()) {
222  KALDI_ASSERT(lat_.Final(tuple.input_state) == CompactLatticeWeight::One());
223  // ... since we did CreateSuperFinal.
224  ProcessFinal(tuple, output_state);
225  }
226  // Now process the arcs. Note: final-states shouldn't have any arcs.
227  for(fst::ArcIterator<CompactLattice> aiter(lat_, tuple.input_state);
228  !aiter.Done(); aiter.Next()) {
229  const CompactLatticeArc &arc = aiter.Value();
230  Tuple next_tuple(tuple);
231  LatticeWeight weight;
232  next_tuple.comp_state.Advance(arc, opts_, &weight);
233  next_tuple.input_state = arc.nextstate;
234  StateId next_output_state = GetStateForTuple(next_tuple, true); // true == add to queue,
235  // if not already present.
236  // We add an epsilon arc here (as the input and output happens
237  // separately)... the epsilons will get removed later.
238  KALDI_ASSERT(next_output_state != output_state);
239  lat_out_->AddArc(output_state,
240  CompactLatticeArc(0, 0,
241  CompactLatticeWeight(weight, std::vector<int32>()),
242  next_output_state));
243  }
244  }
245  }
246 
248  const TransitionModel &tmodel,
249  const PhoneAlignLatticeOptions &opts,
250  CompactLattice *lat_out):
251  lat_(lat), tmodel_(tmodel), opts_(opts), lat_out_(lat_out),
252  error_(false) {
253  fst::CreateSuperFinal(&lat_); // Creates a super-final state, so the
254  // only final-probs are One().
255  }
256 
257  // Removes epsilons; also removes unreachable states...
258  // not sure if these would exist if original was connected.
259  // This also replaces the temporary symbols for the silence
260  // and partial-words, with epsilons, if we wanted epsilons.
262  RmEpsilon(lat_out_, true); // true = connect.
263  }
264 
265  bool AlignLattice() {
266  lat_out_->DeleteStates();
267  if (lat_.Start() == fst::kNoStateId) {
268  KALDI_WARN << "Trying to word-align empty lattice.";
269  return false;
270  }
271  ComputationState initial_comp_state;
272  Tuple initial_tuple(lat_.Start(), initial_comp_state);
273  StateId start_state = GetStateForTuple(initial_tuple, true); // True = add this to queue.
274  lat_out_->SetStart(start_state);
275 
276  while (!queue_.empty())
278 
279  if (opts_.remove_epsilon)
281 
282  return !error_;
283  }
284 
289 
290  std::vector<std::pair<Tuple, StateId> > queue_;
291  MapType map_; // map from tuples to StateId.
292  bool error_;
293 };
294 
296  const TransitionModel &tmodel,
297  const PhoneAlignLatticeOptions &opts,
298  CompactLatticeArc *arc_out,
299  bool *error) {
300  if (transition_ids_.empty()) return false;
301  int32 phone = tmodel.TransitionIdToPhone(transition_ids_[0]);
302  // we assume the start of transition_ids_ is the start of the phone;
303  // this is a precondition.
304  size_t len = transition_ids_.size(), i;
305  // Keep going till we reach a "final" transition-id; note, if
306  // reorder==true, we have to go a bit further after this.
307  for (i = 0; i < len; i++) {
308  int32 tid = transition_ids_[i];
309  int32 this_phone = tmodel.TransitionIdToPhone(tid);
310  if (this_phone != phone && ! *error) { // error condition: should have
311  // reached final transition-id first.
312  *error = true;
313  KALDI_WARN << phone << " -> " << this_phone;
314  KALDI_WARN << "Phone changed before final transition-id found "
315  "[broken lattice or mismatched model or wrong --reorder option?]";
316  }
317  if (tmodel.IsFinal(tid))
318  break;
319  }
320  if (i == len) return false; // fell off loop.
321  i++; // go past the one for which IsFinal returned true.
322  if (opts.reorder) // we have to consume the following self-loop transition-ids.
323  while (i < len && tmodel.IsSelfLoop(transition_ids_[i])) i++;
324  if (i == len) return false; // we don't know if it ends here... so can't output arc.
325 
326  // interpret i as the number of transition-ids to consume.
327  std::vector<int32> tids_out(transition_ids_.begin(),
328  transition_ids_.begin()+i);
329 
330  Label output_label = 0;
331  if (!word_labels_.empty()) {
332  output_label = word_labels_[0];
333  word_labels_.erase(word_labels_.begin(), word_labels_.begin()+1);
334  }
335  if (opts.replace_output_symbols)
336  output_label = phone;
337  *arc_out = CompactLatticeArc(output_label, output_label,
338  CompactLatticeWeight(weight_, tids_out),
339  fst::kNoStateId);
340  transition_ids_.erase(transition_ids_.begin(), transition_ids_.begin()+i);
341  weight_ = LatticeWeight::One(); // we just output the weight.
342  return true;
343 }
344 
346  const TransitionModel &tmodel,
347  const PhoneAlignLatticeOptions &opts,
348  CompactLatticeArc *arc_out,
349  bool *error) {
350  // output a word but no phones.
351  if (word_labels_.size() < 2) return false;
352 
353  int32 output_label = word_labels_[0];
354  word_labels_.erase(word_labels_.begin(), word_labels_.begin()+1);
355 
356  *arc_out = CompactLatticeArc(output_label, output_label,
357  CompactLatticeWeight(weight_, std::vector<int32>()),
358  fst::kNoStateId);
359  weight_ = LatticeWeight::One(); // we just output the weight, so set it to one.
360  return true;
361 }
362 
363 
365  const TransitionModel &tmodel,
366  const PhoneAlignLatticeOptions &opts,
367  CompactLatticeArc *arc_out,
368  bool *error) {
369  KALDI_ASSERT(!IsEmpty());
370 
371  int32 phone = -1; // This value -1 will never be used,
372  // although it might not be obvious from superficially checking
373  // the code. IsEmpty() would be true if we had transition_ids_.empty()
374  // and opts.replace_output_symbols, so we would already die by assertion;
375  // in fact, this function would never be called.
376 
377  if (!transition_ids_.empty()) { // Do some checking here.
378  int32 tid = transition_ids_[0];
379  phone = tmodel.TransitionIdToPhone(tid);
380  int32 num_final = 0;
381  for (int32 i = 0; i < transition_ids_.size(); i++) { // A check.
382  int32 this_tid = transition_ids_[i];
383  int32 this_phone = tmodel.TransitionIdToPhone(this_tid);
384  bool is_final = tmodel.IsFinal(this_tid); // should be exactly one.
385  if (is_final) num_final++;
386  if (this_phone != phone && ! *error) {
387  KALDI_WARN << "Mismatch in phone: error in lattice or mismatched transition model?";
388  *error = true;
389  }
390  }
391  if (num_final != 1 && ! *error) {
392  KALDI_WARN << "Problem phone-aligning lattice: saw " << num_final
393  << " final-states in last phone in lattice (forced out?) "
394  << "Producing partial lattice.";
395  *error = true;
396  }
397  }
398 
399  Label output_label = 0;
400  if (!word_labels_.empty()) {
401  output_label = word_labels_[0];
402  word_labels_.erase(word_labels_.begin(), word_labels_.begin()+1);
403  }
404  if (opts.replace_output_symbols)
405  output_label = phone;
406  *arc_out = CompactLatticeArc(output_label, output_label,
408  fst::kNoStateId);
409  transition_ids_.clear();
410  weight_ = LatticeWeight::One(); // we just output the weight.
411 }
412 
414  const TransitionModel &tmodel,
415  const PhoneAlignLatticeOptions &opts,
416  CompactLattice *lat_out) {
417  LatticePhoneAligner aligner(lat, tmodel, opts, lat_out);
418  return aligner.AlignLattice();
419 }
420 
421 
422 } // namespace kaldi
fst::StdArc::StateId StateId
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
bool operator==(const ComputationState &other) const
A hashing function-object for vectors.
Definition: stl-utils.h:216
static const LatticeWeightTpl One()
LatticeWeightTpl< FloatType > Plus(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
fst::CompactLatticeWeightTpl< LatticeWeight, int32 > CompactLatticeWeight
Definition: kaldi-lattice.h:35
bool OutputPhoneArc(const TransitionModel &tmodel, const PhoneAlignLatticeOptions &opts, CompactLatticeArc *arc_out, bool *error)
If it can output a whole phone, it will do so, will put it in arc_out, and return true; else it will ...
StateId GetStateForTuple(const Tuple &tuple, bool add_to_queue)
kaldi::int32 int32
CompactLatticeArc::StateId StateId
ComputationState(const ComputationState &other)
const PhoneAlignLatticeOptions & opts_
bool OutputWordArc(const TransitionModel &tmodel, const PhoneAlignLatticeOptions &opts, CompactLatticeArc *arc_out, bool *error)
This will succeed (and output the arc) if we have >1 word in words_; the arc won&#39;t have any transitio...
LatticeWeight FinalWeight()
FinalWeight() will return "weight" if both transition_ids and word_labels are empty, otherwise it will return Weight::Zero().
LatticeWeightTpl< FloatType > Times(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
unordered_map< Tuple, StateId, TupleHash, TupleEqual > MapType
static const CompactLatticeWeightTpl< WeightType, IntType > One()
void OutputArcForce(const TransitionModel &tmodel, const PhoneAlignLatticeOptions &opts, CompactLatticeArc *arc_out, bool *error)
This function may be called when you reach the end of the lattice and this structure hasn&#39;t voluntari...
void ProcessFinal(Tuple tuple, StateId output_state)
Arc::StateId CreateSuperFinal(MutableFst< Arc > *fst)
bool IsSelfLoop(int32 trans_id) const
static const LatticeWeightTpl Zero()
const TransitionModel & tmodel_
LatticePhoneAligner(const CompactLattice &lat, const TransitionModel &tmodel, const PhoneAlignLatticeOptions &opts, CompactLattice *lat_out)
#define KALDI_WARN
Definition: kaldi-error.h:150
CompactLatticeArc::Label Label
fst::StdArc::Label Label
fst::VectorFst< CompactLatticeArc > CompactLattice
Definition: kaldi-lattice.h:46
void Advance(const CompactLatticeArc &arc, const PhoneAlignLatticeOptions &opts, LatticeWeight *weight)
The state of the computation in which,.
std::vector< std::pair< Tuple, StateId > > queue_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
static const CompactLatticeWeightTpl< WeightType, IntType > Zero()
fst::ArcTpl< CompactLatticeWeight > CompactLatticeArc
Definition: kaldi-lattice.h:42
bool PhoneAlignLattice(const CompactLattice &lat, const TransitionModel &tmodel, const PhoneAlignLatticeOptions &opts, CompactLattice *lat_out)
Outputs a lattice in which the arcs correspond exactly to sequences of phones, so the boundaries betw...
bool IsFinal(int32 trans_id) const
int32 TransitionIdToPhone(int32 trans_id) const
Tuple(StateId input_state, ComputationState comp_state)