LatticeWordAligner Class Reference
Collaboration diagram for LatticeWordAligner:

Classes

class  ComputationState
 
struct  Tuple
 
struct  TupleEqual
 
struct  TupleHash
 

Public Types

typedef CompactLatticeArc::StateId StateId
 
typedef CompactLatticeArc::Label Label
 
typedef unordered_map< Tuple, StateId, TupleHash, TupleEqualMapType
 

Public Member Functions

StateId GetStateForTuple (const Tuple &tuple, bool add_to_queue)
 
void ProcessFinal (Tuple tuple, StateId output_state)
 
void ProcessQueueElement ()
 
 LatticeWordAligner (const CompactLattice &lat, const TransitionModel &tmodel, const WordBoundaryInfo &info, int32 max_states, CompactLattice *lat_out)
 
void RemoveEpsilonsFromLattice ()
 
bool AlignLattice ()
 

Public Attributes

CompactLattice lat_
 
const TransitionModeltmodel_
 
const WordBoundaryInfoinfo_in_
 
WordBoundaryInfo info_
 
int32 max_states_
 
CompactLatticelat_out_
 
std::vector< std::pair< Tuple, StateId > > queue_
 
MapType map_
 
bool error_
 

Detailed Description

Definition at line 27 of file word-align-lattice.cc.

Member Typedef Documentation

◆ Label

typedef CompactLatticeArc::Label Label

Definition at line 30 of file word-align-lattice.cc.

◆ MapType

typedef unordered_map<Tuple, StateId, TupleHash, TupleEqual> MapType

Definition at line 157 of file word-align-lattice.cc.

◆ StateId

typedef CompactLatticeArc::StateId StateId

Definition at line 29 of file word-align-lattice.cc.

Constructor & Destructor Documentation

◆ LatticeWordAligner()

LatticeWordAligner ( const CompactLattice lat,
const TransitionModel tmodel,
const WordBoundaryInfo info,
int32  max_states,
CompactLattice lat_out 
)
inline

Definition at line 252 of file word-align-lattice.cc.

References fst::CreateSuperFinal(), fst::HighestNumberedOutputSymbol(), LatticeWordAligner::info_, KALDI_ASSERT, KALDI_WARN, LatticeWordAligner::lat_, WordBoundaryInfo::partial_word_label, and WordBoundaryInfo::silence_label.

256  :
257  lat_(lat), tmodel_(tmodel), info_in_(info), info_(info),
258  max_states_(max_states), lat_out_(lat_out),
259  error_(false) {
260  bool test = true;
261  uint64 props = lat_.Properties(fst::kIDeterministic|fst::kIEpsilons, test);
262  if (props != fst::kIDeterministic) {
263  KALDI_WARN << "[Lattice has input epsilons and/or is not input-deterministic "
264  << "(in Mohri sense)]-- i.e. lattice is not deterministic. "
265  << "Word-alignment may be slow and-or blow up in memory.";
266  }
267  fst::CreateSuperFinal(&lat_); // Creates a super-final state, so the
268  // only final-probs are One().
269 
270  // Inside this class, we don't want to use zero for the silence
271  // or partial-word labels, as this will interfere with the RmEpsilon
272  // stage, where we don't want the arcs corresponding to silence or
273  // partial words to be removed-- only the arcs with nothing at all
274  // on them.
275  if (info_.partial_word_label == 0 || info_.silence_label == 0) {
276  int32 unused_label = 1 + HighestNumberedOutputSymbol(lat);
277  if (info_.partial_word_label >= unused_label)
278  unused_label = info_.partial_word_label + 1;
279  if (info_.silence_label >= unused_label)
280  unused_label = info_.silence_label + 1;
281  KALDI_ASSERT(unused_label > 0);
282  if (info_.partial_word_label == 0)
283  info_.partial_word_label = unused_label++;
284  if (info_.silence_label == 0)
285  info_.silence_label = unused_label;
286  }
287  }
const WordBoundaryInfo & info_in_
kaldi::int32 int32
Arc::StateId CreateSuperFinal(MutableFst< Arc > *fst)
Arc::Label HighestNumberedOutputSymbol(const Fst< Arc > &fst)
Returns the highest numbered output symbol id of the FST (or zero for an empty FST.
#define KALDI_WARN
Definition: kaldi-error.h:150
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
const TransitionModel & tmodel_

Member Function Documentation

◆ AlignLattice()

bool AlignLattice ( )
inline

Definition at line 307 of file word-align-lattice.cc.

References LatticeWordAligner::error_, LatticeWordAligner::GetStateForTuple(), KALDI_WARN, LatticeWordAligner::lat_, LatticeWordAligner::lat_out_, LatticeWordAligner::max_states_, LatticeWordAligner::ProcessQueueElement(), LatticeWordAligner::queue_, and LatticeWordAligner::RemoveEpsilonsFromLattice().

Referenced by kaldi::WordAlignLattice().

307  {
308  lat_out_->DeleteStates();
309  if (lat_.Start() == fst::kNoStateId) {
310  KALDI_WARN << "Trying to word-align empty lattice.";
311  return false;
312  }
313  ComputationState initial_comp_state;
314  Tuple initial_tuple(lat_.Start(), initial_comp_state);
315  StateId start_state = GetStateForTuple(initial_tuple, true); // True = add this to queue.
316  lat_out_->SetStart(start_state);
317 
318  while (!queue_.empty()) {
319  if (max_states_ > 0 && lat_out_->NumStates() > max_states_) {
320  KALDI_WARN << "Number of states in lattice exceeded max-states of "
321  << max_states_ << ", original lattice had "
322  << lat_.NumStates() << " states. Returning what we have.";
324  return false;
325  }
327  }
328 
330 
331  return !error_;
332  }
StateId GetStateForTuple(const Tuple &tuple, bool add_to_queue)
std::vector< std::pair< Tuple, StateId > > queue_
#define KALDI_WARN
Definition: kaldi-error.h:150
CompactLatticeArc::StateId StateId

◆ GetStateForTuple()

StateId GetStateForTuple ( const Tuple tuple,
bool  add_to_queue 
)
inline

Definition at line 159 of file word-align-lattice.cc.

References LatticeWordAligner::lat_out_, LatticeWordAligner::map_, and LatticeWordAligner::queue_.

Referenced by LatticeWordAligner::AlignLattice(), LatticeWordAligner::ProcessFinal(), and LatticeWordAligner::ProcessQueueElement().

159  {
160  MapType::iterator iter = map_.find(tuple);
161  if (iter == map_.end()) { // not in map.
162  StateId output_state = lat_out_->AddState();
163  map_[tuple] = output_state;
164  if (add_to_queue)
165  queue_.push_back(std::make_pair(tuple, output_state));
166  return output_state;
167  } else {
168  return iter->second;
169  }
170  }
std::vector< std::pair< Tuple, StateId > > queue_
CompactLatticeArc::StateId StateId

◆ ProcessFinal()

void ProcessFinal ( Tuple  tuple,
StateId  output_state 
)
inline

Definition at line 172 of file word-align-lattice.cc.

References LatticeWordAligner::Tuple::comp_state, LatticeWordAligner::error_, LatticeWordAligner::ComputationState::FinalWeight(), LatticeWordAligner::GetStateForTuple(), LatticeWordAligner::info_, LatticeWordAligner::ComputationState::IsEmpty(), KALDI_ASSERT, LatticeWordAligner::lat_out_, LatticeWordAligner::ComputationState::OutputArcForce(), fst::Plus(), and LatticeWordAligner::tmodel_.

Referenced by LatticeWordAligner::ProcessQueueElement().

172  {
173  // ProcessFinal is only called if the input_state has
174  // final-prob of One(). [else it should be zero. This
175  // is because we called CreateSuperFinal().]
176 
177  if (tuple.comp_state.IsEmpty()) { // computation state doesn't have
178  // anything pending.
179  std::vector<int32> empty_vec;
180  CompactLatticeWeight cw(tuple.comp_state.FinalWeight(), empty_vec);
181  lat_out_->SetFinal(output_state, Plus(lat_out_->Final(output_state), cw));
182  } else {
183  // computation state has something pending, i.e. input or
184  // output symbols that need to be flushed out. Note: OutputArc() would
185  // have returned false or we wouldn't have been called, so we have to
186  // force it out.
187  CompactLatticeArc lat_arc;
188  tuple.comp_state.OutputArcForce(info_, tmodel_, &lat_arc, &error_);
189  // True in the next line means add it to the queue.
190  lat_arc.nextstate = GetStateForTuple(tuple, true);
191  // The final-prob stuff will get called again from ProcessQueueElement().
192  // Note: because we did CreateSuperFinal(), this final-state on the input
193  // lattice will have no output arcs (and unit final-prob), so there will be
194  // no complications with processing the arcs from this state (there won't
195  // be any).
196  KALDI_ASSERT(output_state != lat_arc.nextstate);
197  lat_out_->AddArc(output_state, lat_arc);
198  }
199  }
LatticeWeightTpl< FloatType > Plus(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
fst::CompactLatticeWeightTpl< LatticeWeight, int32 > CompactLatticeWeight
Definition: kaldi-lattice.h:35
StateId GetStateForTuple(const Tuple &tuple, bool add_to_queue)
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
const TransitionModel & tmodel_
fst::ArcTpl< CompactLatticeWeight > CompactLatticeArc
Definition: kaldi-lattice.h:42

◆ ProcessQueueElement()

void ProcessQueueElement ( )
inline

Definition at line 202 of file word-align-lattice.cc.

References LatticeWordAligner::ComputationState::Advance(), LatticeWordAligner::Tuple::comp_state, LatticeWordAligner::error_, LatticeWordAligner::GetStateForTuple(), LatticeWordAligner::info_, LatticeWordAligner::Tuple::input_state, KALDI_ASSERT, LatticeWordAligner::lat_, LatticeWordAligner::lat_out_, CompactLatticeWeightTpl< WeightType, IntType >::One(), LatticeWordAligner::ProcessFinal(), LatticeWordAligner::queue_, LatticeWordAligner::tmodel_, and CompactLatticeWeightTpl< WeightType, IntType >::Zero().

Referenced by LatticeWordAligner::AlignLattice().

202  {
203  KALDI_ASSERT(!queue_.empty());
204  Tuple tuple = queue_.back().first;
205  StateId output_state = queue_.back().second;
206  queue_.pop_back();
207 
208  // First thing is-- we see whether the computation-state has something
209  // pending that it wants to output. In this case we don't do
210  // anything further. This is a chosen behavior similar to the
211  // epsilon-sequencing rules encoded by the filters in
212  // composition.
213  CompactLatticeArc lat_arc;
214  if (tuple.comp_state.OutputArc(info_, tmodel_, &lat_arc, &error_)) {
215  // note: this function changes the tuple (when it returns true).
216  lat_arc.nextstate = GetStateForTuple(tuple, true); // true == add to queue,
217  // if not already present.
218  KALDI_ASSERT(output_state != lat_arc.nextstate);
219  lat_out_->AddArc(output_state, lat_arc);
220  } else {
221  // when there's nothing to output, we'll process arcs from the input-state.
222  // note: it would in a sense be valid to do both (i.e. process the stuff
223  // above, and also these), but this is a bit like the epsilon-sequencing
224  // stuff in composition: we avoid duplicate arcs by doing it this way.
225 
226  if (lat_.Final(tuple.input_state) != CompactLatticeWeight::Zero()) {
227  KALDI_ASSERT(lat_.Final(tuple.input_state) == CompactLatticeWeight::One());
228  // ... since we did CreateSuperFinal.
229  ProcessFinal(tuple, output_state);
230  }
231  // Now process the arcs. Note: final-state shouldn't have any arcs.
232  for (fst::ArcIterator<CompactLattice> aiter(lat_, tuple.input_state);
233  !aiter.Done(); aiter.Next()) {
234  const CompactLatticeArc &arc = aiter.Value();
235  Tuple next_tuple(tuple);
236  LatticeWeight weight;
237  next_tuple.comp_state.Advance(arc, &weight);
238  next_tuple.input_state = arc.nextstate;
239  StateId next_output_state = GetStateForTuple(next_tuple, true); // true == add to queue,
240  // if not already present.
241  // We add an epsilon arc here (as the input and output happens
242  // separately)... the epsilons will get removed later.
243  KALDI_ASSERT(next_output_state != output_state);
244  lat_out_->AddArc(output_state,
245  CompactLatticeArc(0, 0,
246  CompactLatticeWeight(weight, std::vector<int32>()),
247  next_output_state));
248  }
249  }
250  }
fst::CompactLatticeWeightTpl< LatticeWeight, int32 > CompactLatticeWeight
Definition: kaldi-lattice.h:35
StateId GetStateForTuple(const Tuple &tuple, bool add_to_queue)
fst::LatticeWeightTpl< BaseFloat > LatticeWeight
Definition: kaldi-lattice.h:32
std::vector< std::pair< Tuple, StateId > > queue_
static const CompactLatticeWeightTpl< WeightType, IntType > One()
CompactLatticeArc::StateId StateId
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
const TransitionModel & tmodel_
static const CompactLatticeWeightTpl< WeightType, IntType > Zero()
fst::ArcTpl< CompactLatticeWeight > CompactLatticeArc
Definition: kaldi-lattice.h:42
void ProcessFinal(Tuple tuple, StateId output_state)

◆ RemoveEpsilonsFromLattice()

void RemoveEpsilonsFromLattice ( )
inline

Definition at line 293 of file word-align-lattice.cc.

References LatticeWordAligner::info_, LatticeWordAligner::info_in_, LatticeWordAligner::lat_out_, WordBoundaryInfo::partial_word_label, fst::RemoveSomeInputSymbols(), and WordBoundaryInfo::silence_label.

Referenced by LatticeWordAligner::AlignLattice().

293  {
294  // Remove epsilon arcs from output lattice.
295  RmEpsilon(lat_out_, true); // true = connect.
296  std::vector<int32> syms_to_remove;
297  if (info_in_.partial_word_label == 0)
298  syms_to_remove.push_back(info_.partial_word_label);
299  if (info_in_.silence_label == 0)
300  syms_to_remove.push_back(info_.silence_label);
301  if (!syms_to_remove.empty()) {
302  RemoveSomeInputSymbols(syms_to_remove, lat_out_);
303  Project(lat_out_, fst::PROJECT_INPUT);
304  }
305  }
const WordBoundaryInfo & info_in_
void RemoveSomeInputSymbols(const std::vector< I > &to_remove, MutableFst< Arc > *fst)
RemoveSomeInputSymbols removes any symbol that appears in "to_remove", from the input side of the FST...

Member Data Documentation

◆ error_

◆ info_

◆ info_in_

const WordBoundaryInfo& info_in_

◆ lat_

◆ lat_out_

◆ map_

MapType map_

Definition at line 345 of file word-align-lattice.cc.

Referenced by LatticeWordAligner::GetStateForTuple().

◆ max_states_

int32 max_states_

Definition at line 338 of file word-align-lattice.cc.

Referenced by LatticeWordAligner::AlignLattice().

◆ queue_

◆ tmodel_


The documentation for this class was generated from the following file: