20 #ifndef KALDI_LAT_WORD_ALIGN_LATTICE_LEXICON_H_ 21 #define KALDI_LAT_WORD_ALIGN_LATTICE_LEXICON_H_ 22 #include <fst/fstlib.h> 23 #include <fst/fst-decl.h> 50 std::vector<std::vector<int32> > *lexicon);
62 bool IsValidEntry(
const std::vector<int32> &entry)
const;
85 typedef unordered_map<std::vector<int32>,
91 typedef unordered_map<std::vector<int32>,
int32,
97 typedef unordered_map<int32, std::pair<int32, int32> >
NumPhonesMap;
128 test(false), allow_duplicate_paths(false),
132 opts->
Register(
"partial-word-label", &partial_word_label,
"Numeric id of " 133 "word symbol that is to be used for arcs in the word-aligned " 134 "lattice corresponding to partial words at the end of " 135 "\"forced-out\" utterances (zero is OK)");
136 opts->
Register(
"reorder", &reorder,
"True if the lattices were generated " 137 "from graphs that had the --reorder option true, relating to " 138 "reordering self-loops (typically true)");
139 opts->
Register(
"test", &test,
"If true, testing code will be activated " 140 "(the purpose of this is to validate the algorithm).");
141 opts->
Register(
"allow-duplicate-paths", &allow_duplicate_paths,
"Only " 142 "has an effect if --test=true. If true, does not die " 143 "(only prints warnings) if duplicate paths are found. " 144 "This should only happen with very pathological lexicons, " 145 "e.g. as encountered in testing code.");
146 opts->
Register(
"max-expand", &max_expand,
"If >0.0, the maximum ratio " 147 "by which we allow the lattice-alignment code to increase the #states" 148 "in a lattice (vs. the phone-aligned lattice) before we fail and " 149 "refuse to align the lattice. This is helpful in order to " 150 "prevent 'pathological' lattices from causing the program to " 151 "exhaust memory. Actual max-states is 1000 + max-expand * " 181 const std::vector<std::vector<int32> > &lexicon,
183 bool allow_duplicate_paths);
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
ViabilityMap viability_map_
A hashing function-object for vectors.
void UpdateViabilityMap(const std::vector< int32 > &lexicon_entry)
bool ReadLexiconForWordAlign(std::istream &is, std::vector< std::vector< int32 > > *lexicon)
Read the lexicon in the special format required for word alignment.
unordered_map< std::vector< int32 >, int32, VectorHasher< int32 > > LexiconMap
This is a map from a vector (orig-word-symbol phone1 phone2 ...
bool IsValidEntry(const std::vector< int32 > &entry) const
Returns true if this lexicon-entry can appear, intepreted as (output-word phone1 phone2 ...
bool WordAlignLatticeLexicon(const CompactLattice &lat, const TransitionModel &tmodel, const WordAlignLatticeLexiconInfo &lexicon_info, const WordAlignLatticeLexiconOpts &opts, CompactLattice *lat_out)
Align lattice so that each arc has the transition-ids on it that correspond to the word that is on th...
unordered_map< int32, int32 > EquivalenceMap
This is used only in testing code; it defines a mapping from a word to the primary member of that wor...
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
WordAlignLatticeLexiconOpts()
unordered_map< int32, std::pair< int32, int32 > > NumPhonesMap
This is a map from the word-id (as present in the original lattice) to the minimum and maximum #phone...
NumPhonesMap num_phones_map_
void Register(OptionsItf *opts)
void TestWordAlignedLatticeLexicon(const CompactLattice &lat, const TransitionModel &tmodel, const std::vector< std::vector< int32 > > &lexicon, const CompactLattice &aligned_lat, bool allow_duplicate_paths)
This function is designed to crash if something went wrong with the word-alignment of the lattice...
unordered_map< std::vector< int32 >, std::vector< int32 >, VectorHasher< int32 > > ViabilityMap
The type ViabilityMap maps from sequences of phones (excluding the empty sequence), to the sets of all word-labels [on the input lattice] that could correspond to phone sequences that start with s [but are longer than s].
LexiconMap reverse_lexicon_map_
void UpdateEquivalenceMap(const std::vector< std::vector< int32 > > &lexicon)
fst::VectorFst< CompactLatticeArc > CompactLattice
void FinalizeViabilityMap()
This class extracts some information from the lexicon and stores it in a suitable form for the word-a...
void UpdateNumPhonesMap(const std::vector< int32 > &lexicon_entry)
bool allow_duplicate_paths
WordAlignLatticeLexiconInfo(const std::vector< std::vector< int32 > > &lexicon)
void UpdateLexiconMap(const std::vector< int32 > &lexicon_entry)
Update the map from a vector (orig-word-symbol phone1 phone2 ...
int32 EquivalenceClassOf(int32 word) const
Purely for the testing code, we map words into equivalence classes derived from the mappings in the f...
EquivalenceMap equivalence_map_