31 int main(
int argc,
char *argv[]) {
33 using namespace kaldi;
36 typedef kaldi::uint64 uint64;
39 "Create an inverted index of the given lattices. The output index is \n" 40 "in the T*T*T semiring. For details for the semiring, please refer to\n" 41 "Dogan Can and Murat Saraclar's paper named " 42 "\"Lattice Indexing for Spoken Term Detection\"\n" 44 "Usage: lattice-to-kws-index [options] " 45 " <utter-symtab-rspecifier> <lattice-rspecifier> <index-wspecifier>\n" 47 " lattice-to-kws-index ark:utter.symtab ark:1.lats ark:global.idx\n";
51 int32 frame_subsampling_factor = 1;
52 int32 max_silence_frames = 50;
54 bool allow_partial =
true;
56 po.
Register(
"frame-subsampling-factor", &frame_subsampling_factor,
57 "Frame subsampling factor. (Default value 1)");
58 po.
Register(
"max-silence-frames", &max_silence_frames,
59 "If --frame-subsampling-factor is used, --max-silence-frames " 60 "is relative to the the input, not the output frame rate " 61 "(we divide by frame-subsampling-factor and round to " 62 "the closest integer, to get the number of symbols in the " 64 po.
Register(
"strict", &strict,
"Setting --strict=false will cause " 65 "successful termination even if we processed no lattices.");
66 po.
Register(
"max-states-scale", &max_states_scale,
"Number of states in the" 67 " original lattice times this scale is the number of states " 68 "allowed when optimizing the index. Negative number means no " 69 "limit on the number of states.");
70 po.
Register(
"allow-partial", &allow_partial,
"Allow partial output if fails" 71 " to determinize, otherwise skip determinization if it fails.");
80 max_silence_frames = 0.5 +
81 max_silence_frames /
static_cast<float>(frame_subsampling_factor);
82 std::string usymtab_rspecifier = po.
GetOptArg(1),
83 lats_rspecifier = po.
GetArg(2),
84 index_wspecifier = po.
GetArg(3);
94 index_writer(index_wspecifier);
99 int32 max_states = -1;
101 for (; !clat_reader.
Done(); clat_reader.
Next()) {
102 std::string key = clat_reader.
Key();
105 KALDI_LOG <<
"Processing lattice " << key;
107 if (max_states_scale > 0) {
108 max_states =
static_cast<int32
>(
109 max_states_scale *
static_cast<BaseFloat>(clat.NumStates()));
113 if (!usymtab_reader.
HasKey(key)) {
114 KALDI_WARN <<
"Cannot find utterance id for " << key;
120 uint64 props = clat.Properties(fst::kFstProperties,
false);
121 if (!(props & fst::kTopSorted)) {
122 if (fst::TopSort(&clat) ==
false) {
123 KALDI_WARN <<
"Cycles detected in lattice " << key;
130 std::vector<int32> state_times;
140 bool success =
false;
143 KALDI_WARN <<
"State id's and alignments do not match for lattice " 170 KALDI_VLOG(1) <<
"Generating factor transducer...";
172 int32 utterance_id = usymtab_reader.
Value(key);
178 KALDI_WARN <<
"Cannot generate factor transducer for lattice " << key;
202 KALDI_VLOG(1) <<
"Doing factor disambiguation...";
209 KALDI_VLOG(1) <<
"Optimizing factor transducer...";
215 index_writer.
Write(key, index_transducer);
220 KALDI_LOG <<
"Done " << n_done <<
" lattices, failed for " << n_fail;
222 return (n_done != 0 ? 0 : 1);
225 }
catch(
const std::exception &e) {
226 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
fst::VectorFst< KwsProductArc > KwsProductFst
void EnsureEpsilonProperty(VectorFst< Arc > *fst)
This function modifies the fst (while maintaining equivalence) in such a way that, after the modification, all states of the FST which have epsilon-arcs entering them, have no non-epsilon arcs entering them, and all states which have epsilon-arcs leaving them, have no non-epsilon arcs leaving them.
bool ClusterLattice(CompactLattice *clat, const std::vector< int32 > &state_times)
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
A templated class for writing objects to an archive or script file; see The Table concept...
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
fst::VectorFst< KwsLexicographicArc > KwsLexicographicFst
void DoFactorMerging(KwsProductFst *factor_transducer, KwsLexicographicFst *index_transducer)
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
const T & Value(const std::string &key)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
bool CreateFactorTransducer(const CompactLattice &clat, const std::vector< int32 > &state_times, int32 utterance_id, KwsProductFst *factor_transducer)
int32 CompactLatticeStateTimes(const CompactLattice &lat, vector< int32 > *times)
As LatticeStateTimes, but in the CompactLattice format.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
bool HasKey(const std::string &key)
fst::VectorFst< CompactLatticeArc > CompactLattice
void MaybeDoSanityCheck(const KwsLexicographicFst &index_transducer)
void RemoveLongSilences(int32 max_silence_frames, const std::vector< int32 > &state_times, KwsProductFst *factor_transducer)
int NumArgs() const
Number of positional parameters (c.f. argc-1).
int main(int argc, char *argv[])
void DoFactorDisambiguation(KwsLexicographicFst *index_transducer)
void OptimizeFactorTransducer(KwsLexicographicFst *index_transducer, int32 max_states, bool allow_partial)
std::string GetOptArg(int param) const