28 Label subsequential_symbol,
29 const vector<int32>& phones,
30 const vector<int32>& disambig_syms,
32 int32 central_position):
33 context_width_(context_width),
34 central_position_(central_position),
36 disambig_syms_(disambig_syms),
37 subsequential_symbol_(subsequential_symbol) {
44 KALDI_WARN <<
"Context FST created but there are no phone symbols: probably " 45 "input FST was empty.";
48 for (
size_t i = 0;
i < phones.size();
i++) {
55 vector<int32> empty_vec;
75 vector<int32> pseudo_eps_vec;
76 pseudo_eps_vec.push_back(0);
86 std::vector<int32> *phone_seq) {
87 if (!phone_seq->empty()) {
88 phone_seq->erase(phone_seq->begin());
89 phone_seq->push_back(label);
94 const std::vector<int32> &seq,
Label label,
95 std::vector<int32> *full_phone_sequence) {
97 full_phone_sequence->reserve(context_width);
98 full_phone_sequence->insert(full_phone_sequence->end(),
99 seq.begin(), seq.end());
100 full_phone_sequence->push_back(label);
103 (*full_phone_sequence)[
i] = 0;
112 const vector<int32> &phone_context =
state_seqs_[s];
124 has_final_prob =
true;
126 return has_final_prob ? Weight::One() : Weight::Zero();
145 vector<int32> next_seq(seq);
149 vector<int32> full_seq;
167 vector<int32> full_seq;
170 vector<int32> next_seq(seq);
177 KALDI_ERR <<
"ContextFst: CreateArc, invalid ilabel supplied [confusion " 178 <<
"about phone list or disambig symbols?]: " << ilabel;
186 vector<int32> label_info;
187 label_info.push_back(-ilabel);
190 arc->ilabel = ilabel;
191 arc->olabel = olabel;
192 arc->weight = Weight::One();
198 const vector<int32> &phone_seq,
202 arc->ilabel = ilabel;
203 arc->weight = Weight::One();
204 arc->nextstate = dest;
220 VectorToStateMap::const_iterator iter =
state_map_.find(seq);
225 return this_state_id;
234 VectorToLabelMap::const_iterator iter =
ilabel_map_.find(label_vec);
248 VectorFst<StdArc> *ifst,
249 VectorFst<StdArc> *ofst,
250 vector<vector<int32> > *ilabels_out,
257 vector<int32> disambig_syms(disambig_syms_in);
258 std::sort(disambig_syms.begin(), disambig_syms.end());
260 vector<int32> all_syms;
262 std::sort(all_syms.begin(), all_syms.end());
263 vector<int32> phones;
264 for (
size_t i = 0;
i < all_syms.size();
i++)
265 if (!std::binary_search(disambig_syms.begin(),
266 disambig_syms.end(), all_syms[
i]))
267 phones.push_back(all_syms[
i]);
271 int32 subseq_sym = 1;
272 if (!all_syms.empty())
273 subseq_sym = std::max(subseq_sym, all_syms.back() + 1);
274 if (!disambig_syms.empty())
275 subseq_sym = std::max(subseq_sym, disambig_syms.back() + 1);
279 if (central_position != context_width-1) {
282 fst::Project(ifst, fst::PROJECT_INPUT);
287 context_width, central_position);
298 MutableFst<StdArc> *
fst) {
303 vector<StateId> final_states;
304 for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done(); siter.Next()) {
305 StateId s = siter.Value();
306 if (fst->Final(s) != Weight::Zero()) final_states.push_back(s);
309 StateId superfinal = fst->AddState();
310 Arc arc(subseq_symbol, 0, Weight::One(), superfinal);
311 fst->AddArc(superfinal, arc);
312 fst->SetFinal(superfinal, Weight::One());
314 for (
size_t i = 0;
i < final_states.size();
i++) {
315 StateId s = final_states[
i];
316 fst->AddArc(s,
Arc(subseq_symbol, 0, fst->Final(s), superfinal));
321 arc.nextstate = final_states[
i];
326 const vector<vector<int32> > &info) {
327 int32 size = info.size();
336 vector<vector<int32> > *info) {
337 int32 size = info->size();
346 const SymbolTable &phones_symtab,
347 std::string separator,
348 std::string initial_disambig) {
350 SymbolTable *ans =
new SymbolTable(
"ilabel-info-symtab");
351 int64 s = ans->AddSymbol(phones_symtab.Find(static_cast<int64>(0)));
353 for (
size_t i = 1;
i < info.size();
i++) {
354 if (info[
i].size() == 0) {
357 if (info[
i].size() == 1 &&
359 if (info[
i][0] == 0) {
360 s = ans->AddSymbol(initial_disambig);
362 KALDI_ERR <<
"Disambig symbol " << initial_disambig
363 <<
" already in vocab";
366 std::string disambig_sym = phones_symtab.Find(-info[
i][0]);
367 if (disambig_sym ==
"") {
368 KALDI_ERR <<
"Disambig symbol " << -info[
i][0]
369 <<
" not in phone symbol-table";
371 s = ans->AddSymbol(disambig_sym);
373 KALDI_ERR <<
"Disambig symbol " << disambig_sym
374 <<
" already in vocab";
380 for (
size_t j = 0;
j < info[
i].size();
j++) {
381 std::string phonesym = phones_symtab.Find(info[
i][
j]);
382 if (phonesym ==
"") {
384 <<
" not in phone symbol-table";
386 if (j != 0) newsym += separator;
389 int64 s = ans->AddSymbol(newsym);
390 if (s != static_cast<int64>(
i)) {
391 KALDI_ERR <<
"Some problem with duplicate symbols";
fst::StdArc::StateId StateId
void WriteILabelInfo(std::ostream &os, bool binary, const vector< vector< int32 > > &info)
Utility function for writing ilabel-info vectors to disk.
VectorToStateMap state_map_
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
void CreateDisambigArc(StateId s, Label ilabel, Arc *arc)
Create disambiguation-symbol self-loop arc; where 'ilabel' must correspond to a disambiguation symbol...
void CreatePhoneOrEpsArc(StateId src, StateId dst, Label ilabel, const std::vector< int32 > &phone_seq, Arc *arc)
Creates an arc, this function is to be called only when 'ilabel' corresponds to a phone...
Label subsequential_symbol_
Label FindLabel(const std::vector< int32 > &label_info)
Finds the label index corresponding to this context-window of phones (likely of width context_width_)...
std::vector< std::vector< int32 > > ilabel_info_
virtual Weight Final(StateId s)
InverseContextFst(Label subsequential_symbol, const std::vector< int32 > &phones, const std::vector< int32 > &disambig_syms, int32 context_width, int32 central_position)
Constructor.
bool IsDisambigSymbol(Label lab)
void GetInputSymbols(const Fst< Arc > &fst, bool include_eps, std::vector< I > *symbols)
GetInputSymbols gets the list of symbols on the input of fst (including epsilon, if include_eps == tr...
kaldi::ConstIntegerSet< Label > phone_syms_
void ComposeDeterministicOnDemandInverse(const Fst< Arc > &right, DeterministicOnDemandFst< Arc > *left, MutableFst< Arc > *fst_composed)
This function does '*fst_composed = Compose(Inverse(*fst2), fst1)' Note that the arguments are revers...
void ReadILabelInfo(std::istream &is, bool binary, vector< vector< int32 > > *info)
Utility function for reading ilabel-info vectors from disk.
VectorToLabelMap ilabel_map_
virtual bool GetArc(StateId s, Label ilabel, Arc *arc)
Note: ilabel must not be epsilon.
void GetFullPhoneSequence(const std::vector< int32 > &seq, Label label, std::vector< int32 > *full_phone_sequence)
This utility function does something equivalent to the following 3 steps: *full_phone_sequence = seq;...
void ReadIntegerVector(std::istream &is, bool binary, std::vector< T > *v)
Function for reading STL vector of integer types.
void ShiftSequenceLeft(Label label, std::vector< int32 > *phone_seq)
If phone_seq is nonempty then this function it left by one and appends 'label' to it...
void SwapIlabelInfo(std::vector< std::vector< int32 > > *vec)
StateId FindState(const std::vector< int32 > &seq)
Returns the state-id corresponding to this vector of phones; creates the state it if necessary...
#define KALDI_PARANOID_ASSERT(cond)
void AddSubsequentialLoop(StdArc::Label subseq_symbol, MutableFst< StdArc > *fst)
Modifies an FST so that it transuces the same paths, but the input side of the paths can all have the...
fst::StdArc::Weight Weight
kaldi::ConstIntegerSet< Label > disambig_syms_
void ComposeContext(const vector< int32 > &disambig_syms_in, int32 context_width, int32 central_position, VectorFst< StdArc > *ifst, VectorFst< StdArc > *ofst, vector< vector< int32 > > *ilabels_out, bool project_ifst)
Used in the command-line tool fstcomposecontext.
#define KALDI_ASSERT(cond)
SymbolTable * CreateILabelInfoSymbolTable(const vector< vector< int32 > > &info, const SymbolTable &phones_symtab, std::string separator, std::string initial_disambig)
The following function is mainly of use for printing and debugging.
void WriteIntegerVector(std::ostream &os, bool binary, const std::vector< T > &v)
Function for writing STL vectors of integer types.
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
bool IsPhoneSymbol(Label lab)
std::vector< std::vector< int32 > > state_seqs_