38 vector<float> split_cost(symbols.size()+1, 0.0);
40 std::set<int32> indices;
41 size_t num_indices = 1 + (
kaldi::Rand() % split_cost.size());
42 while (indices.size() < num_indices) indices.insert(
kaldi::Rand() % split_cost.size());
43 for (std::set<int32>::iterator iter = indices.begin(); iter != indices.end(); ++iter) {
44 split_cost[*iter] = cost / num_indices;
48 VectorFst<Arc> *
fst =
new VectorFst<Arc>();
49 StateId cur_state = fst->AddState();
50 fst->SetStart(cur_state);
51 for (
size_t i = 0;
i < symbols.size();
i++) {
52 StateId next_state = fst->AddState();
54 arc.ilabel = symbols[
i];
55 arc.olabel = symbols[
i];
56 arc.nextstate = next_state;
57 arc.weight = (
Weight) split_cost[
i];
58 fst->AddArc(cur_state, arc);
59 cur_state = next_state;
62 fst->SetFinal(cur_state, (Weight)split_cost[symbols.size()]);
72 const vector<typename Arc::Label> &phone_ids,
73 const vector<typename Arc::Label> &disambig_ids,
74 const vector<typename Arc::Label> &phone_seq,
75 const vector<vector<typename Arc::Label> > &ilabel_info,
84 vector<int32> input_syms;
85 vector<int32> output_syms;
88 &output_syms, &tot_cost);
91 vector<int32> phone_seq_check;
92 for (
size_t i = 0;
i < output_syms.size();
i++)
93 if (std::binary_search(phone_ids.begin(), phone_ids.end(), output_syms[
i]))
94 phone_seq_check.push_back(output_syms[
i]);
96 assert(phone_seq_check == phone_seq);
98 vector<vector<int32> > input_syms_long;
99 for (
size_t i = 0; i < input_syms.size(); i++) {
100 Label isym = input_syms[
i];
101 if (ilabel_info[isym].size() == 0)
continue;
102 if ( (ilabel_info[isym].size() == 1 &&
103 ilabel_info[isym][0] <= 0) )
continue;
104 input_syms_long.push_back(ilabel_info[isym]);
107 for (
size_t i = 0; i < input_syms_long.size(); i++) {
108 vector<int32> phone_context_window(N);
109 int pos = ((int)i) - P;
110 for (
int j = 0;
j < N;
j++, pos++) {
111 if (static_cast<size_t>(pos) < phone_seq.size()) phone_context_window[
j] = phone_seq[pos];
112 else phone_context_window[
j] = 0;
115 assert(input_syms_long[i] == phone_context_window);
117 return tot_cost.Value();
125 vector<typename Arc::Label> &disambig_syms,
129 vector<typename Arc::Label> *phoneseq_out) {
137 float disambig_prob = 0.33;
138 phoneseq_out->clear();
140 for (
size_t i = 0;
i < len;
i++) {
142 Label phone_id = phone_syms[
kaldi::Rand() % phone_syms.size()];
143 phoneseq_out->push_back(phone_id);
144 syms.push_back(phone_id);
146 for (
size_t i = 0;
static_cast<int32>(
i) < num_subseq_syms;
i++) {
148 syms.push_back(subsequential_symbol);
153 return GenAcceptorFromSequence<Arc>(syms, seq_prob);
166 std::set<int32> phones_set;
167 while (phones_set.size() < num_phones) phones_set.insert(1 +
kaldi::Rand() % (num_phones + 5));
168 vector<int32> phones;
173 if (verbose) std::cout <<
"N = "<< N <<
", P = "<<P<<
'\n';
175 Label subsequential_symbol = 1000;
176 vector<int32> disambig_syms;
177 for (
size_t i =0;
i < 5;
i++) disambig_syms.push_back(500 +
i);
178 vector<int32> phone_syms;
179 for (
size_t i = 0;
i < phones.size();
i++) phone_syms.push_back(phones[
i]);
183 phones, disambig_syms,
190 for (
size_t p = 0; p < 10; p++) {
191 vector<int32> phone_seq;
192 int num_subseq = N - P - 1;
194 VectorFst<Arc> *f = GenRandPhoneSeq<Arc>(phone_syms, disambig_syms, subsequential_symbol, num_subseq, tot_cost, &phone_seq);
196 std::cout <<
"Sequence FST is:\n";
198 FstPrinter<Arc> fstprinter(*f, NULL, NULL, NULL,
false,
true,
"\t");
199 fstprinter.Print(&std::cout,
"standard output");
203 VectorFst<Arc> fst_composed;
215 vector<vector<int32> > ilabel_info;
218 binary_in, &ilabel_info);
224 std::cout <<
"Composed FST is:\n";
226 FstPrinter<Arc> fstprinter(fst_composed, NULL, NULL, NULL,
false,
true,
"\t");
227 fstprinter.Print(&std::cout,
"standard output");
232 float tot_cost_check = CheckPhones<Arc>(fst_composed,
251 for (
int i = 0;
i < 16;
i++) {
252 bool verbose = (
i < 4);
253 bool use_matcher = ( (
i/4) % 2 == 0);
fst::StdArc::StateId StateId
const std::vector< std::vector< int32 > > & IlabelInfo() const
void WriteILabelInfo(std::ostream &os, bool binary, const vector< vector< int32 > > &info)
Utility function for writing ilabel-info vectors to disk.
void CopySetToVector(const std::set< T > &s, std::vector< T > *v)
Copies the elements of a set to a vector.
float RandUniform(struct RandomState *state=NULL)
Returns a random number strictly between 0 and 1.
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
static VectorFst< Arc > * GenAcceptorFromSequence(const vector< typename Arc::Label > &symbols, float cost)
bool GetLinearSymbolSequence(const Fst< Arc > &fst, std::vector< I > *isymbols_out, std::vector< I > *osymbols_out, typename Arc::Weight *tot_weight_out)
GetLinearSymbolSequence gets the symbol sequence from a linear FST.
void ComposeDeterministicOnDemandInverse(const Fst< Arc > &right, DeterministicOnDemandFst< Arc > *left, MutableFst< Arc > *fst_composed)
This function does '*fst_composed = Compose(Inverse(*fst2), fst1)' Note that the arguments are revers...
void ReadILabelInfo(std::istream &is, bool binary, vector< vector< int32 > > *info)
Utility function for reading ilabel-info vectors from disk.
int Rand(struct RandomState *state)
fst::StdArc::Weight Weight
static void TestContextFst(bool verbose, bool use_matcher)
static float CheckPhones(const VectorFst< Arc > &linear_fst, const vector< typename Arc::Label > &phone_ids, const vector< typename Arc::Label > &disambig_ids, const vector< typename Arc::Label > &phone_seq, const vector< vector< typename Arc::Label > > &ilabel_info, int N, int P)
bool IsSorted(const std::vector< T > &vec)
Returns true if the vector is sorted.
#define KALDI_ASSERT(cond)
static void AssertEqual(float a, float b, float relative_tolerance=0.001)
assert abs(a - b) <= relative_tolerance * (abs(a)+abs(b))
static VectorFst< Arc > * GenRandPhoneSeq(vector< typename Arc::Label > &phone_syms, vector< typename Arc::Label > &disambig_syms, typename Arc::Label subsequential_symbol, int num_subseq_syms, float seq_prob, vector< typename Arc::Label > *phoneseq_out)