33 bool allow_zero_words,
34 bool allow_empty_word,
35 bool allow_multiple_prons,
36 std::vector<std::vector<int32> > *lexicon) {
40 for (
int32 word = 1; word <= num_words; word++) {
41 int32 num_prons =
RandInt(1, (allow_multiple_prons ? 2 : 1));
42 bool is_zero_word = allow_zero_words && (
RandInt(1, 5) == 1);
44 for (
int32 j = 0;
j < num_prons;
j++) {
47 int32 pron_length =
RandInt(((allow_empty_word && !is_zero_word) ? 0 : 1),
49 std::vector<int32> this_entry;
50 this_entry.push_back(is_zero_word ? 0 : word);
51 this_entry.push_back(word);
52 for (
int32 p = 0; p < pron_length; p++)
53 this_entry.push_back(phones[
RandInt(0, phones.size() - 1)]);
54 lexicon->push_back(this_entry);
59 std::random_shuffle(lexicon->begin(), lexicon->end());
62 for (
size_t i = 0;
i < lexicon->size();
i++) {
63 if ((*lexicon)[
i].size() > 2) {
76 GenerateLexicon(phones, allow_zero_words, allow_empty_word, allow_multiple_prons,
83 static void PrintLexicon(
const std::vector<std::vector<int32> > &lexicon) {
85 for (
size_t i = 0;
i < lexicon.size();
i++) {
87 const std::vector<int32> &entry = lexicon[
i];
88 std::cerr << entry[0] <<
"\t" << entry[1] <<
"\t";
89 for (
size_t j = 2;
j < entry.size();
j++)
90 std::cerr << entry[
j] <<
" ";
96 const std::vector<int32> &phones) {
97 std::ostringstream word_str, phone_str;
98 for (
size_t i = 0;
i < words.size();
i++)
99 word_str << words[
i] <<
" ";
100 for (
size_t i = 0;
i < phones.size();
i++)
101 phone_str << phones[
i] <<
" ";
102 KALDI_LOG <<
"Word-sequence is: " << word_str.str();
103 KALDI_LOG <<
"Phone-sequence is: " << phone_str.str();
110 std::vector<int32> *phone_seq,
111 std::vector<int32> *word_seq) {
115 for (
int32 i = 0;
i < num_words;
i++) {
116 const std::vector<int32> &lexicon_entry =
117 lexicon[
RandInt(0, lexicon.size() - 1)];
120 int32 word = lexicon_entry[0];
121 if (word != 0) word_seq->push_back(word);
124 phone_seq->insert(phone_seq->end(),
125 lexicon_entry.begin() + 2,
126 lexicon_entry.end());
133 const std::vector<int32> &
words,
135 clat->DeleteStates();
139 size_t word_start = 0, alignment_start = 0,
140 num_words = words.size(), num_transition_ids = alignment.size();
141 for (; word_start < num_words; word_start++) {
142 int32 word = words[word_start];
143 int32 ali_length =
RandInt(0, num_transition_ids - alignment_start);
144 std::vector<int32> this_ali(ali_length);
145 for (
int32 i = 0;
i < ali_length;
i++)
146 this_ali[
i] = alignment[alignment_start +
i];
147 alignment_start += ali_length;
150 int32 next_state = clat->AddState();
152 clat->AddArc(cur_state, arc);
153 cur_state = next_state;
155 if (alignment_start < alignment.size()) {
156 int32 ali_length = num_transition_ids - alignment_start;
157 std::vector<int32> this_ali(ali_length);
158 for (
int32 i = 0;
i < ali_length;
i++)
159 this_ali[
i] = alignment[alignment_start +
i];
160 alignment_start += ali_length;
163 int32 next_state = clat->AddState();
165 clat->AddArc(cur_state, arc);
166 cur_state = next_state;
176 bool allow_zero_words =
true;
177 bool allow_empty_word =
true;
178 bool allow_multiple_prons =
true;
180 const std::vector<int32> &phones = trans_model->
GetPhones();
181 std::vector<std::vector<int32> > lexicon;
183 allow_multiple_prons, &lexicon);
185 std::vector<int32> phone_seq;
186 std::vector<int32> word_seq;
187 while (phone_seq.empty())
193 std::vector<int32> alignment;
194 bool reorder = (
RandInt(0, 1) == 0);
196 phone_seq, &alignment);
221 std::vector<Lattice> nbest_lats;
222 fst::ShortestPath(lat, &nbest_lat, n);
224 KALDI_LOG <<
"Word-aligned lattice has " << nbest_lats.size() <<
" paths.";
235 std::cout <<
"Tests succeeded\n";
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void GenerateCompactLatticeRandomly(const std::vector< int32 > &alignment, const std::vector< int32 > &words, CompactLattice *clat)
const std::vector< int32 > & GetPhones() const
Returns a sorted, unique list of phones.
static const LatticeWeightTpl One()
bool WordAlignLatticeLexicon(const CompactLattice &lat, const TransitionModel &tmodel, const WordAlignLatticeLexiconInfo &lexicon_info, const WordAlignLatticeLexiconOpts &opts, CompactLattice *lat_out)
Align lattice so that each arc has the transition-ids on it that correspond to the word that is on th...
void SortAndUniq(std::vector< T > *vec)
Sorts and uniq's (removes duplicates) from a vector.
void TestWordAlignLatticeLexicon()
void GenerateWordAndPhoneSequence(std::vector< std::vector< int32 > > &lexicon, std::vector< int32 > *phone_seq, std::vector< int32 > *word_seq)
void ConvertNbestToVector(const Fst< Arc > &fst, std::vector< VectorFst< Arc > > *fsts_out)
This function converts an FST with a special structure, which is output by the OpenFst functions Shor...
static const CompactLatticeWeightTpl< WeightType, IntType > One()
void ConvertLattice(const ExpandedFst< ArcTpl< Weight > > &ifst, MutableFst< ArcTpl< CompactLatticeWeightTpl< Weight, Int > > > *ofst, bool invert)
Convert lattice from a normal FST to a CompactLattice FST.
fst::VectorFst< LatticeArc > Lattice
void GenerateRandomAlignment(const ContextDependencyInterface &ctx_dep, const TransitionModel &trans_model, bool reorder, const std::vector< int32 > &phone_sequence, std::vector< int32 > *alignment)
For use in test code, this function generates an alignment (a sequence of transition-ids) correspondi...
void GenerateLexicon(const std::vector< int32 > &phones, bool allow_zero_words, bool allow_empty_word, bool allow_multiple_prons, std::vector< std::vector< int32 > > *lexicon)
fst::VectorFst< CompactLatticeArc > CompactLattice
#define KALDI_ASSERT(cond)
This class extracts some information from the lexicon and stores it in a suitable form for the word-a...
bool WriteCompactLattice(std::ostream &os, bool binary, const CompactLattice &t)
bool allow_duplicate_paths
static void PrintLexicon(const std::vector< std::vector< int32 > > &lexicon)
fst::ArcTpl< CompactLatticeWeight > CompactLatticeArc
TransitionModel * GenRandTransitionModel(ContextDependency **ctx_dep_out)
static void PrintWordsAndPhones(const std::vector< int32 > &words, const std::vector< int32 > &phones)
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)