26 for (
int32 p = 0; p < 2; p++) {
33 std::vector<int32> phone_ids(num_phones);
34 for (
size_t i = 0;
i < (
size_t)num_phones;
i++)
35 phone_ids[
i] = (
i == 0 ? (
Rand() % 2) : phone_ids[
i-1] + 1 + (
Rand()%2));
36 int32 max_phone = *std::max_element(phone_ids.begin(), phone_ids.end());
37 std::vector<int32> hmm_lengths(max_phone+1);
38 std::vector<bool> is_ctx_dep(max_phone+1);
40 for (
int32 i = 0;
i <= max_phone;
i++) {
41 hmm_lengths[
i] = 1 +
Rand() % 3;
44 for (
size_t i = 0;
i < (
size_t) num_phones;
i++) {
45 KALDI_VLOG(2) <<
"For idx = "<<
i <<
", (phone_id, hmm_length, is_ctx_dep) == " << (phone_ids[
i]) <<
" " << (hmm_lengths[phone_ids[
i]]) <<
" " << (is_ctx_dep[phone_ids[
i]]);
51 GenRandStats(dim, num_stats, N, P, phone_ids, hmm_lengths, is_ctx_dep,
false, &stats);
52 std::cout <<
"Writing random stats.";
53 std::cout <<
"dim = " << dim <<
'\n';
54 std::cout <<
"num_phones = " << num_phones <<
'\n';
55 std::cout <<
"num_stats = " << num_stats <<
'\n';
56 std::cout <<
"N = "<< N <<
'\n';
57 std::cout <<
"P = "<< P <<
'\n';
58 std::cout <<
"is-ctx-dep = ";
59 for (
size_t i = 0;
i < is_ctx_dep.size();
i++)
63 std::cout <<
"Stats are: \n";
68 for (
size_t i = 0;i < stats.size();i++) {
75 KALDI_ASSERT(position>=0 && position < hmm_lengths[central_phone]);
91 for (
int32 p = 0; p < 3; p++) {
101 std::vector<int32> phone_ids(num_phones);
102 for (
size_t i = 0;
i < (
size_t)num_phones;
i++)
103 phone_ids[
i] = (
i == 0 ? (
Rand() % 2) : phone_ids[
i-1] + 1 + (
Rand()%2));
104 int32 max_phone = *std::max_element(phone_ids.begin(), phone_ids.end());
105 std::vector<int32> hmm_lengths(max_phone+1);
106 std::vector<bool> is_ctx_dep(max_phone+1);
108 for (
int32 i = 0;
i <= max_phone;
i++) {
109 hmm_lengths[
i] = 1 +
Rand() % 3;
112 for (
size_t i = 0;
i < (
size_t) num_phones;
i++) {
113 KALDI_VLOG(2) <<
"For idx = "<<
i <<
", (phone_id, hmm_length, is_ctx_dep) == " << (phone_ids[
i]) <<
" " << (hmm_lengths[phone_ids[
i]]) <<
" " << (is_ctx_dep[phone_ids[
i]]);
117 bool ensure_all_covered =
false;
118 GenRandStats(dim, num_stats, N, P, phone_ids, hmm_lengths, is_ctx_dep, ensure_all_covered, &stats);
121 std::cout <<
"Writing random stats.";
122 std::cout <<
"dim = " << dim <<
'\n';
123 std::cout <<
"num_phones = " << num_phones <<
'\n';
124 std::cout <<
"num_stats = " << num_stats <<
'\n';
125 std::cout <<
"N = "<< N <<
'\n';
126 std::cout <<
"P = "<< P <<
'\n';
127 std::cout <<
"is-ctx-dep = ";
128 for (
size_t i = 0;
i < is_ctx_dep.size();
i++)
132 std::cout <<
"Stats are: \n";
139 int32 num_quest =
Rand() % 10, num_iters = rand () % 5;
143 std::cout <<
"Printing questions:\n";
144 std::vector<EventKeyType> keys;
146 for (
size_t i = 0;
i < keys.size();
i++) {
150 for (
size_t j = 0;
j < opts.initial_questions.size();
j++) {
151 for (
size_t k = 0;k < opts.initial_questions[
j].size();k++)
152 std::cout << opts.initial_questions[
j][k] <<
" ";
159 int max_leaves = 100;
160 std::cout <<
"Thresh = "<<thresh<<
" for building tree.\n";
163 std::cout <<
"Building tree\n";
165 std::vector<std::vector<int32> > phone_sets(phone_ids.size());
166 for (
size_t i = 0;
i < phone_ids.size();
i++)
167 phone_sets[
i].push_back(phone_ids[
i]);
168 std::vector<bool> share_roots(phone_sets.size(),
true),
169 do_split(phone_sets.size(),
true);
172 bool round_num_leaves =
true;
175 BuildTree(qopts, phone_sets, hmm_lengths, share_roots,
176 do_split, stats, thresh, max_leaves, 0.0, P,
179 tree =
BuildTree(qopts, phone_sets, hmm_lengths, share_roots,
180 do_split, stats, thresh, max_leaves, 0.0, P,
183 BuildTreeStatsType::const_iterator iter, end = stats.end();
185 std::map<EventAnswerType, std::set<EventAnswerType> > mapping;
186 int32 num_removed = 0;
187 for (iter = stats.begin(); iter != end; ++iter) {
195 auto it = mapping.find(ans);
196 if (it == mapping.end()) {
197 std::set<EventAnswerType> leaf_set;
198 leaf_set.insert(ans_not_rounded);
199 mapping.insert(it, std::make_pair(ans, leaf_set));
200 }
else if (it->second.count(ans_not_rounded) == 0) {
202 it->second.insert(ans_not_rounded);
206 std::cout <<
"Leaf rounding map:\n";
207 for (
auto it = mapping.begin(); it != mapping.end(); ++it) {
209 for (
auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
212 std::cout << std::endl;
217 tree =
BuildTree(qopts, phone_sets, hmm_lengths, share_roots,
218 do_split, stats, thresh, max_leaves, 0.0, P,
223 std::cout <<
"Tree [default build] is:\n";
224 tree->
Write(std::cout,
false);
void GetKeysWithQuestions(std::vector< EventKeyType > *keys_out) const
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
This class defines, for each EventKeyType, a set of initial questions that it tries and also a number...
float RandUniform(struct RandomState *state=NULL)
Returns a random number strictly between 0 and 1.
const QuestionsForKey & GetQuestionsOf(EventKeyType key) const
void GenRandStats(int32 dim, int32 num_stats, int32 N, int32 P, const std::vector< int32 > &phone_ids, const std::vector< int32 > &phone2hmm_length, const std::vector< bool > &is_ctx_dep, bool ensure_all_phones_covered, BuildTreeStatsType *stats_out)
GenRandStats generates random statistics of the form used by BuildTree.
virtual bool Map(const EventType &event, EventAnswerType *ans) const =0
void DeleteBuildTreeStats(BuildTreeStatsType *stats)
This frees the Clusterable* pointers in "stats", where non-NULL, and sets them to NULL...
static const EventKeyType kPdfClass
std::vector< std::pair< EventKeyType, EventValueType > > EventType
void InitRand(const BuildTreeStatsType &stats, int32 num_quest, int32 num_iters_refine, AllKeysType all_keys_type)
InitRand attempts to generate "reasonable" random questions.
int32 EventKeyType
Things of type EventKeyType can take any value.
QuestionsForKey is a class used to define the questions for a key, and also options that allow us to ...
std::vector< std::vector< EventValueType > > initial_questions
int Rand(struct RandomState *state)
static bool Lookup(const EventType &event, EventKeyType key, EventValueType *ans)
A class that is capable of representing a generic mapping from EventType (which is a vector of (key...
bool HasQuestionsForKey(EventKeyType key) const
#define KALDI_ASSERT(cond)
EventMap * BuildTree(Questions &qopts, const std::vector< std::vector< int32 > > &phone_sets, const std::vector< int32 > &phone2num_pdf_classes, const std::vector< bool > &share_roots, const std::vector< bool > &do_split, const BuildTreeStatsType &stats, BaseFloat thresh, int32 max_leaves, BaseFloat cluster_thresh, int32 P, bool round_num_leaves)
BuildTree is the normal way to build a set of decision trees.
void WriteIntegerVector(std::ostream &os, bool binary, const std::vector< T > &v)
Function for writing STL vectors of integer types.
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
int32 EventAnswerType
As far as the event-map code itself is concerned, things of type EventAnswerType may take any value e...
int32 EventValueType
Given current code, things of type EventValueType should generally be nonnegative and in a reasonably...
std::vector< std::pair< EventType, Clusterable * > > BuildTreeStatsType
virtual void Write(std::ostream &os, bool binary)=0
Write to stream.
void WriteBuildTreeStats(std::ostream &os, bool binary, const BuildTreeStatsType &stats)
Writes BuildTreeStats object. This works even if pointers are NULL.