32 bool b = tree->
Map(empty, &ans);
40 std::map<EventKeyType, std::set<EventValueType> > all_key_vals;
41 for (
size_t i = 0;
i < 20;
i++) {
42 std::map<EventKeyType, EventValueType> key_vals;
43 for (
size_t j = 0;
j < 20;
j++) {
46 if (key_vals.count(k) == 0) {
48 all_key_vals[k].insert(v);
53 stats.push_back(std::pair<EventType, Clusterable*>(evec, (
Clusterable*)NULL));
55 for (std::map<
EventKeyType, std::set<EventValueType> >::iterator iter = all_key_vals.begin();
56 iter != all_key_vals.end(); iter++) {
58 std::vector<EventValueType> vals1, vals2;
62 printf(
"vals differ!\n");
63 for (
size_t i = 0;
i < vals1.size();
i++) std::cout << vals1[
i] <<
" ";
65 for (
size_t i = 0;
i < vals2.size();
i++) std::cout << vals2[
i] <<
" ";
77 evec.push_back(std::pair<int32, int32>(-1, 1));
78 evec.push_back(std::pair<int32, int32>(0, 10));
79 evec.push_back(std::pair<int32, int32>(1, 11));
80 evec.push_back(std::pair<int32, int32>(2, 12));
81 stats.push_back(std::make_pair(evec, static_cast<Clusterable*>(NULL)));
82 int32 oldN = 3, oldP = 1, newN = 1, newP = 0;
86 KALDI_ASSERT(new_evec[0].first == -1 && new_evec[0].second == 1);
87 KALDI_ASSERT(new_evec[1].first == 0 && new_evec[1].second == 11);
94 evec.push_back(std::pair<int32, int32>(-1, 1));
95 evec.push_back(std::pair<int32, int32>(0, 10));
96 evec.push_back(std::pair<int32, int32>(1, 11));
97 evec.push_back(std::pair<int32, int32>(2, 12));
98 stats.push_back(std::make_pair(evec, static_cast<Clusterable*>(NULL)));
99 int32 oldN = 3, oldP = 1, newN = 2, newP = 1;
103 KALDI_ASSERT(new_evec[0].first == -1 && new_evec[0].second == 1);
104 KALDI_ASSERT(new_evec[1].first == 0 && new_evec[1].second == 10);
105 KALDI_ASSERT(new_evec[2].first == 1 && new_evec[2].second == 11);
112 evec.push_back(std::pair<int32, int32>(-1, 1));
113 evec.push_back(std::pair<int32, int32>(0, 10));
114 evec.push_back(std::pair<int32, int32>(1, 11));
115 evec.push_back(std::pair<int32, int32>(2, 12));
116 stats.push_back(std::make_pair(evec, static_cast<Clusterable*>(NULL)));
117 int32 oldN = 3, oldP = 1, newN = 3, newP = 1;
129 evec.push_back(std::make_pair(12,
Rand() % 10));
130 evec.push_back(std::make_pair(10,
Rand() % 10));
132 evec.push_back(std::make_pair(8,
Rand() % 10));
133 std::sort(evec.begin(), evec.end());
134 stats.push_back(std::make_pair(evec, static_cast<Clusterable*>(NULL)));
136 std::vector<BuildTreeStatsType> stats_vec;
138 for(
int32 i = 0; i < static_cast<int32>(stats_vec.size());
i++) {
139 for(
int32 j = 0; j < static_cast<int32>(stats_vec[
i].size());
j++) {
149 for (
size_t iter = 0;iter < 10;iter++) {
151 std::set<EventKeyType> all_keys_union;
152 std::set<EventKeyType> all_keys_intersection;
154 for (
size_t i = 0;
i < 3;
i++) {
155 std::map<EventKeyType, EventValueType> key_vals;
156 for (
size_t j = 0;
j < 5;
j++) {
163 stats.push_back(std::pair<EventType, Clusterable*>(evec, (
Clusterable*) NULL));
164 std::set<EventKeyType> s;
166 if (
i == 0) { all_keys_union = s; all_keys_intersection = s; }
168 std::set<EventKeyType> new_intersection;
169 for (std::set<EventKeyType>::iterator iter = s.begin(); iter != s.end(); iter++) {
170 all_keys_union.insert(*iter);
171 if (all_keys_intersection.count(*iter) != 0) new_intersection.insert(*iter);
173 all_keys_intersection = new_intersection;
178 std::vector<EventKeyType> keys1, keys2;
184 std::vector<EventKeyType> keys1, keys2;
190 std::vector<EventKeyType> keys1, keys2;
208 for (
size_t iter = 0;iter < 10;iter++) {
212 std::set<EventValueType> all_vals;
213 for (
size_t i = 0;
i < 10;
i++) {
217 if (kk == k) all_vals.insert(v);
218 evec.push_back(std::make_pair(kk, v));
220 stats.push_back(std::pair<EventType, Clusterable*>(evec, (
Clusterable*) NULL));
227 for (
size_t i = 0;
i < 10;
i++) {
228 size_t idx1 =
RandInt(0, stats.size()-1), idx2 =
RandInt(0, stats.size()-1);
230 table_map->
Map(stats[idx1].first, &ans1);
232 table_map->
Map(stats[idx2].first, &ans2);
242 if (all_vals.count(
i) == 0) {
243 EventType v; v.push_back(std::make_pair(k,
i));
245 bool b = table_map->
Map(v, &ans);
258 for (
size_t iter = 0;iter < 1;iter++) {
261 int32 num_clust = 10;
262 for (
int32 i = 0;
i < num_clust;
i++) {
263 size_t n = 1 +
Rand() % 3;
264 for (
size_t j = 0;
j <
n;
j++) {
267 evec.push_back(std::make_pair(key, cur_value++));
268 stats.push_back(std::make_pair(evec, static_cast<Clusterable*>(
new ScalarClusterable(scalar))));
278 std::vector<EventMap*> mapping;
281 std::cout <<
"TestCluster(): num_reduced = "<<num_reduced<<
", expected: "<<cur_value<<
" - "<<num_clust<<
" = "<<(cur_value-num_clust)<<
'\n';
290 std::vector<EventAnswerType> orig_answers, clustered_answers, renumbered_answers;
293 table_map->
MultiMap(empty_vec, &orig_answers);
294 clustered_map->
MultiMap(empty_vec, &clustered_answers);
295 renumbered_map->
MultiMap(empty_vec, &renumbered_answers);
306 delete renumbered_map;
307 delete clustered_map;
317 for (
size_t iter = 0;iter < 1;iter++) {
321 int32 num_clust = 10;
322 for (
int32 i = 0;
i < num_clust;
i++) {
323 size_t n = 1 +
Rand() % 3;
324 for (
size_t j = 0;
j <
n;
j++) {
327 evec.push_back(std::make_pair(key, cur_value++));
328 stats.push_back(std::make_pair(evec, static_cast<Clusterable*>(
new ScalarClusterable(scalar))));
329 if (
Rand() % 10 < 5) stats_reduced.push_back(stats.back());
339 std::vector<EventMap*> mapping;
342 std::cout <<
"TestCluster(): num_reduced = "<<num_reduced<<
", expected [ignoring gaps]: "<<cur_value<<
" - "<<num_clust<<
" = "<<(cur_value-num_clust)<<
'\n';
351 std::vector<EventAnswerType> orig_answers, clustered_answers, renumbered_answers;
354 table_map->
MultiMap(empty_vec, &orig_answers);
355 clustered_map->MultiMap(empty_vec, &clustered_answers);
356 renumbered_map->
MultiMap(empty_vec, &renumbered_answers);
367 delete renumbered_map;
368 delete clustered_map;
383 for (
size_t iter = 0;iter < 1;iter++) {
386 int32 num_clust = 10;
387 for (
int32 i = 0;
i < num_clust;
i++) {
388 size_t n = 1 +
Rand() % 3;
389 for (
size_t j = 0;
j <
n;
j++) {
392 evec.push_back(std::make_pair(key, cur_value++));
393 stats.push_back(std::make_pair(evec, static_cast<Clusterable*>(
new ScalarClusterable(scalar))));
403 std::set<EventValueType> exclude_leaves;
404 for (
size_t i = 0;
i < 4;
i++) exclude_leaves.insert(
Rand() % num_clust);
407 for (
size_t i = 0;
i < stats.size();
i++) {
408 if (exclude_leaves.count(stats[
i].first[0].second) != 0) {
409 stats_excluded.push_back(stats[i]);
411 stats_included.push_back(stats[i]);
414 KALDI_ASSERT(!stats_excluded.empty()&&!stats_included.empty() && stats_excluded.size()+stats_included.size() == stats.size());
419 std::cout <<
"TestCluster*(): num_reduced = "<<num_reduced<<
", expected [without exclusion]: "<<cur_value<<
" - "<<num_clust<<
" = "<<(cur_value-num_clust)<<
'\n';
422 for (
size_t i = 0;
i < stats_excluded.size();
i++) {
423 const EventType &evec = stats_excluded[
i].first;
429 delete clustered_map;
442 bool test_by_key = (
Rand()%2 == 0);
445 std::vector<EventKeyType> keys;
448 std::set<EventKeyType> keys_set;
449 while (keys_set.size() < (
size_t)num_keys)
450 keys_set.insert( (
Rand() % (num_keys + 10)) - 3 );
460 for (
size_t i = 0;
i < (
size_t)n_stats;
i++) {
463 for (
size_t j = 0;
j < keys.size();
j++) {
465 evec.push_back(std::make_pair(keys[
j], val));
470 std::vector<EventKeyType> special_keys;
471 for (
size_t i = 0;
i < keys.size();
i++)
472 if (
RandUniform() < 0.5) special_keys.push_back(keys[
i]);
482 int32 nleaves_after_table_split = nleaves;
483 std::cout <<
"TestClusterEventMapRestricted: after splitting on "<<special_keys.size()<<
" keys, nleaves = " <<nleaves<<
'\n';
487 int32 num_quest =
Rand() % 10, num_iters = rand () % 5;
489 float thresh = 0.001;
490 int32 max_leaves = 50;
494 &nleaves, &impr, &smallest_split);
495 KALDI_ASSERT((nleaves <= max_leaves || nleaves == nleaves_after_table_split) && smallest_split >= thresh);
497 std::cout <<
"TestClusterEventMapRestricted: after building decision tree, " <<nleaves<<
'\n';
503 thresh, &num_removed);
504 std::cout <<
"ClusterEventMap: num_removed = "<<num_removed;
506 delete map_clustered;
515 thresh, special_keys,
519 thresh, *table_split_map,
522 std::cout <<
"ClusterEventMapRestricted: num_removed = "<<num_removed;
524 KALDI_ASSERT(num_removed == nleaves - nleaves_after_table_split);
525 delete map_clustered;
530 delete table_split_map;
540 std::vector<EventKeyType> keys;
543 std::set<EventKeyType> keys_set;
544 while (keys_set.size() < (
size_t)num_keys)
545 keys_set.insert( (
Rand() % (num_keys + 10)) - 3 );
555 for (
size_t i = 0;
i < (
size_t)n_stats;
i++) {
558 for (
size_t j = 0;
j < keys.size();
j++) {
560 evec.push_back(std::make_pair(keys[
j], val));
565 std::vector<EventKeyType> special_keys;
566 for (
size_t i = 0;
i < keys.size();
i++)
567 if (
RandUniform() < 0.5) special_keys.push_back(keys[
i]);
578 std::cout <<
"TestClusterEventMapRestricted: after splitting on "<<special_keys.size()<<
" keys, nleaves = " <<nleaves<<
'\n';
580 int nleaves_after_table_split = nleaves;
582 int32 num_quest =
Rand() % 10, num_iters = rand () % 5;
584 float thresh = 0.001;
585 int32 max_leaves = 100;
589 &nleaves, &impr, &smallest_split);
590 KALDI_ASSERT((nleaves <= max_leaves || nleaves == nleaves_after_table_split) && smallest_split >= thresh);
592 std::cout <<
"TestShareEventMapLeaves: after building decision tree, " <<nleaves<<
'\n';
594 if (special_keys.size() == 0) {
595 KALDI_WARN <<
"TestShareEventMapLeaves(): could not test since key not always defined.";
598 delete table_split_map;
603 std::vector<EventValueType> values;
607 std::set<EventValueType> to_share;
608 for (
size_t i = 0; i < 3; i++) to_share.insert(values[
Rand() % values.size()]);
610 std::vector<std::vector<EventValueType> > share_value;
611 for (std::set<EventValueType>::iterator iter = to_share.begin();
612 iter != to_share.end();
614 share_value.resize(share_value.size()+1);
615 share_value.back().push_back(*iter);
623 for (
size_t i = 0; i < share_value.size(); i++) {
625 std::vector<EventAnswerType> answers;
635 delete table_split_map;
642 for (
int32 p = 0;p < 10;p++) {
643 std::vector<EventKeyType> keys_all, keys_some;
645 std::set<EventKeyType> keys_all_set, keys_some_set;
647 for (
int32 i = 0;
i < num_all;
i++) keys_all_set.insert(
Rand() % 10);
648 for (
int32 i = 0;
i < num_some;
i++) {
650 if (keys_all_set.count(k) == 0) keys_some_set.insert(k);
655 std::set<EventKeyType> keys_all_saw_set;
659 size_t n_stats =
Rand() % 100;
661 if (n_stats > 90) n_stats = 0;
662 if (n_stats > 80) n_stats = 1;
664 for (
size_t i = 0;
i < n_stats;
i++) {
666 for (
size_t j = 0;
j < keys_all.size();
j++) {
668 keys_all_saw_set.insert(keys_all[j]);
670 for (
size_t j = 0;
j < keys_some.size();
j++)
671 if (
Rand() % 2 == 0) {
673 keys_all_saw_set.insert(keys_some[j]);
675 std::sort(evec.begin(), evec.end());
677 dummy_stats.push_back(std::make_pair(evec, (
Clusterable*)NULL));
680 bool intersection = (p%2 == 0);
681 int32 num_quest =
Rand() % 10, num_iters = rand () % 5;
684 for (
int i = 0;
i < 2;
i++) {
686 bool binary = (
i == 0);
687 std::ostringstream oss;
688 qo.
Write(oss, binary);
690 std::istringstream iss(oss.str());
692 qo2.
Read(iss, binary);
694 std::ostringstream oss2;
695 qo2.Write(oss2, binary);
697 if (oss.str() != oss2.str()) {
698 KALDI_ERR <<
"Questions I/O failure: " << oss.str() <<
" vs. " << oss2.str();
704 for (
size_t i = 0;
i < keys_all.size();
i++) {
708 for (
size_t j = 0;
j < opts.initial_questions.size();
j++) {
709 for (
size_t k = 0;k < opts.initial_questions[
j].size();k++)
710 std::cout << opts.initial_questions[
j][k] <<
" ";
716 for (
size_t i = 0;
i < keys_all.size();
i++) {
721 for (std::set<int32>::iterator iter = keys_all_saw_set.begin(); iter != keys_all_saw_set.end(); iter++) {
732 for (
int32 p = 0;p < 4;p++) {
733 std::vector<EventKeyType> keys_all, keys_some;
735 std::set<EventKeyType> keys_all_set, keys_some_set;
737 for (
int32 i = 0;
i < num_all;
i++) keys_all_set.insert(
Rand() % 10);
738 for (
int32 i = 0;
i < num_some;
i++) {
740 if (keys_all_set.count(k) == 0) keys_some_set.insert(k);
745 std::set<EventKeyType> keys_all_saw_set;
749 size_t n_stats =
Rand() % 100;
751 if (n_stats > 90) n_stats = 0;
752 if (n_stats > 80) n_stats = 1;
754 for (
size_t i = 0;
i < n_stats;
i++) {
756 for (
size_t j = 0;
j < keys_all.size();
j++) {
758 keys_all_saw_set.insert(keys_all[j]);
760 for (
size_t j = 0;
j < keys_some.size();
j++)
761 if (
Rand() % 2 == 0) {
763 keys_all_saw_set.insert(keys_some[j]);
765 std::sort(evec.begin(), evec.end());
773 bool intersection =
true;
775 int32 num_quest =
Rand() % 10, num_iters = rand () % 5;
780 for (
size_t i = 0;
i < keys_all.size();
i++) {
784 for (
size_t j = 0;
j < opts.initial_questions.size();
j++) {
785 for (
size_t k = 0;k < opts.initial_questions[
j].size();k++)
786 std::cout << opts.initial_questions[
j][k] <<
" ";
792 for (
size_t i = 0;
i < keys_all.size();
i++) {
797 for (std::set<int32>::iterator iter = keys_all_saw_set.begin(); iter != keys_all_saw_set.end(); iter++) {
801 std::cout <<
"num_quest = " <<num_quest<<
", num_iters = "<<num_iters<<
'\n';
804 int32 num_leaves = 0;
805 int32 max_leaves = 50;
810 &num_leaves, &impr, &smallest_split);
811 KALDI_ASSERT(num_leaves <= max_leaves && smallest_split >= thresh);
815 std::cout <<
"Objf impr is " << impr <<
", computed differently: " <<impr_check<<
'\n';
820 std::cout <<
"After splitting, num_leaves = " << num_leaves <<
'\n';
822 std::vector<BuildTreeStatsType> mapped_stats;
824 std::cout <<
"Assignments of stats to leaves is:\n";
825 for (
size_t i = 0;
i < mapped_stats.size();
i++) {
826 std::cout <<
" [ leaf "<<
i<<
"]: ";
827 for (
size_t j = 0;
j < mapped_stats[
i].size();
j++) {
839 for (
int32 p = 0; p < 10; p++) {
840 size_t num_stats =
Rand() % 20;
842 for (
size_t i = 0;
i < num_stats;
i++) {
847 ev.push_back(std::make_pair(key, value));
849 stats.push_back(std::make_pair(ev, (
Clusterable*) NULL));
851 const char *filename =
"tmpf";
858 Input ki(filename, &binary_in);
860 binary_in, gc, &stats2);
873 using namespace kaldi;
874 for (
size_t i = 0;
i < 2;
i++) {
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void CopySetToVector(const std::set< T > &s, std::vector< T > *v)
Copies the elements of a set to a vector.
void DeletePointers(std::vector< A *> *v)
Deletes any non-NULL pointers in the vector v, and sets the corresponding entries of v to NULL...
std::pair< EventKeyType, EventValueType > MakeEventPair(EventKeyType k, EventValueType v)
bool ConvertStats(int32 oldN, int32 oldP, int32 newN, int32 newP, BuildTreeStatsType *stats)
Converts stats from a given context-window (N) and central-position (P) to a different N and P...
void TestClusterEventMapGetMappingAndRenumberEventMap()
This class defines, for each EventKeyType, a set of initial questions that it tries and also a number...
float RandUniform(struct RandomState *state=NULL)
Returns a random number strictly between 0 and 1.
virtual EventAnswerType MaxResult() const
void SplitStatsByMap(const BuildTreeStatsType &stats, const EventMap &e, std::vector< BuildTreeStatsType > *stats_out)
Splits stats according to the EventMap, indexing them at output by the leaf type. ...
void FindAllKeys(const BuildTreeStatsType &stats, AllKeysType keys_type, std::vector< EventKeyType > *keys_out)
FindAllKeys puts in *keys the (sorted, unique) list of all key identities in the stats.
const QuestionsForKey & GetQuestionsOf(EventKeyType key) const
float RandGauss(struct RandomState *state=NULL)
void SortAndUniq(std::vector< T > *vec)
Sorts and uniq's (removes duplicates) from a vector.
void SplitStatsByKey(const BuildTreeStatsType &stats_in, EventKeyType key, std::vector< BuildTreeStatsType > *stats_out)
SplitStatsByKey splits stats up according to the value of a particular key, which must be always defi...
virtual bool Map(const EventType &event, EventAnswerType *ans) const =0
bool PossibleValues(EventKeyType key, const BuildTreeStatsType &stats, std::vector< EventValueType > *ans)
Convenience function e.g.
void DeleteBuildTreeStats(BuildTreeStatsType *stats)
This frees the Clusterable* pointers in "stats", where non-NULL, and sets them to NULL...
void ReadBuildTreeStats(std::istream &is, bool binary, const Clusterable &example, BuildTreeStatsType *stats)
Reads BuildTreeStats object.
static void Check(const EventType &event)
std::vector< std::pair< EventKeyType, EventValueType > > EventType
void CopyMapKeysToSet(const std::map< A, B > &m, std::set< A > *s)
Copies the keys in a map to a set.
void InitRand(const BuildTreeStatsType &stats, int32 num_quest, int32 num_iters_refine, AllKeysType all_keys_type)
InitRand attempts to generate "reasonable" random questions.
int32 EventKeyType
Things of type EventKeyType can take any value.
QuestionsForKey is a class used to define the questions for a key, and also options that allow us to ...
std::vector< std::vector< EventValueType > > initial_questions
void TestSplitStatsByKey()
void Read(std::istream &is, bool binary)
void TestShareEventMapLeaves()
virtual EventMap * Copy(const std::vector< EventMap *> &new_leaves) const =0
BaseFloat ObjfGivenMap(const BuildTreeStatsType &stats_in, const EventMap &e)
Cluster the stats given the event map return the total objf given those clusters. ...
int Rand(struct RandomState *state)
void TestBuildTreeStatsIo(bool binary)
static bool Lookup(const EventType &event, EventKeyType key, EventValueType *ans)
void TestClusterEventMapRestricted()
A class that is capable of representing a generic mapping from EventType (which is a vector of (key...
bool HasQuestionsForKey(EventKeyType key) const
#define KALDI_ASSERT(cond)
void TestPossibleValues()
void TestQuestionsInitRand()
int32 EventAnswerType
As far as the event-map code itself is concerned, things of type EventAnswerType may take any value e...
int32 EventValueType
Given current code, things of type EventValueType should generally be nonnegative and in a reasonably...
std::vector< std::pair< EventType, Clusterable * > > BuildTreeStatsType
void TestClusterEventMap()
GaussClusterable wraps Gaussian statistics in a form accessible to generic clustering algorithms...
virtual void MultiMap(const EventType &event, std::vector< EventAnswerType > *ans) const =0
void CopyMapToVector(const std::map< A, B > &m, std::vector< std::pair< A, B > > *v)
Copies the (key, value) pairs in a map to a vector of pairs.
void TestClusterEventMapGetMappingAndRenumberEventMap2()
void TestSplitDecisionTree()
void Write(std::ostream &os, bool binary) const
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)
void WriteBuildTreeStats(std::ostream &os, bool binary, const BuildTreeStatsType &stats)
Writes BuildTreeStats object. This works even if pointers are NULL.
ScalarClusterable clusters scalars with x^2 loss.