32   bool b = tree->
Map(empty, &ans);
    40   std::map<EventKeyType, std::set<EventValueType> > all_key_vals;
    41   for (
size_t i = 0;
i < 20;
i++) {
    42     std::map<EventKeyType, EventValueType> key_vals;
    43     for (
size_t j = 0;
j < 20;
j++) {
    46       if (key_vals.count(k) == 0) {
    48         all_key_vals[k].insert(v);
    53     stats.push_back(std::pair<EventType, Clusterable*>(evec, (
Clusterable*)NULL));
    55   for (std::map<
EventKeyType, std::set<EventValueType> >::iterator iter = all_key_vals.begin();
    56       iter != all_key_vals.end(); iter++) {
    58     std::vector<EventValueType> vals1, vals2;
    62       printf(
"vals differ!\n");
    63       for (
size_t i = 0;
i < vals1.size();
i++) std::cout << vals1[
i] << 
" ";
    65       for (
size_t i = 0;
i < vals2.size();
i++) std::cout << vals2[
i] << 
" ";
    77     evec.push_back(std::pair<int32, int32>(-1, 1));
    78     evec.push_back(std::pair<int32, int32>(0, 10));
    79     evec.push_back(std::pair<int32, int32>(1, 11));
    80     evec.push_back(std::pair<int32, int32>(2, 12));
    81     stats.push_back(std::make_pair(evec, static_cast<Clusterable*>(NULL)));
    82     int32 oldN = 3, oldP = 1, newN = 1, newP = 0;
    86     KALDI_ASSERT(new_evec[0].first == -1 && new_evec[0].second == 1);
    87     KALDI_ASSERT(new_evec[1].first == 0 && new_evec[1].second == 11);
    94     evec.push_back(std::pair<int32, int32>(-1, 1));
    95     evec.push_back(std::pair<int32, int32>(0, 10));
    96     evec.push_back(std::pair<int32, int32>(1, 11));
    97     evec.push_back(std::pair<int32, int32>(2, 12));
    98     stats.push_back(std::make_pair(evec, static_cast<Clusterable*>(NULL)));
    99     int32 oldN = 3, oldP = 1, newN = 2, newP = 1;
   103     KALDI_ASSERT(new_evec[0].first == -1 && new_evec[0].second == 1);
   104     KALDI_ASSERT(new_evec[1].first == 0 && new_evec[1].second == 10);
   105     KALDI_ASSERT(new_evec[2].first == 1 && new_evec[2].second == 11);
   112     evec.push_back(std::pair<int32, int32>(-1, 1));
   113     evec.push_back(std::pair<int32, int32>(0, 10));
   114     evec.push_back(std::pair<int32, int32>(1, 11));
   115     evec.push_back(std::pair<int32, int32>(2, 12));
   116     stats.push_back(std::make_pair(evec, static_cast<Clusterable*>(NULL)));
   117     int32 oldN = 3, oldP = 1, newN = 3, newP = 1;
   129         evec.push_back(std::make_pair(12, 
Rand() % 10));
   130       evec.push_back(std::make_pair(10, 
Rand() % 10));
   132         evec.push_back(std::make_pair(8, 
Rand() % 10));
   133       std::sort(evec.begin(), evec.end());
   134       stats.push_back(std::make_pair(evec, static_cast<Clusterable*>(NULL)));
   136     std::vector<BuildTreeStatsType> stats_vec;
   138     for(
int32 i = 0; i < static_cast<int32>(stats_vec.size()); 
i++) {
   139       for(
int32 j = 0; j < static_cast<int32>(stats_vec[
i].size()); 
j++) {
   149   for (
size_t iter = 0;iter < 10;iter++) {
   151     std::set<EventKeyType> all_keys_union;
   152     std::set<EventKeyType> all_keys_intersection;
   154     for (
size_t i = 0;
i < 3;
i++) {
   155       std::map<EventKeyType, EventValueType> key_vals;
   156       for (
size_t j = 0;
j < 5;
j++) {
   163       stats.push_back(std::pair<EventType, Clusterable*>(evec, (
Clusterable*) NULL));
   164       std::set<EventKeyType> s;
   166       if (
i == 0) { all_keys_union = s; all_keys_intersection = s; }
   168         std::set<EventKeyType> new_intersection;
   169         for (std::set<EventKeyType>::iterator iter = s.begin(); iter != s.end(); iter++) {
   170           all_keys_union.insert(*iter);
   171           if (all_keys_intersection.count(*iter) != 0) new_intersection.insert(*iter);
   173         all_keys_intersection = new_intersection;
   178       std::vector<EventKeyType> keys1, keys2;
   184       std::vector<EventKeyType> keys1, keys2;
   190       std::vector<EventKeyType> keys1, keys2;
   208   for (
size_t iter = 0;iter < 10;iter++) {
   212     std::set<EventValueType> all_vals;
   213     for (
size_t i = 0;
i < 10;
i++) {
   217         if (kk == k) all_vals.insert(v);
   218         evec.push_back(std::make_pair(kk, v));
   220       stats.push_back(std::pair<EventType, Clusterable*>(evec, (
Clusterable*) NULL));
   227     for (
size_t i = 0;
i < 10;
i++) {
   228       size_t idx1 = 
RandInt(0, stats.size()-1), idx2 = 
RandInt(0, stats.size()-1);
   230       table_map->
Map(stats[idx1].first, &ans1);
   232       table_map->
Map(stats[idx2].first, &ans2);
   242       if (all_vals.count(
i) == 0) {
   243         EventType v; v.push_back(std::make_pair(k, 
i));
   245         bool b = table_map->
Map(v, &ans);
   258   for (
size_t iter = 0;iter < 1;iter++) {  
   261     int32 num_clust = 10;
   262     for (
int32 i = 0;
i < num_clust;
i++) {  
   263       size_t n = 1 + 
Rand() % 3;
   264       for (
size_t j = 0;
j < 
n;
j++) {
   267         evec.push_back(std::make_pair(key, cur_value++));
   268         stats.push_back(std::make_pair(evec, static_cast<Clusterable*>(
new ScalarClusterable(scalar))));
   278     std::vector<EventMap*> mapping;
   281     std::cout << 
"TestCluster(): num_reduced = "<<num_reduced<<
", expected: "<<cur_value<<
" - "<<num_clust<<
" = "<<(cur_value-num_clust)<<
'\n';
   290     std::vector<EventAnswerType> orig_answers, clustered_answers, renumbered_answers;
   293     table_map->
MultiMap(empty_vec, &orig_answers);
   294     clustered_map->
MultiMap(empty_vec, &clustered_answers);
   295     renumbered_map->
MultiMap(empty_vec, &renumbered_answers);
   306     delete renumbered_map;
   307     delete clustered_map;
   317   for (
size_t iter = 0;iter < 1;iter++) {  
   321     int32 num_clust = 10;
   322     for (
int32 i = 0;
i < num_clust;
i++) {  
   323       size_t n = 1 + 
Rand() % 3;
   324       for (
size_t j = 0;
j < 
n;
j++) {
   327         evec.push_back(std::make_pair(key, cur_value++));
   328         stats.push_back(std::make_pair(evec, static_cast<Clusterable*>(
new ScalarClusterable(scalar))));
   329         if (
Rand() % 10 < 5) stats_reduced.push_back(stats.back());
   339     std::vector<EventMap*> mapping;
   342     std::cout << 
"TestCluster(): num_reduced = "<<num_reduced<<
", expected [ignoring gaps]: "<<cur_value<<
" - "<<num_clust<<
" = "<<(cur_value-num_clust)<<
'\n';
   351     std::vector<EventAnswerType> orig_answers, clustered_answers, renumbered_answers;
   354     table_map->
MultiMap(empty_vec, &orig_answers);
   355     clustered_map->MultiMap(empty_vec, &clustered_answers);
   356     renumbered_map->
MultiMap(empty_vec, &renumbered_answers);
   367     delete renumbered_map;
   368     delete clustered_map;
   383   for (
size_t iter = 0;iter < 1;iter++) {  
   386     int32 num_clust = 10;
   387     for (
int32 i = 0;
i < num_clust;
i++) {  
   388       size_t n = 1 + 
Rand() % 3;
   389       for (
size_t j = 0;
j < 
n;
j++) {
   392         evec.push_back(std::make_pair(key, cur_value++));
   393         stats.push_back(std::make_pair(evec, static_cast<Clusterable*>(
new ScalarClusterable(scalar))));
   403     std::set<EventValueType> exclude_leaves;
   404     for (
size_t i = 0;
i < 4;
i++) exclude_leaves.insert(
Rand() % num_clust);
   407     for (
size_t i = 0;
i < stats.size();
i++) {
   408       if (exclude_leaves.count(stats[
i].first[0].second) != 0) {  
   409         stats_excluded.push_back(stats[i]);
   411         stats_included.push_back(stats[i]);
   414     KALDI_ASSERT(!stats_excluded.empty()&&!stats_included.empty() && stats_excluded.size()+stats_included.size() == stats.size());
   419     std::cout << 
"TestCluster*(): num_reduced = "<<num_reduced<<
", expected [without exclusion]: "<<cur_value<<
" - "<<num_clust<<
" = "<<(cur_value-num_clust)<<
'\n';
   422     for (
size_t i = 0;
i < stats_excluded.size();
i++) {
   423       const EventType &evec = stats_excluded[
i].first;
   429     delete clustered_map;
   442   bool test_by_key = (
Rand()%2 == 0);
   445   std::vector<EventKeyType> keys;
   448     std::set<EventKeyType> keys_set;
   449     while (keys_set.size() < (
size_t)num_keys)
   450       keys_set.insert(  (
Rand() % (num_keys + 10)) - 3 );
   460   for (
size_t i = 0; 
i < (
size_t)n_stats; 
i++) {
   463     for (
size_t j = 0; 
j < keys.size(); 
j++) {
   465       evec.push_back(std::make_pair(keys[
j], val));
   470   std::vector<EventKeyType> special_keys;
   471   for (
size_t i = 0; 
i < keys.size(); 
i++)
   472     if (
RandUniform() < 0.5) special_keys.push_back(keys[
i]);
   482   int32 nleaves_after_table_split = nleaves;
   483   std::cout << 
"TestClusterEventMapRestricted: after splitting on "<<special_keys.size()<<
" keys, nleaves = " <<nleaves<<
'\n';
   487   int32 num_quest = 
Rand() % 10, num_iters = rand () % 5;
   489   float thresh = 0.001;
   490   int32 max_leaves = 50;
   494                                            &nleaves, &impr, &smallest_split);
   495   KALDI_ASSERT((nleaves <= max_leaves || nleaves == nleaves_after_table_split) && smallest_split >= thresh);
   497   std::cout << 
"TestClusterEventMapRestricted: after building decision tree, " <<nleaves<<
'\n';
   503                                               thresh, &num_removed);
   504     std::cout << 
"ClusterEventMap: num_removed = "<<num_removed;
   506     delete map_clustered;
   515                                                       thresh, special_keys,
   519                                                      thresh, *table_split_map,
   522     std::cout << 
"ClusterEventMapRestricted: num_removed = "<<num_removed;
   524     KALDI_ASSERT(num_removed == nleaves - nleaves_after_table_split);
   525     delete map_clustered;
   530   delete table_split_map;
   540   std::vector<EventKeyType> keys;
   543     std::set<EventKeyType> keys_set;
   544     while (keys_set.size() < (
size_t)num_keys)
   545       keys_set.insert(  (
Rand() % (num_keys + 10)) - 3 );
   555   for (
size_t i = 0; 
i < (
size_t)n_stats; 
i++) {
   558     for (
size_t j = 0; 
j < keys.size(); 
j++) {
   560       evec.push_back(std::make_pair(keys[
j], val));
   565   std::vector<EventKeyType> special_keys;
   566   for (
size_t i = 0; 
i < keys.size(); 
i++)
   567     if (
RandUniform() < 0.5) special_keys.push_back(keys[
i]);
   578   std::cout << 
"TestClusterEventMapRestricted: after splitting on "<<special_keys.size()<<
" keys, nleaves = " <<nleaves<<
'\n';
   580   int nleaves_after_table_split = nleaves;
   582   int32 num_quest = 
Rand() % 10, num_iters = rand () % 5;
   584   float thresh = 0.001;
   585   int32 max_leaves = 100;
   589                                            &nleaves, &impr, &smallest_split);
   590   KALDI_ASSERT((nleaves <= max_leaves || nleaves == nleaves_after_table_split) && smallest_split >= thresh);
   592   std::cout << 
"TestShareEventMapLeaves: after building decision tree, " <<nleaves<<
'\n';
   594   if (special_keys.size() == 0) {
   595     KALDI_WARN << 
"TestShareEventMapLeaves(): could not test since key not always defined.";
   598     delete table_split_map;
   603   std::vector<EventValueType> values;
   607   std::set<EventValueType> to_share;
   608   for (
size_t i = 0; i < 3; i++) to_share.insert(values[
Rand() % values.size()]);
   610   std::vector<std::vector<EventValueType> > share_value;
   611   for (std::set<EventValueType>::iterator iter = to_share.begin();
   612       iter != to_share.end();
   614     share_value.resize(share_value.size()+1);
   615     share_value.back().push_back(*iter);
   623   for (
size_t i = 0; i < share_value.size(); i++) {
   625     std::vector<EventAnswerType> answers;
   635   delete table_split_map;
   642   for (
int32 p = 0;p < 10;p++) {
   643     std::vector<EventKeyType>  keys_all, keys_some;
   645       std::set<EventKeyType> keys_all_set, keys_some_set;
   647       for (
int32 i = 0;
i < num_all;
i++) keys_all_set.insert(
Rand() % 10);
   648       for (
int32 i = 0;
i < num_some;
i++) {
   650         if (keys_all_set.count(k) == 0) keys_some_set.insert(k);
   655     std::set<EventKeyType> keys_all_saw_set;
   659     size_t n_stats = 
Rand() % 100;
   661     if (n_stats > 90) n_stats = 0;
   662     if (n_stats > 80) n_stats = 1;
   664     for (
size_t i = 0;
i < n_stats;
i++) {  
   666       for (
size_t j = 0;
j < keys_all.size();
j++) {
   668         keys_all_saw_set.insert(keys_all[j]);
   670       for (
size_t j = 0;
j < keys_some.size();
j++)
   671         if (
Rand() % 2 == 0) {  
   673           keys_all_saw_set.insert(keys_some[j]);
   675       std::sort(evec.begin(), evec.end());  
   677       dummy_stats.push_back(std::make_pair(evec, (
Clusterable*)NULL));
   680     bool intersection = (p%2 == 0);
   681     int32 num_quest = 
Rand() % 10, num_iters = rand () % 5;
   684     for (
int i = 0; 
i < 2; 
i++) {
   686       bool binary = (
i == 0);
   687       std::ostringstream oss;
   688       qo.
Write(oss, binary);
   690       std::istringstream iss(oss.str());
   692       qo2.
Read(iss, binary);
   694       std::ostringstream oss2;
   695       qo2.Write(oss2, binary);
   697       if (oss.str() != oss2.str()) {
   698         KALDI_ERR << 
"Questions I/O failure: " << oss.str() << 
" vs. " << oss2.str();
   704         for (
size_t i = 0;
i < keys_all.size();
i++) {
   708           for (
size_t j = 0;
j < opts.initial_questions.size();
j++) {
   709             for (
size_t k = 0;k < opts.initial_questions[
j].size();k++)
   710               std::cout << opts.initial_questions[
j][k] <<
" ";
   716         for (
size_t i = 0;
i < keys_all.size();
i++) {
   721         for (std::set<int32>::iterator iter = keys_all_saw_set.begin(); iter != keys_all_saw_set.end(); iter++) {
   732   for (
int32 p = 0;p < 4;p++) {
   733     std::vector<EventKeyType>  keys_all, keys_some;
   735       std::set<EventKeyType> keys_all_set, keys_some_set;
   737       for (
int32 i = 0;
i < num_all;
i++) keys_all_set.insert(
Rand() % 10);
   738       for (
int32 i = 0;
i < num_some;
i++) {
   740         if (keys_all_set.count(k) == 0) keys_some_set.insert(k);
   745     std::set<EventKeyType> keys_all_saw_set;
   749     size_t n_stats = 
Rand() % 100;
   751     if (n_stats > 90) n_stats = 0;
   752     if (n_stats > 80) n_stats = 1;
   754     for (
size_t i = 0;
i < n_stats;
i++) {  
   756       for (
size_t j = 0;
j < keys_all.size();
j++) {
   758         keys_all_saw_set.insert(keys_all[j]);
   760       for (
size_t j = 0;
j < keys_some.size();
j++)
   761         if (
Rand() % 2 == 0) {  
   763           keys_all_saw_set.insert(keys_some[j]);
   765       std::sort(evec.begin(), evec.end());  
   773     bool intersection = 
true;  
   775     int32 num_quest = 
Rand() % 10, num_iters = rand () % 5;
   780         for (
size_t i = 0;
i < keys_all.size();
i++) {
   784           for (
size_t j = 0;
j < opts.initial_questions.size();
j++) {
   785             for (
size_t k = 0;k < opts.initial_questions[
j].size();k++)
   786               std::cout << opts.initial_questions[
j][k] <<
" ";
   792         for (
size_t i = 0;
i < keys_all.size();
i++) {
   797         for (std::set<int32>::iterator iter = keys_all_saw_set.begin(); iter != keys_all_saw_set.end(); iter++) {
   801       std::cout << 
"num_quest = " <<num_quest<<
", num_iters = "<<num_iters<<
'\n';
   804       int32 num_leaves = 0;
   805       int32 max_leaves = 50;
   810                                                &num_leaves, &impr, &smallest_split);
   811       KALDI_ASSERT(num_leaves <= max_leaves && smallest_split >= thresh);
   815         std::cout << 
"Objf impr is " << impr << 
", computed differently: " <<impr_check<<
'\n';
   820       std::cout << 
"After splitting, num_leaves = " << num_leaves << 
'\n';
   822       std::vector<BuildTreeStatsType> mapped_stats;
   824       std::cout << 
"Assignments of stats to leaves is:\n";
   825       for (
size_t i = 0; 
i < mapped_stats.size(); 
i++) {
   826         std::cout << 
" [ leaf "<<
i<<
"]: ";
   827         for (
size_t j = 0; 
j < mapped_stats[
i].size(); 
j++) {
   839   for (
int32 p = 0; p < 10; p++) {
   840     size_t num_stats = 
Rand() % 20;
   842     for (
size_t i = 0; 
i < num_stats; 
i++) {
   847         ev.push_back(std::make_pair(key, value));
   849       stats.push_back(std::make_pair(ev, (
Clusterable*) NULL));
   851     const char *filename = 
"tmpf";
   858       Input ki(filename, &binary_in);
   860                          binary_in, gc, &stats2);
   873   using namespace kaldi;
   874   for (
size_t i = 0;
i < 2;
i++) {
 This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
 
void CopySetToVector(const std::set< T > &s, std::vector< T > *v)
Copies the elements of a set to a vector. 
 
void DeletePointers(std::vector< A *> *v)
Deletes any non-NULL pointers in the vector v, and sets the corresponding entries of v to NULL...
 
std::pair< EventKeyType, EventValueType > MakeEventPair(EventKeyType k, EventValueType v)
 
bool ConvertStats(int32 oldN, int32 oldP, int32 newN, int32 newP, BuildTreeStatsType *stats)
Converts stats from a given context-window (N) and central-position (P) to a different N and P...
 
void TestClusterEventMapGetMappingAndRenumberEventMap()
 
This class defines, for each EventKeyType, a set of initial questions that it tries and also a number...
 
float RandUniform(struct RandomState *state=NULL)
Returns a random number strictly between 0 and 1. 
 
virtual EventAnswerType MaxResult() const
 
void SplitStatsByMap(const BuildTreeStatsType &stats, const EventMap &e, std::vector< BuildTreeStatsType > *stats_out)
Splits stats according to the EventMap, indexing them at output by the leaf type. ...
 
void FindAllKeys(const BuildTreeStatsType &stats, AllKeysType keys_type, std::vector< EventKeyType > *keys_out)
FindAllKeys puts in *keys the (sorted, unique) list of all key identities in the stats. 
 
const QuestionsForKey & GetQuestionsOf(EventKeyType key) const
 
float RandGauss(struct RandomState *state=NULL)
 
void SortAndUniq(std::vector< T > *vec)
Sorts and uniq's (removes duplicates) from a vector. 
 
void SplitStatsByKey(const BuildTreeStatsType &stats_in, EventKeyType key, std::vector< BuildTreeStatsType > *stats_out)
SplitStatsByKey splits stats up according to the value of a particular key, which must be always defi...
 
virtual bool Map(const EventType &event, EventAnswerType *ans) const =0
 
bool PossibleValues(EventKeyType key, const BuildTreeStatsType &stats, std::vector< EventValueType > *ans)
Convenience function e.g. 
 
void DeleteBuildTreeStats(BuildTreeStatsType *stats)
This frees the Clusterable* pointers in "stats", where non-NULL, and sets them to NULL...
 
void ReadBuildTreeStats(std::istream &is, bool binary, const Clusterable &example, BuildTreeStatsType *stats)
Reads BuildTreeStats object. 
 
static void Check(const EventType &event)
 
std::vector< std::pair< EventKeyType, EventValueType > > EventType
 
void CopyMapKeysToSet(const std::map< A, B > &m, std::set< A > *s)
Copies the keys in a map to a set. 
 
void InitRand(const BuildTreeStatsType &stats, int32 num_quest, int32 num_iters_refine, AllKeysType all_keys_type)
InitRand attempts to generate "reasonable" random questions. 
 
int32 EventKeyType
Things of type EventKeyType can take any value. 
 
QuestionsForKey is a class used to define the questions for a key, and also options that allow us to ...
 
std::vector< std::vector< EventValueType > > initial_questions
 
void TestSplitStatsByKey()
 
void Read(std::istream &is, bool binary)
 
void TestShareEventMapLeaves()
 
virtual EventMap * Copy(const std::vector< EventMap *> &new_leaves) const =0
 
BaseFloat ObjfGivenMap(const BuildTreeStatsType &stats_in, const EventMap &e)
Cluster the stats given the event map return the total objf given those clusters. ...
 
int Rand(struct RandomState *state)
 
void TestBuildTreeStatsIo(bool binary)
 
static bool Lookup(const EventType &event, EventKeyType key, EventValueType *ans)
 
void TestClusterEventMapRestricted()
 
A class that is capable of representing a generic mapping from EventType (which is a vector of (key...
 
bool HasQuestionsForKey(EventKeyType key) const
 
#define KALDI_ASSERT(cond)
 
void TestPossibleValues()
 
void TestQuestionsInitRand()
 
int32 EventAnswerType
As far as the event-map code itself is concerned, things of type EventAnswerType may take any value e...
 
int32 EventValueType
Given current code, things of type EventValueType should generally be nonnegative and in a reasonably...
 
std::vector< std::pair< EventType, Clusterable * > > BuildTreeStatsType
 
void TestClusterEventMap()
 
GaussClusterable wraps Gaussian statistics in a form accessible to generic clustering algorithms...
 
virtual void MultiMap(const EventType &event, std::vector< EventAnswerType > *ans) const =0
 
void CopyMapToVector(const std::map< A, B > &m, std::vector< std::pair< A, B > > *v)
Copies the (key, value) pairs in a map to a vector of pairs. 
 
void TestClusterEventMapGetMappingAndRenumberEventMap2()
 
void TestSplitDecisionTree()
 
void Write(std::ostream &os, bool binary) const
 
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)
 
void WriteBuildTreeStats(std::ostream &os, bool binary, const BuildTreeStatsType &stats)
Writes BuildTreeStats object. This works even if pointers are NULL. 
 
ScalarClusterable clusters scalars with x^2 loss.