21 #ifndef KALDI_TREE_CLUSTER_UTILS_H_ 22 #define KALDI_TREE_CLUSTER_UTILS_H_ 59 const std::vector<int32> &assignments,
60 std::vector<Clusterable*> *clusters);
71 const std::vector<int32> &assignments,
72 const Clusterable &total,
73 std::vector<Clusterable*> *clusters);
112 std::vector<Clusterable*> *clusters_out,
113 std::vector<int32> *assignments_out);
124 const std::vector< std::vector<Clusterable*> > &points,
BaseFloat thresh,
125 int32 min_clust, std::vector< std::vector<Clusterable*> > *clusters_out,
126 std::vector< std::vector<int32> > *assignments_out);
134 : num_iters(num_iters_in), top_n(top_n_in) {}
137 void Write(std::ostream &os,
bool binary)
const;
138 void Read(std::istream &is,
bool binary);
157 std::vector<Clusterable*> *clusters ,
158 std::vector<int32> *assignments ,
167 : refine_cfg(), num_iters(20), num_tries(2), verbose(true) {}
205 std::vector<Clusterable*> *clusters_out,
206 std::vector<int32> *assignments_out,
214 : kmeans_cfg(), branch_factor(2), thresh(0) {
254 std::vector<Clusterable*> *clusters_out,
255 std::vector<int32> *assignments_out,
256 std::vector<int32> *clust_assignments_out,
257 int32 *num_leaves_out,
283 std::vector<Clusterable*> *clusters_out,
284 std::vector<int32> *assignments_out,
291 #endif // KALDI_TREE_CLUSTER_UTILS_H_ RefineClustersOptions(int32 num_iters_in, int32 top_n_in)
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void Write(std::ostream &os, bool binary) const
BaseFloat RefineClusters(const std::vector< Clusterable *> &points, std::vector< Clusterable *> *clusters, std::vector< int32 > *assignments, RefineClustersOptions cfg)
RefineClusters is mainly used internally by other clustering algorithms.
void Read(std::istream &is, bool binary)
BaseFloat SumClusterableNormalizer(const std::vector< Clusterable *> &vec)
Returns the total normalizer (usually count) of the cluster (pointers may be NULL).
BaseFloat ClusterKMeans(const std::vector< Clusterable *> &points, int32 num_clust, std::vector< Clusterable *> *clusters_out, std::vector< int32 > *assignments_out, ClusterKMeansOptions cfg)
ClusterKMeans is a K-means-like clustering algorithm.
BaseFloat ClusterBottomUpCompartmentalized(const std::vector< std::vector< Clusterable *> > &points, BaseFloat thresh, int32 min_clust, std::vector< std::vector< Clusterable *> > *clusters_out, std::vector< std::vector< int32 > > *assignments_out)
This is a bottom-up clustering where the points are pre-clustered in a set of compartments, such that only points in the same compartment are clustered together.
void EnsureClusterableVectorNotNull(std::vector< Clusterable *> *stats)
Fills in any (NULL) holes in "stats" vector, with empty stats, because certain algorithms require non...
BaseFloat ClusterBottomUp(const std::vector< Clusterable *> &points, BaseFloat max_merge_thresh, int32 min_clust, std::vector< Clusterable *> *clusters_out, std::vector< int32 > *assignments_out)
A bottom-up clustering algorithm.
void AddToClusters(const std::vector< Clusterable *> &stats, const std::vector< int32 > &assignments, std::vector< Clusterable *> *clusters)
Given stats and a vector "assignments" of the same size (that maps to cluster indices), sums the stats up into "clusters." It will add to any stats already present in "clusters" (although typically "clusters" will be empty when called), and it will extend with NULL pointers for any unseen indices.
void AddToClustersOptimized(const std::vector< Clusterable *> &stats, const std::vector< int32 > &assignments, const Clusterable &total, std::vector< Clusterable *> *clusters)
AddToClustersOptimized does the same as AddToClusters (it sums up the stats within each cluster...
BaseFloat ClusterTopDown(const std::vector< Clusterable *> &points, int32 max_clust, std::vector< Clusterable *> *clusters_out, std::vector< int32 > *assignments_out, TreeClusterOptions cfg)
A clustering algorithm that internally uses TreeCluster, but does not give you the information about ...
BaseFloat SumClusterableObjf(const std::vector< Clusterable *> &vec)
Returns the total objective function after adding up all the statistics in the vector (pointers may b...
BaseFloat TreeCluster(const std::vector< Clusterable *> &points, int32 max_clust, std::vector< Clusterable *> *clusters_out, std::vector< int32 > *assignments_out, std::vector< int32 > *clust_assignments_out, int32 *num_leaves_out, TreeClusterOptions cfg)
TreeCluster is a top-down clustering algorithm, using a binary tree (not necessarily balanced)...
ClusterKMeansOptions kmeans_cfg
Clusterable * SumClusterable(const std::vector< Clusterable *> &vec)
Sums stats (ptrs may be NULL). Returns NULL if no non-NULL stats present.
RefineClustersOptions refine_cfg