22 #ifndef KALDI_IVECTOR_AGGLOMERATIVE_CLUSTERING_H_ 23 #define KALDI_IVECTOR_AGGLOMERATIVE_CLUSTERING_H_ 28 #include <unordered_map> 48 : id(id), parent1(p1), parent2(p2), utt_ids(utts) {
61 int32 first_pass_max_points,
63 std::vector<int32> *assignments_out)
64 : costs_(costs), threshold_(threshold), min_clusters_(min_clusters),
65 first_pass_max_points_(first_pass_max_points),
66 assignments_(assignments_out) {
73 max_cluster_size_ = ceil(num_points_ * max_cluster_fraction);
84 second_pass_count_ = 0;
91 void ClusterSinglePass();
94 void ClusterTwoPass();
100 std::pair<int32, int32> DecodePair(uint32 key);
102 void InitializeClusters(
int32 first,
int32 last);
104 void ComputeClusters(
int32 min_clusters);
106 void AddClustersToSecondPass();
108 void AssignClusters();
126 typedef std::priority_queue<QueueElement, std::vector<QueueElement>,
184 int32 first_pass_max_points,
186 std::vector<int32> *assignments_out);
190 #endif // KALDI_IVECTOR_AGGLOMERATIVE_CLUSTERING_H_ This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
std::unordered_map< int32, AhcCluster * > clusters_map_
std::set< int32 > second_pass_active_clusters_
const Matrix< BaseFloat > & costs_
std::vector< int32 > * assignments_
std::unordered_map< uint32, BaseFloat > second_pass_cluster_cost_map_
AhcCluster(int32 id, int32 p1, int32 p2, std::vector< int32 > utts)
std::unordered_map< int32, AhcCluster * > second_pass_clusters_map_
AgglomerativeClusterer(const Matrix< BaseFloat > &costs, BaseFloat threshold, int32 min_clusters, int32 first_pass_max_points, BaseFloat max_cluster_fraction, std::vector< int32 > *assignments_out)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
QueueType second_pass_queue_
void AgglomerativeCluster(const Matrix< BaseFloat > &costs, BaseFloat threshold, int32 min_clusters, int32 first_pass_max_points, BaseFloat max_cluster_fraction, std::vector< int32 > *assignments_out)
This is the function that is called to perform the agglomerative clustering.
AhcCluster is the cluster object for the agglomerative clustering.
std::pair< BaseFloat, uint32 > QueueElement
std::priority_queue< QueueElement, std::vector< QueueElement >, std::greater< QueueElement > > QueueType
std::set< int32 > active_clusters_
int32 first_pass_max_points_
The AgglomerativeClusterer class contains the necessary mechanisms for the actual clustering algorith...
std::unordered_map< uint32, BaseFloat > cluster_cost_map_
std::vector< int32 > utt_ids