49 int32 subset_size = ceil(num_points / num_subsets);
73 return (static_cast<uint32>(i) << 16) +
static_cast<uint32
>(
j);
75 return (static_cast<uint32>(j) << 16) + static_cast<uint32>(i);
79 return std::make_pair(static_cast<int32>(key >> 16),
80 static_cast<int32>(key & 0x0000FFFFu));
90 for (
int32 i = first;
i < last;
i++) {
92 std::vector<int32> ids;
104 queue_.push(std::make_pair(cost, key));
111 std::pair<BaseFloat, uint32> pr =
queue_.top();
139 std::set<int32>::iterator it;
145 cluster_cost_map_[new_key] = new_cost;
148 queue_.push(std::make_pair(new_cost / norm, new_key));
159 std::set<int32>::iterator it1, it2;
172 std::vector<int32>::iterator utt_it1, utt_it2;
173 for (utt_it1 = clust1->
utt_ids.begin();
174 utt_it1 != clust1->
utt_ids.end(); ++utt_it1) {
175 for (utt_it2 = clust2->
utt_ids.begin();
176 utt_it2 != clust2->
utt_ids.end(); ++utt_it2) {
177 new_cost +=
costs_(*utt_it1, *utt_it2);
208 std::set<int32>::iterator it;
215 std::vector<int32>::iterator utt_it;
216 for (utt_it = cluster->
utt_ids.begin();
217 utt_it != cluster->
utt_ids.end(); ++utt_it)
227 int32 first_pass_max_points,
229 std::vector<int32> *assignments_out) {
231 KALDI_ASSERT(max_cluster_fraction >= 1.0 / min_clusters);
233 first_pass_max_points, max_cluster_fraction,
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void InitializeClusters(int32 first, int32 last)
std::unordered_map< int32, AhcCluster * > clusters_map_
std::set< int32 > second_pass_active_clusters_
void AddClustersToSecondPass()
const Matrix< BaseFloat > & costs_
std::vector< int32 > * assignments_
std::unordered_map< uint32, BaseFloat > second_pass_cluster_cost_map_
std::unordered_map< int32, AhcCluster * > second_pass_clusters_map_
void MergeClusters(int32 i, int32 j)
std::pair< int32, int32 > DecodePair(uint32 key)
#define KALDI_ASSERT(cond)
QueueType second_pass_queue_
void AgglomerativeCluster(const Matrix< BaseFloat > &costs, BaseFloat threshold, int32 min_clusters, int32 first_pass_max_points, BaseFloat max_cluster_fraction, std::vector< int32 > *assignments_out)
This is the function that is called to perform the agglomerative clustering.
AhcCluster is the cluster object for the agglomerative clustering.
uint32 EncodePair(int32 i, int32 j)
std::priority_queue< QueueElement, std::vector< QueueElement >, std::greater< QueueElement > > QueueType
std::set< int32 > active_clusters_
int32 first_pass_max_points_
The AgglomerativeClusterer class contains the necessary mechanisms for the actual clustering algorith...
void ComputeClusters(int32 min_clusters)
std::unordered_map< uint32, BaseFloat > cluster_cost_map_
std::vector< int32 > utt_ids