CompartmentalizedBottomUpClusterer Class Reference
Collaboration diagram for CompartmentalizedBottomUpClusterer:

Public Member Functions

 CompartmentalizedBottomUpClusterer (const vector< vector< Clusterable *> > &points, BaseFloat max_merge_thresh, int32 min_clust)
 
BaseFloat Cluster (vector< vector< Clusterable *> > *clusters_out, vector< vector< int32 > > *assignments_out)
 
 ~CompartmentalizedBottomUpClusterer ()
 

Private Types

typedef std::priority_queue< CompBotClustElem, std::vector< CompBotClustElem >, std::greater< CompBotClustElem > > QueueType
 

Private Member Functions

void Renumber (int32 compartment)
 
void InitializeAssignments ()
 
void SetInitialDistances ()
 Sets up distances and queue. More...
 
bool CanMerge (int32 compartment, int32 i, int32 j, BaseFloat dist)
 CanMerge returns true if i and j are existing clusters, and the distance (negated objf-change) "dist" is accurate (i.e. More...
 
BaseFloat MergeClusters (int32 compartment, int32 i, int32 j)
 Merge j into i and delete j. Returns obj function change. More...
 
void ReconstructQueue ()
 Reconstructs the priority queue from the distances. More...
 
void SetDistance (int32 compartment, int32 i, int32 j)
 

Private Attributes

const vector< vector< Clusterable * > > & points_
 
BaseFloat max_merge_thresh_
 
int32 min_clust_
 
vector< vector< Clusterable * > > clusters_
 
vector< vector< int32 > > assignments_
 
vector< vector< BaseFloat > > dist_vec_
 
int32 ncompartments_
 
int32 nclusters_
 
vector< int32npoints_
 
QueueType queue_
 

Detailed Description

Definition at line 442 of file cluster-utils.cc.

Member Typedef Documentation

◆ QueueType

typedef std::priority_queue< CompBotClustElem, std::vector<CompBotClustElem>, std::greater<CompBotClustElem> > QueueType
private

Definition at line 491 of file cluster-utils.cc.

Constructor & Destructor Documentation

◆ CompartmentalizedBottomUpClusterer()

CompartmentalizedBottomUpClusterer ( const vector< vector< Clusterable *> > &  points,
BaseFloat  max_merge_thresh,
int32  min_clust 
)
inline

Definition at line 444 of file cluster-utils.cc.

References BottomUpClusterer::Cluster(), BottomUpClusterer::nclusters_, and BottomUpClusterer::npoints_.

447  : points_(points), max_merge_thresh_(max_merge_thresh),
448  min_clust_(min_clust) {
449  ncompartments_ = points.size();
450  nclusters_ = 0;
451  npoints_.resize(ncompartments_);
452  for (int32 comp = 0; comp < ncompartments_; comp++) {
453  npoints_[comp] = points[comp].size();
454  nclusters_ += npoints_[comp];
455  }
456  }
kaldi::int32 int32
const vector< vector< Clusterable * > > & points_

◆ ~CompartmentalizedBottomUpClusterer()

Definition at line 459 of file cluster-utils.cc.

References BottomUpClusterer::CanMerge(), BottomUpClusterer::clusters_, kaldi::DeletePointers(), rnnlm::i, BottomUpClusterer::InitializeAssignments(), rnnlm::j, BottomUpClusterer::MergeClusters(), BottomUpClusterer::ReconstructQueue(), BottomUpClusterer::Renumber(), BottomUpClusterer::SetDistance(), and BottomUpClusterer::SetInitialDistances().

459  {
460  for (vector< vector<Clusterable*> >::iterator itr = clusters_.begin(),
461  end = clusters_.end(); itr != end; ++itr)
462  DeletePointers(&(*itr));
463  }
void DeletePointers(std::vector< A *> *v)
Deletes any non-NULL pointers in the vector v, and sets the corresponding entries of v to NULL...
Definition: stl-utils.h:184
vector< vector< Clusterable * > > clusters_

Member Function Documentation

◆ CanMerge()

bool CanMerge ( int32  compartment,
int32  i,
int32  j,
BaseFloat  dist 
)
private

CanMerge returns true if i and j are existing clusters, and the distance (negated objf-change) "dist" is accurate (i.e.

not outdated).

Definition at line 584 of file cluster-utils.cc.

References BottomUpClusterer::clusters_, BottomUpClusterer::dist_vec_, rnnlm::j, KALDI_ASSERT, and BottomUpClusterer::npoints_.

585  {
586  KALDI_ASSERT(comp < ncompartments_ && i < npoints_[comp] && j < i);
587  if (clusters_[comp][i] == NULL || clusters_[comp][j] == NULL)
588  return false;
589  BaseFloat cached_dist = dist_vec_[comp][(i * (i - 1)) / 2 + j];
590  return (std::fabs(cached_dist - dist) <= 1.0e-05 * std::fabs(dist));
591 }
float BaseFloat
Definition: kaldi-types.h:29
vector< vector< Clusterable * > > clusters_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
vector< vector< BaseFloat > > dist_vec_

◆ Cluster()

BaseFloat Cluster ( vector< vector< Clusterable *> > *  clusters_out,
vector< vector< int32 > > *  assignments_out 
)

Definition at line 495 of file cluster-utils.cc.

References BottomUpClusterer::assignments_, BottomUpClusterer::CanMerge(), BottomUpClusterer::clusters_, CompBotClustElem::compartment, CompBotClustElem::dist, BottomUpClusterer::InitializeAssignments(), BottomUpClusterer::MergeClusters(), BottomUpClusterer::min_clust_, BottomUpClusterer::nclusters_, CompBotClustElem::point1, CompBotClustElem::point2, BottomUpClusterer::queue_, BottomUpClusterer::Renumber(), and BottomUpClusterer::SetInitialDistances().

Referenced by kaldi::ClusterBottomUpCompartmentalized().

497  {
500  BaseFloat total_obj_change = 0;
501 
502  while (nclusters_ > min_clust_ && !queue_.empty()) {
503  CompBotClustElem qelem = queue_.top();
504  queue_.pop();
505  if (CanMerge(qelem.compartment, qelem.point1, qelem.point2, qelem.dist))
506  total_obj_change += MergeClusters(qelem.compartment, qelem.point1,
507  qelem.point2);
508  }
509  for (int32 comp = 0; comp < ncompartments_; comp++)
510  Renumber(comp);
511  if (clusters_out != NULL) clusters_out->swap(clusters_);
512  if (assignments_out != NULL) assignments_out->swap(assignments_);
513  return total_obj_change;
514 }
kaldi::int32 int32
vector< vector< int32 > > assignments_
float BaseFloat
Definition: kaldi-types.h:29
vector< vector< Clusterable * > > clusters_
bool CanMerge(int32 compartment, int32 i, int32 j, BaseFloat dist)
CanMerge returns true if i and j are existing clusters, and the distance (negated objf-change) "dist"...
BaseFloat MergeClusters(int32 compartment, int32 i, int32 j)
Merge j into i and delete j. Returns obj function change.
void SetInitialDistances()
Sets up distances and queue.

◆ InitializeAssignments()

void InitializeAssignments ( )
private

Definition at line 561 of file cluster-utils.cc.

References BottomUpClusterer::assignments_, BottomUpClusterer::clusters_, rnnlm::i, BottomUpClusterer::npoints_, and BottomUpClusterer::points_.

561  {
562  clusters_.resize(ncompartments_);
564  for (int32 comp = 0; comp < ncompartments_; comp++) {
565  clusters_[comp].resize(npoints_[comp]);
566  assignments_[comp].resize(npoints_[comp]);
567  for (int32 i = 0; i < npoints_[comp]; i++) { // initialize as 1-1 mapping.
568  clusters_[comp][i] = points_[comp][i]->Copy();
569  assignments_[comp][i] = i;
570  }
571  }
572 }
kaldi::int32 int32
vector< vector< int32 > > assignments_
const vector< vector< Clusterable * > > & points_
vector< vector< Clusterable * > > clusters_

◆ MergeClusters()

BaseFloat MergeClusters ( int32  compartment,
int32  i,
int32  j 
)
private

Merge j into i and delete j. Returns obj function change.

Definition at line 593 of file cluster-utils.cc.

References BottomUpClusterer::assignments_, BottomUpClusterer::clusters_, BottomUpClusterer::dist_vec_, rnnlm::i, rnnlm::j, KALDI_ASSERT, BottomUpClusterer::nclusters_, BottomUpClusterer::npoints_, BottomUpClusterer::queue_, BottomUpClusterer::ReconstructQueue(), and BottomUpClusterer::SetDistance().

594  {
595  KALDI_ASSERT(comp < ncompartments_ && i < npoints_[comp] && j < i);
596  clusters_[comp][i]->Add(*(clusters_[comp][j]));
597  delete clusters_[comp][j];
598  clusters_[comp][j] = NULL;
599  // note that we may have to follow the chain within "assignment_" to get
600  // final assignments.
601  assignments_[comp][j] = i;
602  // objective function change.
603  BaseFloat ans = -dist_vec_[comp][(i * (i - 1)) / 2 + j];
604  nclusters_--;
605  // Now update "distances".
606  for (int32 k = 0; k < npoints_[comp]; k++) {
607  if (k != i && clusters_[comp][k] != NULL) {
608  if (k < i)
609  SetDistance(comp, i, k); // SetDistance requires k < i.
610  else
611  SetDistance(comp, k, i);
612  }
613  }
614  // Control memory use by getting rid of orphaned queue entries every time
615  // it's at least twice the maximum possible size.
616  if (queue_.size() >= static_cast<size_t> (nclusters_ * nclusters_)) {
618  }
619  return ans;
620 }
void ReconstructQueue()
Reconstructs the priority queue from the distances.
kaldi::int32 int32
vector< vector< int32 > > assignments_
float BaseFloat
Definition: kaldi-types.h:29
vector< vector< Clusterable * > > clusters_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void SetDistance(int32 compartment, int32 i, int32 j)
vector< vector< BaseFloat > > dist_vec_

◆ ReconstructQueue()

void ReconstructQueue ( )
private

Reconstructs the priority queue from the distances.

Definition at line 622 of file cluster-utils.cc.

References BottomUpClusterer::clusters_, rnnlm::i, rnnlm::j, BottomUpClusterer::npoints_, BottomUpClusterer::queue_, BottomUpClusterer::SetDistance(), and kaldi::swap().

622  {
623  // empty queue [since there is no clear()]
624  {
625  QueueType tmp;
626  std::swap(tmp, queue_);
627  }
628  for (int32 comp = 0; comp < ncompartments_; comp++) {
629  for (int32 i = 0; i < npoints_[comp]; i++) {
630  if (clusters_[comp][i] == NULL) continue;
631  for (int32 j = 0; j < i; j++) {
632  if (clusters_[comp][j] == NULL) continue;
633  SetDistance(comp, i, j);
634  }
635  }
636  }
637 }
std::priority_queue< CompBotClustElem, std::vector< CompBotClustElem >, std::greater< CompBotClustElem > > QueueType
void swap(basic_filebuf< CharT, Traits > &x, basic_filebuf< CharT, Traits > &y)
kaldi::int32 int32
vector< vector< Clusterable * > > clusters_
void SetDistance(int32 compartment, int32 i, int32 j)

◆ Renumber()

void Renumber ( int32  compartment)
private

Definition at line 516 of file cluster-utils.cc.

References BottomUpClusterer::assignments_, BottomUpClusterer::clusters_, rnnlm::i, KALDI_ASSERT, BottomUpClusterer::nclusters_, BottomUpClusterer::npoints_, BottomUpClusterer::queue_, and kaldi::swap().

516  {
517  // first free up memory by getting rid of queue. this is a special trick
518  // to force STL to free memory.
519  {
520  QueueType tmp;
521  std::swap(tmp, queue_);
522  }
523 
524  // First find the number of surviving clusters in the compartment.
525  int32 clusts_in_compartment = 0;
526  for (int32 i = 0; i < npoints_[comp]; i++) {
527  if (clusters_[comp][i] != NULL)
528  clusts_in_compartment++;
529  }
530  KALDI_ASSERT(clusts_in_compartment <= nclusters_);
531 
532  // mapping from intermediate to final clusters.
533  vector<uint_smaller> mapping(npoints_[comp], static_cast<uint_smaller> (-1));
534  vector<Clusterable*> new_clusters(clusts_in_compartment);
535 
536  // Now copy the surviving clusters in a fresh array.
537  clusts_in_compartment = 0;
538  for (int32 i = 0; i < npoints_[comp]; i++) {
539  if (clusters_[comp][i] != NULL) {
540  new_clusters[clusts_in_compartment] = clusters_[comp][i];
541  mapping[i] = clusts_in_compartment;
542  clusts_in_compartment++;
543  }
544  }
545 
546  // Next, process the assignments.
547  std::vector<int32> new_assignments(npoints_[comp]);
548  for (int32 i = 0; i < npoints_[comp]; i++) {
549  int32 ii = i;
550  while (assignments_[comp][ii] != ii)
551  ii = assignments_[comp][ii]; // follow the chain.
552  // cannot assign to nonexistent cluster.
553  KALDI_ASSERT(clusters_[comp][ii] != NULL);
554  KALDI_ASSERT(mapping[ii] != static_cast<uint_smaller>(-1));
555  new_assignments[i] = mapping[ii];
556  }
557  clusters_[comp].swap(new_clusters);
558  assignments_[comp].swap(new_assignments);
559 }
std::priority_queue< CompBotClustElem, std::vector< CompBotClustElem >, std::greater< CompBotClustElem > > QueueType
void swap(basic_filebuf< CharT, Traits > &x, basic_filebuf< CharT, Traits > &y)
kaldi::int32 int32
vector< vector< int32 > > assignments_
vector< vector< Clusterable * > > clusters_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ SetDistance()

void SetDistance ( int32  compartment,
int32  i,
int32  j 
)
private

Definition at line 639 of file cluster-utils.cc.

References BottomUpClusterer::clusters_, BottomUpClusterer::dist_vec_, rnnlm::i, rnnlm::j, KALDI_ASSERT, BottomUpClusterer::max_merge_thresh_, BottomUpClusterer::npoints_, and BottomUpClusterer::queue_.

640  {
641  KALDI_ASSERT(comp < ncompartments_ && i < npoints_[comp] && j < i);
642  KALDI_ASSERT(clusters_[comp][i] != NULL && clusters_[comp][j] != NULL);
643  BaseFloat dist = clusters_[comp][i]->Distance(*(clusters_[comp][j]));
644  dist_vec_[comp][(i * (i - 1)) / 2 + j] = dist;
645  if (dist < max_merge_thresh_) {
646  queue_.push(CompBotClustElem(dist, comp, static_cast<uint_smaller>(i),
647  static_cast<uint_smaller>(j)));
648  }
649 }
float BaseFloat
Definition: kaldi-types.h:29
vector< vector< Clusterable * > > clusters_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
vector< vector< BaseFloat > > dist_vec_

◆ SetInitialDistances()

void SetInitialDistances ( )
private

Sets up distances and queue.

Definition at line 574 of file cluster-utils.cc.

References BottomUpClusterer::dist_vec_, rnnlm::i, rnnlm::j, BottomUpClusterer::npoints_, and BottomUpClusterer::SetDistance().

574  {
575  dist_vec_.resize(ncompartments_);
576  for (int32 comp = 0; comp < ncompartments_; comp++) {
577  dist_vec_[comp].resize((npoints_[comp] * (npoints_[comp] - 1)) / 2);
578  for (int32 i = 0; i < npoints_[comp]; i++)
579  for (int32 j = 0; j < i; j++)
580  SetDistance(comp, i, j);
581  }
582 }
kaldi::int32 int32
void SetDistance(int32 compartment, int32 i, int32 j)
vector< vector< BaseFloat > > dist_vec_

Member Data Documentation

◆ assignments_

vector< vector<int32> > assignments_
private

Definition at line 485 of file cluster-utils.cc.

◆ clusters_

vector< vector<Clusterable*> > clusters_
private

Definition at line 484 of file cluster-utils.cc.

◆ dist_vec_

vector< vector<BaseFloat> > dist_vec_
private

Definition at line 486 of file cluster-utils.cc.

◆ max_merge_thresh_

BaseFloat max_merge_thresh_
private

Definition at line 482 of file cluster-utils.cc.

◆ min_clust_

int32 min_clust_
private

Definition at line 483 of file cluster-utils.cc.

◆ nclusters_

int32 nclusters_
private

Definition at line 487 of file cluster-utils.cc.

◆ ncompartments_

int32 ncompartments_
private

Definition at line 487 of file cluster-utils.cc.

◆ npoints_

vector<int32> npoints_
private

Definition at line 488 of file cluster-utils.cc.

◆ points_

const vector< vector<Clusterable*> >& points_
private

Definition at line 481 of file cluster-utils.cc.

◆ queue_

QueueType queue_
private

Definition at line 492 of file cluster-utils.cc.


The documentation for this class was generated from the following file: