All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
compare-feats.cc File Reference
Include dependency graph for compare-feats.cc:

Go to the source code of this file.

Functions

int main (int argc, char *argv[])
 

Function Documentation

int main ( int  argc,
char *  argv[] 
)

Definition at line 28 of file compare-feats.cc.

References VectorBase< Real >::AddVec(), VectorBase< Real >::AddVecDivVec(), VectorBase< Real >::CopyColFromMat(), SequentialTableReader< Holder >::Done(), ParseOptions::GetArg(), RandomAccessTableReader< Holder >::HasKey(), rnnlm::i, KALDI_LOG, KALDI_WARN, SequentialTableReader< Holder >::Key(), SequentialTableReader< Holder >::Next(), ParseOptions::NumArgs(), ParseOptions::PrintUsage(), ParseOptions::Read(), ParseOptions::Register(), Vector< Real >::Resize(), VectorBase< Real >::Sum(), RandomAccessTableReader< Holder >::Value(), SequentialTableReader< Holder >::Value(), and kaldi::VecVec().

28  {
29  try {
30  using namespace kaldi;
31 
32  const char *usage =
33  "Computes relative difference between two sets of features\n"
34  "per dimension and an average difference\n"
35  "Can be used to figure out how different two sets of features are.\n"
36  "Inputs must have same dimension. Prints to stdout a similarity\n"
37  "metric vector that is 1.0 per dimension if the features identical,\n"
38  "and <1.0 otherwise, and an average overall similarity value.\n"
39  "\n"
40  "Usage: compare-feats [options] <in-rspecifier1> <in-rspecifier2>\n"
41  "e.g.: compare-feats ark:1.ark ark:2.ark\n";
42 
43  ParseOptions po(usage);
44 
45  BaseFloat threshold = 0.99;
46  po.Register("threshold", &threshold, "Similarity threshold, affects "
47  "return status");
48 
49  po.Read(argc, argv);
50 
51  if (po.NumArgs() != 2) {
52  po.PrintUsage();
53  exit(1);
54  }
55 
56  std::string rspecifier1 = po.GetArg(1), rspecifier2 = po.GetArg(2);
57 
58  int32 num_done = 0, num_err = 0, Dim = 0;
59  Vector<double> prod1, prod2, cross_prod, similarity_metric;
60  double overall_similarity = 0;
61 
62  SequentialBaseFloatMatrixReader feat_reader1(rspecifier1);
63  RandomAccessBaseFloatMatrixReader feat_reader2(rspecifier2);
64 
65  for (; !feat_reader1.Done(); feat_reader1.Next()) {
66  std::string utt = feat_reader1.Key();
67  Matrix<BaseFloat> feat1 (feat_reader1.Value());
68 
69 
70  if (!feat_reader2.HasKey(utt)) {
71  KALDI_WARN << "Second table has no feature for utterance "
72  << utt;
73  num_err++;
74  continue;
75  }
76  Matrix<BaseFloat> feat2 (feat_reader2.Value(utt));
77  if (feat1.NumCols() != feat2.NumCols()) {
78  KALDI_WARN << "Feature dimensions differ for utterance "
79  << utt << ", " << feat1.NumCols() << " vs. "
80  << feat2.NumCols() << ", skipping utterance."
81  << utt;
82  num_err++;
83  continue;
84  }
85 
86  if (num_done == 0){
87  Dim=feat1.NumCols();
88  prod1.Resize(Dim);
89  prod2.Resize(Dim);
90  cross_prod.Resize(Dim);
91  similarity_metric.Resize(Dim);
92  }
93 
94  Vector<BaseFloat> feat1_col(feat1.NumRows()), feat2_col(feat2.NumRows());
95  for (MatrixIndexT i = 0; i < feat1.NumCols(); i++){
96  feat1_col.CopyColFromMat(feat1, i);
97  feat2_col.CopyColFromMat(feat2, i);
98  prod1(i) += VecVec(feat1_col, feat1_col);
99  prod2(i) += VecVec(feat2_col, feat2_col);
100  cross_prod(i) += VecVec(feat1_col, feat2_col);
101  }
102  num_done++;
103  }
104 
105  KALDI_LOG << "self-product of 1st features for each column dimension: " << prod1;
106  KALDI_LOG << "self-product of 2nd features for each column dimension: " << prod2;
107  KALDI_LOG << "cross-product for each column dimension: " << cross_prod;
108 
109  prod1.AddVec(1.0, prod2);
110  similarity_metric.AddVecDivVec(2.0, cross_prod, prod1, 0.0);
111  KALDI_LOG << "Similarity metric for each dimension " << similarity_metric
112  << " (1.0 means identical, the smaller the more different)";
113 
114  overall_similarity = similarity_metric.Sum() / static_cast<double>(Dim);
115 
116  KALDI_LOG << "Overall similarity for the two feats is:" << overall_similarity
117  << " (1.0 means identical, the smaller the more different)";
118 
119  KALDI_LOG << "Processed " << num_done << " feature files, "
120  << num_err << " had errors.";
121 
122  bool similar = (overall_similarity >= threshold);
123 
124  if (num_done > 0) {
125  if (similar) {
126  KALDI_LOG << "Features are considered similar since "
127  << overall_similarity << " >= " << threshold;
128  } else {
129  KALDI_LOG << "Features are considered dissimilar since "
130  << overall_similarity << " < " << threshold;
131  }
132  }
133 
134  return (num_done > 0 && similar) ? 0 : 1;
135  } catch(const std::exception &e) {
136  std::cerr << e.what();
137  return -1;
138  }
139 }
Relabels neural network egs with the read pdf-id alignments.
Definition: chain.dox:20
Real Sum() const
Returns sum of the elements.
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
float BaseFloat
Definition: kaldi-types.h:29
int32 MatrixIndexT
Definition: matrix-common.h:96
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
#define KALDI_WARN
Definition: kaldi-error.h:130
void AddVecDivVec(Real alpha, const VectorBase< Real > &v, const VectorBase< Real > &r, Real beta)
Add element-by-element quotient of two vectors.
void CopyColFromMat(const MatrixBase< OtherReal > &M, MatrixIndexT col)
Extracts a column of the matrix M.
#define KALDI_LOG
Definition: kaldi-error.h:133
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Definition: kaldi-vector.cc:36
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...