compare-feats.cc
Go to the documentation of this file.
1 // featbin/compare-feats.cc
2 
3 // Copyright 2009-2011 Microsoft Corporation
4 // 2013 Johns Hopkins University (author: Daniel Povey)
5 // 2014 Mobvoi Inc. (author: Minhua Wu)
6 
7 // See ../../COPYING for clarification regarding multiple authors
8 //
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 //
13 // http://www.apache.org/licenses/LICENSE-2.0
14 //
15 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
17 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
18 // MERCHANTABLITY OR NON-INFRINGEMENT.
19 // See the Apache 2 License for the specific language governing permissions and
20 // limitations under the License.
21 
22 #include "base/kaldi-common.h"
23 #include "util/common-utils.h"
24 #include "matrix/kaldi-matrix.h"
25 #include "matrix/kaldi-vector.h"
26 
27 
28 int main(int argc, char *argv[]) {
29  try {
30  using namespace kaldi;
31 
32  const char *usage =
33  "Computes relative difference between two sets of features\n"
34  "per dimension and an average difference\n"
35  "Can be used to figure out how different two sets of features are.\n"
36  "Inputs must have same dimension. Prints to stdout a similarity\n"
37  "metric vector that is 1.0 per dimension if the features identical,\n"
38  "and <1.0 otherwise, and an average overall similarity value.\n"
39  "\n"
40  "Usage: compare-feats [options] <in-rspecifier1> <in-rspecifier2>\n"
41  "e.g.: compare-feats ark:1.ark ark:2.ark\n";
42 
43  ParseOptions po(usage);
44 
45  BaseFloat threshold = 0.99;
46  po.Register("threshold", &threshold, "Similarity threshold, affects "
47  "return status");
48 
49  po.Read(argc, argv);
50 
51  if (po.NumArgs() != 2) {
52  po.PrintUsage();
53  exit(1);
54  }
55 
56  std::string rspecifier1 = po.GetArg(1), rspecifier2 = po.GetArg(2);
57 
58  int32 num_done = 0, num_err = 0, Dim = 0;
59  Vector<double> prod1, prod2, cross_prod, similarity_metric;
60  double overall_similarity = 0;
61 
62  SequentialBaseFloatMatrixReader feat_reader1(rspecifier1);
63  RandomAccessBaseFloatMatrixReader feat_reader2(rspecifier2);
64 
65  for (; !feat_reader1.Done(); feat_reader1.Next()) {
66  std::string utt = feat_reader1.Key();
67  Matrix<BaseFloat> feat1 (feat_reader1.Value());
68 
69 
70  if (!feat_reader2.HasKey(utt)) {
71  KALDI_WARN << "Second table has no feature for utterance "
72  << utt;
73  num_err++;
74  continue;
75  }
76  Matrix<BaseFloat> feat2 (feat_reader2.Value(utt));
77  if (feat1.NumCols() != feat2.NumCols()) {
78  KALDI_WARN << "Feature dimensions differ for utterance "
79  << utt << ", " << feat1.NumCols() << " vs. "
80  << feat2.NumCols() << ", skipping utterance."
81  << utt;
82  num_err++;
83  continue;
84  }
85 
86  if (num_done == 0){
87  Dim=feat1.NumCols();
88  prod1.Resize(Dim);
89  prod2.Resize(Dim);
90  cross_prod.Resize(Dim);
91  similarity_metric.Resize(Dim);
92  }
93 
94  Vector<BaseFloat> feat1_col(feat1.NumRows()), feat2_col(feat2.NumRows());
95  for (MatrixIndexT i = 0; i < feat1.NumCols(); i++){
96  feat1_col.CopyColFromMat(feat1, i);
97  feat2_col.CopyColFromMat(feat2, i);
98  prod1(i) += VecVec(feat1_col, feat1_col);
99  prod2(i) += VecVec(feat2_col, feat2_col);
100  cross_prod(i) += VecVec(feat1_col, feat2_col);
101  }
102  num_done++;
103  }
104 
105  KALDI_LOG << "self-product of 1st features for each column dimension: " << prod1;
106  KALDI_LOG << "self-product of 2nd features for each column dimension: " << prod2;
107  KALDI_LOG << "cross-product for each column dimension: " << cross_prod;
108 
109  prod1.AddVec(1.0, prod2);
110  similarity_metric.AddVecDivVec(2.0, cross_prod, prod1, 0.0);
111  KALDI_LOG << "Similarity metric for each dimension " << similarity_metric
112  << " (1.0 means identical, the smaller the more different)";
113 
114  overall_similarity = similarity_metric.Sum() / static_cast<double>(Dim);
115 
116  KALDI_LOG << "Overall similarity for the two feats is:" << overall_similarity
117  << " (1.0 means identical, the smaller the more different)";
118 
119  KALDI_LOG << "Processed " << num_done << " feature files, "
120  << num_err << " had errors.";
121 
122  bool similar = (overall_similarity >= threshold);
123 
124  if (num_done > 0) {
125  if (similar) {
126  KALDI_LOG << "Features are considered similar since "
127  << overall_similarity << " >= " << threshold;
128  } else {
129  KALDI_LOG << "Features are considered dissimilar since "
130  << overall_similarity << " < " << threshold;
131  }
132  }
133 
134  return (num_done > 0 && similar) ? 0 : 1;
135  } catch(const std::exception &e) {
136  std::cerr << e.what();
137  return -1;
138  }
139 }
140 
141 
142 /*
143  tested with:
144 compare-feats 'ark:echo foo [ 1.0 2.0 ]|' 'ark:echo foo [ 1.0 2.0 ]|'
145 */
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
kaldi::int32 int32
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
void Register(const std::string &name, bool *ptr, const std::string &doc)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
float BaseFloat
Definition: kaldi-types.h:29
int32 MatrixIndexT
Definition: matrix-common.h:98
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
const T & Value(const std::string &key)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_WARN
Definition: kaldi-error.h:150
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
bool HasKey(const std::string &key)
Real Sum() const
Returns sum of the elements.
int main(int argc, char *argv[])
void AddVecDivVec(Real alpha, const VectorBase< Real > &v, const VectorBase< Real > &r, Real beta)
Add element-by-element quotient of two vectors.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
void CopyColFromMat(const MatrixBase< OtherReal > &M, MatrixIndexT col)
Extracts a column of the matrix M.
#define KALDI_LOG
Definition: kaldi-error.h:153
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Definition: kaldi-vector.cc:37
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...