compute-vad-from-frame-likes.cc File Reference
Include dependency graph for compute-vad-from-frame-likes.cc:

Go to the source code of this file.

Namespaces

 kaldi
 This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for mispronunciations detection tasks, the reference:
 

Functions

void PrepareMap (const std::string &map_rxfilename, int32 num_classes, unordered_map< int32, int32 > *map)
 PrepareMap creates a map that specifies the mapping between the input and output class labels. More...
 
void PreparePriors (const std::string &priors_str, int32 num_classes, std::vector< BaseFloat > *priors)
 PreparePriors creates a table specifying the priors for each class. More...
 
int main (int argc, char *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 98 of file compute-vad-from-frame-likes.cc.

References VectorBase< Real >::Dim(), ParseOptions::GetArg(), rnnlm::i, rnnlm::j, KALDI_ERR, KALDI_LOG, KALDI_WARN, ParseOptions::NumArgs(), kaldi::PrepareMap(), kaldi::PreparePriors(), ParseOptions::PrintUsage(), ParseOptions::Read(), ParseOptions::Register(), and TableWriter< Holder >::Write().

98  {
99  using namespace kaldi;
100  typedef kaldi::int32 int32;
101  try {
102  const char *usage =
103  "This program computes frame-level voice activity decisions from a\n"
104  "set of input frame-level log-likelihoods. Usually, these\n"
105  "log-likelihoods are the output of fgmm-global-get-frame-likes.\n"
106  "Frames are assigned labels according to the class for which the\n"
107  "log-likelihood (optionally weighted by a prior) is maximal. The\n"
108  "class labels are determined by the order of inputs on the command\n"
109  "line. See options for more details.\n"
110  "\n"
111  "Usage: compute-vad-from-frame-likes [options] <likes-rspecifier-1>\n"
112  " ... <likes-rspecifier-n> <vad-wspecifier>\n"
113  "e.g.: compute-vad-from-frame-likes --map=label_map.txt\n"
114  " scp:likes1.scp scp:likes2.scp ark:vad.ark\n"
115  "See also: fgmm-global-get-frame-likes, compute-vad, merge-vads\n";
116 
117  ParseOptions po(usage);
118  std::string map_rxfilename;
119  std::string priors_str;
120 
121  po.Register("map", &map_rxfilename, "Table that defines the frame-level "
122  "labels. For each row, the first field is the zero-based index of the "
123  "input likelihood archive and the second field is the associated "
124  "integer label.");
125 
126  po.Register("priors", &priors_str, "Comma-separated list that specifies "
127  "the priors for each class. The order of the floats corresponds to "
128  "the index of the input archives. E.g., --priors=0.5,0.2,0.3");
129 
130  po.Read(argc, argv);
131  if (po.NumArgs() < 3) {
132  po.PrintUsage();
133  exit(1);
134  }
135 
136  unordered_map<int32, int32> map;
137  std::vector<BaseFloat> priors;
138  int32 num_classes = po.NumArgs() - 1;
139  PrepareMap(map_rxfilename, num_classes, &map);
140  PreparePriors(priors_str, num_classes, &priors);
141 
142  SequentialBaseFloatVectorReader first_reader(po.GetArg(1));
143  std::vector<RandomAccessBaseFloatVectorReader *> readers;
144  std::string vad_wspecifier = po.GetArg(po.NumArgs());
145  BaseFloatVectorWriter vad_writer(vad_wspecifier);
146 
147  for (int32 i = 2; i < po.NumArgs(); i++) {
149  = new RandomAccessBaseFloatVectorReader(po.GetArg(i));
150  readers.push_back(reader);
151  }
152 
153  int32 num_done = 0, num_err = 0;
154  for (;!first_reader.Done(); first_reader.Next()) {
155  std::string utt = first_reader.Key();
156  Vector<BaseFloat> like(first_reader.Value());
157  int32 like_dim = like.Dim();
158  std::vector<Vector<BaseFloat> > likes;
159  likes.push_back(like);
160  if (like_dim == 0) {
161  KALDI_WARN << "Empty vector for utterance " << utt;
162  num_err++;
163  continue;
164  }
165  for (int32 i = 0; i < num_classes - 1; i++) {
166  if (!readers[i]->HasKey(utt)) {
167  KALDI_WARN << "No vector for utterance " << utt;
168  num_err++;
169  continue;
170  }
171  Vector<BaseFloat> other_like(readers[i]->Value(utt));
172  if (like_dim != other_like.Dim()) {
173  KALDI_WARN << "Dimension mismatch in input vectors in " << utt
174  << ": " << like_dim << " vs. " << other_like.Dim();
175  num_err++;
176  continue;
177  }
178  likes.push_back(other_like);
179  }
180 
181  Vector<BaseFloat> vad_result(like_dim);
182  for (int32 i = 0; i < like.Dim(); i++) {
183  int32 max_indx = 0;
184  BaseFloat max_post = likes[0](i) + priors[0];
185  for (int32 j = 0; j < num_classes; j++) {
186  BaseFloat other_post = likes[j](i) + priors[j];
187  if (other_post > max_post) {
188  max_indx = j;
189  max_post = other_post;
190  }
191  }
192  unordered_map<int32, int32>::const_iterator iter = map.find(max_indx);
193  if (iter == map.end()) {
194  KALDI_ERR << "Missing label " << max_indx << " in map";
195  } else {
196  vad_result(i) = iter->second;
197  }
198  }
199  vad_writer.Write(utt, vad_result);
200  num_done++;
201  }
202 
203  for (int32 i = 0; i < num_classes - 1; i++)
204  delete readers[i];
205 
206  KALDI_LOG << "Applied frame-level likelihood-based voice activity "
207  << "detection; processed " << num_done
208  << " utterances successfully; " << num_err
209  << " had empty features.";
210  return (num_done != 0 ? 0 : 1);
211  } catch(const std::exception &e) {
212  std::cerr << e.what();
213  return -1;
214  }
215 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
void PreparePriors(const std::string &priors_str, int32 num_classes, std::vector< BaseFloat > *priors)
PreparePriors creates a table specifying the priors for each class.
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
float BaseFloat
Definition: kaldi-types.h:29
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
void PrepareMap(const std::string &map_rxfilename, int32 num_classes, unordered_map< int32, int32 > *map)
PrepareMap creates a map that specifies the mapping between the input and output class labels...
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
A class representing a vector.
Definition: kaldi-vector.h:406
#define KALDI_LOG
Definition: kaldi-error.h:153
RandomAccessTableReader< KaldiObjectHolder< Vector< BaseFloat > > > RandomAccessBaseFloatVectorReader
Definition: table-types.h:62