99 using namespace kaldi;
103 "This program computes frame-level voice activity decisions from a\n" 104 "set of input frame-level log-likelihoods. Usually, these\n" 105 "log-likelihoods are the output of fgmm-global-get-frame-likes.\n" 106 "Frames are assigned labels according to the class for which the\n" 107 "log-likelihood (optionally weighted by a prior) is maximal. The\n" 108 "class labels are determined by the order of inputs on the command\n" 109 "line. See options for more details.\n" 111 "Usage: compute-vad-from-frame-likes [options] <likes-rspecifier-1>\n" 112 " ... <likes-rspecifier-n> <vad-wspecifier>\n" 113 "e.g.: compute-vad-from-frame-likes --map=label_map.txt\n" 114 " scp:likes1.scp scp:likes2.scp ark:vad.ark\n" 115 "See also: fgmm-global-get-frame-likes, compute-vad, merge-vads\n";
118 std::string map_rxfilename;
119 std::string priors_str;
121 po.Register(
"map", &map_rxfilename,
"Table that defines the frame-level " 122 "labels. For each row, the first field is the zero-based index of the " 123 "input likelihood archive and the second field is the associated " 126 po.Register(
"priors", &priors_str,
"Comma-separated list that specifies " 127 "the priors for each class. The order of the floats corresponds to " 128 "the index of the input archives. E.g., --priors=0.5,0.2,0.3");
131 if (po.NumArgs() < 3) {
136 unordered_map<int32, int32> map;
137 std::vector<BaseFloat> priors;
138 int32 num_classes = po.NumArgs() - 1;
139 PrepareMap(map_rxfilename, num_classes, &map);
143 std::vector<RandomAccessBaseFloatVectorReader *> readers;
144 std::string vad_wspecifier = po.GetArg(po.NumArgs());
147 for (int32
i = 2;
i < po.NumArgs();
i++) {
150 readers.push_back(reader);
153 int32 num_done = 0, num_err = 0;
154 for (;!first_reader.Done(); first_reader.Next()) {
155 std::string utt = first_reader.Key();
157 int32 like_dim = like.
Dim();
158 std::vector<Vector<BaseFloat> > likes;
159 likes.push_back(like);
161 KALDI_WARN <<
"Empty vector for utterance " << utt;
165 for (int32
i = 0;
i < num_classes - 1;
i++) {
166 if (!readers[
i]->HasKey(utt)) {
167 KALDI_WARN <<
"No vector for utterance " << utt;
172 if (like_dim != other_like.
Dim()) {
173 KALDI_WARN <<
"Dimension mismatch in input vectors in " << utt
174 <<
": " << like_dim <<
" vs. " << other_like.
Dim();
178 likes.push_back(other_like);
182 for (int32
i = 0;
i < like.Dim();
i++) {
185 for (int32
j = 0;
j < num_classes;
j++) {
187 if (other_post > max_post) {
189 max_post = other_post;
192 unordered_map<int32, int32>::const_iterator iter = map.find(max_indx);
193 if (iter == map.end()) {
194 KALDI_ERR <<
"Missing label " << max_indx <<
" in map";
196 vad_result(
i) = iter->second;
199 vad_writer.Write(utt, vad_result);
203 for (int32
i = 0;
i < num_classes - 1;
i++)
206 KALDI_LOG <<
"Applied frame-level likelihood-based voice activity " 207 <<
"detection; processed " << num_done
208 <<
" utterances successfully; " << num_err
209 <<
" had empty features.";
210 return (num_done != 0 ? 0 : 1);
211 }
catch(
const std::exception &e) {
212 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
A templated class for writing objects to an archive or script file; see The Table concept...
void PreparePriors(const std::string &priors_str, int32 num_classes, std::vector< BaseFloat > *priors)
PreparePriors creates a table specifying the priors for each class.
Allows random access to a collection of objects in an archive or script file; see The Table concept...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
void PrepareMap(const std::string &map_rxfilename, int32 num_classes, unordered_map< int32, int32 > *map)
PrepareMap creates a map that specifies the mapping between the input and output class labels...
MatrixIndexT Dim() const
Returns the dimension of the vector.
A class representing a vector.
RandomAccessTableReader< KaldiObjectHolder< Vector< BaseFloat > > > RandomAccessBaseFloatVectorReader