43 unordered_map<int32, int32> *map) {
44 Input map_input(map_rxfilename);
45 for (
int32 i = 0;
i < num_classes;
i++)
48 if (!map_rxfilename.empty()) {
50 while (std::getline(map_input.
Stream(), line)) {
51 if (line.size() == 0)
continue;
52 int32 start = line.find_first_not_of(
" \t");
53 int32 end = line.find_first_of(
'#');
54 if (start == std::string::npos || start == end)
continue;
55 end = line.find_last_not_of(
" \t", end - 1);
57 std::vector<std::string> fields;
59 " \t\n\r",
true, &fields);
60 if (fields.size() != 2) {
61 KALDI_ERR <<
"Bad line. Expected two fields, got: " 64 (*map)[std::atoi(fields[0].c_str())] = std::atoi(fields[1].c_str());
68 if (map->size() > num_classes)
69 KALDI_ERR <<
"Map table has " << map->size() <<
" classes. " 70 <<
"Expected " << num_classes <<
" or fewer";
81 std::vector<BaseFloat> *priors) {
82 if (priors_str.empty()) {
83 for (
int32 i = 0;
i < num_classes;
i++)
84 priors->push_back(log(1.0/num_classes));
87 for (
int32 i = 0;
i < priors->size();
i++)
88 (*priors)[
i] = log((*priors)[
i]);
91 if (priors->size() != num_classes)
92 KALDI_ERR << priors->size() <<
" priors specified. Expected " 98 int main(
int argc,
char *argv[]) {
99 using namespace kaldi;
103 "This program computes frame-level voice activity decisions from a\n" 104 "set of input frame-level log-likelihoods. Usually, these\n" 105 "log-likelihoods are the output of fgmm-global-get-frame-likes.\n" 106 "Frames are assigned labels according to the class for which the\n" 107 "log-likelihood (optionally weighted by a prior) is maximal. The\n" 108 "class labels are determined by the order of inputs on the command\n" 109 "line. See options for more details.\n" 111 "Usage: compute-vad-from-frame-likes [options] <likes-rspecifier-1>\n" 112 " ... <likes-rspecifier-n> <vad-wspecifier>\n" 113 "e.g.: compute-vad-from-frame-likes --map=label_map.txt\n" 114 " scp:likes1.scp scp:likes2.scp ark:vad.ark\n" 115 "See also: fgmm-global-get-frame-likes, compute-vad, merge-vads\n";
118 std::string map_rxfilename;
119 std::string priors_str;
121 po.
Register(
"map", &map_rxfilename,
"Table that defines the frame-level " 122 "labels. For each row, the first field is the zero-based index of the " 123 "input likelihood archive and the second field is the associated " 126 po.
Register(
"priors", &priors_str,
"Comma-separated list that specifies " 127 "the priors for each class. The order of the floats corresponds to " 128 "the index of the input archives. E.g., --priors=0.5,0.2,0.3");
136 unordered_map<int32, int32> map;
137 std::vector<BaseFloat> priors;
138 int32 num_classes = po.
NumArgs() - 1;
139 PrepareMap(map_rxfilename, num_classes, &map);
143 std::vector<RandomAccessBaseFloatVectorReader *> readers;
150 readers.push_back(reader);
153 int32 num_done = 0, num_err = 0;
154 for (;!first_reader.Done(); first_reader.Next()) {
155 std::string utt = first_reader.Key();
157 int32 like_dim = like.
Dim();
158 std::vector<Vector<BaseFloat> > likes;
159 likes.push_back(like);
161 KALDI_WARN <<
"Empty vector for utterance " << utt;
165 for (int32
i = 0;
i < num_classes - 1;
i++) {
166 if (!readers[
i]->HasKey(utt)) {
167 KALDI_WARN <<
"No vector for utterance " << utt;
172 if (like_dim != other_like.
Dim()) {
173 KALDI_WARN <<
"Dimension mismatch in input vectors in " << utt
174 <<
": " << like_dim <<
" vs. " << other_like.
Dim();
178 likes.push_back(other_like);
182 for (int32
i = 0;
i < like.Dim();
i++) {
185 for (int32
j = 0;
j < num_classes;
j++) {
187 if (other_post > max_post) {
189 max_post = other_post;
192 unordered_map<int32, int32>::const_iterator iter = map.find(max_indx);
193 if (iter == map.end()) {
194 KALDI_ERR <<
"Missing label " << max_indx <<
" in map";
196 vad_result(
i) = iter->second;
199 vad_writer.
Write(utt, vad_result);
203 for (int32
i = 0;
i < num_classes - 1;
i++)
206 KALDI_LOG <<
"Applied frame-level likelihood-based voice activity " 207 <<
"detection; processed " << num_done
208 <<
" utterances successfully; " << num_err
209 <<
" had empty features.";
210 return (num_done != 0 ? 0 : 1);
211 }
catch(
const std::exception &e) {
212 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
bool SplitStringToFloats(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< F > *out)
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
int main(int argc, char *argv[])
A templated class for writing objects to an archive or script file; see The Table concept...
void PreparePriors(const std::string &priors_str, int32 num_classes, std::vector< BaseFloat > *priors)
PreparePriors creates a table specifying the priors for each class.
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
void SplitStringToVector(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< std::string > *out)
Split a string using any of the single character delimiters.
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
void PrepareMap(const std::string &map_rxfilename, int32 num_classes, unordered_map< int32, int32 > *map)
PrepareMap creates a map that specifies the mapping between the input and output class labels...
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
MatrixIndexT Dim() const
Returns the dimension of the vector.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
A class representing a vector.
#define KALDI_ASSERT(cond)
RandomAccessTableReader< KaldiObjectHolder< Vector< BaseFloat > > > RandomAccessBaseFloatVectorReader