47 Input map_input(map_rxfilename);
54 if (map_rxfilename.empty()) {
55 (*map)[std::pair<int32, int32>(0, 0)] = 0;
56 (*map)[std::pair<int32, int32>(0, 1)] = 0;
57 (*map)[std::pair<int32, int32>(1, 0)] = 0;
58 (*map)[std::pair<int32, int32>(1, 1)] = 1;
61 while (std::getline(map_input.
Stream(), line)) {
62 if (line.size() == 0)
continue;
63 int32 start = line.find_first_not_of(
" \t");
64 int32 end = line.find_first_of(
'#');
65 if (start == std::string::npos || start == end)
continue;
66 end = line.find_last_not_of(
" \t", end - 1);
68 std::vector<std::string> fields;
70 " \t\n\r",
true, &fields);
71 if (fields.size() != 3) {
72 KALDI_ERR <<
"Bad line. Expected three fields, got: " 75 int32 label1 = std::atoi(fields[0].c_str()),
76 label2 = std::atoi(fields[1].c_str()),
77 result_label = std::atoi(fields[2].c_str());
78 (*map)[std::pair<int32, int32>(label1, label2)] = result_label;
85 int main(
int argc,
char *argv[]) {
86 using namespace kaldi;
90 "This program merges two archives of per-frame weights representing\n" 91 "voice activity decisions. By default, the program assumes that the\n" 92 "input vectors consist of floats that are 0.0 if a frame is judged\n" 93 "as nonspeech and 1.0 if it is considered speech. The default\n" 94 "behavior produces a frame-level decision of 1.0 if both input frames\n" 95 "are 1.0, and 0.0 otherwise. Additional classes (e.g., 2.0 for music)\n" 96 "can be handled using the \"map\" option.\n" 98 "Usage: merge-vads [options] <vad-rspecifier-1> <vad-rspecifier-2>\n" 100 "e.g.: merge-vads [options] scp:vad_energy.scp scp:vad_gmm.scp\n" 102 "See also: compute-vad-from-frame-likes, compute-vad, ali-to-post,\n" 106 std::string map_rxfilename;
107 po.
Register(
"map", &map_rxfilename,
"This table specifies a mapping " 108 "between the labels of the frame-level decisions in the first and " 109 "second input archives to the integer output label.");
123 int32 num_done = 0, num_err = 0;
124 for (;!first_vad_reader.Done(); first_vad_reader.Next()) {
125 std::string utt = first_vad_reader.Key();
127 if (!second_vad_reader.HasKey(utt)) {
128 KALDI_WARN <<
"No vector for utterance " << utt;
133 if (vad1.Dim() != vad2.Dim()) {
134 KALDI_WARN <<
"VAD length mismatch for utterance " << utt;
139 for (int32
i = 0;
i < vad1.Dim();
i++) {
140 std::pair<int32, int32> key(static_cast<int32>(vad1(
i)),
141 static_cast<int32>(vad2(
i)));
142 unordered_map<std::pair<int32, int32>,
int32,
144 if (iter == map.end()) {
145 KALDI_ERR <<
"Map is missing combination " 146 << vad1(
i) <<
" and " << vad2(
i);
148 vad_result(
i) = iter->second;
152 vad_writer.Write(utt, vad_result);
155 KALDI_LOG <<
"Merged voice activity detection decisions; " 156 <<
"processed " << num_done <<
" utterances successfully; " 157 << num_err <<
" had errors.";
158 return (num_done != 0 ? 0 : 1);
159 }
catch(
const std::exception &e) {
160 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
A templated class for writing objects to an archive or script file; see The Table concept...
int main(int argc, char *argv[])
void Register(const std::string &name, bool *ptr, const std::string &doc)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
void SplitStringToVector(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< std::string > *out)
Split a string using any of the single character delimiters.
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
void PrepareMap(const std::string &map_rxfilename, int32 num_classes, unordered_map< int32, int32 > *map)
PrepareMap creates a map that specifies the mapping between the input and output class labels...
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
A class representing a vector.
#define KALDI_ASSERT(cond)
A hashing function-object for pairs of ints.