86 using namespace kaldi;
90 "This program merges two archives of per-frame weights representing\n" 91 "voice activity decisions. By default, the program assumes that the\n" 92 "input vectors consist of floats that are 0.0 if a frame is judged\n" 93 "as nonspeech and 1.0 if it is considered speech. The default\n" 94 "behavior produces a frame-level decision of 1.0 if both input frames\n" 95 "are 1.0, and 0.0 otherwise. Additional classes (e.g., 2.0 for music)\n" 96 "can be handled using the \"map\" option.\n" 98 "Usage: merge-vads [options] <vad-rspecifier-1> <vad-rspecifier-2>\n" 100 "e.g.: merge-vads [options] scp:vad_energy.scp scp:vad_gmm.scp\n" 102 "See also: compute-vad-from-frame-likes, compute-vad, ali-to-post,\n" 106 std::string map_rxfilename;
107 po.Register(
"map", &map_rxfilename,
"This table specifies a mapping " 108 "between the labels of the frame-level decisions in the first and " 109 "second input archives to the integer output label.");
112 if (po.NumArgs() != 3) {
123 int32 num_done = 0, num_err = 0;
124 for (;!first_vad_reader.Done(); first_vad_reader.Next()) {
125 std::string utt = first_vad_reader.Key();
127 if (!second_vad_reader.HasKey(utt)) {
128 KALDI_WARN <<
"No vector for utterance " << utt;
133 if (vad1.Dim() != vad2.Dim()) {
134 KALDI_WARN <<
"VAD length mismatch for utterance " << utt;
139 for (int32
i = 0;
i < vad1.Dim();
i++) {
140 std::pair<int32, int32> key(static_cast<int32>(vad1(
i)),
141 static_cast<int32>(vad2(
i)));
142 unordered_map<std::pair<int32, int32>,
int32,
144 if (iter == map.end()) {
145 KALDI_ERR <<
"Map is missing combination " 146 << vad1(
i) <<
" and " << vad2(
i);
148 vad_result(
i) = iter->second;
152 vad_writer.Write(utt, vad_result);
155 KALDI_LOG <<
"Merged voice activity detection decisions; " 156 <<
"processed " << num_done <<
" utterances successfully; " 157 << num_err <<
" had errors.";
158 return (num_done != 0 ? 0 : 1);
159 }
catch(
const std::exception &e) {
160 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
A templated class for writing objects to an archive or script file; see The Table concept...
Allows random access to a collection of objects in an archive or script file; see The Table concept...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
void PrepareMap(const std::string &map_rxfilename, int32 num_classes, unordered_map< int32, int32 > *map)
PrepareMap creates a map that specifies the mapping between the input and output class labels...
A class representing a vector.
A hashing function-object for pairs of ints.